build: move package sources from src/pkg to src

Preparation was in CL 134570043. This CL contains only the effect of 'hg mv src/pkg/* src'. For more about the move, see golang.org/s/go14nopkg.
author: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
committer: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
commit: 8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree: 334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/runtime
parent: 73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download: go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz
412 files changed, 81043 insertions, 0 deletions
diff --git a/src/runtime/Makefile b/src/runtime/Makefile
new file mode 100644
index 000000000..55087def0
--- /dev/null
+++ b/src/runtime/Makefile
@@ -0,0 +1,5 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../Make.dist
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
new file mode 100644
index 000000000..e9ed59503
--- /dev/null
+++ b/src/runtime/alg.go
@@ -0,0 +1,352 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	c0 = uintptr((8-ptrSize)/4*2860486313 + (ptrSize-4)/4*33054211828000289)
+	c1 = uintptr((8-ptrSize)/4*3267000013 + (ptrSize-4)/4*23344194077549503)
+)
+
+// type algorithms - known to compiler
+const (
+	alg_MEM = iota
+	alg_MEM0
+	alg_MEM8
+	alg_MEM16
+	alg_MEM32
+	alg_MEM64
+	alg_MEM128
+	alg_NOEQ
+	alg_NOEQ0
+	alg_NOEQ8
+	alg_NOEQ16
+	alg_NOEQ32
+	alg_NOEQ64
+	alg_NOEQ128
+	alg_STRING
+	alg_INTER
+	alg_NILINTER
+	alg_SLICE
+	alg_FLOAT32
+	alg_FLOAT64
+	alg_CPLX64
+	alg_CPLX128
+	alg_max
+)
+
+type typeAlg struct {
+	// function for hashing objects of this type
+	// (ptr to object, size, seed) -> hash
+	hash func(unsafe.Pointer, uintptr, uintptr) uintptr
+	// function for comparing objects of this type
+	// (ptr to object A, ptr to object B, size) -> ==?
+	equal func(unsafe.Pointer, unsafe.Pointer, uintptr) bool
+}
+
+var algarray = [alg_max]typeAlg{
+	alg_MEM:      {memhash, memequal},
+	alg_MEM0:     {memhash, memequal0},
+	alg_MEM8:     {memhash, memequal8},
+	alg_MEM16:    {memhash, memequal16},
+	alg_MEM32:    {memhash, memequal32},
+	alg_MEM64:    {memhash, memequal64},
+	alg_MEM128:   {memhash, memequal128},
+	alg_NOEQ:     {nil, nil},
+	alg_NOEQ0:    {nil, nil},
+	alg_NOEQ8:    {nil, nil},
+	alg_NOEQ16:   {nil, nil},
+	alg_NOEQ32:   {nil, nil},
+	alg_NOEQ64:   {nil, nil},
+	alg_NOEQ128:  {nil, nil},
+	alg_STRING:   {strhash, strequal},
+	alg_INTER:    {interhash, interequal},
+	alg_NILINTER: {nilinterhash, nilinterequal},
+	alg_SLICE:    {nil, nil},
+	alg_FLOAT32:  {f32hash, f32equal},
+	alg_FLOAT64:  {f64hash, f64equal},
+	alg_CPLX64:   {c64hash, c64equal},
+	alg_CPLX128:  {c128hash, c128equal},
+}
+
+const nacl = GOOS == "nacl"
+
+var useAeshash bool
+
+// in asm_*.s
+func aeshash(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshash32(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshash64(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshashstr(p unsafe.Pointer, s, h uintptr) uintptr
+
+func memhash(p unsafe.Pointer, s, h uintptr) uintptr {
+	if !nacl && useAeshash {
+		return aeshash(p, s, h)
+	}
+
+	h ^= c0
+	for s > 0 {
+		h = (h ^ uintptr(*(*byte)(p))) * c1
+		p = add(p, 1)
+		s--
+	}
+	return h
+}
+
+func strhash(a unsafe.Pointer, s, h uintptr) uintptr {
+	return memhash((*stringStruct)(a).str, uintptr(len(*(*string)(a))), h)
+}
+
+// NOTE: Because NaN != NaN, a map can contain any
+// number of (mostly useless) entries keyed with NaNs.
+// To avoid long hash chains, we assign a random number
+// as the hash value for a NaN.
+
+func f32hash(p unsafe.Pointer, s, h uintptr) uintptr {
+	f := *(*float32)(p)
+	switch {
+	case f == 0:
+		return c1 * (c0 ^ h) // +0, -0
+	case f != f:
+		return c1 * (c0 ^ h ^ uintptr(fastrand1())) // any kind of NaN
+	default:
+		return memhash(p, 4, h)
+	}
+}
+
+func f64hash(p unsafe.Pointer, s, h uintptr) uintptr {
+	f := *(*float64)(p)
+	switch {
+	case f == 0:
+		return c1 * (c0 ^ h) // +0, -0
+	case f != f:
+		return c1 * (c0 ^ h ^ uintptr(fastrand1())) // any kind of NaN
+	default:
+		return memhash(p, 8, h)
+	}
+}
+
+func c64hash(p unsafe.Pointer, s, h uintptr) uintptr {
+	x := (*[2]float32)(p)
+	return f32hash(unsafe.Pointer(&x[1]), 4, f32hash(unsafe.Pointer(&x[0]), 4, h))
+}
+
+func c128hash(p unsafe.Pointer, s, h uintptr) uintptr {
+	x := (*[2]float64)(p)
+	return f64hash(unsafe.Pointer(&x[1]), 8, f64hash(unsafe.Pointer(&x[0]), 8, h))
+}
+
+func interhash(p unsafe.Pointer, s, h uintptr) uintptr {
+	a := (*iface)(p)
+	tab := a.tab
+	if tab == nil {
+		return h
+	}
+	t := tab._type
+	fn := goalg(t.alg).hash
+	if fn == nil {
+		panic(errorString("hash of unhashable type " + *t._string))
+	}
+	if isDirectIface(t) {
+		return c1 * fn(unsafe.Pointer(&a.data), uintptr(t.size), h^c0)
+	} else {
+		return c1 * fn(a.data, uintptr(t.size), h^c0)
+	}
+}
+
+func nilinterhash(p unsafe.Pointer, s, h uintptr) uintptr {
+	a := (*eface)(p)
+	t := a._type
+	if t == nil {
+		return h
+	}
+	fn := goalg(t.alg).hash
+	if fn == nil {
+		panic(errorString("hash of unhashable type " + *t._string))
+	}
+	if isDirectIface(t) {
+		return c1 * fn(unsafe.Pointer(&a.data), uintptr(t.size), h^c0)
+	} else {
+		return c1 * fn(a.data, uintptr(t.size), h^c0)
+	}
+}
+
+func memequal(p, q unsafe.Pointer, size uintptr) bool {
+	if p == q {
+		return true
+	}
+	return memeq(p, q, size)
+}
+
+func memequal0(p, q unsafe.Pointer, size uintptr) bool {
+	return true
+}
+func memequal8(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*int8)(p) == *(*int8)(q)
+}
+func memequal16(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*int16)(p) == *(*int16)(q)
+}
+func memequal32(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*int32)(p) == *(*int32)(q)
+}
+func memequal64(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*int64)(p) == *(*int64)(q)
+}
+func memequal128(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*[2]int64)(p) == *(*[2]int64)(q)
+}
+func f32equal(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*float32)(p) == *(*float32)(q)
+}
+func f64equal(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*float64)(p) == *(*float64)(q)
+}
+func c64equal(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*complex64)(p) == *(*complex64)(q)
+}
+func c128equal(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*complex128)(p) == *(*complex128)(q)
+}
+func strequal(p, q unsafe.Pointer, size uintptr) bool {
+	return *(*string)(p) == *(*string)(q)
+}
+func interequal(p, q unsafe.Pointer, size uintptr) bool {
+	return ifaceeq(*(*interface {
+		f()
+	})(p), *(*interface {
+		f()
+	})(q))
+}
+func nilinterequal(p, q unsafe.Pointer, size uintptr) bool {
+	return efaceeq(*(*interface{})(p), *(*interface{})(q))
+}
+func efaceeq(p, q interface{}) bool {
+	x := (*eface)(unsafe.Pointer(&p))
+	y := (*eface)(unsafe.Pointer(&q))
+	t := x._type
+	if t != y._type {
+		return false
+	}
+	if t == nil {
+		return true
+	}
+	eq := goalg(t.alg).equal
+	if eq == nil {
+		panic(errorString("comparing uncomparable type " + *t._string))
+	}
+	if isDirectIface(t) {
+		return eq(noescape(unsafe.Pointer(&x.data)), noescape(unsafe.Pointer(&y.data)), uintptr(t.size))
+	}
+	return eq(x.data, y.data, uintptr(t.size))
+}
+func ifaceeq(p, q interface {
+	f()
+}) bool {
+	x := (*iface)(unsafe.Pointer(&p))
+	y := (*iface)(unsafe.Pointer(&q))
+	xtab := x.tab
+	if xtab != y.tab {
+		return false
+	}
+	if xtab == nil {
+		return true
+	}
+	t := xtab._type
+	eq := goalg(t.alg).equal
+	if eq == nil {
+		panic(errorString("comparing uncomparable type " + *t._string))
+	}
+	if isDirectIface(t) {
+		return eq(noescape(unsafe.Pointer(&x.data)), noescape(unsafe.Pointer(&y.data)), uintptr(t.size))
+	}
+	return eq(x.data, y.data, uintptr(t.size))
+}
+
+// Testing adapters for hash quality tests (see hash_test.go)
+func haveGoodHash() bool {
+	return useAeshash
+}
+
+func stringHash(s string, seed uintptr) uintptr {
+	return algarray[alg_STRING].hash(noescape(unsafe.Pointer(&s)), unsafe.Sizeof(s), seed)
+}
+
+func bytesHash(b []byte, seed uintptr) uintptr {
+	s := (*sliceStruct)(unsafe.Pointer(&b))
+	return algarray[alg_MEM].hash(s.array, uintptr(s.len), seed)
+}
+
+func int32Hash(i uint32, seed uintptr) uintptr {
+	return algarray[alg_MEM32].hash(noescape(unsafe.Pointer(&i)), 4, seed)
+}
+
+func int64Hash(i uint64, seed uintptr) uintptr {
+	return algarray[alg_MEM64].hash(noescape(unsafe.Pointer(&i)), 8, seed)
+}
+
+func efaceHash(i interface{}, seed uintptr) uintptr {
+	return algarray[alg_NILINTER].hash(noescape(unsafe.Pointer(&i)), unsafe.Sizeof(i), seed)
+}
+
+func ifaceHash(i interface {
+	F()
+}, seed uintptr) uintptr {
+	return algarray[alg_INTER].hash(noescape(unsafe.Pointer(&i)), unsafe.Sizeof(i), seed)
+}
+
+// Testing adapter for memclr
+func memclrBytes(b []byte) {
+	s := (*sliceStruct)(unsafe.Pointer(&b))
+	memclr(s.array, uintptr(s.len))
+}
+
+// TODO(dvyukov): remove when Type is converted to Go and contains *typeAlg.
+func goalg(a unsafe.Pointer) *typeAlg {
+	return (*typeAlg)(a)
+}
+
+// used in asm_{386,amd64}.s
+const hashRandomBytes = 32
+
+var aeskeysched [hashRandomBytes]byte
+
+//go:noescape
+func get_random_data(rnd *unsafe.Pointer, n *int32)
+
+func init() {
+	if theGoos == "nacl" {
+		return
+	}
+
+	// Install aes hash algorithm if we have the instructions we need
+	if (cpuid_ecx&(1<<25)) != 0 && // aes (aesenc)
+		(cpuid_ecx&(1<<9)) != 0 && // sse3 (pshufb)
+		(cpuid_ecx&(1<<19)) != 0 { // sse4.1 (pinsr{d,q})
+		useAeshash = true
+		algarray[alg_MEM].hash = aeshash
+		algarray[alg_MEM8].hash = aeshash
+		algarray[alg_MEM16].hash = aeshash
+		algarray[alg_MEM32].hash = aeshash32
+		algarray[alg_MEM64].hash = aeshash64
+		algarray[alg_MEM128].hash = aeshash
+		algarray[alg_STRING].hash = aeshashstr
+		// Initialize with random data so hash collisions will be hard to engineer.
+		var rnd unsafe.Pointer
+		var n int32
+		get_random_data(&rnd, &n)
+		if n > hashRandomBytes {
+			n = hashRandomBytes
+		}
+		memmove(unsafe.Pointer(&aeskeysched[0]), rnd, uintptr(n))
+		if n < hashRandomBytes {
+			// Not very random, but better than nothing.
+			for t := nanotime(); n < hashRandomBytes; n++ {
+				aeskeysched[n] = byte(t >> uint(8*(n%8)))
+			}
+		}
+	}
+}
diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go
new file mode 100644
index 000000000..a67dc9b49
--- /dev/null
+++ b/src/runtime/append_test.go
@@ -0,0 +1,190 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime_test
+
+import "testing"
+
+const N = 20
+
+func BenchmarkAppend(b *testing.B) {
+	b.StopTimer()
+	x := make([]int, 0, N)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		for j := 0; j < N; j++ {
+			x = append(x, j)
+		}
+	}
+}
+
+func BenchmarkAppendGrowByte(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x []byte
+		for j := 0; j < 1<<20; j++ {
+			x = append(x, byte(j))
+		}
+	}
+}
+
+func BenchmarkAppendGrowString(b *testing.B) {
+	var s string
+	for i := 0; i < b.N; i++ {
+		var x []string
+		for j := 0; j < 1<<20; j++ {
+			x = append(x, s)
+		}
+	}
+}
+
+func benchmarkAppendBytes(b *testing.B, length int) {
+	b.StopTimer()
+	x := make([]byte, 0, N)
+	y := make([]byte, length)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		x = append(x, y...)
+	}
+}
+
+func BenchmarkAppend1Byte(b *testing.B) {
+	benchmarkAppendBytes(b, 1)
+}
+
+func BenchmarkAppend4Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 4)
+}
+
+func BenchmarkAppend7Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 7)
+}
+
+func BenchmarkAppend8Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 8)
+}
+
+func BenchmarkAppend15Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 15)
+}
+
+func BenchmarkAppend16Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 16)
+}
+
+func BenchmarkAppend32Bytes(b *testing.B) {
+	benchmarkAppendBytes(b, 32)
+}
+
+func benchmarkAppendStr(b *testing.B, str string) {
+	b.StopTimer()
+	x := make([]byte, 0, N)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		x = append(x, str...)
+	}
+}
+
+func BenchmarkAppendStr1Byte(b *testing.B) {
+	benchmarkAppendStr(b, "1")
+}
+
+func BenchmarkAppendStr4Bytes(b *testing.B) {
+	benchmarkAppendStr(b, "1234")
+}
+
+func BenchmarkAppendStr8Bytes(b *testing.B) {
+	benchmarkAppendStr(b, "12345678")
+}
+
+func BenchmarkAppendStr16Bytes(b *testing.B) {
+	benchmarkAppendStr(b, "1234567890123456")
+}
+
+func BenchmarkAppendStr32Bytes(b *testing.B) {
+	benchmarkAppendStr(b, "12345678901234567890123456789012")
+}
+
+func BenchmarkAppendSpecialCase(b *testing.B) {
+	b.StopTimer()
+	x := make([]int, 0, N)
+	b.StartTimer()
+	for i := 0; i < b.N; i++ {
+		x = x[0:0]
+		for j := 0; j < N; j++ {
+			if len(x) < cap(x) {
+				x = x[:len(x)+1]
+				x[len(x)-1] = j
+			} else {
+				x = append(x, j)
+			}
+		}
+	}
+}
+
+var x []int
+
+func f() int {
+	x[:1][0] = 3
+	return 2
+}
+
+func TestSideEffectOrder(t *testing.T) {
+	x = make([]int, 0, 10)
+	x = append(x, 1, f())
+	if x[0] != 1 || x[1] != 2 {
+		t.Error("append failed: ", x[0], x[1])
+	}
+}
+
+func TestAppendOverlap(t *testing.T) {
+	x := []byte("1234")
+	x = append(x[1:], x...) // p > q in runtime·appendslice.
+	got := string(x)
+	want := "2341234"
+	if got != want {
+		t.Errorf("overlap failed: got %q want %q", got, want)
+	}
+}
+
+func benchmarkCopySlice(b *testing.B, l int) {
+	s := make([]byte, l)
+	buf := make([]byte, 4096)
+	var n int
+	for i := 0; i < b.N; i++ {
+		n = copy(buf, s)
+	}
+	b.SetBytes(int64(n))
+}
+
+func benchmarkCopyStr(b *testing.B, l int) {
+	s := string(make([]byte, l))
+	buf := make([]byte, 4096)
+	var n int
+	for i := 0; i < b.N; i++ {
+		n = copy(buf, s)
+	}
+	b.SetBytes(int64(n))
+}
+
+func BenchmarkCopy1Byte(b *testing.B)    { benchmarkCopySlice(b, 1) }
+func BenchmarkCopy2Byte(b *testing.B)    { benchmarkCopySlice(b, 2) }
+func BenchmarkCopy4Byte(b *testing.B)    { benchmarkCopySlice(b, 4) }
+func BenchmarkCopy8Byte(b *testing.B)    { benchmarkCopySlice(b, 8) }
+func BenchmarkCopy12Byte(b *testing.B)   { benchmarkCopySlice(b, 12) }
+func BenchmarkCopy16Byte(b *testing.B)   { benchmarkCopySlice(b, 16) }
+func BenchmarkCopy32Byte(b *testing.B)   { benchmarkCopySlice(b, 32) }
+func BenchmarkCopy128Byte(b *testing.B)  { benchmarkCopySlice(b, 128) }
+func BenchmarkCopy1024Byte(b *testing.B) { benchmarkCopySlice(b, 1024) }
+
+func BenchmarkCopy1String(b *testing.B)    { benchmarkCopyStr(b, 1) }
+func BenchmarkCopy2String(b *testing.B)    { benchmarkCopyStr(b, 2) }
+func BenchmarkCopy4String(b *testing.B)    { benchmarkCopyStr(b, 4) }
+func BenchmarkCopy8String(b *testing.B)    { benchmarkCopyStr(b, 8) }
+func BenchmarkCopy12String(b *testing.B)   { benchmarkCopyStr(b, 12) }
+func BenchmarkCopy16String(b *testing.B)   { benchmarkCopyStr(b, 16) }
+func BenchmarkCopy32String(b *testing.B)   { benchmarkCopyStr(b, 32) }
+func BenchmarkCopy128String(b *testing.B)  { benchmarkCopyStr(b, 128) }
+func BenchmarkCopy1024String(b *testing.B) { benchmarkCopyStr(b, 1024) }
diff --git a/src/runtime/arch_386.go b/src/runtime/arch_386.go
new file mode 100644
index 000000000..287b67e27
--- /dev/null
+++ b/src/runtime/arch_386.go
@@ -0,0 +1,12 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	cacheLineSize = 64
+)
+
+type uintreg uint32
+type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_386.h b/src/runtime/arch_386.h
new file mode 100644
index 000000000..75a5ba77f
--- /dev/null
+++ b/src/runtime/arch_386.h
@@ -0,0 +1,17 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+	thechar = '8',
+	BigEndian = 0,
+	CacheLineSize = 64,
+	RuntimeGogoBytes = 64,
+#ifdef GOOS_nacl
+	PhysPageSize = 65536,
+#else
+	PhysPageSize = 4096,
+#endif
+	PCQuantum = 1,
+	Int64Align = 4
+};
diff --git a/src/runtime/arch_amd64.go b/src/runtime/arch_amd64.go
new file mode 100644
index 000000000..fe60c7066
--- /dev/null
+++ b/src/runtime/arch_amd64.go
@@ -0,0 +1,12 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	cacheLineSize = 64
+)
+
+type uintreg uint64
+type intptr int64 // TODO(rsc): remove
diff --git a/src/runtime/arch_amd64.h b/src/runtime/arch_amd64.h
new file mode 100644
index 000000000..d7b81ee90
--- /dev/null
+++ b/src/runtime/arch_amd64.h
@@ -0,0 +1,25 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+	thechar = '6',
+	BigEndian = 0,
+	CacheLineSize = 64,
+#ifdef GOOS_solaris
+	RuntimeGogoBytes = 80,
+#else
+#ifdef GOOS_windows
+	RuntimeGogoBytes = 80,
+#else
+#ifdef GOOS_plan9
+	RuntimeGogoBytes = 80,
+#else
+	RuntimeGogoBytes = 64,
+#endif	// Plan 9
+#endif	// Windows
+#endif	// Solaris
+	PhysPageSize = 4096,
+	PCQuantum = 1,
+	Int64Align = 8
+};
diff --git a/src/runtime/arch_amd64p32.go b/src/runtime/arch_amd64p32.go
new file mode 100644
index 000000000..90766b404
--- /dev/null
+++ b/src/runtime/arch_amd64p32.go
@@ -0,0 +1,12 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	cacheLineSize = 64
+)
+
+type uintreg uint64
+type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_amd64p32.h b/src/runtime/arch_amd64p32.h
new file mode 100644
index 000000000..d3e864987
--- /dev/null
+++ b/src/runtime/arch_amd64p32.h
@@ -0,0 +1,17 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+	thechar = '6',
+	BigEndian = 0,
+	CacheLineSize = 64,
+	RuntimeGogoBytes = 64,
+#ifdef GOOS_nacl
+	PhysPageSize = 65536,
+#else
+	PhysPageSize = 4096,
+#endif
+	PCQuantum = 1,
+	Int64Align = 8
+};
diff --git a/src/runtime/arch_arm.go b/src/runtime/arch_arm.go
new file mode 100644
index 000000000..23f2711f6
--- /dev/null
+++ b/src/runtime/arch_arm.go
@@ -0,0 +1,12 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	cacheLineSize = 32
+)
+
+type uintreg uint32
+type intptr int32 // TODO(rsc): remove
diff --git a/src/runtime/arch_arm.h b/src/runtime/arch_arm.h
new file mode 100644
index 000000000..637a334a0
--- /dev/null
+++ b/src/runtime/arch_arm.h
@@ -0,0 +1,17 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+enum {
+	thechar = '5',
+	BigEndian = 0,
+	CacheLineSize = 32,
+	RuntimeGogoBytes = 60,
+#ifdef GOOS_nacl
+	PhysPageSize = 65536,
+#else
+	PhysPageSize = 4096,
+#endif
+	PCQuantum = 4,
+	Int64Align = 4
+};
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
new file mode 100644
index 000000000..25026417b
--- /dev/null
+++ b/src/runtime/asm_386.s
@@ -0,0 +1,2292 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+	// copy arguments forward on an even stack
+	MOVL	argc+0(FP), AX
+	MOVL	argv+4(FP), BX
+	SUBL	$128, SP		// plenty of scratch
+	ANDL	$~15, SP
+	MOVL	AX, 120(SP)		// save argc, argv away
+	MOVL	BX, 124(SP)
+
+	// set default stack bounds.
+	// _cgo_init may update stackguard.
+	MOVL	$runtime·g0(SB), BP
+	LEAL	(-64*1024+104)(SP), BX
+	MOVL	BX, g_stackguard(BP)
+	MOVL	BX, g_stackguard0(BP)
+	MOVL	SP, g_stackbase(BP)
+	
+	// find out information about the processor we're on
+	MOVL	$0, AX
+	CPUID
+	CMPL	AX, $0
+	JE	nocpuinfo
+	MOVL	$1, AX
+	CPUID
+	MOVL	CX, runtime·cpuid_ecx(SB)
+	MOVL	DX, runtime·cpuid_edx(SB)
+nocpuinfo:	
+
+	// if there is an _cgo_init, call it to let it
+	// initialize and to set up GS.  if not,
+	// we set up GS ourselves.
+	MOVL	_cgo_init(SB), AX
+	TESTL	AX, AX
+	JZ	needtls
+	MOVL	$setg_gcc<>(SB), BX
+	MOVL	BX, 4(SP)
+	MOVL	BP, 0(SP)
+	CALL	AX
+	// update stackguard after _cgo_init
+	MOVL	$runtime·g0(SB), CX
+	MOVL	g_stackguard0(CX), AX
+	MOVL	AX, g_stackguard(CX)
+	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
+	CMPL runtime·iswindows(SB), $0
+	JEQ ok
+needtls:
+	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
+	CMPL	runtime·isplan9(SB), $1
+	JEQ	ok
+
+	// set up %gs
+	CALL	runtime·ldt0setup(SB)
+
+	// store through it, to make sure it works
+	get_tls(BX)
+	MOVL	$0x123, g(BX)
+	MOVL	runtime·tls0(SB), AX
+	CMPL	AX, $0x123
+	JEQ	ok
+	MOVL	AX, 0	// abort
+ok:
+	// set up m and g "registers"
+	get_tls(BX)
+	LEAL	runtime·g0(SB), CX
+	MOVL	CX, g(BX)
+	LEAL	runtime·m0(SB), AX
+
+	// save m->g0 = g0
+	MOVL	CX, m_g0(AX)
+	// save g0->m = m0
+	MOVL	AX, g_m(CX)
+
+	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
+
+	// convention is D is always cleared
+	CLD
+
+	CALL	runtime·check(SB)
+
+	// saved argc, argv
+	MOVL	120(SP), AX
+	MOVL	AX, 0(SP)
+	MOVL	124(SP), AX
+	MOVL	AX, 4(SP)
+	CALL	runtime·args(SB)
+	CALL	runtime·osinit(SB)
+	CALL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	PUSHL	$runtime·main·f(SB)	// entry
+	PUSHL	$0	// arg size
+	ARGSIZE(8)
+	CALL	runtime·newproc(SB)
+	ARGSIZE(-1)
+	POPL	AX
+	POPL	AX
+
+	// start this M
+	CALL	runtime·mstart(SB)
+
+	INT $3
+	RET
+
+DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
+GLOBL	runtime·main·f(SB),RODATA,$4
+
+TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
+	INT $3
+	RET
+
+TEXT runtime·asminit(SB),NOSPLIT,$0-0
+	// Linux and MinGW start the FPU in extended double precision.
+	// Other operating systems use double precision.
+	// Change to double precision to match them,
+	// and to match other hardware that only has double.
+	PUSHL $0x27F
+	FLDCW	0(SP)
+	POPL AX
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT, $0-4
+	MOVL	buf+0(FP), AX		// gobuf
+	LEAL	buf+0(FP), BX		// caller's SP
+	MOVL	BX, gobuf_sp(AX)
+	MOVL	0(SP), BX		// caller's PC
+	MOVL	BX, gobuf_pc(AX)
+	MOVL	$0, gobuf_ret(AX)
+	MOVL	$0, gobuf_ctxt(AX)
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	BX, gobuf_g(AX)
+	RET
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT, $0-4
+	MOVL	buf+0(FP), BX		// gobuf
+	MOVL	gobuf_g(BX), DX
+	MOVL	0(DX), CX		// make sure g != nil
+	get_tls(CX)
+	MOVL	DX, g(CX)
+	MOVL	gobuf_sp(BX), SP	// restore SP
+	MOVL	gobuf_ret(BX), AX
+	MOVL	gobuf_ctxt(BX), DX
+	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
+	MOVL	$0, gobuf_ret(BX)
+	MOVL	$0, gobuf_ctxt(BX)
+	MOVL	gobuf_pc(BX), BX
+	JMP	BX
+
+// func mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI
+	
+	get_tls(CX)
+	MOVL	g(CX), AX	// save state in g->sched
+	MOVL	0(SP), BX	// caller's PC
+	MOVL	BX, (g_sched+gobuf_pc)(AX)
+	LEAL	fn+0(FP), BX	// caller's SP
+	MOVL	BX, (g_sched+gobuf_sp)(AX)
+	MOVL	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to m->g0 & its stack, call fn
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVL	m_g0(BX), SI
+	CMPL	SI, AX	// if g == m->g0 call badmcall
+	JNE	3(PC)
+	MOVL	$runtime·badmcall(SB), AX
+	JMP	AX
+	MOVL	SI, g(CX)	// g = m->g0
+	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
+	PUSHL	AX
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+	POPL	AX
+	MOVL	$runtime·badmcall2(SB), AX
+	JMP	AX
+	RET
+
+// switchtoM is a dummy routine that onM leaves at the bottom
+// of the G stack.  We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the M stack because the one at the top of
+// the M stack terminates the stack walk (see topofstack()).
+TEXT runtime·switchtoM(SB), NOSPLIT, $0-4
+	RET
+
+// func onM(fn func())
+TEXT runtime·onM(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI	// DI = fn
+	get_tls(CX)
+	MOVL	g(CX), AX	// AX = g
+	MOVL	g_m(AX), BX	// BX = m
+
+	MOVL	m_g0(BX), DX	// DX = g0
+	CMPL	AX, DX
+	JEQ	onm
+
+	MOVL	m_curg(BX), BP
+	CMPL	AX, BP
+	JEQ	oncurg
+	
+	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Hide call from linker nosplit analysis.
+	MOVL	$runtime·badonm(SB), AX
+	CALL	AX
+
+oncurg:
+	// save our state in g->sched.  Pretend to
+	// be switchtoM if the G stack is scanned.
+	MOVL	$runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX)
+	MOVL	SP, (g_sched+gobuf_sp)(AX)
+	MOVL	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to g0
+	MOVL	DX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(DX), BX
+	// make it look like mstart called onM on g0, to stop traceback
+	SUBL	$4, BX
+	MOVL	$runtime·mstart(SB), DX
+	MOVL	DX, 0(BX)
+	MOVL	BX, SP
+
+	// call target function
+	ARGSIZE(0)
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+
+	// switch back to g
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	g_m(AX), BX
+	MOVL	m_curg(BX), AX
+	MOVL	AX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(AX), SP
+	MOVL	$0, (g_sched+gobuf_sp)(AX)
+	RET
+
+onm:
+	// already on m stack, just call directly
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT,$0-0
+	// Cannot grow scheduler stack (m->g0).
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVL	m_g0(BX), SI
+	CMPL	g(CX), SI
+	JNE	2(PC)
+	INT	$3
+
+	// Cannot grow signal stack.
+	MOVL	m_gsignal(BX), SI
+	CMPL	g(CX), SI
+	JNE	2(PC)
+	INT	$3
+
+	// frame size in DI
+	// arg size in AX
+	// Save in m.
+	MOVL	DI, m_moreframesize(BX)
+	MOVL	AX, m_moreargsize(BX)
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVL	4(SP), DI	// f's caller's PC
+	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
+	LEAL	8(SP), CX	// f's caller's SP
+	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
+	MOVL	CX, m_moreargp(BX)
+	get_tls(CX)
+	MOVL	g(CX), SI
+	MOVL	SI, (m_morebuf+gobuf_g)(BX)
+
+	// Set g->sched to context in f.
+	MOVL	0(SP), AX	// f's PC
+	MOVL	AX, (g_sched+gobuf_pc)(SI)
+	MOVL	SI, (g_sched+gobuf_g)(SI)
+	LEAL	4(SP), AX	// f's SP
+	MOVL	AX, (g_sched+gobuf_sp)(SI)
+	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
+
+	// Call newstack on m->g0's stack.
+	MOVL	m_g0(BX), BP
+	MOVL	BP, g(CX)
+	MOVL	(g_sched+gobuf_sp)(BP), AX
+	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
+	MOVL	AX, SP
+	CALL	runtime·newstack(SB)
+	MOVL	$0, 0x1003	// crash if newstack returns
+	RET
+
+TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
+	MOVL	$0, DX
+	JMP runtime·morestack(SB)
+
+// reflect·call: call a function with the given argument list
+// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE)		\
+	CMPL	CX, $MAXSIZE;		\
+	JA	3(PC);			\
+	MOVL	$NAME(SB), AX;		\
+	JMP	AX
+// Note: can't just "JMP NAME(SB)" - bad inlining results.
+
+TEXT reflect·call(SB), NOSPLIT, $0-16
+	MOVL	argsize+8(FP), CX
+	DISPATCH(runtime·call16, 16)
+	DISPATCH(runtime·call32, 32)
+	DISPATCH(runtime·call64, 64)
+	DISPATCH(runtime·call128, 128)
+	DISPATCH(runtime·call256, 256)
+	DISPATCH(runtime·call512, 512)
+	DISPATCH(runtime·call1024, 1024)
+	DISPATCH(runtime·call2048, 2048)
+	DISPATCH(runtime·call4096, 4096)
+	DISPATCH(runtime·call8192, 8192)
+	DISPATCH(runtime·call16384, 16384)
+	DISPATCH(runtime·call32768, 32768)
+	DISPATCH(runtime·call65536, 65536)
+	DISPATCH(runtime·call131072, 131072)
+	DISPATCH(runtime·call262144, 262144)
+	DISPATCH(runtime·call524288, 524288)
+	DISPATCH(runtime·call1048576, 1048576)
+	DISPATCH(runtime·call2097152, 2097152)
+	DISPATCH(runtime·call4194304, 4194304)
+	DISPATCH(runtime·call8388608, 8388608)
+	DISPATCH(runtime·call16777216, 16777216)
+	DISPATCH(runtime·call33554432, 33554432)
+	DISPATCH(runtime·call67108864, 67108864)
+	DISPATCH(runtime·call134217728, 134217728)
+	DISPATCH(runtime·call268435456, 268435456)
+	DISPATCH(runtime·call536870912, 536870912)
+	DISPATCH(runtime·call1073741824, 1073741824)
+	MOVL	$runtime·badreflectcall(SB), AX
+	JMP	AX
+
+// Argument map for the callXX frames.  Each has one stack map.
+DATA gcargs_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gcargs_reflectcall<>+0x04(SB)/4, $8  // 4 words
+DATA gcargs_reflectcall<>+0x08(SB)/1, $(const_BitsPointer+(const_BitsPointer<<2)+(const_BitsScalar<<4)+(const_BitsScalar<<6))
+GLOBL gcargs_reflectcall<>(SB),RODATA,$12
+
+// callXX frames have no locals
+DATA gclocals_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gclocals_reflectcall<>+0x04(SB)/4, $0  // 0 locals
+GLOBL gclocals_reflectcall<>(SB),RODATA,$8
+
+#define CALLFN(NAME,MAXSIZE)			\
+TEXT NAME(SB), WRAPPER, $MAXSIZE-16;	\
+	FUNCDATA $FUNCDATA_ArgsPointerMaps,gcargs_reflectcall<>(SB);	\
+	FUNCDATA $FUNCDATA_LocalsPointerMaps,gclocals_reflectcall<>(SB);\
+	/* copy arguments to stack */		\
+	MOVL	argptr+4(FP), SI;		\
+	MOVL	argsize+8(FP), CX;		\
+	MOVL	SP, DI;				\
+	REP;MOVSB;				\
+	/* call function */			\
+	MOVL	f+0(FP), DX;			\
+	MOVL	(DX), AX; 			\
+	PCDATA  $PCDATA_StackMapIndex, $0;	\
+	CALL	AX;				\
+	/* copy return values back */		\
+	MOVL	argptr+4(FP), DI;		\
+	MOVL	argsize+8(FP), CX;		\
+	MOVL	retoffset+12(FP), BX;		\
+	MOVL	SP, SI;				\
+	ADDL	BX, DI;				\
+	ADDL	BX, SI;				\
+	SUBL	BX, CX;				\
+	REP;MOVSB;				\
+	RET
+
+CALLFN(runtime·call16, 16)
+CALLFN(runtime·call32, 32)
+CALLFN(runtime·call64, 64)
+CALLFN(runtime·call128, 128)
+CALLFN(runtime·call256, 256)
+CALLFN(runtime·call512, 512)
+CALLFN(runtime·call1024, 1024)
+CALLFN(runtime·call2048, 2048)
+CALLFN(runtime·call4096, 4096)
+CALLFN(runtime·call8192, 8192)
+CALLFN(runtime·call16384, 16384)
+CALLFN(runtime·call32768, 32768)
+CALLFN(runtime·call65536, 65536)
+CALLFN(runtime·call131072, 131072)
+CALLFN(runtime·call262144, 262144)
+CALLFN(runtime·call524288, 524288)
+CALLFN(runtime·call1048576, 1048576)
+CALLFN(runtime·call2097152, 2097152)
+CALLFN(runtime·call4194304, 4194304)
+CALLFN(runtime·call8388608, 8388608)
+CALLFN(runtime·call16777216, 16777216)
+CALLFN(runtime·call33554432, 33554432)
+CALLFN(runtime·call67108864, 67108864)
+CALLFN(runtime·call134217728, 134217728)
+CALLFN(runtime·call268435456, 268435456)
+CALLFN(runtime·call536870912, 536870912)
+CALLFN(runtime·call1073741824, 1073741824)
+
+// Return point when leaving stack.
+//
+// Lessstack can appear in stack traces for the same reason
+// as morestack; in that context, it has 0 arguments.
+TEXT runtime·lessstack(SB), NOSPLIT, $0-0
+	// Save return value in m->cret
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVL	AX, m_cret(BX)
+
+	// Call oldstack on m->g0's stack.
+	MOVL	m_g0(BX), BP
+	MOVL	BP, g(CX)
+	MOVL	(g_sched+gobuf_sp)(BP), SP
+	CALL	runtime·oldstack(SB)
+	MOVL	$0, 0x1004	// crash if oldstack returns
+	RET
+
+// bool cas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+TEXT runtime·cas(SB), NOSPLIT, $0-13
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
+	LOCK
+	CMPXCHGL	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+12(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+12(FP)
+	RET
+
+TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
+	JMP	runtime·cas(SB)
+
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
+	JMP	runtime·atomicload(SB)
+
+TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
+	JMP	runtime·atomicload(SB)
+
+// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+//	if(*val == *old){
+//		*val = new;
+//		return 1;
+//	} else {
+//		return 0;
+//	}
+TEXT runtime·cas64(SB), NOSPLIT, $0-21
+	MOVL	ptr+0(FP), BP
+	MOVL	old_lo+4(FP), AX
+	MOVL	old_hi+8(FP), DX
+	MOVL	new_lo+12(FP), BX
+	MOVL	new_hi+16(FP), CX
+	LOCK
+	CMPXCHG8B	0(BP)
+	JNZ	cas64_fail
+	MOVL	$1, AX
+	MOVB	AX, ret+20(FP)
+	RET
+cas64_fail:
+	MOVL	$0, AX
+	MOVB	AX, ret+20(FP)
+	RET
+
+// bool casp(void **p, void *old, void *new)
+// Atomically:
+//	if(*p == old){
+//		*p = new;
+//		return 1;
+//	}else
+//		return 0;
+TEXT runtime·casp(SB), NOSPLIT, $0-13
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
+	LOCK
+	CMPXCHGL	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+12(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+12(FP)
+	RET
+
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+//	*val += delta;
+//	return *val;
+TEXT runtime·xadd(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	delta+4(FP), AX
+	MOVL	AX, CX
+	LOCK
+	XADDL	AX, 0(BX)
+	ADDL	CX, AX
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xchg(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
+	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xchgp(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
+	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
+	JMP	runtime·xchg(SB)
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+	MOVL	cycles+0(FP), AX
+again:
+	PAUSE
+	SUBL	$1, AX
+	JNZ	again
+	RET
+
+TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
+	XCHGL	AX, 0(BX)
+	RET
+
+TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
+	XCHGL	AX, 0(BX)
+	RET
+
+// uint64 atomicload64(uint64 volatile* addr);
+TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), AX
+	LEAL	ret_lo+4(FP), BX
+	// MOVQ (%EAX), %MM0
+	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
+	// MOVQ %MM0, 0(%EBX)
+	BYTE $0x0f; BYTE $0x7f; BYTE $0x03
+	// EMMS
+	BYTE $0x0F; BYTE $0x77
+	RET
+
+// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
+TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), AX
+	// MOVQ and EMMS were introduced on the Pentium MMX.
+	// MOVQ 0x8(%ESP), %MM0
+	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
+	// MOVQ %MM0, (%EAX)
+	BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
+	// EMMS
+	BYTE $0x0F; BYTE $0x77
+	// This is essentially a no-op, but it provides required memory fencing.
+	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
+	MOVL	$0, AX
+	LOCK
+	XADDL	AX, (SP)
+	RET
+
+// void	runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
+	MOVL	ptr+0(FP), AX
+	MOVB	val+4(FP), BX
+	LOCK
+	ORB	BX, (AX)
+	RET
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. pop the caller
+// 2. sub 5 bytes from the callers return
+// 3. jmp to the argument
+TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
+	MOVL	fv+0(FP), DX	// fn
+	MOVL	argp+4(FP), BX	// caller sp
+	LEAL	-4(BX), SP	// caller sp after CALL
+	SUBL	$5, (SP)	// return to CALL again
+	MOVL	0(DX), BX
+	JMP	BX	// but first run the deferred function
+
+// Save state of caller into g->sched.
+TEXT gosave<>(SB),NOSPLIT,$0
+	PUSHL	AX
+	PUSHL	BX
+	get_tls(BX)
+	MOVL	g(BX), BX
+	LEAL	arg+0(FP), AX
+	MOVL	AX, (g_sched+gobuf_sp)(BX)
+	MOVL	-4(AX), AX
+	MOVL	AX, (g_sched+gobuf_pc)(BX)
+	MOVL	$0, (g_sched+gobuf_ret)(BX)
+	MOVL	$0, (g_sched+gobuf_ctxt)(BX)
+	POPL	BX
+	POPL	AX
+	RET
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.c for more details.
+TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
+	MOVL	fn+0(FP), AX
+	MOVL	arg+4(FP), BX
+	CALL	asmcgocall<>(SB)
+	RET
+
+TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
+	MOVL	fn+0(FP), AX
+	MOVL	arg+4(FP), BX
+	CALL	asmcgocall<>(SB)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT asmcgocall<>(SB),NOSPLIT,$0-12
+	// fn in AX, arg in BX
+	MOVL	SP, DX
+
+	// Figure out if we need to switch to m->g0 stack.
+	// We get called to create new OS threads too, and those
+	// come in on the m->g0 stack already.
+	get_tls(CX)
+	MOVL	g(CX), BP
+	MOVL	g_m(BP), BP
+	MOVL	m_g0(BP), SI
+	MOVL	g(CX), DI
+	CMPL	SI, DI
+	JEQ	4(PC)
+	CALL	gosave<>(SB)
+	MOVL	SI, g(CX)
+	MOVL	(g_sched+gobuf_sp)(SI), SP
+
+	// Now on a scheduling stack (a pthread-created stack).
+	SUBL	$32, SP
+	ANDL	$~15, SP	// alignment, perhaps unnecessary
+	MOVL	DI, 8(SP)	// save g
+	MOVL	DX, 4(SP)	// save SP
+	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
+	CALL	AX
+
+	// Restore registers, g, stack pointer.
+	get_tls(CX)
+	MOVL	8(SP), DI
+	MOVL	DI, g(CX)
+	MOVL	4(SP), SP
+	RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Turn the fn into a Go func (by taking its address) and call
+// cgocallback_gofunc.
+TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
+	LEAL	fn+0(FP), AX
+	MOVL	AX, 0(SP)
+	MOVL	frame+4(FP), AX
+	MOVL	AX, 4(SP)
+	MOVL	framesize+8(FP), AX
+	MOVL	AX, 8(SP)
+	MOVL	$runtime·cgocallback_gofunc(SB), AX
+	CALL	AX
+	RET
+
+// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
+// See cgocall.c for more details.
+TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$12-12
+	// If g is nil, Go did not create the current thread.
+	// Call needm to obtain one for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call through AX.
+	get_tls(CX)
+#ifdef GOOS_windows
+	MOVL	$0, BP
+	CMPL	CX, $0
+	JEQ	2(PC) // TODO
+#endif
+	MOVL	g(CX), BP
+	CMPL	BP, $0
+	JEQ	needm
+	MOVL	g_m(BP), BP
+	MOVL	BP, DX // saved copy of oldm
+	JMP	havem
+needm:
+	MOVL	$0, 0(SP)
+	MOVL	$runtime·needm(SB), AX
+	CALL	AX
+	MOVL	0(SP), DX
+	get_tls(CX)
+	MOVL	g(CX), BP
+	MOVL	g_m(BP), BP
+
+havem:
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
+	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
+	MOVL	m_g0(BP), SI
+	MOVL	(g_sched+gobuf_sp)(SI), AX
+	MOVL	AX, 0(SP)
+	MOVL	SP, (g_sched+gobuf_sp)(SI)
+
+	// Switch to m->curg stack and call runtime.cgocallbackg.
+	// Because we are taking over the execution of m->curg
+	// but *not* resuming what had been running, we need to
+	// save that information (m->curg->sched) so we can restore it.
+	// We can restore m->curg->sched.sp easily, because calling
+	// runtime.cgocallbackg leaves SP unchanged upon return.
+	// To save m->curg->sched.pc, we push it onto the stack.
+	// This has the added benefit that it looks to the traceback
+	// routine like cgocallbackg is going to return to that
+	// PC (because the frame we allocate below has the same
+	// size as cgocallback_gofunc's frame declared above)
+	// so that the traceback will seamlessly trace back into
+	// the earlier calls.
+	//
+	// In the new goroutine, 0(SP) holds the saved oldm (DX) register.
+	// 4(SP) and 8(SP) are unused.
+	MOVL	m_curg(BP), SI
+	MOVL	SI, g(CX)
+	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
+	MOVL	(g_sched+gobuf_pc)(SI), BP
+	MOVL	BP, -4(DI)
+	LEAL	-(4+12)(DI), SP
+	MOVL	DX, 0(SP)
+	CALL	runtime·cgocallbackg(SB)
+	MOVL	0(SP), DX
+
+	// Restore g->sched (== m->curg->sched) from saved values.
+	get_tls(CX)
+	MOVL	g(CX), SI
+	MOVL	12(SP), BP
+	MOVL	BP, (g_sched+gobuf_pc)(SI)
+	LEAL	(12+4)(SP), DI
+	MOVL	DI, (g_sched+gobuf_sp)(SI)
+
+	// Switch back to m->g0's stack and restore m->g0->sched.sp.
+	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
+	// so we do not have to restore it.)
+	MOVL	g(CX), BP
+	MOVL	g_m(BP), BP
+	MOVL	m_g0(BP), SI
+	MOVL	SI, g(CX)
+	MOVL	(g_sched+gobuf_sp)(SI), SP
+	MOVL	0(SP), AX
+	MOVL	AX, (g_sched+gobuf_sp)(SI)
+	
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	CMPL	DX, $0
+	JNE 3(PC)
+	MOVL	$runtime·dropm(SB), AX
+	CALL	AX
+
+	// Done!
+	RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-4
+	MOVL	gg+0(FP), BX
+#ifdef GOOS_windows
+	CMPL	BX, $0
+	JNE	settls
+	MOVL	$0, 0x14(FS)
+	RET
+settls:
+	MOVL	g_m(BX), AX
+	LEAL	m_tls(AX), AX
+	MOVL	AX, 0x14(FS)
+#endif
+	get_tls(CX)
+	MOVL	BX, g(CX)
+	RET
+
+// void setg_gcc(G*); set g. for use by gcc
+TEXT setg_gcc<>(SB), NOSPLIT, $0
+	get_tls(AX)
+	MOVL	gg+0(FP), DX
+	MOVL	DX, g(AX)
+	RET
+
+// check that SP is in range [g->stackbase, g->stackguard)
+TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
+	get_tls(CX)
+	MOVL	g(CX), AX
+	CMPL	g_stackbase(AX), SP
+	JHI	2(PC)
+	INT	$3
+	CMPL	SP, g_stackguard(AX)
+	JHI	2(PC)
+	INT	$3
+	RET
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
+	MOVL	argp+0(FP),AX		// addr of first arg
+	MOVL	-4(AX),AX		// get calling pc
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVL	p+0(FP),AX		// addr of first arg
+	MOVL	-4(AX),AX		// get calling pc
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
+	MOVL	argp+0(FP),AX		// addr of first arg
+	MOVL	pc+4(FP), BX
+	MOVL	BX, -4(AX)		// set calling pc
+	RET
+
+TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
+	MOVL	argp+0(FP), AX
+	MOVL	AX, ret+4(FP)
+	RET
+
+// func gogetcallersp(p unsafe.Pointer) uintptr
+TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-8
+	MOVL	p+0(FP),AX		// addr of first arg
+	MOVL	AX, ret+4(FP)
+	RET
+
+// int64 runtime·cputicks(void), so really
+// void runtime·cputicks(int64 *ticks)
+TEXT runtime·cputicks(SB),NOSPLIT,$0-8
+	RDTSC
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
+	RDTSC
+	MOVL    AX, ret_lo+0(FP)
+	MOVL    DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
+	// set up ldt 7 to point at tls0
+	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
+	// the entry number is just a hint.  setldt will set up GS with what it used.
+	MOVL	$7, 0(SP)
+	LEAL	runtime·tls0(SB), AX
+	MOVL	AX, 4(SP)
+	MOVL	$32, 8(SP)	// sizeof(tls array)
+	CALL	runtime·setldt(SB)
+	RET
+
+TEXT runtime·emptyfunc(SB),0,$0-0
+	RET
+
+TEXT runtime·abort(SB),NOSPLIT,$0-0
+	INT $0x3
+
+TEXT runtime·stackguard(SB),NOSPLIT,$0-8
+	MOVL	SP, DX
+	MOVL	DX, sp+0(FP)
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_stackguard(BX), DX
+	MOVL	DX, limit+4(FP)
+	RET
+
+GLOBL runtime·tls0(SB), $32
+
+// hash function using AES hardware instructions
+TEXT runtime·aeshash(SB),NOSPLIT,$0-16
+	MOVL	p+0(FP), AX	// ptr to data
+	MOVL	s+4(FP), CX	// size
+	JMP	runtime·aeshashbody(SB)
+
+TEXT runtime·aeshashstr(SB),NOSPLIT,$0-16
+	MOVL	p+0(FP), AX	// ptr to string object
+	// s+4(FP) is ignored, it is always sizeof(String)
+	MOVL	4(AX), CX	// length of string
+	MOVL	(AX), AX	// string data
+	JMP	runtime·aeshashbody(SB)
+
+// AX: data
+// CX: length
+TEXT runtime·aeshashbody(SB),NOSPLIT,$0-16
+	MOVL	h+8(FP), X0	// seed to low 32 bits of xmm0
+	PINSRD	$1, CX, X0	// size to next 32 bits of xmm0
+	MOVO	runtime·aeskeysched+0(SB), X2
+	MOVO	runtime·aeskeysched+16(SB), X3
+	CMPL	CX, $16
+	JB	aessmall
+aesloop:
+	CMPL	CX, $16
+	JBE	aesloopend
+	MOVOU	(AX), X1
+	AESENC	X2, X0
+	AESENC	X1, X0
+	SUBL	$16, CX
+	ADDL	$16, AX
+	JMP	aesloop
+// 1-16 bytes remaining
+aesloopend:
+	// This load may overlap with the previous load above.
+	// We'll hash some bytes twice, but that's ok.
+	MOVOU	-16(AX)(CX*1), X1
+	JMP	partial
+// 0-15 bytes
+aessmall:
+	TESTL	CX, CX
+	JE	finalize	// 0 bytes
+
+	CMPB	AX, $0xf0
+	JA	highpartial
+
+	// 16 bytes loaded at this address won't cross
+	// a page boundary, so we can load it directly.
+	MOVOU	(AX), X1
+	ADDL	CX, CX
+	PAND	masks<>(SB)(CX*8), X1
+	JMP	partial
+highpartial:
+	// address ends in 1111xxxx.  Might be up against
+	// a page boundary, so load ending at last byte.
+	// Then shift bytes down using pshufb.
+	MOVOU	-16(AX)(CX*1), X1
+	ADDL	CX, CX
+	PSHUFB	shifts<>(SB)(CX*8), X1
+partial:
+	// incorporate partial block into hash
+	AESENC	X3, X0
+	AESENC	X1, X0
+finalize:	
+	// finalize hash
+	AESENC	X2, X0
+	AESENC	X3, X0
+	AESENC	X2, X0
+	MOVL	X0, ret+12(FP)
+	RET
+
+TEXT runtime·aeshash32(SB),NOSPLIT,$0-16
+	MOVL	p+0(FP), AX	// ptr to data
+	// s+4(FP) is ignored, it is always sizeof(int32)
+	MOVL	h+8(FP), X0	// seed
+	PINSRD	$1, (AX), X0	// data
+	AESENC	runtime·aeskeysched+0(SB), X0
+	AESENC	runtime·aeskeysched+16(SB), X0
+	AESENC	runtime·aeskeysched+0(SB), X0
+	MOVL	X0, ret+12(FP)
+	RET
+
+TEXT runtime·aeshash64(SB),NOSPLIT,$0-16
+	MOVL	p+0(FP), AX	// ptr to data
+	// s+4(FP) is ignored, it is always sizeof(int64)
+	MOVQ	(AX), X0	// data
+	PINSRD	$2, h+8(FP), X0	// seed
+	AESENC	runtime·aeskeysched+0(SB), X0
+	AESENC	runtime·aeskeysched+16(SB), X0
+	AESENC	runtime·aeskeysched+0(SB), X0
+	MOVL	X0, ret+12(FP)
+	RET
+
+// simple mask to get rid of data in the high part of the register.
+DATA masks<>+0x00(SB)/4, $0x00000000
+DATA masks<>+0x04(SB)/4, $0x00000000
+DATA masks<>+0x08(SB)/4, $0x00000000
+DATA masks<>+0x0c(SB)/4, $0x00000000
+	
+DATA masks<>+0x10(SB)/4, $0x000000ff
+DATA masks<>+0x14(SB)/4, $0x00000000
+DATA masks<>+0x18(SB)/4, $0x00000000
+DATA masks<>+0x1c(SB)/4, $0x00000000
+	
+DATA masks<>+0x20(SB)/4, $0x0000ffff
+DATA masks<>+0x24(SB)/4, $0x00000000
+DATA masks<>+0x28(SB)/4, $0x00000000
+DATA masks<>+0x2c(SB)/4, $0x00000000
+	
+DATA masks<>+0x30(SB)/4, $0x00ffffff
+DATA masks<>+0x34(SB)/4, $0x00000000
+DATA masks<>+0x38(SB)/4, $0x00000000
+DATA masks<>+0x3c(SB)/4, $0x00000000
+	
+DATA masks<>+0x40(SB)/4, $0xffffffff
+DATA masks<>+0x44(SB)/4, $0x00000000
+DATA masks<>+0x48(SB)/4, $0x00000000
+DATA masks<>+0x4c(SB)/4, $0x00000000
+	
+DATA masks<>+0x50(SB)/4, $0xffffffff
+DATA masks<>+0x54(SB)/4, $0x000000ff
+DATA masks<>+0x58(SB)/4, $0x00000000
+DATA masks<>+0x5c(SB)/4, $0x00000000
+	
+DATA masks<>+0x60(SB)/4, $0xffffffff
+DATA masks<>+0x64(SB)/4, $0x0000ffff
+DATA masks<>+0x68(SB)/4, $0x00000000
+DATA masks<>+0x6c(SB)/4, $0x00000000
+	
+DATA masks<>+0x70(SB)/4, $0xffffffff
+DATA masks<>+0x74(SB)/4, $0x00ffffff
+DATA masks<>+0x78(SB)/4, $0x00000000
+DATA masks<>+0x7c(SB)/4, $0x00000000
+	
+DATA masks<>+0x80(SB)/4, $0xffffffff
+DATA masks<>+0x84(SB)/4, $0xffffffff
+DATA masks<>+0x88(SB)/4, $0x00000000
+DATA masks<>+0x8c(SB)/4, $0x00000000
+	
+DATA masks<>+0x90(SB)/4, $0xffffffff
+DATA masks<>+0x94(SB)/4, $0xffffffff
+DATA masks<>+0x98(SB)/4, $0x000000ff
+DATA masks<>+0x9c(SB)/4, $0x00000000
+	
+DATA masks<>+0xa0(SB)/4, $0xffffffff
+DATA masks<>+0xa4(SB)/4, $0xffffffff
+DATA masks<>+0xa8(SB)/4, $0x0000ffff
+DATA masks<>+0xac(SB)/4, $0x00000000
+	
+DATA masks<>+0xb0(SB)/4, $0xffffffff
+DATA masks<>+0xb4(SB)/4, $0xffffffff
+DATA masks<>+0xb8(SB)/4, $0x00ffffff
+DATA masks<>+0xbc(SB)/4, $0x00000000
+	
+DATA masks<>+0xc0(SB)/4, $0xffffffff
+DATA masks<>+0xc4(SB)/4, $0xffffffff
+DATA masks<>+0xc8(SB)/4, $0xffffffff
+DATA masks<>+0xcc(SB)/4, $0x00000000
+	
+DATA masks<>+0xd0(SB)/4, $0xffffffff
+DATA masks<>+0xd4(SB)/4, $0xffffffff
+DATA masks<>+0xd8(SB)/4, $0xffffffff
+DATA masks<>+0xdc(SB)/4, $0x000000ff
+	
+DATA masks<>+0xe0(SB)/4, $0xffffffff
+DATA masks<>+0xe4(SB)/4, $0xffffffff
+DATA masks<>+0xe8(SB)/4, $0xffffffff
+DATA masks<>+0xec(SB)/4, $0x0000ffff
+	
+DATA masks<>+0xf0(SB)/4, $0xffffffff
+DATA masks<>+0xf4(SB)/4, $0xffffffff
+DATA masks<>+0xf8(SB)/4, $0xffffffff
+DATA masks<>+0xfc(SB)/4, $0x00ffffff
+
+GLOBL masks<>(SB),RODATA,$256
+
+// these are arguments to pshufb.  They move data down from
+// the high bytes of the register to the low bytes of the register.
+// index is how many bytes to move.
+DATA shifts<>+0x00(SB)/4, $0x00000000
+DATA shifts<>+0x04(SB)/4, $0x00000000
+DATA shifts<>+0x08(SB)/4, $0x00000000
+DATA shifts<>+0x0c(SB)/4, $0x00000000
+	
+DATA shifts<>+0x10(SB)/4, $0xffffff0f
+DATA shifts<>+0x14(SB)/4, $0xffffffff
+DATA shifts<>+0x18(SB)/4, $0xffffffff
+DATA shifts<>+0x1c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x20(SB)/4, $0xffff0f0e
+DATA shifts<>+0x24(SB)/4, $0xffffffff
+DATA shifts<>+0x28(SB)/4, $0xffffffff
+DATA shifts<>+0x2c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
+DATA shifts<>+0x34(SB)/4, $0xffffffff
+DATA shifts<>+0x38(SB)/4, $0xffffffff
+DATA shifts<>+0x3c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0x44(SB)/4, $0xffffffff
+DATA shifts<>+0x48(SB)/4, $0xffffffff
+DATA shifts<>+0x4c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0x54(SB)/4, $0xffffff0f
+DATA shifts<>+0x58(SB)/4, $0xffffffff
+DATA shifts<>+0x5c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0x64(SB)/4, $0xffff0f0e
+DATA shifts<>+0x68(SB)/4, $0xffffffff
+DATA shifts<>+0x6c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
+DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
+DATA shifts<>+0x78(SB)/4, $0xffffffff
+DATA shifts<>+0x7c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x80(SB)/4, $0x0b0a0908
+DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0x88(SB)/4, $0xffffffff
+DATA shifts<>+0x8c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0x90(SB)/4, $0x0a090807
+DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0x98(SB)/4, $0xffffff0f
+DATA shifts<>+0x9c(SB)/4, $0xffffffff
+	
+DATA shifts<>+0xa0(SB)/4, $0x09080706
+DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
+DATA shifts<>+0xac(SB)/4, $0xffffffff
+	
+DATA shifts<>+0xb0(SB)/4, $0x08070605
+DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
+DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
+DATA shifts<>+0xbc(SB)/4, $0xffffffff
+	
+DATA shifts<>+0xc0(SB)/4, $0x07060504
+DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
+DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
+DATA shifts<>+0xcc(SB)/4, $0xffffffff
+	
+DATA shifts<>+0xd0(SB)/4, $0x06050403
+DATA shifts<>+0xd4(SB)/4, $0x0a090807
+DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
+DATA shifts<>+0xdc(SB)/4, $0xffffff0f
+	
+DATA shifts<>+0xe0(SB)/4, $0x05040302
+DATA shifts<>+0xe4(SB)/4, $0x09080706
+DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
+DATA shifts<>+0xec(SB)/4, $0xffff0f0e
+	
+DATA shifts<>+0xf0(SB)/4, $0x04030201
+DATA shifts<>+0xf4(SB)/4, $0x08070605
+DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
+DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
+
+GLOBL shifts<>(SB),RODATA,$256
+
+TEXT runtime·memeq(SB),NOSPLIT,$0-13
+	MOVL	a+0(FP), SI
+	MOVL	b+4(FP), DI
+	MOVL	size+8(FP), BX
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, ret+12(FP)
+	RET
+
+// eqstring tests whether two strings are equal.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT,$0-17
+	MOVL	s1len+4(FP), AX
+	MOVL	s2len+12(FP), BX
+	CMPL	AX, BX
+	JNE	different
+	MOVL	s1str+0(FP), SI
+	MOVL	s2str+8(FP), DI
+	CMPL	SI, DI
+	JEQ	same
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, v+16(FP)
+	RET
+same:
+	MOVB	$1, v+16(FP)
+	RET
+different:
+	MOVB	$0, v+16(FP)
+	RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+	MOVL	a_len+4(FP), BX
+	MOVL	b_len+16(FP), CX
+	XORL	AX, AX
+	CMPL	BX, CX
+	JNE	eqret
+	MOVL	a+0(FP), SI
+	MOVL	b+12(FP), DI
+	CALL	runtime·memeqbody(SB)
+eqret:
+	MOVB	AX, ret+24(FP)
+	RET
+
+// a in SI
+// b in DI
+// count in BX
+TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
+	XORL	AX, AX
+
+	CMPL	BX, $4
+	JB	small
+
+	// 64 bytes at a time using xmm registers
+hugeloop:
+	CMPL	BX, $64
+	JB	bigloop
+	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
+	JE	bigloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	MOVOU	16(SI), X2
+	MOVOU	16(DI), X3
+	MOVOU	32(SI), X4
+	MOVOU	32(DI), X5
+	MOVOU	48(SI), X6
+	MOVOU	48(DI), X7
+	PCMPEQB	X1, X0
+	PCMPEQB	X3, X2
+	PCMPEQB	X5, X4
+	PCMPEQB	X7, X6
+	PAND	X2, X0
+	PAND	X6, X4
+	PAND	X4, X0
+	PMOVMSKB X0, DX
+	ADDL	$64, SI
+	ADDL	$64, DI
+	SUBL	$64, BX
+	CMPL	DX, $0xffff
+	JEQ	hugeloop
+	RET
+
+	// 4 bytes at a time using 32-bit register
+bigloop:
+	CMPL	BX, $4
+	JBE	leftover
+	MOVL	(SI), CX
+	MOVL	(DI), DX
+	ADDL	$4, SI
+	ADDL	$4, DI
+	SUBL	$4, BX
+	CMPL	CX, DX
+	JEQ	bigloop
+	RET
+
+	// remaining 0-4 bytes
+leftover:
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	-4(DI)(BX*1), DX
+	CMPL	CX, DX
+	SETEQ	AX
+	RET
+
+small:
+	CMPL	BX, $0
+	JEQ	equal
+
+	LEAL	0(BX*8), CX
+	NEGL	CX
+
+	MOVL	SI, DX
+	CMPB	DX, $0xfc
+	JA	si_high
+
+	// load at SI won't cross a page boundary.
+	MOVL	(SI), SI
+	JMP	si_finish
+si_high:
+	// address ends in 111111xx.  Load up to bytes we want, move to correct position.
+	MOVL	-4(SI)(BX*1), SI
+	SHRL	CX, SI
+si_finish:
+
+	// same for DI.
+	MOVL	DI, DX
+	CMPB	DX, $0xfc
+	JA	di_high
+	MOVL	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVL	-4(DI)(BX*1), DI
+	SHRL	CX, DI
+di_finish:
+
+	SUBL	SI, DI
+	SHLL	CX, DI
+equal:
+	SETEQ	AX
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
+	MOVL	s1_base+0(FP), SI
+	MOVL	s1_len+4(FP), BX
+	MOVL	s2_base+8(FP), DI
+	MOVL	s2_len+12(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
+	MOVL	s1+0(FP), SI
+	MOVL	s1+4(FP), BX
+	MOVL	s2+12(FP), DI
+	MOVL	s2+16(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0
+	MOVL	s+0(FP), SI
+	MOVL	s_len+4(FP), CX
+	MOVB	c+12(FP), AL
+	MOVL	SI, DI
+	CLD; REPN; SCASB
+	JZ 3(PC)
+	MOVL	$-1, ret+16(FP)
+	RET
+	SUBL	SI, DI
+	SUBL	$1, DI
+	MOVL	DI, ret+16(FP)
+	RET
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0
+	MOVL	s+0(FP), SI
+	MOVL	s_len+4(FP), CX
+	MOVB	c+8(FP), AL
+	MOVL	SI, DI
+	CLD; REPN; SCASB
+	JZ 3(PC)
+	MOVL	$-1, ret+12(FP)
+	RET
+	SUBL	SI, DI
+	SUBL	$1, DI
+	MOVL	DI, ret+12(FP)
+	RET
+
+// input:
+//   SI = a
+//   DI = b
+//   BX = alen
+//   DX = blen
+// output:
+//   AX = 1/0/-1
+TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
+	CMPL	SI, DI
+	JEQ	cmp_allsame
+	CMPL	BX, DX
+	MOVL	DX, BP
+	CMOVLLT	BX, BP // BP = min(alen, blen)
+	CMPL	BP, $4
+	JB	cmp_small
+	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
+	JE	cmp_mediumloop
+cmp_largeloop:
+	CMPL	BP, $16
+	JB	cmp_mediumloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORL	$0xffff, AX	// convert EQ to NE
+	JNE	cmp_diff16	// branch if at least one byte is not equal
+	ADDL	$16, SI
+	ADDL	$16, DI
+	SUBL	$16, BP
+	JMP	cmp_largeloop
+
+cmp_diff16:
+	BSFL	AX, BX	// index of first byte that differs
+	XORL	AX, AX
+	MOVB	(SI)(BX*1), CX
+	CMPB	CX, (DI)(BX*1)
+	SETHI	AX
+	LEAL	-1(AX*2), AX	// convert 1/0 to +1/-1
+	RET
+
+cmp_mediumloop:
+	CMPL	BP, $4
+	JBE	cmp_0through4
+	MOVL	(SI), AX
+	MOVL	(DI), CX
+	CMPL	AX, CX
+	JNE	cmp_diff4
+	ADDL	$4, SI
+	ADDL	$4, DI
+	SUBL	$4, BP
+	JMP	cmp_mediumloop
+
+cmp_0through4:
+	MOVL	-4(SI)(BP*1), AX
+	MOVL	-4(DI)(BP*1), CX
+	CMPL	AX, CX
+	JEQ	cmp_allsame
+
+cmp_diff4:
+	BSWAPL	AX	// reverse order of bytes
+	BSWAPL	CX
+	XORL	AX, CX	// find bit differences
+	BSRL	CX, CX	// index of highest bit difference
+	SHRL	CX, AX	// move a's bit to bottom
+	ANDL	$1, AX	// mask bit
+	LEAL	-1(AX*2), AX // 1/0 => +1/-1
+	RET
+
+	// 0-3 bytes in common
+cmp_small:
+	LEAL	(BP*8), CX
+	NEGL	CX
+	JEQ	cmp_allsame
+
+	// load si
+	CMPB	SI, $0xfc
+	JA	cmp_si_high
+	MOVL	(SI), SI
+	JMP	cmp_si_finish
+cmp_si_high:
+	MOVL	-4(SI)(BP*1), SI
+	SHRL	CX, SI
+cmp_si_finish:
+	SHLL	CX, SI
+
+	// same for di
+	CMPB	DI, $0xfc
+	JA	cmp_di_high
+	MOVL	(DI), DI
+	JMP	cmp_di_finish
+cmp_di_high:
+	MOVL	-4(DI)(BP*1), DI
+	SHRL	CX, DI
+cmp_di_finish:
+	SHLL	CX, DI
+
+	BSWAPL	SI	// reverse order of bytes
+	BSWAPL	DI
+	XORL	SI, DI	// find bit differences
+	JEQ	cmp_allsame
+	BSRL	DI, CX	// index of highest bit difference
+	SHRL	CX, SI	// move a's bit to bottom
+	ANDL	$1, SI	// mask bit
+	LEAL	-1(SI*2), AX // 1/0 => +1/-1
+	RET
+
+	// all the bytes in common are the same, so we just need
+	// to compare the lengths.
+cmp_allsame:
+	XORL	AX, AX
+	XORL	CX, CX
+	CMPL	BX, DX
+	SETGT	AX	// 1 if alen > blen
+	SETEQ	CX	// 1 if alen == blen
+	LEAL	-1(CX)(AX*2), AX	// 1,0,-1 result
+	RET
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory.  Do not
+// change this code without also changing the code
+// in ../../cmd/8g/ggen.c:clearfat.
+// AX: zero
+// DI: ptr to memory to be zeroed
+// DI is updated as a side effect.
+TEXT runtime·duffzero(SB), NOSPLIT, $0-0
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	STOSL
+	RET
+
+// A Duff's device for copying memory.
+// The compiler jumps to computed addresses within
+// this routine to copy chunks of memory.  Source
+// and destination must not overlap.  Do not
+// change this code without also changing the code
+// in ../../cmd/6g/cgen.c:sgen.
+// SI: ptr to source memory
+// DI: ptr to destination memory
+// SI and DI are updated as a side effect.
+
+// NOTE: this is equivalent to a sequence of MOVSL but
+// for some reason MOVSL is really slow.
+TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	MOVL	(SI),CX
+	ADDL	$4,SI
+	MOVL	CX,(DI)
+	ADDL	$4,DI
+	
+	RET
+
+TEXT runtime·timenow(SB), NOSPLIT, $0-0
+	JMP	time·now(SB)
+
+TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	g_m(AX), AX
+	MOVL	m_fastrand(AX), DX
+	ADDL	DX, DX
+	MOVL	DX, BX
+	XORL	$0x88888eef, DX
+	CMOVLMI	BX, DX
+	MOVL	DX, m_fastrand(AX)
+	MOVL	DX, ret+0(FP)
+	RET
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+	MOVL	$0, AX
+	RET
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
new file mode 100644
index 000000000..cc32ad8a1
--- /dev/null
+++ b/src/runtime/asm_amd64.s
@@ -0,0 +1,2343 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+	// copy arguments forward on an even stack
+	MOVQ	DI, AX		// argc
+	MOVQ	SI, BX		// argv
+	SUBQ	$(4*8+7), SP		// 2args 2auto
+	ANDQ	$~15, SP
+	MOVQ	AX, 16(SP)
+	MOVQ	BX, 24(SP)
+	
+	// create istack out of the given (operating system) stack.
+	// _cgo_init may update stackguard.
+	MOVQ	$runtime·g0(SB), DI
+	LEAQ	(-64*1024+104)(SP), BX
+	MOVQ	BX, g_stackguard(DI)
+	MOVQ	BX, g_stackguard0(DI)
+	MOVQ	SP, g_stackbase(DI)
+
+	// find out information about the processor we're on
+	MOVQ	$0, AX
+	CPUID
+	CMPQ	AX, $0
+	JE	nocpuinfo
+	MOVQ	$1, AX
+	CPUID
+	MOVL	CX, runtime·cpuid_ecx(SB)
+	MOVL	DX, runtime·cpuid_edx(SB)
+nocpuinfo:	
+	
+	// if there is an _cgo_init, call it.
+	MOVQ	_cgo_init(SB), AX
+	TESTQ	AX, AX
+	JZ	needtls
+	// g0 already in DI
+	MOVQ	DI, CX	// Win64 uses CX for first parameter
+	MOVQ	$setg_gcc<>(SB), SI
+	CALL	AX
+	// update stackguard after _cgo_init
+	MOVQ	$runtime·g0(SB), CX
+	MOVQ	g_stackguard0(CX), AX
+	MOVQ	AX, g_stackguard(CX)
+	CMPL	runtime·iswindows(SB), $0
+	JEQ ok
+
+needtls:
+	// skip TLS setup on Plan 9
+	CMPL	runtime·isplan9(SB), $1
+	JEQ ok
+	// skip TLS setup on Solaris
+	CMPL	runtime·issolaris(SB), $1
+	JEQ ok
+
+	LEAQ	runtime·tls0(SB), DI
+	CALL	runtime·settls(SB)
+
+	// store through it, to make sure it works
+	get_tls(BX)
+	MOVQ	$0x123, g(BX)
+	MOVQ	runtime·tls0(SB), AX
+	CMPQ	AX, $0x123
+	JEQ 2(PC)
+	MOVL	AX, 0	// abort
+ok:
+	// set the per-goroutine and per-mach "registers"
+	get_tls(BX)
+	LEAQ	runtime·g0(SB), CX
+	MOVQ	CX, g(BX)
+	LEAQ	runtime·m0(SB), AX
+
+	// save m->g0 = g0
+	MOVQ	CX, m_g0(AX)
+	// save m0 to g0->m
+	MOVQ	AX, g_m(CX)
+
+	CLD				// convention is D is always left cleared
+	CALL	runtime·check(SB)
+
+	MOVL	16(SP), AX		// copy argc
+	MOVL	AX, 0(SP)
+	MOVQ	24(SP), AX		// copy argv
+	MOVQ	AX, 8(SP)
+	CALL	runtime·args(SB)
+	CALL	runtime·osinit(SB)
+	CALL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	MOVQ	$runtime·main·f(SB), BP		// entry
+	PUSHQ	BP
+	PUSHQ	$0			// arg size
+	ARGSIZE(16)
+	CALL	runtime·newproc(SB)
+	ARGSIZE(-1)
+	POPQ	AX
+	POPQ	AX
+
+	// start this M
+	CALL	runtime·mstart(SB)
+
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+DATA	runtime·main·f+0(SB)/8,$runtime·main(SB)
+GLOBL	runtime·main·f(SB),RODATA,$8
+
+TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
+	BYTE	$0xcc
+	RET
+
+TEXT runtime·asminit(SB),NOSPLIT,$0-0
+	// No per-thread init.
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT, $0-8
+	MOVQ	buf+0(FP), AX		// gobuf
+	LEAQ	buf+0(FP), BX		// caller's SP
+	MOVQ	BX, gobuf_sp(AX)
+	MOVQ	0(SP), BX		// caller's PC
+	MOVQ	BX, gobuf_pc(AX)
+	MOVQ	$0, gobuf_ret(AX)
+	MOVQ	$0, gobuf_ctxt(AX)
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	BX, gobuf_g(AX)
+	RET
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT, $0-8
+	MOVQ	buf+0(FP), BX		// gobuf
+	MOVQ	gobuf_g(BX), DX
+	MOVQ	0(DX), CX		// make sure g != nil
+	get_tls(CX)
+	MOVQ	DX, g(CX)
+	MOVQ	gobuf_sp(BX), SP	// restore SP
+	MOVQ	gobuf_ret(BX), AX
+	MOVQ	gobuf_ctxt(BX), DX
+	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
+	MOVQ	$0, gobuf_ret(BX)
+	MOVQ	$0, gobuf_ctxt(BX)
+	MOVQ	gobuf_pc(BX), BX
+	JMP	BX
+
+// func mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), NOSPLIT, $0-8
+	MOVQ	fn+0(FP), DI
+	
+	get_tls(CX)
+	MOVQ	g(CX), AX	// save state in g->sched
+	MOVQ	0(SP), BX	// caller's PC
+	MOVQ	BX, (g_sched+gobuf_pc)(AX)
+	LEAQ	fn+0(FP), BX	// caller's SP
+	MOVQ	BX, (g_sched+gobuf_sp)(AX)
+	MOVQ	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to m->g0 & its stack, call fn
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	m_g0(BX), SI
+	CMPQ	SI, AX	// if g == m->g0 call badmcall
+	JNE	3(PC)
+	MOVQ	$runtime·badmcall(SB), AX
+	JMP	AX
+	MOVQ	SI, g(CX)	// g = m->g0
+	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
+	PUSHQ	AX
+	ARGSIZE(8)
+	MOVQ	DI, DX
+	MOVQ	0(DI), DI
+	CALL	DI
+	POPQ	AX
+	MOVQ	$runtime·badmcall2(SB), AX
+	JMP	AX
+	RET
+
+// switchtoM is a dummy routine that onM leaves at the bottom
+// of the G stack.  We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the M stack because the one at the top of
+// the M stack terminates the stack walk (see topofstack()).
+TEXT runtime·switchtoM(SB), NOSPLIT, $0-8
+	RET
+
+// func onM(fn func())
+TEXT runtime·onM(SB), NOSPLIT, $0-8
+	MOVQ	fn+0(FP), DI	// DI = fn
+	get_tls(CX)
+	MOVQ	g(CX), AX	// AX = g
+	MOVQ	g_m(AX), BX	// BX = m
+
+	MOVQ	m_g0(BX), DX	// DX = g0
+	CMPQ	AX, DX
+	JEQ	onm
+
+	MOVQ	m_curg(BX), BP
+	CMPQ	AX, BP
+	JEQ	oncurg
+	
+	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Hide call from linker nosplit analysis.
+	MOVQ	$runtime·badonm(SB), AX
+	CALL	AX
+
+oncurg:
+	// save our state in g->sched.  Pretend to
+	// be switchtoM if the G stack is scanned.
+	MOVQ	$runtime·switchtoM(SB), BP
+	MOVQ	BP, (g_sched+gobuf_pc)(AX)
+	MOVQ	SP, (g_sched+gobuf_sp)(AX)
+	MOVQ	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to g0
+	MOVQ	DX, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(DX), BX
+	// make it look like mstart called onM on g0, to stop traceback
+	SUBQ	$8, BX
+	MOVQ	$runtime·mstart(SB), DX
+	MOVQ	DX, 0(BX)
+	MOVQ	BX, SP
+
+	// call target function
+	ARGSIZE(0)
+	MOVQ	DI, DX
+	MOVQ	0(DI), DI
+	CALL	DI
+
+	// switch back to g
+	get_tls(CX)
+	MOVQ	g(CX), AX
+	MOVQ	g_m(AX), BX
+	MOVQ	m_curg(BX), AX
+	MOVQ	AX, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(AX), SP
+	MOVQ	$0, (g_sched+gobuf_sp)(AX)
+	RET
+
+onm:
+	// already on m stack, just call directly
+	MOVQ	DI, DX
+	MOVQ	0(DI), DI
+	CALL	DI
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already done get_tls(CX); MOVQ m(CX), BX.
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT,$0-0
+	// Cannot grow scheduler stack (m->g0).
+	MOVQ	m_g0(BX), SI
+	CMPQ	g(CX), SI
+	JNE	2(PC)
+	INT	$3
+
+	// Cannot grow signal stack (m->gsignal).
+	MOVQ	m_gsignal(BX), SI
+	CMPQ	g(CX), SI
+	JNE	2(PC)
+	INT	$3
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVQ	8(SP), AX	// f's caller's PC
+	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
+	LEAQ	16(SP), AX	// f's caller's SP
+	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
+	MOVQ	AX, m_moreargp(BX)
+	get_tls(CX)
+	MOVQ	g(CX), SI
+	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
+
+	// Set g->sched to context in f.
+	MOVQ	0(SP), AX // f's PC
+	MOVQ	AX, (g_sched+gobuf_pc)(SI)
+	MOVQ	SI, (g_sched+gobuf_g)(SI)
+	LEAQ	8(SP), AX // f's SP
+	MOVQ	AX, (g_sched+gobuf_sp)(SI)
+	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
+
+	// Call newstack on m->g0's stack.
+	MOVQ	m_g0(BX), BP
+	MOVQ	BP, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(BP), SP
+	CALL	runtime·newstack(SB)
+	MOVQ	$0, 0x1003	// crash if newstack returns
+	RET
+
+// reflect·call: call a function with the given argument list
+// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE)		\
+	CMPQ	CX, $MAXSIZE;		\
+	JA	3(PC);			\
+	MOVQ	$NAME(SB), AX;	\
+	JMP	AX
+// Note: can't just "JMP NAME(SB)" - bad inlining results.
+
+TEXT reflect·call(SB), NOSPLIT, $0-24
+	MOVLQZX argsize+16(FP), CX
+	DISPATCH(runtime·call16, 16)
+	DISPATCH(runtime·call32, 32)
+	DISPATCH(runtime·call64, 64)
+	DISPATCH(runtime·call128, 128)
+	DISPATCH(runtime·call256, 256)
+	DISPATCH(runtime·call512, 512)
+	DISPATCH(runtime·call1024, 1024)
+	DISPATCH(runtime·call2048, 2048)
+	DISPATCH(runtime·call4096, 4096)
+	DISPATCH(runtime·call8192, 8192)
+	DISPATCH(runtime·call16384, 16384)
+	DISPATCH(runtime·call32768, 32768)
+	DISPATCH(runtime·call65536, 65536)
+	DISPATCH(runtime·call131072, 131072)
+	DISPATCH(runtime·call262144, 262144)
+	DISPATCH(runtime·call524288, 524288)
+	DISPATCH(runtime·call1048576, 1048576)
+	DISPATCH(runtime·call2097152, 2097152)
+	DISPATCH(runtime·call4194304, 4194304)
+	DISPATCH(runtime·call8388608, 8388608)
+	DISPATCH(runtime·call16777216, 16777216)
+	DISPATCH(runtime·call33554432, 33554432)
+	DISPATCH(runtime·call67108864, 67108864)
+	DISPATCH(runtime·call134217728, 134217728)
+	DISPATCH(runtime·call268435456, 268435456)
+	DISPATCH(runtime·call536870912, 536870912)
+	DISPATCH(runtime·call1073741824, 1073741824)
+	MOVQ	$runtime·badreflectcall(SB), AX
+	JMP	AX
+
+// Argument map for the callXX frames.  Each has one stack map.
+DATA gcargs_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gcargs_reflectcall<>+0x04(SB)/4, $6  // 3 words
+DATA gcargs_reflectcall<>+0x08(SB)/1, $(const_BitsPointer+(const_BitsPointer<<2)+(const_BitsScalar<<4))
+GLOBL gcargs_reflectcall<>(SB),RODATA,$12
+
+// callXX frames have no locals
+DATA gclocals_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gclocals_reflectcall<>+0x04(SB)/4, $0  // 0 locals
+GLOBL gclocals_reflectcall<>(SB),RODATA,$8
+
+#define CALLFN(NAME,MAXSIZE)			\
+TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
+	FUNCDATA $FUNCDATA_ArgsPointerMaps,gcargs_reflectcall<>(SB);	\
+	FUNCDATA $FUNCDATA_LocalsPointerMaps,gclocals_reflectcall<>(SB);\
+	/* copy arguments to stack */		\
+	MOVQ	argptr+8(FP), SI;		\
+	MOVLQZX argsize+16(FP), CX;		\
+	MOVQ	SP, DI;				\
+	REP;MOVSB;				\
+	/* call function */			\
+	MOVQ	f+0(FP), DX;			\
+	PCDATA  $PCDATA_StackMapIndex, $0;	\
+	CALL	(DX);				\
+	/* copy return values back */		\
+	MOVQ	argptr+8(FP), DI;		\
+	MOVLQZX	argsize+16(FP), CX;		\
+	MOVLQZX retoffset+20(FP), BX;		\
+	MOVQ	SP, SI;				\
+	ADDQ	BX, DI;				\
+	ADDQ	BX, SI;				\
+	SUBQ	BX, CX;				\
+	REP;MOVSB;				\
+	RET
+
+CALLFN(runtime·call16, 16)
+CALLFN(runtime·call32, 32)
+CALLFN(runtime·call64, 64)
+CALLFN(runtime·call128, 128)
+CALLFN(runtime·call256, 256)
+CALLFN(runtime·call512, 512)
+CALLFN(runtime·call1024, 1024)
+CALLFN(runtime·call2048, 2048)
+CALLFN(runtime·call4096, 4096)
+CALLFN(runtime·call8192, 8192)
+CALLFN(runtime·call16384, 16384)
+CALLFN(runtime·call32768, 32768)
+CALLFN(runtime·call65536, 65536)
+CALLFN(runtime·call131072, 131072)
+CALLFN(runtime·call262144, 262144)
+CALLFN(runtime·call524288, 524288)
+CALLFN(runtime·call1048576, 1048576)
+CALLFN(runtime·call2097152, 2097152)
+CALLFN(runtime·call4194304, 4194304)
+CALLFN(runtime·call8388608, 8388608)
+CALLFN(runtime·call16777216, 16777216)
+CALLFN(runtime·call33554432, 33554432)
+CALLFN(runtime·call67108864, 67108864)
+CALLFN(runtime·call134217728, 134217728)
+CALLFN(runtime·call268435456, 268435456)
+CALLFN(runtime·call536870912, 536870912)
+CALLFN(runtime·call1073741824, 1073741824)
+
+// Return point when leaving stack.
+//
+// Lessstack can appear in stack traces for the same reason
+// as morestack; in that context, it has 0 arguments.
+TEXT runtime·lessstack(SB), NOSPLIT, $0-0
+	// Save return value in m->cret
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	AX, m_cret(BX)
+
+	// Call oldstack on m->g0's stack.
+	MOVQ	m_g0(BX), BP
+	MOVQ	BP, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(BP), SP
+	CALL	runtime·oldstack(SB)
+	MOVQ	$0, 0x1004	// crash if oldstack returns
+	RET
+
+// morestack trampolines
+TEXT runtime·morestack00(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	$0, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVQ	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack01(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	SHLQ	$32, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVQ	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack10(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVLQZX	AX, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVQ	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack11(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVQ	$runtime·morestack(SB), AX
+	JMP	AX
+
+// subcases of morestack01
+// with const of 8,16,...48
+TEXT runtime·morestack8(SB),NOSPLIT,$0
+	MOVQ	$1, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack16(SB),NOSPLIT,$0
+	MOVQ	$2, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack24(SB),NOSPLIT,$0
+	MOVQ	$3, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack32(SB),NOSPLIT,$0
+	MOVQ	$4, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack40(SB),NOSPLIT,$0
+	MOVQ	$5, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack48(SB),NOSPLIT,$0
+	MOVQ	$6, R8
+	MOVQ	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT morestack<>(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	SHLQ	$35, R8
+	MOVQ	R8, m_moreframesize(BX)
+	MOVQ	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack00_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack00(SB)
+
+TEXT runtime·morestack01_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack01(SB)
+
+TEXT runtime·morestack10_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack10(SB)
+
+TEXT runtime·morestack11_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack11(SB)
+
+TEXT runtime·morestack8_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack8(SB)
+
+TEXT runtime·morestack16_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack16(SB)
+
+TEXT runtime·morestack24_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack24(SB)
+
+TEXT runtime·morestack32_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack32(SB)
+
+TEXT runtime·morestack40_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack40(SB)
+
+TEXT runtime·morestack48_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack48(SB)
+
+// bool cas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	} else
+//		return 0;
+TEXT runtime·cas(SB), NOSPLIT, $0-17
+	MOVQ	ptr+0(FP), BX
+	MOVL	old+8(FP), AX
+	MOVL	new+12(FP), CX
+	LOCK
+	CMPXCHGL	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
+	RET
+
+// bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+//	if(*val == *old){
+//		*val = new;
+//		return 1;
+//	} else {
+//		return 0;
+//	}
+TEXT runtime·cas64(SB), NOSPLIT, $0-25
+	MOVQ	ptr+0(FP), BX
+	MOVQ	old+8(FP), AX
+	MOVQ	new+16(FP), CX
+	LOCK
+	CMPXCHGQ	CX, 0(BX)
+	JNZ	cas64_fail
+	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
+	RET
+cas64_fail:
+	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
+	RET
+	
+TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
+	JMP	runtime·cas64(SB)
+
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16
+	JMP	runtime·atomicload64(SB)
+
+TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16
+	JMP	runtime·atomicload64(SB)
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	} else
+//		return 0;
+TEXT runtime·casp(SB), NOSPLIT, $0-25
+	MOVQ	ptr+0(FP), BX
+	MOVQ	old+8(FP), AX
+	MOVQ	new+16(FP), CX
+	LOCK
+	CMPXCHGQ	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
+	RET
+
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+//	*val += delta;
+//	return *val;
+TEXT runtime·xadd(SB), NOSPLIT, $0-20
+	MOVQ	ptr+0(FP), BX
+	MOVL	delta+8(FP), AX
+	MOVL	AX, CX
+	LOCK
+	XADDL	AX, 0(BX)
+	ADDL	CX, AX
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xadd64(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	delta+8(FP), AX
+	MOVQ	AX, CX
+	LOCK
+	XADDQ	AX, 0(BX)
+	ADDQ	CX, AX
+	MOVQ	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchg(SB), NOSPLIT, $0-20
+	MOVQ	ptr+0(FP), BX
+	MOVL	new+8(FP), AX
+	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchg64(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	new+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchgp(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	new+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
+	JMP	runtime·xchg64(SB)
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+	MOVL	cycles+0(FP), AX
+again:
+	PAUSE
+	SUBL	$1, AX
+	JNZ	again
+	RET
+
+TEXT runtime·atomicstorep(SB), NOSPLIT, $0-16
+	MOVQ	ptr+0(FP), BX
+	MOVQ	val+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	RET
+
+TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
+	MOVQ	ptr+0(FP), BX
+	MOVL	val+8(FP), AX
+	XCHGL	AX, 0(BX)
+	RET
+
+TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
+	MOVQ	ptr+0(FP), BX
+	MOVQ	val+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	RET
+
+// void	runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
+	MOVQ	ptr+0(FP), AX
+	MOVB	val+8(FP), BX
+	LOCK
+	ORB	BX, (AX)
+	RET
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. pop the caller
+// 2. sub 5 bytes from the callers return
+// 3. jmp to the argument
+TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
+	MOVQ	fv+0(FP), DX	// fn
+	MOVQ	argp+8(FP), BX	// caller sp
+	LEAQ	-8(BX), SP	// caller sp after CALL
+	SUBQ	$5, (SP)	// return to CALL again
+	MOVQ	0(DX), BX
+	JMP	BX	// but first run the deferred function
+
+// Save state of caller into g->sched. Smashes R8, R9.
+TEXT gosave<>(SB),NOSPLIT,$0
+	get_tls(R8)
+	MOVQ	g(R8), R8
+	MOVQ	0(SP), R9
+	MOVQ	R9, (g_sched+gobuf_pc)(R8)
+	LEAQ	8(SP), R9
+	MOVQ	R9, (g_sched+gobuf_sp)(R8)
+	MOVQ	$0, (g_sched+gobuf_ret)(R8)
+	MOVQ	$0, (g_sched+gobuf_ctxt)(R8)
+	RET
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.c for more details.
+TEXT runtime·asmcgocall(SB),NOSPLIT,$0-16
+	MOVQ	fn+0(FP), AX
+	MOVQ	arg+8(FP), BX
+	CALL	asmcgocall<>(SB)
+	RET
+
+TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-20
+	MOVQ	fn+0(FP), AX
+	MOVQ	arg+8(FP), BX
+	CALL	asmcgocall<>(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+// asmcgocall common code. fn in AX, arg in BX. returns errno in AX.
+TEXT asmcgocall<>(SB),NOSPLIT,$0-0
+	MOVQ	SP, DX
+
+	// Figure out if we need to switch to m->g0 stack.
+	// We get called to create new OS threads too, and those
+	// come in on the m->g0 stack already.
+	get_tls(CX)
+	MOVQ	g(CX), BP
+	MOVQ	g_m(BP), BP
+	MOVQ	m_g0(BP), SI
+	MOVQ	g(CX), DI
+	CMPQ	SI, DI
+	JEQ	nosave
+	MOVQ	m_gsignal(BP), SI
+	CMPQ	SI, DI
+	JEQ	nosave
+	
+	MOVQ	m_g0(BP), SI
+	CALL	gosave<>(SB)
+	MOVQ	SI, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(SI), SP
+nosave:
+
+	// Now on a scheduling stack (a pthread-created stack).
+	// Make sure we have enough room for 4 stack-backed fast-call
+	// registers as per windows amd64 calling convention.
+	SUBQ	$64, SP
+	ANDQ	$~15, SP	// alignment for gcc ABI
+	MOVQ	DI, 48(SP)	// save g
+	MOVQ	DX, 40(SP)	// save SP
+	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
+	MOVQ	BX, CX		// CX = first argument in Win64
+	CALL	AX
+
+	// Restore registers, g, stack pointer.
+	get_tls(CX)
+	MOVQ	48(SP), DI
+	MOVQ	DI, g(CX)
+	MOVQ	40(SP), SP
+	RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Turn the fn into a Go func (by taking its address) and call
+// cgocallback_gofunc.
+TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
+	LEAQ	fn+0(FP), AX
+	MOVQ	AX, 0(SP)
+	MOVQ	frame+8(FP), AX
+	MOVQ	AX, 8(SP)
+	MOVQ	framesize+16(FP), AX
+	MOVQ	AX, 16(SP)
+	MOVQ	$runtime·cgocallback_gofunc(SB), AX
+	CALL	AX
+	RET
+
+// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
+// See cgocall.c for more details.
+TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$8-24
+	// If g is nil, Go did not create the current thread.
+	// Call needm to obtain one m for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call through AX.
+	get_tls(CX)
+#ifdef GOOS_windows
+	MOVL	$0, BP
+	CMPQ	CX, $0
+	JEQ	2(PC)
+#endif
+	MOVQ	g(CX), BP
+	CMPQ	BP, $0
+	JEQ	needm
+	MOVQ	g_m(BP), BP
+	MOVQ	BP, R8 // holds oldm until end of function
+	JMP	havem
+needm:
+	MOVQ	$0, 0(SP)
+	MOVQ	$runtime·needm(SB), AX
+	CALL	AX
+	MOVQ	0(SP), R8
+	get_tls(CX)
+	MOVQ	g(CX), BP
+	MOVQ	g_m(BP), BP
+
+havem:
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
+	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
+	MOVQ	m_g0(BP), SI
+	MOVQ	(g_sched+gobuf_sp)(SI), AX
+	MOVQ	AX, 0(SP)
+	MOVQ	SP, (g_sched+gobuf_sp)(SI)
+
+	// Switch to m->curg stack and call runtime.cgocallbackg.
+	// Because we are taking over the execution of m->curg
+	// but *not* resuming what had been running, we need to
+	// save that information (m->curg->sched) so we can restore it.
+	// We can restore m->curg->sched.sp easily, because calling
+	// runtime.cgocallbackg leaves SP unchanged upon return.
+	// To save m->curg->sched.pc, we push it onto the stack.
+	// This has the added benefit that it looks to the traceback
+	// routine like cgocallbackg is going to return to that
+	// PC (because the frame we allocate below has the same
+	// size as cgocallback_gofunc's frame declared above)
+	// so that the traceback will seamlessly trace back into
+	// the earlier calls.
+	//
+	// In the new goroutine, 0(SP) holds the saved R8.
+	MOVQ	m_curg(BP), SI
+	MOVQ	SI, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
+	MOVQ	(g_sched+gobuf_pc)(SI), BP
+	MOVQ	BP, -8(DI)
+	LEAQ	-(8+8)(DI), SP
+	MOVQ	R8, 0(SP)
+	CALL	runtime·cgocallbackg(SB)
+	MOVQ	0(SP), R8
+
+	// Restore g->sched (== m->curg->sched) from saved values.
+	get_tls(CX)
+	MOVQ	g(CX), SI
+	MOVQ	8(SP), BP
+	MOVQ	BP, (g_sched+gobuf_pc)(SI)
+	LEAQ	(8+8)(SP), DI
+	MOVQ	DI, (g_sched+gobuf_sp)(SI)
+
+	// Switch back to m->g0's stack and restore m->g0->sched.sp.
+	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
+	// so we do not have to restore it.)
+	MOVQ	g(CX), BP
+	MOVQ	g_m(BP), BP
+	MOVQ	m_g0(BP), SI
+	MOVQ	SI, g(CX)
+	MOVQ	(g_sched+gobuf_sp)(SI), SP
+	MOVQ	0(SP), AX
+	MOVQ	AX, (g_sched+gobuf_sp)(SI)
+	
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	CMPQ	R8, $0
+	JNE 3(PC)
+	MOVQ	$runtime·dropm(SB), AX
+	CALL	AX
+
+	// Done!
+	RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB), NOSPLIT, $0-8
+	MOVQ	gg+0(FP), BX
+#ifdef GOOS_windows
+	CMPQ	BX, $0
+	JNE	settls
+	MOVQ	$0, 0x28(GS)
+	RET
+settls:
+	MOVQ	g_m(BX), AX
+	LEAQ	m_tls(AX), AX
+	MOVQ	AX, 0x28(GS)
+#endif
+	get_tls(CX)
+	MOVQ	BX, g(CX)
+	RET
+
+// void setg_gcc(G*); set g called from gcc.
+TEXT setg_gcc<>(SB),NOSPLIT,$0
+	get_tls(AX)
+	MOVQ	DI, g(AX)
+	RET
+
+// check that SP is in range [g->stackbase, g->stackguard)
+TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
+	get_tls(CX)
+	MOVQ	g(CX), AX
+	CMPQ	g_stackbase(AX), SP
+	JHI	2(PC)
+	INT	$3
+	CMPQ	SP, g_stackguard(AX)
+	JHI	2(PC)
+	INT	$3
+	RET
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16
+	MOVQ	argp+0(FP),AX		// addr of first arg
+	MOVQ	-8(AX),AX		// get calling pc
+	MOVQ	AX, ret+8(FP)
+	RET
+
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16
+	MOVQ	p+0(FP),AX		// addr of first arg
+	MOVQ	-8(AX),AX		// get calling pc
+	MOVQ	AX,ret+8(FP)
+	RET
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
+	MOVQ	argp+0(FP),AX		// addr of first arg
+	MOVQ	pc+8(FP), BX
+	MOVQ	BX, -8(AX)		// set calling pc
+	RET
+
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
+	MOVQ	argp+0(FP), AX
+	MOVQ	AX, ret+8(FP)
+	RET
+
+// func gogetcallersp(p unsafe.Pointer) uintptr
+TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16
+	MOVQ	p+0(FP),AX		// addr of first arg
+	MOVQ	AX, ret+8(FP)
+	RET
+
+// int64 runtime·cputicks(void)
+TEXT runtime·cputicks(SB),NOSPLIT,$0-0
+	RDTSC
+	SHLQ	$32, DX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
+	RDTSC
+	SHLQ    $32, DX
+	ADDQ    DX, AX
+	MOVQ    AX, ret+0(FP)
+	RET
+
+TEXT runtime·stackguard(SB),NOSPLIT,$0-16
+	MOVQ	SP, DX
+	MOVQ	DX, sp+0(FP)
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_stackguard(BX), DX
+	MOVQ	DX, limit+8(FP)
+	RET
+
+GLOBL runtime·tls0(SB), $64
+
+// hash function using AES hardware instructions
+TEXT runtime·aeshash(SB),NOSPLIT,$0-32
+	MOVQ	p+0(FP), AX	// ptr to data
+	MOVQ	s+8(FP), CX	// size
+	JMP	runtime·aeshashbody(SB)
+
+TEXT runtime·aeshashstr(SB),NOSPLIT,$0-32
+	MOVQ	p+0(FP), AX	// ptr to string struct
+	// s+8(FP) is ignored, it is always sizeof(String)
+	MOVQ	8(AX), CX	// length of string
+	MOVQ	(AX), AX	// string data
+	JMP	runtime·aeshashbody(SB)
+
+// AX: data
+// CX: length
+TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
+	MOVQ	h+16(FP), X0	// seed to low 64 bits of xmm0
+	PINSRQ	$1, CX, X0	// size to high 64 bits of xmm0
+	MOVO	runtime·aeskeysched+0(SB), X2
+	MOVO	runtime·aeskeysched+16(SB), X3
+	CMPQ	CX, $16
+	JB	aessmall
+aesloop:
+	CMPQ	CX, $16
+	JBE	aesloopend
+	MOVOU	(AX), X1
+	AESENC	X2, X0
+	AESENC	X1, X0
+	SUBQ	$16, CX
+	ADDQ	$16, AX
+	JMP	aesloop
+// 1-16 bytes remaining
+aesloopend:
+	// This load may overlap with the previous load above.
+	// We'll hash some bytes twice, but that's ok.
+	MOVOU	-16(AX)(CX*1), X1
+	JMP	partial
+// 0-15 bytes
+aessmall:
+	TESTQ	CX, CX
+	JE	finalize	// 0 bytes
+
+	CMPB	AX, $0xf0
+	JA	highpartial
+
+	// 16 bytes loaded at this address won't cross
+	// a page boundary, so we can load it directly.
+	MOVOU	(AX), X1
+	ADDQ	CX, CX
+	MOVQ	$masks<>(SB), BP
+	PAND	(BP)(CX*8), X1
+	JMP	partial
+highpartial:
+	// address ends in 1111xxxx.  Might be up against
+	// a page boundary, so load ending at last byte.
+	// Then shift bytes down using pshufb.
+	MOVOU	-16(AX)(CX*1), X1
+	ADDQ	CX, CX
+	MOVQ	$shifts<>(SB), BP
+	PSHUFB	(BP)(CX*8), X1
+partial:
+	// incorporate partial block into hash
+	AESENC	X3, X0
+	AESENC	X1, X0
+finalize:	
+	// finalize hash
+	AESENC	X2, X0
+	AESENC	X3, X0
+	AESENC	X2, X0
+	MOVQ	X0, res+24(FP)
+	RET
+
+TEXT runtime·aeshash32(SB),NOSPLIT,$0-32
+	MOVQ	p+0(FP), AX	// ptr to data
+	// s+8(FP) is ignored, it is always sizeof(int32)
+	MOVQ	h+16(FP), X0	// seed
+	PINSRD	$2, (AX), X0	// data
+	AESENC	runtime·aeskeysched+0(SB), X0
+	AESENC	runtime·aeskeysched+16(SB), X0
+	AESENC	runtime·aeskeysched+0(SB), X0
+	MOVQ	X0, ret+24(FP)
+	RET
+
+TEXT runtime·aeshash64(SB),NOSPLIT,$0-32
+	MOVQ	p+0(FP), AX	// ptr to data
+	// s+8(FP) is ignored, it is always sizeof(int64)
+	MOVQ	h+16(FP), X0	// seed
+	PINSRQ	$1, (AX), X0	// data
+	AESENC	runtime·aeskeysched+0(SB), X0
+	AESENC	runtime·aeskeysched+16(SB), X0
+	AESENC	runtime·aeskeysched+0(SB), X0
+	MOVQ	X0, ret+24(FP)
+	RET
+
+// simple mask to get rid of data in the high part of the register.
+DATA masks<>+0x00(SB)/8, $0x0000000000000000
+DATA masks<>+0x08(SB)/8, $0x0000000000000000
+DATA masks<>+0x10(SB)/8, $0x00000000000000ff
+DATA masks<>+0x18(SB)/8, $0x0000000000000000
+DATA masks<>+0x20(SB)/8, $0x000000000000ffff
+DATA masks<>+0x28(SB)/8, $0x0000000000000000
+DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
+DATA masks<>+0x38(SB)/8, $0x0000000000000000
+DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
+DATA masks<>+0x48(SB)/8, $0x0000000000000000
+DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
+DATA masks<>+0x58(SB)/8, $0x0000000000000000
+DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
+DATA masks<>+0x68(SB)/8, $0x0000000000000000
+DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
+DATA masks<>+0x78(SB)/8, $0x0000000000000000
+DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
+DATA masks<>+0x88(SB)/8, $0x0000000000000000
+DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
+DATA masks<>+0x98(SB)/8, $0x00000000000000ff
+DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
+DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
+DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
+DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
+DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
+DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
+DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
+GLOBL masks<>(SB),RODATA,$256
+
+// these are arguments to pshufb.  They move data down from
+// the high bytes of the register to the low bytes of the register.
+// index is how many bytes to move.
+DATA shifts<>+0x00(SB)/8, $0x0000000000000000
+DATA shifts<>+0x08(SB)/8, $0x0000000000000000
+DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
+DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
+DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
+DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
+DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
+DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
+DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
+DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
+DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
+DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
+DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
+DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
+DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
+DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
+DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
+DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
+DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
+DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
+DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
+DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
+DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
+DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
+DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
+GLOBL shifts<>(SB),RODATA,$256
+
+TEXT runtime·memeq(SB),NOSPLIT,$0-25
+	MOVQ	a+0(FP), SI
+	MOVQ	b+8(FP), DI
+	MOVQ	size+16(FP), BX
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, ret+24(FP)
+	RET
+
+// eqstring tests whether two strings are equal.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT,$0-33
+	MOVQ	s1len+8(FP), AX
+	MOVQ	s2len+24(FP), BX
+	CMPQ	AX, BX
+	JNE	different
+	MOVQ	s1str+0(FP), SI
+	MOVQ	s2str+16(FP), DI
+	CMPQ	SI, DI
+	JEQ	same
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, v+32(FP)
+	RET
+same:
+	MOVB	$1, v+32(FP)
+	RET
+different:
+	MOVB	$0, v+32(FP)
+	RET
+
+// a in SI
+// b in DI
+// count in BX
+TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
+	XORQ	AX, AX
+
+	CMPQ	BX, $8
+	JB	small
+	
+	// 64 bytes at a time using xmm registers
+hugeloop:
+	CMPQ	BX, $64
+	JB	bigloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	MOVOU	16(SI), X2
+	MOVOU	16(DI), X3
+	MOVOU	32(SI), X4
+	MOVOU	32(DI), X5
+	MOVOU	48(SI), X6
+	MOVOU	48(DI), X7
+	PCMPEQB	X1, X0
+	PCMPEQB	X3, X2
+	PCMPEQB	X5, X4
+	PCMPEQB	X7, X6
+	PAND	X2, X0
+	PAND	X6, X4
+	PAND	X4, X0
+	PMOVMSKB X0, DX
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, BX
+	CMPL	DX, $0xffff
+	JEQ	hugeloop
+	RET
+
+	// 8 bytes at a time using 64-bit register
+bigloop:
+	CMPQ	BX, $8
+	JBE	leftover
+	MOVQ	(SI), CX
+	MOVQ	(DI), DX
+	ADDQ	$8, SI
+	ADDQ	$8, DI
+	SUBQ	$8, BX
+	CMPQ	CX, DX
+	JEQ	bigloop
+	RET
+
+	// remaining 0-8 bytes
+leftover:
+	MOVQ	-8(SI)(BX*1), CX
+	MOVQ	-8(DI)(BX*1), DX
+	CMPQ	CX, DX
+	SETEQ	AX
+	RET
+
+small:
+	CMPQ	BX, $0
+	JEQ	equal
+
+	LEAQ	0(BX*8), CX
+	NEGQ	CX
+
+	CMPB	SI, $0xf8
+	JA	si_high
+
+	// load at SI won't cross a page boundary.
+	MOVQ	(SI), SI
+	JMP	si_finish
+si_high:
+	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
+	MOVQ	-8(SI)(BX*1), SI
+	SHRQ	CX, SI
+si_finish:
+
+	// same for DI.
+	CMPB	DI, $0xf8
+	JA	di_high
+	MOVQ	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVQ	-8(DI)(BX*1), DI
+	SHRQ	CX, DI
+di_finish:
+
+	SUBQ	SI, DI
+	SHLQ	CX, DI
+equal:
+	SETEQ	AX
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
+	MOVQ	s1_base+0(FP), SI
+	MOVQ	s1_len+8(FP), BX
+	MOVQ	s2_base+16(FP), DI
+	MOVQ	s2_len+24(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
+	MOVQ	s1+0(FP), SI
+	MOVQ	s1+8(FP), BX
+	MOVQ	s2+24(FP), DI
+	MOVQ	s2+32(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVQ	AX, res+48(FP)
+	RET
+
+// input:
+//   SI = a
+//   DI = b
+//   BX = alen
+//   DX = blen
+// output:
+//   AX = 1/0/-1
+TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
+	CMPQ	SI, DI
+	JEQ	cmp_allsame
+	CMPQ	BX, DX
+	MOVQ	DX, BP
+	CMOVQLT	BX, BP // BP = min(alen, blen) = # of bytes to compare
+	CMPQ	BP, $8
+	JB	cmp_small
+
+cmp_loop:
+	CMPQ	BP, $16
+	JBE	cmp_0through16
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX	// convert EQ to NE
+	JNE	cmp_diff16	// branch if at least one byte is not equal
+	ADDQ	$16, SI
+	ADDQ	$16, DI
+	SUBQ	$16, BP
+	JMP	cmp_loop
+	
+	// AX = bit mask of differences
+cmp_diff16:
+	BSFQ	AX, BX	// index of first byte that differs
+	XORQ	AX, AX
+	MOVB	(SI)(BX*1), CX
+	CMPB	CX, (DI)(BX*1)
+	SETHI	AX
+	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
+	RET
+
+	// 0 through 16 bytes left, alen>=8, blen>=8
+cmp_0through16:
+	CMPQ	BP, $8
+	JBE	cmp_0through8
+	MOVQ	(SI), AX
+	MOVQ	(DI), CX
+	CMPQ	AX, CX
+	JNE	cmp_diff8
+cmp_0through8:
+	MOVQ	-8(SI)(BP*1), AX
+	MOVQ	-8(DI)(BP*1), CX
+	CMPQ	AX, CX
+	JEQ	cmp_allsame
+
+	// AX and CX contain parts of a and b that differ.
+cmp_diff8:
+	BSWAPQ	AX	// reverse order of bytes
+	BSWAPQ	CX
+	XORQ	AX, CX
+	BSRQ	CX, CX	// index of highest bit difference
+	SHRQ	CX, AX	// move a's bit to bottom
+	ANDQ	$1, AX	// mask bit
+	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
+	RET
+
+	// 0-7 bytes in common
+cmp_small:
+	LEAQ	(BP*8), CX	// bytes left -> bits left
+	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
+	JEQ	cmp_allsame
+
+	// load bytes of a into high bytes of AX
+	CMPB	SI, $0xf8
+	JA	cmp_si_high
+	MOVQ	(SI), SI
+	JMP	cmp_si_finish
+cmp_si_high:
+	MOVQ	-8(SI)(BP*1), SI
+	SHRQ	CX, SI
+cmp_si_finish:
+	SHLQ	CX, SI
+
+	// load bytes of b in to high bytes of BX
+	CMPB	DI, $0xf8
+	JA	cmp_di_high
+	MOVQ	(DI), DI
+	JMP	cmp_di_finish
+cmp_di_high:
+	MOVQ	-8(DI)(BP*1), DI
+	SHRQ	CX, DI
+cmp_di_finish:
+	SHLQ	CX, DI
+
+	BSWAPQ	SI	// reverse order of bytes
+	BSWAPQ	DI
+	XORQ	SI, DI	// find bit differences
+	JEQ	cmp_allsame
+	BSRQ	DI, CX	// index of highest bit difference
+	SHRQ	CX, SI	// move a's bit to bottom
+	ANDQ	$1, SI	// mask bit
+	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
+	RET
+
+cmp_allsame:
+	XORQ	AX, AX
+	XORQ	CX, CX
+	CMPQ	BX, DX
+	SETGT	AX	// 1 if alen > blen
+	SETEQ	CX	// 1 if alen == blen
+	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
+	RET
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0
+	MOVQ s+0(FP), SI
+	MOVQ s_len+8(FP), BX
+	MOVB c+24(FP), AL
+	CALL runtime·indexbytebody(SB)
+	MOVQ AX, ret+32(FP)
+	RET
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0
+	MOVQ s+0(FP), SI
+	MOVQ s_len+8(FP), BX
+	MOVB c+16(FP), AL
+	CALL runtime·indexbytebody(SB)
+	MOVQ AX, ret+24(FP)
+	RET
+
+// input:
+//   SI: data
+//   BX: data len
+//   AL: byte sought
+// output:
+//   AX
+TEXT runtime·indexbytebody(SB),NOSPLIT,$0
+	MOVQ SI, DI
+
+	CMPQ BX, $16
+	JLT indexbyte_small
+
+	// round up to first 16-byte boundary
+	TESTQ $15, SI
+	JZ aligned
+	MOVQ SI, CX
+	ANDQ $~15, CX
+	ADDQ $16, CX
+
+	// search the beginning
+	SUBQ SI, CX
+	REPN; SCASB
+	JZ success
+
+// DI is 16-byte aligned; get ready to search using SSE instructions
+aligned:
+	// round down to last 16-byte boundary
+	MOVQ BX, R11
+	ADDQ SI, R11
+	ANDQ $~15, R11
+
+	// shuffle X0 around so that each byte contains c
+	MOVD AX, X0
+	PUNPCKLBW X0, X0
+	PUNPCKLBW X0, X0
+	PSHUFL $0, X0, X0
+	JMP condition
+
+sse:
+	// move the next 16-byte chunk of the buffer into X1
+	MOVO (DI), X1
+	// compare bytes in X0 to X1
+	PCMPEQB X0, X1
+	// take the top bit of each byte in X1 and put the result in DX
+	PMOVMSKB X1, DX
+	TESTL DX, DX
+	JNZ ssesuccess
+	ADDQ $16, DI
+
+condition:
+	CMPQ DI, R11
+	JLT sse
+
+	// search the end
+	MOVQ SI, CX
+	ADDQ BX, CX
+	SUBQ R11, CX
+	// if CX == 0, the zero flag will be set and we'll end up
+	// returning a false success
+	JZ failure
+	REPN; SCASB
+	JZ success
+
+failure:
+	MOVQ $-1, AX
+	RET
+
+// handle for lengths < 16
+indexbyte_small:
+	MOVQ BX, CX
+	REPN; SCASB
+	JZ success
+	MOVQ $-1, AX
+	RET
+
+// we've found the chunk containing the byte
+// now just figure out which specific byte it is
+ssesuccess:
+	// get the index of the least significant set bit
+	BSFW DX, DX
+	SUBQ SI, DI
+	ADDQ DI, DX
+	MOVQ DX, AX
+	RET
+
+success:
+	SUBQ SI, DI
+	SUBL $1, DI
+	MOVQ DI, AX
+	RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+	MOVQ	a_len+8(FP), BX
+	MOVQ	b_len+32(FP), CX
+	XORQ	AX, AX
+	CMPQ	BX, CX
+	JNE	eqret
+	MOVQ	a+0(FP), SI
+	MOVQ	b+24(FP), DI
+	CALL	runtime·memeqbody(SB)
+eqret:
+	MOVB	AX, ret+48(FP)
+	RET
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory.  Do not
+// change this code without also changing the code
+// in ../../cmd/6g/ggen.c:clearfat.
+// AX: zero
+// DI: ptr to memory to be zeroed
+// DI is updated as a side effect.
+TEXT runtime·duffzero(SB), NOSPLIT, $0-0
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	STOSQ
+	RET
+
+// A Duff's device for copying memory.
+// The compiler jumps to computed addresses within
+// this routine to copy chunks of memory.  Source
+// and destination must not overlap.  Do not
+// change this code without also changing the code
+// in ../../cmd/6g/cgen.c:sgen.
+// SI: ptr to source memory
+// DI: ptr to destination memory
+// SI and DI are updated as a side effect.
+
+// NOTE: this is equivalent to a sequence of MOVSQ but
+// for some reason that is 3.5x slower than this code.
+// The STOSQ above seem fine, though.
+TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	MOVQ	(SI),CX
+	ADDQ	$8,SI
+	MOVQ	CX,(DI)
+	ADDQ	$8,DI
+
+	RET
+
+TEXT runtime·timenow(SB), NOSPLIT, $0-0
+	JMP	time·now(SB)
+
+TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
+	get_tls(CX)
+	MOVQ	g(CX), AX
+	MOVQ	g_m(AX), AX
+	MOVL	m_fastrand(AX), DX
+	ADDL	DX, DX
+	MOVL	DX, BX
+	XORL	$0x88888eef, DX
+	CMOVLMI	BX, DX
+	MOVL	DX, m_fastrand(AX)
+	MOVL	DX, ret+0(FP)
+	RET
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+	MOVL	$0, AX
+	RET
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
new file mode 100644
index 000000000..6c10bec5c
--- /dev/null
+++ b/src/runtime/asm_amd64p32.s
@@ -0,0 +1,1227 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT runtime·rt0_go(SB),NOSPLIT,$0
+	// copy arguments forward on an even stack
+	MOVL	argc+0(FP), AX
+	MOVL	argv+4(FP), BX
+	MOVL	SP, CX
+	SUBL	$128, SP		// plenty of scratch
+	ANDL	$~15, CX
+	MOVL	CX, SP
+
+	MOVL	AX, 16(SP)
+	MOVL	BX, 24(SP)
+	
+	// create istack out of the given (operating system) stack.
+	MOVL	$runtime·g0(SB), DI
+	LEAL	(-64*1024+104)(SP), DI
+	MOVL	BX, g_stackguard(DI)
+	MOVL	BX, g_stackguard0(DI)
+	MOVL	SP, g_stackbase(DI)
+
+	// find out information about the processor we're on
+	MOVQ	$0, AX
+	CPUID
+	CMPQ	AX, $0
+	JE	nocpuinfo
+	MOVQ	$1, AX
+	CPUID
+	MOVL	CX, runtime·cpuid_ecx(SB)
+	MOVL	DX, runtime·cpuid_edx(SB)
+nocpuinfo:	
+	
+needtls:
+	LEAL	runtime·tls0(SB), DI
+	CALL	runtime·settls(SB)
+
+	// store through it, to make sure it works
+	get_tls(BX)
+	MOVQ	$0x123, g(BX)
+	MOVQ	runtime·tls0(SB), AX
+	CMPQ	AX, $0x123
+	JEQ 2(PC)
+	MOVL	AX, 0	// abort
+ok:
+	// set the per-goroutine and per-mach "registers"
+	get_tls(BX)
+	LEAL	runtime·g0(SB), CX
+	MOVL	CX, g(BX)
+	LEAL	runtime·m0(SB), AX
+
+	// save m->g0 = g0
+	MOVL	CX, m_g0(AX)
+	// save m0 to g0->m
+	MOVL	AX, g_m(CX)
+
+	CLD				// convention is D is always left cleared
+	CALL	runtime·check(SB)
+
+	MOVL	16(SP), AX		// copy argc
+	MOVL	AX, 0(SP)
+	MOVL	24(SP), AX		// copy argv
+	MOVL	AX, 4(SP)
+	CALL	runtime·args(SB)
+	CALL	runtime·osinit(SB)
+	CALL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	MOVL	$runtime·main·f(SB), AX	// entry
+	MOVL	$0, 0(SP)
+	MOVL	AX, 4(SP)
+	ARGSIZE(8)
+	CALL	runtime·newproc(SB)
+	ARGSIZE(-1)
+
+	// start this M
+	CALL	runtime·mstart(SB)
+
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
+GLOBL	runtime·main·f(SB),RODATA,$4
+
+TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
+	INT $3
+	RET
+
+TEXT runtime·asminit(SB),NOSPLIT,$0-0
+	// No per-thread init.
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), NOSPLIT, $0-4
+	MOVL	buf+0(FP), AX	// gobuf
+	LEAL	buf+0(FP), BX	// caller's SP
+	MOVL	BX, gobuf_sp(AX)
+	MOVL	0(SP), BX		// caller's PC
+	MOVL	BX, gobuf_pc(AX)
+	MOVL	$0, gobuf_ctxt(AX)
+	MOVQ	$0, gobuf_ret(AX)
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	BX, gobuf_g(AX)
+	RET
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), NOSPLIT, $0-4
+	MOVL	buf+0(FP), BX		// gobuf
+	MOVL	gobuf_g(BX), DX
+	MOVL	0(DX), CX		// make sure g != nil
+	get_tls(CX)
+	MOVL	DX, g(CX)
+	MOVL	gobuf_sp(BX), SP	// restore SP
+	MOVL	gobuf_ctxt(BX), DX
+	MOVQ	gobuf_ret(BX), AX
+	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
+	MOVQ	$0, gobuf_ret(BX)
+	MOVL	$0, gobuf_ctxt(BX)
+	MOVL	gobuf_pc(BX), BX
+	JMP	BX
+
+// func mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI
+	
+	get_tls(CX)
+	MOVL	g(CX), AX	// save state in g->sched
+	MOVL	0(SP), BX	// caller's PC
+	MOVL	BX, (g_sched+gobuf_pc)(AX)
+	LEAL	fn+0(FP), BX	// caller's SP
+	MOVL	BX, (g_sched+gobuf_sp)(AX)
+	MOVL	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to m->g0 & its stack, call fn
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVL	m_g0(BX), SI
+	CMPL	SI, AX	// if g == m->g0 call badmcall
+	JNE	3(PC)
+	MOVL	$runtime·badmcall(SB), AX
+	JMP	AX
+	MOVL	SI, g(CX)	// g = m->g0
+	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
+	PUSHQ	AX
+	ARGSIZE(8)
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+	POPQ	AX
+	MOVL	$runtime·badmcall2(SB), AX
+	JMP	AX
+	RET
+
+// switchtoM is a dummy routine that onM leaves at the bottom
+// of the G stack.  We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the M stack because the one at the top of
+// the M stack terminates the stack walk (see topofstack()).
+TEXT runtime·switchtoM(SB), NOSPLIT, $0-4
+	RET
+
+// func onM(fn func())
+TEXT runtime·onM(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI	// DI = fn
+	get_tls(CX)
+	MOVL	g(CX), AX	// AX = g
+	MOVL	g_m(AX), BX	// BX = m
+
+	MOVL	m_g0(BX), DX	// DX = g0
+	CMPL	AX, DX
+	JEQ	onm
+
+	MOVL	m_curg(BX), R8
+	CMPL	AX, R8
+	JEQ	oncurg
+	
+	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Hide call from linker nosplit analysis.
+	MOVL	$runtime·badonm(SB), AX
+	CALL	AX
+
+oncurg:
+	// save our state in g->sched.  Pretend to
+	// be switchtoM if the G stack is scanned.
+	MOVL	$runtime·switchtoM(SB), SI
+	MOVL	SI, (g_sched+gobuf_pc)(AX)
+	MOVL	SP, (g_sched+gobuf_sp)(AX)
+	MOVL	AX, (g_sched+gobuf_g)(AX)
+
+	// switch to g0
+	MOVL	DX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(DX), SP
+
+	// call target function
+	ARGSIZE(0)
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+
+	// switch back to g
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	g_m(AX), BX
+	MOVL	m_curg(BX), AX
+	MOVL	AX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(AX), SP
+	MOVL	$0, (g_sched+gobuf_sp)(AX)
+	RET
+
+onm:
+	// already on m stack, just call directly
+	MOVL	DI, DX
+	MOVL	0(DI), DI
+	CALL	DI
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// Caller has already done get_tls(CX); MOVQ m(CX), BX.
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT,$0-0
+	// Cannot grow scheduler stack (m->g0).
+	MOVL	m_g0(BX), SI
+	CMPL	g(CX), SI
+	JNE	2(PC)
+	MOVL	0, AX
+
+	// Cannot grow signal stack (m->gsignal).
+	MOVL	m_gsignal(BX), SI
+	CMPL	g(CX), SI
+	JNE	2(PC)
+	MOVL	0, AX
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVL	8(SP), AX	// f's caller's PC
+	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
+	LEAL	16(SP), AX	// f's caller's SP
+	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
+	MOVL	AX, m_moreargp(BX)
+	get_tls(CX)
+	MOVL	g(CX), SI
+	MOVL	SI, (m_morebuf+gobuf_g)(BX)
+
+	// Set g->sched to context in f.
+	MOVL	0(SP), AX // f's PC
+	MOVL	AX, (g_sched+gobuf_pc)(SI)
+	MOVL	SI, (g_sched+gobuf_g)(SI)
+	LEAL	8(SP), AX // f's SP
+	MOVL	AX, (g_sched+gobuf_sp)(SI)
+	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
+
+	// Call newstack on m->g0's stack.
+	MOVL	m_g0(BX), BX
+	MOVL	BX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(BX), SP
+	CALL	runtime·newstack(SB)
+	MOVL	$0, 0x1003	// crash if newstack returns
+	RET
+
+// reflect·call: call a function with the given argument list
+// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE)		\
+	CMPL	CX, $MAXSIZE;		\
+	JA	3(PC);			\
+	MOVL	$NAME(SB), AX;	\
+	JMP	AX
+// Note: can't just "JMP NAME(SB)" - bad inlining results.
+
+TEXT reflect·call(SB), NOSPLIT, $0-16
+	MOVLQZX argsize+8(FP), CX
+	DISPATCH(runtime·call16, 16)
+	DISPATCH(runtime·call32, 32)
+	DISPATCH(runtime·call64, 64)
+	DISPATCH(runtime·call128, 128)
+	DISPATCH(runtime·call256, 256)
+	DISPATCH(runtime·call512, 512)
+	DISPATCH(runtime·call1024, 1024)
+	DISPATCH(runtime·call2048, 2048)
+	DISPATCH(runtime·call4096, 4096)
+	DISPATCH(runtime·call8192, 8192)
+	DISPATCH(runtime·call16384, 16384)
+	DISPATCH(runtime·call32768, 32768)
+	DISPATCH(runtime·call65536, 65536)
+	DISPATCH(runtime·call131072, 131072)
+	DISPATCH(runtime·call262144, 262144)
+	DISPATCH(runtime·call524288, 524288)
+	DISPATCH(runtime·call1048576, 1048576)
+	DISPATCH(runtime·call2097152, 2097152)
+	DISPATCH(runtime·call4194304, 4194304)
+	DISPATCH(runtime·call8388608, 8388608)
+	DISPATCH(runtime·call16777216, 16777216)
+	DISPATCH(runtime·call33554432, 33554432)
+	DISPATCH(runtime·call67108864, 67108864)
+	DISPATCH(runtime·call134217728, 134217728)
+	DISPATCH(runtime·call268435456, 268435456)
+	DISPATCH(runtime·call536870912, 536870912)
+	DISPATCH(runtime·call1073741824, 1073741824)
+	MOVL	$runtime·badreflectcall(SB), AX
+	JMP	AX
+
+// Argument map for the callXX frames.  Each has one stack map.
+DATA gcargs_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gcargs_reflectcall<>+0x04(SB)/4, $10  // 5 words
+DATA gcargs_reflectcall<>+0x08(SB)/1, $(const_BitsPointer+(const_BitsPointer<<2)+(const_BitsScalar<<4)+(const_BitsScalar<<6))
+DATA gcargs_reflectcall<>+0x09(SB)/1, $(const_BitsPointer)
+GLOBL gcargs_reflectcall<>(SB),RODATA,$12
+
+// callXX frames have no locals
+DATA gclocals_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gclocals_reflectcall<>+0x04(SB)/4, $0  // 0 locals
+GLOBL gclocals_reflectcall<>(SB),RODATA,$8
+
+#define CALLFN(NAME,MAXSIZE)			\
+TEXT NAME(SB), WRAPPER, $MAXSIZE-16;		\
+	FUNCDATA $FUNCDATA_ArgsPointerMaps,gcargs_reflectcall<>(SB);	\
+	FUNCDATA $FUNCDATA_LocalsPointerMaps,gclocals_reflectcall<>(SB);\
+	/* copy arguments to stack */		\
+	MOVL	argptr+4(FP), SI;		\
+	MOVL	argsize+8(FP), CX;		\
+	MOVL	SP, DI;				\
+	REP;MOVSB;				\
+	/* call function */			\
+	MOVL	f+0(FP), DX;			\
+	MOVL	(DX), AX;				\
+	CALL	AX; \
+	/* copy return values back */		\
+	MOVL	argptr+4(FP), DI;		\
+	MOVL	argsize+8(FP), CX;		\
+	MOVL	retoffset+12(FP), BX;		\
+	MOVL	SP, SI;				\
+	ADDL	BX, DI;				\
+	ADDL	BX, SI;				\
+	SUBL	BX, CX;				\
+	REP;MOVSB;				\
+	RET
+
+CALLFN(runtime·call16, 16)
+CALLFN(runtime·call32, 32)
+CALLFN(runtime·call64, 64)
+CALLFN(runtime·call128, 128)
+CALLFN(runtime·call256, 256)
+CALLFN(runtime·call512, 512)
+CALLFN(runtime·call1024, 1024)
+CALLFN(runtime·call2048, 2048)
+CALLFN(runtime·call4096, 4096)
+CALLFN(runtime·call8192, 8192)
+CALLFN(runtime·call16384, 16384)
+CALLFN(runtime·call32768, 32768)
+CALLFN(runtime·call65536, 65536)
+CALLFN(runtime·call131072, 131072)
+CALLFN(runtime·call262144, 262144)
+CALLFN(runtime·call524288, 524288)
+CALLFN(runtime·call1048576, 1048576)
+CALLFN(runtime·call2097152, 2097152)
+CALLFN(runtime·call4194304, 4194304)
+CALLFN(runtime·call8388608, 8388608)
+CALLFN(runtime·call16777216, 16777216)
+CALLFN(runtime·call33554432, 33554432)
+CALLFN(runtime·call67108864, 67108864)
+CALLFN(runtime·call134217728, 134217728)
+CALLFN(runtime·call268435456, 268435456)
+CALLFN(runtime·call536870912, 536870912)
+CALLFN(runtime·call1073741824, 1073741824)
+
+// Return point when leaving stack.
+//
+// Lessstack can appear in stack traces for the same reason
+// as morestack; in that context, it has 0 arguments.
+TEXT runtime·lessstack(SB), NOSPLIT, $0-0
+	// Save return value in m->cret
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVQ	AX, m_cret(BX)	// MOVQ, to save all 64 bits
+
+	// Call oldstack on m->g0's stack.
+	MOVL	m_g0(BX), BX
+	MOVL	BX, g(CX)
+	MOVL	(g_sched+gobuf_sp)(BX), SP
+	CALL	runtime·oldstack(SB)
+	MOVL	$0, 0x1004	// crash if oldstack returns
+	RET
+
+// morestack trampolines
+TEXT runtime·morestack00(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVQ	$0, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVL	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack01(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	SHLQ	$32, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVL	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack10(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVLQZX	AX, AX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVL	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack11(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	MOVQ	AX, m_moreframesize(BX)
+	MOVL	$runtime·morestack(SB), AX
+	JMP	AX
+
+// subcases of morestack01
+// with const of 8,16,...48
+TEXT runtime·morestack8(SB),NOSPLIT,$0
+	MOVQ	$1, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack16(SB),NOSPLIT,$0
+	MOVQ	$2, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack24(SB),NOSPLIT,$0
+	MOVQ	$3, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack32(SB),NOSPLIT,$0
+	MOVQ	$4, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack40(SB),NOSPLIT,$0
+	MOVQ	$5, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack48(SB),NOSPLIT,$0
+	MOVQ	$6, R8
+	MOVL	$morestack<>(SB), AX
+	JMP	AX
+
+TEXT morestack<>(SB),NOSPLIT,$0
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_m(BX), BX
+	SHLQ	$35, R8
+	MOVQ	R8, m_moreframesize(BX)
+	MOVL	$runtime·morestack(SB), AX
+	JMP	AX
+
+TEXT runtime·morestack00_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack00(SB)
+
+TEXT runtime·morestack01_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack01(SB)
+
+TEXT runtime·morestack10_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack10(SB)
+
+TEXT runtime·morestack11_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack11(SB)
+
+TEXT runtime·morestack8_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack8(SB)
+
+TEXT runtime·morestack16_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack16(SB)
+
+TEXT runtime·morestack24_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack24(SB)
+
+TEXT runtime·morestack32_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack32(SB)
+
+TEXT runtime·morestack40_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack40(SB)
+
+TEXT runtime·morestack48_noctxt(SB),NOSPLIT,$0
+	MOVL	$0, DX
+	JMP	runtime·morestack48(SB)
+
+// bool cas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	} else
+//		return 0;
+TEXT runtime·cas(SB), NOSPLIT, $0-17
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
+	LOCK
+	CMPXCHGL	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
+	RET
+
+TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
+	JMP	runtime·cas(SB)
+
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
+	JMP	runtime·atomicload(SB)
+
+TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
+	JMP	runtime·atomicload(SB)
+
+// bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
+// Atomically:
+//	if(*val == *old){
+//		*val = new;
+//		return 1;
+//	} else {
+//		return 0;
+//	}
+TEXT runtime·cas64(SB), NOSPLIT, $0-25
+	MOVL	ptr+0(FP), BX
+	MOVQ	old+8(FP), AX
+	MOVQ	new+16(FP), CX
+	LOCK
+	CMPXCHGQ	CX, 0(BX)
+	JNZ	cas64_fail
+	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
+	RET
+cas64_fail:
+	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
+	RET
+
+// bool casp(void **val, void *old, void *new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	} else
+//		return 0;
+TEXT runtime·casp(SB), NOSPLIT, $0-17
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
+	LOCK
+	CMPXCHGL	CX, 0(BX)
+	JZ 4(PC)
+	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
+	RET
+	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
+	RET
+
+// uint32 xadd(uint32 volatile *val, int32 delta)
+// Atomically:
+//	*val += delta;
+//	return *val;
+TEXT runtime·xadd(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	delta+4(FP), AX
+	MOVL	AX, CX
+	LOCK
+	XADDL	AX, 0(BX)
+	ADDL	CX, AX
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xadd64(SB), NOSPLIT, $0-24
+	MOVL	ptr+0(FP), BX
+	MOVQ	delta+8(FP), AX
+	MOVQ	AX, CX
+	LOCK
+	XADDQ	AX, 0(BX)
+	ADDQ	CX, AX
+	MOVQ	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchg(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
+	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xchg64(SB), NOSPLIT, $0-24
+	MOVL	ptr+0(FP), BX
+	MOVQ	new+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
+	RET
+
+TEXT runtime·xchgp(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
+	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
+	JMP	runtime·xchg(SB)
+
+TEXT runtime·procyield(SB),NOSPLIT,$0-0
+	MOVL	cycles+0(FP), AX
+again:
+	PAUSE
+	SUBL	$1, AX
+	JNZ	again
+	RET
+
+TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
+	XCHGL	AX, 0(BX)
+	RET
+
+TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
+	XCHGL	AX, 0(BX)
+	RET
+
+TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
+	MOVL	ptr+0(FP), BX
+	MOVQ	val+8(FP), AX
+	XCHGQ	AX, 0(BX)
+	RET
+
+// void	runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
+	MOVL	ptr+0(FP), BX
+	MOVB	val+4(FP), AX
+	LOCK
+	ORB	AX, 0(BX)
+	RET
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. pop the caller
+// 2. sub 5 bytes from the callers return
+// 3. jmp to the argument
+TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
+	MOVL	fv+0(FP), DX
+	MOVL	argp+4(FP), BX
+	LEAL	-8(BX), SP	// caller sp after CALL
+	SUBL	$5, (SP)	// return to CALL again
+	MOVL	0(DX), BX
+	JMP	BX	// but first run the deferred function
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Not implemented.
+TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
+	MOVL	0, AX
+	RET
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Not implemented.
+TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
+	MOVL	0, AX
+	RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Not implemented.
+TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
+	MOVL	0, AX
+	RET
+
+// void setg(G*); set g. for use by needm.
+// Not implemented.
+TEXT runtime·setg(SB), NOSPLIT, $0-4
+	MOVL	0, AX
+	RET
+
+// check that SP is in range [g->stackbase, g->stackguard)
+TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
+	get_tls(CX)
+	MOVL	g(CX), AX
+	CMPL	g_stackbase(AX), SP
+	JHI	2(PC)
+	MOVL	0, AX
+	CMPL	SP, g_stackguard(AX)
+	JHI	2(PC)
+	MOVL	0, AX
+	RET
+
+TEXT runtime·memclr(SB),NOSPLIT,$0-8
+	MOVL	ptr+0(FP), DI
+	MOVL	n+4(FP), CX
+	MOVQ	CX, BX
+	ANDQ	$7, BX
+	SHRQ	$3, CX
+	MOVQ	$0, AX
+	CLD
+	REP
+	STOSQ
+	MOVQ	BX, CX
+	REP
+	STOSB
+	RET
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12
+	MOVL	argp+0(FP),AX		// addr of first arg
+	MOVL	-8(AX),AX		// get calling pc
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12
+	MOVL	p+0(FP),AX		// addr of first arg
+	MOVL	-8(AX),AX		// get calling pc
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
+	MOVL	argp+0(FP),AX		// addr of first arg
+	MOVL	pc+4(FP), BX		// pc to set
+	MOVQ	BX, -8(AX)		// set calling pc
+	RET
+
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
+	MOVL	argp+0(FP), AX
+	MOVL	AX, ret+8(FP)
+	RET
+
+// func gogetcallersp(p unsafe.Pointer) uintptr
+TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-12
+	MOVL	p+0(FP),AX		// addr of first arg
+	MOVL	AX, ret+8(FP)
+	RET
+
+// int64 runtime·cputicks(void)
+TEXT runtime·cputicks(SB),NOSPLIT,$0-0
+	RDTSC
+	SHLQ	$32, DX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
+	RDTSC
+	SHLQ    $32, DX
+	ADDQ    DX, AX
+	MOVQ    AX, ret+0(FP)
+	RET
+
+TEXT runtime·stackguard(SB),NOSPLIT,$0-8
+	MOVL	SP, DX
+	MOVL	DX, sp+0(FP)
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	g_stackguard(BX), DX
+	MOVL	DX, limit+4(FP)
+	RET
+
+GLOBL runtime·tls0(SB), $64
+
+// hash function using AES hardware instructions
+// For now, our one amd64p32 system (NaCl) does not
+// support using AES instructions, so have not bothered to
+// write the implementations. Can copy and adjust the ones
+// in asm_amd64.s when the time comes.
+
+TEXT runtime·aeshash(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·memeq(SB),NOSPLIT,$0-17
+	MOVL	a+0(FP), SI
+	MOVL	b+4(FP), DI
+	MOVL	size+8(FP), BX
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, ret+16(FP)
+	RET
+
+// eqstring tests whether two strings are equal.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT,$0-17
+	MOVL	s1len+4(FP), AX
+	MOVL	s2len+12(FP), BX
+	CMPL	AX, BX
+	JNE	different
+	MOVL	s1str+0(FP), SI
+	MOVL	s2str+8(FP), DI
+	CMPL	SI, DI
+	JEQ	same
+	CALL	runtime·memeqbody(SB)
+	MOVB	AX, v+16(FP)
+	RET
+same:
+	MOVB	$1, v+16(FP)
+	RET
+different:
+	MOVB	$0, v+16(FP)
+	RET
+
+// a in SI
+// b in DI
+// count in BX
+TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
+	XORQ	AX, AX
+
+	CMPQ	BX, $8
+	JB	small
+	
+	// 64 bytes at a time using xmm registers
+hugeloop:
+	CMPQ	BX, $64
+	JB	bigloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	MOVOU	16(SI), X2
+	MOVOU	16(DI), X3
+	MOVOU	32(SI), X4
+	MOVOU	32(DI), X5
+	MOVOU	48(SI), X6
+	MOVOU	48(DI), X7
+	PCMPEQB	X1, X0
+	PCMPEQB	X3, X2
+	PCMPEQB	X5, X4
+	PCMPEQB	X7, X6
+	PAND	X2, X0
+	PAND	X6, X4
+	PAND	X4, X0
+	PMOVMSKB X0, DX
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, BX
+	CMPL	DX, $0xffff
+	JEQ	hugeloop
+	RET
+
+	// 8 bytes at a time using 64-bit register
+bigloop:
+	CMPQ	BX, $8
+	JBE	leftover
+	MOVQ	(SI), CX
+	MOVQ	(DI), DX
+	ADDQ	$8, SI
+	ADDQ	$8, DI
+	SUBQ	$8, BX
+	CMPQ	CX, DX
+	JEQ	bigloop
+	RET
+
+	// remaining 0-8 bytes
+leftover:
+	ADDQ	BX, SI
+	ADDQ	BX, DI
+	MOVQ	-8(SI), CX
+	MOVQ	-8(DI), DX
+	CMPQ	CX, DX
+	SETEQ	AX
+	RET
+
+small:
+	CMPQ	BX, $0
+	JEQ	equal
+
+	LEAQ	0(BX*8), CX
+	NEGQ	CX
+
+	CMPB	SI, $0xf8
+	JA	si_high
+
+	// load at SI won't cross a page boundary.
+	MOVQ	(SI), SI
+	JMP	si_finish
+si_high:
+	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
+	MOVQ	BX, DX
+	ADDQ	SI, DX
+	MOVQ	-8(DX), SI
+	SHRQ	CX, SI
+si_finish:
+
+	// same for DI.
+	CMPB	DI, $0xf8
+	JA	di_high
+	MOVQ	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVQ	BX, DX
+	ADDQ	DI, DX
+	MOVQ	-8(DX), DI
+	SHRQ	CX, DI
+di_finish:
+
+	SUBQ	SI, DI
+	SHLQ	CX, DI
+equal:
+	SETEQ	AX
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
+	MOVL	s1_base+0(FP), SI
+	MOVL	s1_len+4(FP), BX
+	MOVL	s2_base+8(FP), DI
+	MOVL	s2_len+12(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
+	MOVL	s1+0(FP), SI
+	MOVL	s1+4(FP), BX
+	MOVL	s2+12(FP), DI
+	MOVL	s2+16(FP), DX
+	CALL	runtime·cmpbody(SB)
+	MOVQ	AX, res+24(FP)
+	RET
+
+// input:
+//   SI = a
+//   DI = b
+//   BX = alen
+//   DX = blen
+// output:
+//   AX = 1/0/-1
+TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
+	CMPQ	SI, DI
+	JEQ	cmp_allsame
+	CMPQ	BX, DX
+	MOVQ	DX, R8
+	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
+	CMPQ	R8, $8
+	JB	cmp_small
+
+cmp_loop:
+	CMPQ	R8, $16
+	JBE	cmp_0through16
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX	// convert EQ to NE
+	JNE	cmp_diff16	// branch if at least one byte is not equal
+	ADDQ	$16, SI
+	ADDQ	$16, DI
+	SUBQ	$16, R8
+	JMP	cmp_loop
+	
+	// AX = bit mask of differences
+cmp_diff16:
+	BSFQ	AX, BX	// index of first byte that differs
+	XORQ	AX, AX
+	ADDQ	BX, SI
+	MOVB	(SI), CX
+	ADDQ	BX, DI
+	CMPB	CX, (DI)
+	SETHI	AX
+	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
+	RET
+
+	// 0 through 16 bytes left, alen>=8, blen>=8
+cmp_0through16:
+	CMPQ	R8, $8
+	JBE	cmp_0through8
+	MOVQ	(SI), AX
+	MOVQ	(DI), CX
+	CMPQ	AX, CX
+	JNE	cmp_diff8
+cmp_0through8:
+	ADDQ	R8, SI
+	ADDQ	R8, DI
+	MOVQ	-8(SI), AX
+	MOVQ	-8(DI), CX
+	CMPQ	AX, CX
+	JEQ	cmp_allsame
+
+	// AX and CX contain parts of a and b that differ.
+cmp_diff8:
+	BSWAPQ	AX	// reverse order of bytes
+	BSWAPQ	CX
+	XORQ	AX, CX
+	BSRQ	CX, CX	// index of highest bit difference
+	SHRQ	CX, AX	// move a's bit to bottom
+	ANDQ	$1, AX	// mask bit
+	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
+	RET
+
+	// 0-7 bytes in common
+cmp_small:
+	LEAQ	(R8*8), CX	// bytes left -> bits left
+	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
+	JEQ	cmp_allsame
+
+	// load bytes of a into high bytes of AX
+	CMPB	SI, $0xf8
+	JA	cmp_si_high
+	MOVQ	(SI), SI
+	JMP	cmp_si_finish
+cmp_si_high:
+	ADDQ	R8, SI
+	MOVQ	-8(SI), SI
+	SHRQ	CX, SI
+cmp_si_finish:
+	SHLQ	CX, SI
+
+	// load bytes of b in to high bytes of BX
+	CMPB	DI, $0xf8
+	JA	cmp_di_high
+	MOVQ	(DI), DI
+	JMP	cmp_di_finish
+cmp_di_high:
+	ADDQ	R8, DI
+	MOVQ	-8(DI), DI
+	SHRQ	CX, DI
+cmp_di_finish:
+	SHLQ	CX, DI
+
+	BSWAPQ	SI	// reverse order of bytes
+	BSWAPQ	DI
+	XORQ	SI, DI	// find bit differences
+	JEQ	cmp_allsame
+	BSRQ	DI, CX	// index of highest bit difference
+	SHRQ	CX, SI	// move a's bit to bottom
+	ANDQ	$1, SI	// mask bit
+	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
+	RET
+
+cmp_allsame:
+	XORQ	AX, AX
+	XORQ	CX, CX
+	CMPQ	BX, DX
+	SETGT	AX	// 1 if alen > blen
+	SETEQ	CX	// 1 if alen == blen
+	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
+	RET
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0
+	MOVL s+0(FP), SI
+	MOVL s_len+4(FP), BX
+	MOVB c+12(FP), AL
+	CALL runtime·indexbytebody(SB)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0
+	MOVL s+0(FP), SI
+	MOVL s_len+4(FP), BX
+	MOVB c+8(FP), AL
+	CALL runtime·indexbytebody(SB)
+	MOVL AX, ret+16(FP)
+	RET
+
+// input:
+//   SI: data
+//   BX: data len
+//   AL: byte sought
+// output:
+//   AX
+TEXT runtime·indexbytebody(SB),NOSPLIT,$0
+	MOVL SI, DI
+
+	CMPL BX, $16
+	JLT indexbyte_small
+
+	// round up to first 16-byte boundary
+	TESTL $15, SI
+	JZ aligned
+	MOVL SI, CX
+	ANDL $~15, CX
+	ADDL $16, CX
+
+	// search the beginning
+	SUBL SI, CX
+	REPN; SCASB
+	JZ success
+
+// DI is 16-byte aligned; get ready to search using SSE instructions
+aligned:
+	// round down to last 16-byte boundary
+	MOVL BX, R11
+	ADDL SI, R11
+	ANDL $~15, R11
+
+	// shuffle X0 around so that each byte contains c
+	MOVD AX, X0
+	PUNPCKLBW X0, X0
+	PUNPCKLBW X0, X0
+	PSHUFL $0, X0, X0
+	JMP condition
+
+sse:
+	// move the next 16-byte chunk of the buffer into X1
+	MOVO (DI), X1
+	// compare bytes in X0 to X1
+	PCMPEQB X0, X1
+	// take the top bit of each byte in X1 and put the result in DX
+	PMOVMSKB X1, DX
+	TESTL DX, DX
+	JNZ ssesuccess
+	ADDL $16, DI
+
+condition:
+	CMPL DI, R11
+	JLT sse
+
+	// search the end
+	MOVL SI, CX
+	ADDL BX, CX
+	SUBL R11, CX
+	// if CX == 0, the zero flag will be set and we'll end up
+	// returning a false success
+	JZ failure
+	REPN; SCASB
+	JZ success
+
+failure:
+	MOVL $-1, AX
+	RET
+
+// handle for lengths < 16
+indexbyte_small:
+	MOVL BX, CX
+	REPN; SCASB
+	JZ success
+	MOVL $-1, AX
+	RET
+
+// we've found the chunk containing the byte
+// now just figure out which specific byte it is
+ssesuccess:
+	// get the index of the least significant set bit
+	BSFW DX, DX
+	SUBL SI, DI
+	ADDL DI, DX
+	MOVL DX, AX
+	RET
+
+success:
+	SUBL SI, DI
+	SUBL $1, DI
+	MOVL DI, AX
+	RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+	MOVL	a_len+4(FP), BX
+	MOVL	b_len+16(FP), CX
+	XORL	AX, AX
+	CMPL	BX, CX
+	JNE	eqret
+	MOVL	a+0(FP), SI
+	MOVL	b+12(FP), DI
+	CALL	runtime·memeqbody(SB)
+eqret:
+	MOVB	AX, ret+24(FP)
+	RET
+
+TEXT runtime·timenow(SB), NOSPLIT, $0-0
+	JMP	time·now(SB)
+
+TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	g_m(AX), AX
+	MOVL	m_fastrand(AX), DX
+	ADDL	DX, DX
+	MOVL	DX, BX
+	XORL	$0x88888eef, DX
+	CMOVLMI	BX, DX
+	MOVL	DX, m_fastrand(AX)
+	MOVL	DX, ret+0(FP)
+	RET
+
+TEXT runtime·return0(SB), NOSPLIT, $0
+	MOVL	$0, AX
+	RET
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
new file mode 100644
index 000000000..3db907945
--- /dev/null
+++ b/src/runtime/asm_arm.s
@@ -0,0 +1,1309 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// using frame size $-4 means do not save LR on stack.
+TEXT runtime·rt0_go(SB),NOSPLIT,$-4
+	MOVW	$0xcafebabe, R12
+
+	// copy arguments forward on an even stack
+	// use R13 instead of SP to avoid linker rewriting the offsets
+	MOVW	0(R13), R0		// argc
+	MOVW	4(R13), R1		// argv
+	SUB	$64, R13		// plenty of scratch
+	AND	$~7, R13
+	MOVW	R0, 60(R13)		// save argc, argv away
+	MOVW	R1, 64(R13)
+
+	// set up g register
+	// g is R10
+	MOVW	$runtime·g0(SB), g
+	MOVW	$runtime·m0(SB), R8
+
+	// save m->g0 = g0
+	MOVW	g, m_g0(R8)
+	// save g->m = m0
+	MOVW	R8, g_m(g)
+
+	// create istack out of the OS stack
+	MOVW	$(-8192+104)(R13), R0
+	MOVW	R0, g_stackguard(g)	// (w 104b guard)
+	MOVW	R0, g_stackguard0(g)
+	MOVW	R13, g_stackbase(g)
+	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
+
+#ifndef GOOS_nacl
+	// if there is an _cgo_init, call it.
+	MOVW	_cgo_init(SB), R4
+	CMP	$0, R4
+	B.EQ	nocgo
+	MRC     15, 0, R0, C13, C0, 3 	// load TLS base pointer
+	MOVW 	R0, R3 			// arg 3: TLS base pointer
+	MOVW 	$runtime·tlsg(SB), R2 	// arg 2: tlsg
+	MOVW	$setg_gcc<>(SB), R1 	// arg 1: setg
+	MOVW	g, R0 			// arg 0: G
+	BL	(R4) // will clobber R0-R3
+#endif
+
+nocgo:
+	// update stackguard after _cgo_init
+	MOVW	g_stackguard0(g), R0
+	MOVW	R0, g_stackguard(g)
+
+	BL	runtime·checkgoarm(SB)
+	BL	runtime·check(SB)
+
+	// saved argc, argv
+	MOVW	60(R13), R0
+	MOVW	R0, 4(R13)
+	MOVW	64(R13), R1
+	MOVW	R1, 8(R13)
+	BL	runtime·args(SB)
+	BL	runtime·osinit(SB)
+	BL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	MOVW	$runtime·main·f(SB), R0
+	MOVW.W	R0, -4(R13)
+	MOVW	$8, R0
+	MOVW.W	R0, -4(R13)
+	MOVW	$0, R0
+	MOVW.W	R0, -4(R13)	// push $0 as guard
+	ARGSIZE(12)
+	BL	runtime·newproc(SB)
+	ARGSIZE(-1)
+	MOVW	$12(R13), R13	// pop args and LR
+
+	// start this M
+	BL	runtime·mstart(SB)
+
+	MOVW	$1234, R0
+	MOVW	$1000, R1
+	MOVW	R0, (R1)	// fail hard
+
+DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
+GLOBL	runtime·main·f(SB),RODATA,$4
+
+TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
+	// gdb won't skip this breakpoint instruction automatically,
+	// so you must manually "set $pc+=4" to skip it and continue.
+#ifdef GOOS_nacl
+	WORD	$0xe125be7f	// BKPT 0x5bef, NACL_INSTR_ARM_BREAKPOINT
+#else
+	WORD	$0xe1200071	// BKPT 0x0001
+#endif
+	RET
+
+TEXT runtime·asminit(SB),NOSPLIT,$0-0
+	// disable runfast (flush-to-zero) mode of vfp if runtime.goarm > 5
+	MOVB	runtime·goarm(SB), R11
+	CMP	$5, R11
+	BLE	4(PC)
+	WORD	$0xeef1ba10	// vmrs r11, fpscr
+	BIC	$(1<<24), R11
+	WORD	$0xeee1ba10	// vmsr fpscr, r11
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB),NOSPLIT,$-4-4
+	MOVW	0(FP), R0		// gobuf
+	MOVW	SP, gobuf_sp(R0)
+	MOVW	LR, gobuf_pc(R0)
+	MOVW	g, gobuf_g(R0)
+	MOVW	$0, R11
+	MOVW	R11, gobuf_lr(R0)
+	MOVW	R11, gobuf_ret(R0)
+	MOVW	R11, gobuf_ctxt(R0)
+	RET
+
+// void gogo(Gobuf*)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB),NOSPLIT,$-4-4
+	MOVW	0(FP), R1		// gobuf
+	MOVW	gobuf_g(R1), R0
+	BL	setg<>(SB)
+
+	// NOTE: We updated g above, and we are about to update SP.
+	// Until LR and PC are also updated, the g/SP/LR/PC quadruple
+	// are out of sync and must not be used as the basis of a traceback.
+	// Sigprof skips the traceback when SP is not within g's bounds,
+	// and when the PC is inside this function, runtime.gogo.
+	// Since we are about to update SP, until we complete runtime.gogo
+	// we must not leave this function. In particular, no calls
+	// after this point: it must be straight-line code until the
+	// final B instruction.
+	// See large comment in sigprof for more details.
+	MOVW	gobuf_sp(R1), SP	// restore SP
+	MOVW	gobuf_lr(R1), LR
+	MOVW	gobuf_ret(R1), R0
+	MOVW	gobuf_ctxt(R1), R7
+	MOVW	$0, R11
+	MOVW	R11, gobuf_sp(R1)	// clear to help garbage collector
+	MOVW	R11, gobuf_ret(R1)
+	MOVW	R11, gobuf_lr(R1)
+	MOVW	R11, gobuf_ctxt(R1)
+	MOVW	gobuf_pc(R1), R11
+	CMP	R11, R11 // set condition codes for == test, needed by stack split
+	B	(R11)
+
+// func mcall(fn func(*g))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB),NOSPLIT,$-4-4
+	// Save caller state in g->sched.
+	MOVW	SP, (g_sched+gobuf_sp)(g)
+	MOVW	LR, (g_sched+gobuf_pc)(g)
+	MOVW	$0, R11
+	MOVW	R11, (g_sched+gobuf_lr)(g)
+	MOVW	g, (g_sched+gobuf_g)(g)
+
+	// Switch to m->g0 & its stack, call fn.
+	MOVW	g, R1
+	MOVW	g_m(g), R8
+	MOVW	m_g0(R8), R0
+	BL	setg<>(SB)
+	CMP	g, R1
+	B.NE	2(PC)
+	B	runtime·badmcall(SB)
+	MOVB	runtime·iscgo(SB), R11
+	CMP	$0, R11
+	BL.NE	runtime·save_g(SB)
+	MOVW	fn+0(FP), R0
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	SUB	$8, SP
+	MOVW	R1, 4(SP)
+	MOVW	R0, R7
+	MOVW	0(R0), R0
+	BL	(R0)
+	B	runtime·badmcall2(SB)
+	RET
+
+// switchtoM is a dummy routine that onM leaves at the bottom
+// of the G stack.  We need to distinguish the routine that
+// lives at the bottom of the G stack from the one that lives
+// at the top of the M stack because the one at the top of
+// the M stack terminates the stack walk (see topofstack()).
+TEXT runtime·switchtoM(SB),NOSPLIT,$0-4
+	MOVW	$0, R0
+	BL	(R0) // clobber lr to ensure push {lr} is kept
+	RET
+
+// func onM(fn func())
+TEXT runtime·onM(SB),NOSPLIT,$0-4
+	MOVW	fn+0(FP), R0	// R0 = fn
+	MOVW	g_m(g), R1	// R1 = m
+
+	MOVW	m_g0(R1), R2	// R2 = g0
+	CMP	g, R2
+	B.EQ	onm
+
+	MOVW	m_curg(R1), R3
+	CMP	g, R3
+	B.EQ	oncurg
+
+	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Hide call from linker nosplit analysis.
+	MOVW	$runtime·badonm(SB), R0
+	BL	(R0)
+
+oncurg:
+	// save our state in g->sched.  Pretend to
+	// be switchtoM if the G stack is scanned.
+	MOVW	$runtime·switchtoM(SB), R3
+	ADD	$4, R3, R3 // get past push {lr}
+	MOVW	R3, (g_sched+gobuf_pc)(g)
+	MOVW	SP, (g_sched+gobuf_sp)(g)
+	MOVW	LR, (g_sched+gobuf_lr)(g)
+	MOVW	g, (g_sched+gobuf_g)(g)
+
+	// switch to g0
+	MOVW	R0, R5
+	MOVW	R2, R0
+	BL	setg<>(SB)
+	MOVW	R5, R0
+	MOVW	(g_sched+gobuf_sp)(R2), R3
+	// make it look like mstart called onM on g0, to stop traceback
+	SUB	$4, R3, R3
+	MOVW	$runtime·mstart(SB), R4
+	MOVW	R4, 0(R3)
+	MOVW	R3, SP
+
+	// call target function
+	ARGSIZE(0)
+	MOVW	R0, R7
+	MOVW	0(R0), R0
+	BL	(R0)
+
+	// switch back to g
+	MOVW	g_m(g), R1
+	MOVW	m_curg(R1), R0
+	BL	setg<>(SB)
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	MOVW	$0, R3
+	MOVW	R3, (g_sched+gobuf_sp)(g)
+	RET
+
+onm:
+	MOVW	R0, R7
+	MOVW	0(R0), R0
+	BL	(R0)
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// R1 frame size
+// R2 arg size
+// R3 prolog's LR
+// NB. we do not save R0 because we've forced 5c to pass all arguments
+// on the stack.
+// using frame size $-4 means do not save LR on stack.
+//
+// The traceback routines see morestack on a g0 as being
+// the top of a stack (for example, morestack calling newstack
+// calling the scheduler calling newm calling gc), so we must
+// record an argument size. For that purpose, it has no arguments.
+TEXT runtime·morestack(SB),NOSPLIT,$-4-0
+	// Cannot grow scheduler stack (m->g0).
+	MOVW	g_m(g), R8
+	MOVW	m_g0(R8), R4
+	CMP	g, R4
+	BL.EQ	runtime·abort(SB)
+
+	// Cannot grow signal stack (m->gsignal).
+	MOVW	m_gsignal(R8), R4
+	CMP	g, R4
+	BL.EQ	runtime·abort(SB)
+
+	MOVW	R1, m_moreframesize(R8)
+	MOVW	R2, m_moreargsize(R8)
+
+	// Called from f.
+	// Set g->sched to context in f.
+	MOVW	R7, (g_sched+gobuf_ctxt)(g)
+	MOVW	SP, (g_sched+gobuf_sp)(g)
+	MOVW	LR, (g_sched+gobuf_pc)(g)
+	MOVW	R3, (g_sched+gobuf_lr)(g)
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVW	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
+	MOVW	SP, (m_morebuf+gobuf_sp)(R8)	// f's caller's SP
+	MOVW	$4(SP), R3			// f's argument pointer
+	MOVW	R3, m_moreargp(R8)	
+	MOVW	g, (m_morebuf+gobuf_g)(R8)
+
+	// Call newstack on m->g0's stack.
+	MOVW	m_g0(R8), R0
+	BL	setg<>(SB)
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	BL	runtime·newstack(SB)
+
+	// Not reached, but make sure the return PC from the call to newstack
+	// is still in this function, and not the beginning of the next.
+	RET
+
+TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0
+	MOVW	$0, R7
+	B runtime·morestack(SB)
+
+// reflect·call: call a function with the given argument list
+// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
+// we don't have variable-sized frames, so we use a small number
+// of constant-sized-frame functions to encode a few bits of size in the pc.
+// Caution: ugly multiline assembly macros in your future!
+
+#define DISPATCH(NAME,MAXSIZE)		\
+	CMP	$MAXSIZE, R0;		\
+	B.HI	3(PC);			\
+	MOVW	$NAME(SB), R1;		\
+	B	(R1)
+
+TEXT reflect·call(SB),NOSPLIT,$-4-16
+	MOVW	argsize+8(FP), R0
+	DISPATCH(runtime·call16, 16)
+	DISPATCH(runtime·call32, 32)
+	DISPATCH(runtime·call64, 64)
+	DISPATCH(runtime·call128, 128)
+	DISPATCH(runtime·call256, 256)
+	DISPATCH(runtime·call512, 512)
+	DISPATCH(runtime·call1024, 1024)
+	DISPATCH(runtime·call2048, 2048)
+	DISPATCH(runtime·call4096, 4096)
+	DISPATCH(runtime·call8192, 8192)
+	DISPATCH(runtime·call16384, 16384)
+	DISPATCH(runtime·call32768, 32768)
+	DISPATCH(runtime·call65536, 65536)
+	DISPATCH(runtime·call131072, 131072)
+	DISPATCH(runtime·call262144, 262144)
+	DISPATCH(runtime·call524288, 524288)
+	DISPATCH(runtime·call1048576, 1048576)
+	DISPATCH(runtime·call2097152, 2097152)
+	DISPATCH(runtime·call4194304, 4194304)
+	DISPATCH(runtime·call8388608, 8388608)
+	DISPATCH(runtime·call16777216, 16777216)
+	DISPATCH(runtime·call33554432, 33554432)
+	DISPATCH(runtime·call67108864, 67108864)
+	DISPATCH(runtime·call134217728, 134217728)
+	DISPATCH(runtime·call268435456, 268435456)
+	DISPATCH(runtime·call536870912, 536870912)
+	DISPATCH(runtime·call1073741824, 1073741824)
+	MOVW	$runtime·badreflectcall(SB), R1
+	B	(R1)
+
+// Argument map for the callXX frames.  Each has one stack map.
+DATA gcargs_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gcargs_reflectcall<>+0x04(SB)/4, $8  // 4 words
+DATA gcargs_reflectcall<>+0x08(SB)/1, $(const_BitsPointer+(const_BitsPointer<<2)+(const_BitsScalar<<4)+(const_BitsScalar<<6))
+GLOBL gcargs_reflectcall<>(SB),RODATA,$12
+
+// callXX frames have no locals
+DATA gclocals_reflectcall<>+0x00(SB)/4, $1  // 1 stackmap
+DATA gclocals_reflectcall<>+0x04(SB)/4, $0  // 0 locals
+GLOBL gclocals_reflectcall<>(SB),RODATA,$8
+
+#define CALLFN(NAME,MAXSIZE)			\
+TEXT NAME(SB), WRAPPER, $MAXSIZE-16;		\
+	FUNCDATA $FUNCDATA_ArgsPointerMaps,gcargs_reflectcall<>(SB);	\
+	FUNCDATA $FUNCDATA_LocalsPointerMaps,gclocals_reflectcall<>(SB);\
+	/* copy arguments to stack */		\
+	MOVW	argptr+4(FP), R0;		\
+	MOVW	argsize+8(FP), R2;		\
+	ADD	$4, SP, R1;			\
+	CMP	$0, R2;				\
+	B.EQ	5(PC);				\
+	MOVBU.P	1(R0), R5;			\
+	MOVBU.P R5, 1(R1);			\
+	SUB	$1, R2, R2;			\
+	B	-5(PC);				\
+	/* call function */			\
+	MOVW	f+0(FP), R7;			\
+	MOVW	(R7), R0;			\
+	PCDATA  $PCDATA_StackMapIndex, $0;	\
+	BL	(R0);				\
+	/* copy return values back */		\
+	MOVW	argptr+4(FP), R0;		\
+	MOVW	argsize+8(FP), R2;		\
+	MOVW	retoffset+12(FP), R3;		\
+	ADD	$4, SP, R1;			\
+	ADD	R3, R1;				\
+	ADD	R3, R0;				\
+	SUB	R3, R2;				\
+	CMP	$0, R2;				\
+	RET.EQ	;				\
+	MOVBU.P	1(R1), R5;			\
+	MOVBU.P R5, 1(R0);			\
+	SUB	$1, R2, R2;			\
+	B	-5(PC)				\
+
+CALLFN(runtime·call16, 16)
+CALLFN(runtime·call32, 32)
+CALLFN(runtime·call64, 64)
+CALLFN(runtime·call128, 128)
+CALLFN(runtime·call256, 256)
+CALLFN(runtime·call512, 512)
+CALLFN(runtime·call1024, 1024)
+CALLFN(runtime·call2048, 2048)
+CALLFN(runtime·call4096, 4096)
+CALLFN(runtime·call8192, 8192)
+CALLFN(runtime·call16384, 16384)
+CALLFN(runtime·call32768, 32768)
+CALLFN(runtime·call65536, 65536)
+CALLFN(runtime·call131072, 131072)
+CALLFN(runtime·call262144, 262144)
+CALLFN(runtime·call524288, 524288)
+CALLFN(runtime·call1048576, 1048576)
+CALLFN(runtime·call2097152, 2097152)
+CALLFN(runtime·call4194304, 4194304)
+CALLFN(runtime·call8388608, 8388608)
+CALLFN(runtime·call16777216, 16777216)
+CALLFN(runtime·call33554432, 33554432)
+CALLFN(runtime·call67108864, 67108864)
+CALLFN(runtime·call134217728, 134217728)
+CALLFN(runtime·call268435456, 268435456)
+CALLFN(runtime·call536870912, 536870912)
+CALLFN(runtime·call1073741824, 1073741824)
+
+// Return point when leaving stack.
+// using frame size $-4 means do not save LR on stack.
+//
+// Lessstack can appear in stack traces for the same reason
+// as morestack; in that context, it has 0 arguments.
+TEXT runtime·lessstack(SB),NOSPLIT,$-4-0
+	// Save return value in m->cret
+	MOVW	g_m(g), R8
+	MOVW	R0, m_cret(R8)
+
+	// Call oldstack on m->g0's stack.
+	MOVW	m_g0(R8), R0
+	BL	setg<>(SB)
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	BL	runtime·oldstack(SB)
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. grab stored LR for caller
+// 2. sub 4 bytes to get back to BL deferreturn
+// 3. B to fn
+// TODO(rsc): Push things on stack and then use pop
+// to load all registers simultaneously, so that a profiling
+// interrupt can never see mismatched SP/LR/PC.
+// (And double-check that pop is atomic in that way.)
+TEXT runtime·jmpdefer(SB),NOSPLIT,$0-8
+	MOVW	0(SP), LR
+	MOVW	$-4(LR), LR	// BL deferreturn
+	MOVW	fv+0(FP), R7
+	MOVW	argp+4(FP), SP
+	MOVW	$-4(SP), SP	// SP is 4 below argp, due to saved LR
+	MOVW	0(R7), R1
+	B	(R1)
+
+// Save state of caller into g->sched. Smashes R11.
+TEXT gosave<>(SB),NOSPLIT,$0
+	MOVW	LR, (g_sched+gobuf_pc)(g)
+	MOVW	R13, (g_sched+gobuf_sp)(g)
+	MOVW	$0, R11
+	MOVW	R11, (g_sched+gobuf_lr)(g)
+	MOVW	R11, (g_sched+gobuf_ret)(g)
+	MOVW	R11, (g_sched+gobuf_ctxt)(g)
+	RET
+
+// asmcgocall(void(*fn)(void*), void *arg)
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.c for more details.
+TEXT	runtime·asmcgocall(SB),NOSPLIT,$0-8
+	MOVW	fn+0(FP), R1
+	MOVW	arg+4(FP), R0
+	BL	asmcgocall<>(SB)
+	RET
+
+TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
+	MOVW	fn+0(FP), R1
+	MOVW	arg+4(FP), R0
+	BL	asmcgocall<>(SB)
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT asmcgocall<>(SB),NOSPLIT,$0-0
+	// fn in R1, arg in R0.
+	MOVW	R13, R2
+	MOVW	g, R4
+
+	// Figure out if we need to switch to m->g0 stack.
+	// We get called to create new OS threads too, and those
+	// come in on the m->g0 stack already.
+	MOVW	g_m(g), R8
+	MOVW	m_g0(R8), R3
+	CMP	R3, g
+	BEQ	asmcgocall_g0
+	BL	gosave<>(SB)
+	MOVW	R0, R5
+	MOVW	R3, R0
+	BL	setg<>(SB)
+	MOVW	R5, R0
+	MOVW	(g_sched+gobuf_sp)(g), R13
+
+	// Now on a scheduling stack (a pthread-created stack).
+asmcgocall_g0:
+	SUB	$24, R13
+	BIC	$0x7, R13	// alignment for gcc ABI
+	MOVW	R4, 20(R13) // save old g
+	MOVW	R2, 16(R13)	// save old SP
+	BL	(R1)
+
+	// Restore registers, g, stack pointer.
+	MOVW	R0, R5
+	MOVW	20(R13), R0
+	BL	setg<>(SB)
+	MOVW	R5, R0
+	MOVW	16(R13), R13
+	RET
+
+// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
+// Turn the fn into a Go func (by taking its address) and call
+// cgocallback_gofunc.
+TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
+	MOVW	$fn+0(FP), R0
+	MOVW	R0, 4(R13)
+	MOVW	frame+4(FP), R0
+	MOVW	R0, 8(R13)
+	MOVW	framesize+8(FP), R0
+	MOVW	R0, 12(R13)
+	MOVW	$runtime·cgocallback_gofunc(SB), R0
+	BL	(R0)
+	RET
+
+// cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize)
+// See cgocall.c for more details.
+TEXT	runtime·cgocallback_gofunc(SB),NOSPLIT,$8-12
+	// Load m and g from thread-local storage.
+	MOVB	runtime·iscgo(SB), R0
+	CMP	$0, R0
+	BL.NE	runtime·load_g(SB)
+
+	// If g is nil, Go did not create the current thread.
+	// Call needm to obtain one for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call.
+	CMP	$0, g
+	B.NE	havem
+	MOVW	g, savedm-4(SP) // g is zero, so is m.
+	MOVW	$runtime·needm(SB), R0
+	BL	(R0)
+
+havem:
+	MOVW	g_m(g), R8
+	MOVW	R8, savedm-4(SP)
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
+	// NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-8(SP).
+	MOVW	m_g0(R8), R3
+	MOVW	(g_sched+gobuf_sp)(R3), R4
+	MOVW	R4, savedsp-8(SP)
+	MOVW	R13, (g_sched+gobuf_sp)(R3)
+
+	// Switch to m->curg stack and call runtime.cgocallbackg.
+	// Because we are taking over the execution of m->curg
+	// but *not* resuming what had been running, we need to
+	// save that information (m->curg->sched) so we can restore it.
+	// We can restore m->curg->sched.sp easily, because calling
+	// runtime.cgocallbackg leaves SP unchanged upon return.
+	// To save m->curg->sched.pc, we push it onto the stack.
+	// This has the added benefit that it looks to the traceback
+	// routine like cgocallbackg is going to return to that
+	// PC (because the frame we allocate below has the same
+	// size as cgocallback_gofunc's frame declared above)
+	// so that the traceback will seamlessly trace back into
+	// the earlier calls.
+	//
+	// In the new goroutine, -8(SP) and -4(SP) are unused.
+	MOVW	m_curg(R8), R0
+	BL	setg<>(SB)
+	MOVW	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
+	MOVW	(g_sched+gobuf_pc)(g), R5
+	MOVW	R5, -12(R4)
+	MOVW	$-12(R4), R13
+	BL	runtime·cgocallbackg(SB)
+
+	// Restore g->sched (== m->curg->sched) from saved values.
+	MOVW	0(R13), R5
+	MOVW	R5, (g_sched+gobuf_pc)(g)
+	MOVW	$12(R13), R4
+	MOVW	R4, (g_sched+gobuf_sp)(g)
+
+	// Switch back to m->g0's stack and restore m->g0->sched.sp.
+	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
+	// so we do not have to restore it.)
+	MOVW	g_m(g), R8
+	MOVW	m_g0(R8), R0
+	BL	setg<>(SB)
+	MOVW	(g_sched+gobuf_sp)(g), R13
+	MOVW	savedsp-8(SP), R4
+	MOVW	R4, (g_sched+gobuf_sp)(g)
+
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	MOVW	savedm-4(SP), R6
+	CMP	$0, R6
+	B.NE	3(PC)
+	MOVW	$runtime·dropm(SB), R0
+	BL	(R0)
+
+	// Done!
+	RET
+
+// void setg(G*); set g. for use by needm.
+TEXT runtime·setg(SB),NOSPLIT,$-4-4
+	MOVW	gg+0(FP), R0
+	B	setg<>(SB)
+
+TEXT setg<>(SB),NOSPLIT,$-4-0
+	MOVW	R0, g
+
+	// Save g to thread-local storage.
+	MOVB	runtime·iscgo(SB), R0
+	CMP	$0, R0
+	B.EQ	2(PC)
+	B	runtime·save_g(SB)
+
+	MOVW	g, R0
+	RET
+
+TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4
+	MOVW	0(SP), R0
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$-4-8
+	MOVW	R14, ret+4(FP)
+	RET
+
+TEXT runtime·setcallerpc(SB),NOSPLIT,$-4-8
+	MOVW	pc+4(FP), R0
+	MOVW	R0, 0(SP)
+	RET
+
+TEXT runtime·getcallersp(SB),NOSPLIT,$-4-4
+	MOVW	0(FP), R0
+	MOVW	$-4(R0), R0
+	MOVW	R0, ret+4(FP)
+	RET
+
+// func gogetcallersp(p unsafe.Pointer) uintptr
+TEXT runtime·gogetcallersp(SB),NOSPLIT,$-4-8
+	MOVW	0(FP), R0
+	MOVW	$-4(R0), R0
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·emptyfunc(SB),0,$0-0
+	RET
+
+TEXT runtime·abort(SB),NOSPLIT,$-4-0
+	MOVW	$0, R0
+	MOVW	(R0), R1
+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+//
+// To implement runtime·cas in sys_$GOOS_arm.s
+// using the native instructions, use:
+//
+//	TEXT runtime·cas(SB),NOSPLIT,$0
+//		B	runtime·armcas(SB)
+//
+TEXT runtime·armcas(SB),NOSPLIT,$0-13
+	MOVW	valptr+0(FP), R1
+	MOVW	old+4(FP), R2
+	MOVW	new+8(FP), R3
+casl:
+	LDREX	(R1), R0
+	CMP	R0, R2
+	BNE	casfail
+	STREX	R3, (R1), R0
+	CMP	$0, R0
+	BNE	casl
+	MOVW	$1, R0
+	MOVB	R0, ret+12(FP)
+	RET
+casfail:
+	MOVW	$0, R0
+	MOVB	R0, ret+12(FP)
+	RET
+
+TEXT runtime·casuintptr(SB),NOSPLIT,$0-13
+	B	runtime·cas(SB)
+
+TEXT runtime·atomicloaduintptr(SB),NOSPLIT,$0-8
+	B	runtime·atomicload(SB)
+
+TEXT runtime·atomicloaduint(SB),NOSPLIT,$0-8
+	B	runtime·atomicload(SB)
+
+TEXT runtime·stackguard(SB),NOSPLIT,$0-8
+	MOVW	R13, R1
+	MOVW	g_stackguard(g), R2
+	MOVW	R1, sp+0(FP)
+	MOVW	R2, limit+4(FP)
+	RET
+
+// AES hashing not implemented for ARM
+TEXT runtime·aeshash(SB),NOSPLIT,$-4-0
+	MOVW	$0, R0
+	MOVW	(R0), R1
+TEXT runtime·aeshash32(SB),NOSPLIT,$-4-0
+	MOVW	$0, R0
+	MOVW	(R0), R1
+TEXT runtime·aeshash64(SB),NOSPLIT,$-4-0
+	MOVW	$0, R0
+	MOVW	(R0), R1
+TEXT runtime·aeshashstr(SB),NOSPLIT,$-4-0
+	MOVW	$0, R0
+	MOVW	(R0), R1
+
+TEXT runtime·memeq(SB),NOSPLIT,$-4-13
+	MOVW	a+0(FP), R1
+	MOVW	b+4(FP), R2
+	MOVW	size+8(FP), R3
+	ADD	R1, R3, R6
+	MOVW	$1, R0
+	MOVB	R0, ret+12(FP)
+_next2:
+	CMP	R1, R6
+	RET.EQ
+	MOVBU.P	1(R1), R4
+	MOVBU.P	1(R2), R5
+	CMP	R4, R5
+	BEQ	_next2
+
+	MOVW	$0, R0
+	MOVB	R0, ret+12(FP)
+	RET
+
+// eqstring tests whether two strings are equal.
+// See runtime_test.go:eqstring_generic for
+// equivalent Go code.
+TEXT runtime·eqstring(SB),NOSPLIT,$-4-17
+	MOVW	s1len+4(FP), R0
+	MOVW	s2len+12(FP), R1
+	MOVW	$0, R7
+	CMP	R0, R1
+	MOVB.NE R7, v+16(FP)
+	RET.NE
+	MOVW	s1str+0(FP), R2
+	MOVW	s2str+8(FP), R3
+	MOVW	$1, R8
+	MOVB	R8, v+16(FP)
+	CMP	R2, R3
+	RET.EQ
+	ADD	R2, R0, R6
+_eqnext:
+	CMP	R2, R6
+	RET.EQ
+	MOVBU.P	1(R2), R4
+	MOVBU.P	1(R3), R5
+	CMP	R4, R5
+	BEQ	_eqnext
+	MOVB	R7, v+16(FP)
+	RET
+
+// void setg_gcc(G*); set g called from gcc.
+TEXT setg_gcc<>(SB),NOSPLIT,$0
+	MOVW	R0, g
+	B		runtime·save_g(SB)
+
+// TODO: share code with memeq?
+TEXT bytes·Equal(SB),NOSPLIT,$0
+	MOVW	a_len+4(FP), R1
+	MOVW	b_len+16(FP), R3
+	
+	CMP	R1, R3		// unequal lengths are not equal
+	B.NE	_notequal
+
+	MOVW	a+0(FP), R0
+	MOVW	b+12(FP), R2
+	ADD	R0, R1		// end
+
+_byteseq_next:
+	CMP	R0, R1
+	B.EQ	_equal		// reached the end
+	MOVBU.P	1(R0), R4
+	MOVBU.P	1(R2), R5
+	CMP	R4, R5
+	B.EQ	_byteseq_next
+
+_notequal:
+	MOVW	$0, R0
+	MOVBU	R0, ret+24(FP)
+	RET
+
+_equal:
+	MOVW	$1, R0
+	MOVBU	R0, ret+24(FP)
+	RET
+
+TEXT bytes·IndexByte(SB),NOSPLIT,$0
+	MOVW	s+0(FP), R0
+	MOVW	s_len+4(FP), R1
+	MOVBU	c+12(FP), R2	// byte to find
+	MOVW	R0, R4		// store base for later
+	ADD	R0, R1		// end 
+
+_loop:
+	CMP	R0, R1
+	B.EQ	_notfound
+	MOVBU.P	1(R0), R3
+	CMP	R2, R3
+	B.NE	_loop
+
+	SUB	$1, R0		// R0 will be one beyond the position we want
+	SUB	R4, R0		// remove base
+	MOVW    R0, ret+16(FP) 
+	RET
+
+_notfound:
+	MOVW	$-1, R0
+	MOVW	R0, ret+16(FP)
+	RET
+
+TEXT strings·IndexByte(SB),NOSPLIT,$0
+	MOVW	s+0(FP), R0
+	MOVW	s_len+4(FP), R1
+	MOVBU	c+8(FP), R2	// byte to find
+	MOVW	R0, R4		// store base for later
+	ADD	R0, R1		// end 
+
+_sib_loop:
+	CMP	R0, R1
+	B.EQ	_sib_notfound
+	MOVBU.P	1(R0), R3
+	CMP	R2, R3
+	B.NE	_sib_loop
+
+	SUB	$1, R0		// R0 will be one beyond the position we want
+	SUB	R4, R0		// remove base
+	MOVW	R0, ret+12(FP) 
+	RET
+
+_sib_notfound:
+	MOVW	$-1, R0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·timenow(SB),NOSPLIT,$0-0
+	B	time·now(SB)
+
+// A Duff's device for zeroing memory.
+// The compiler jumps to computed addresses within
+// this routine to zero chunks of memory.  Do not
+// change this code without also changing the code
+// in ../../cmd/5g/ggen.c:clearfat.
+// R0: zero
+// R1: ptr to memory to be zeroed
+// R1 is updated as a side effect.
+TEXT runtime·duffzero(SB),NOSPLIT,$0-0
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	MOVW.P	R0, 4(R1)
+	RET
+
+// A Duff's device for copying memory.
+// The compiler jumps to computed addresses within
+// this routine to copy chunks of memory.  Source
+// and destination must not overlap.  Do not
+// change this code without also changing the code
+// in ../../cmd/5g/cgen.c:sgen.
+// R0: scratch space
+// R1: ptr to source memory
+// R2: ptr to destination memory
+// R1 and R2 are updated as a side effect
+TEXT runtime·duffcopy(SB),NOSPLIT,$0-0
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	MOVW.P	4(R1), R0
+	MOVW.P	R0, 4(R2)
+	RET
+
+TEXT runtime·fastrand1(SB),NOSPLIT,$-4-4
+	MOVW	g_m(g), R1
+	MOVW	m_fastrand(R1), R0
+	ADD.S	R0, R0
+	EOR.MI	$0x88888eef, R0
+	MOVW	R0, m_fastrand(R1)
+	MOVW	R0, ret+0(FP)
+	RET
+
+TEXT runtime·gocputicks(SB),NOSPLIT,$0
+	B runtime·cputicks(SB)
+
+TEXT runtime·return0(SB),NOSPLIT,$0
+	MOVW	$0, R0
+	RET
+
+TEXT runtime·procyield(SB),NOSPLIT,$-4
+	MOVW	cycles+0(FP), R1
+	MOVW	$0, R0
+yieldloop:
+	CMP	R0, R1
+	B.NE	2(PC)
+	RET
+	SUB	$1, R1
+	B yieldloop
diff --git a/src/runtime/atomic.go b/src/runtime/atomic.go
new file mode 100644
index 000000000..7e9d9b3aa
--- /dev/null
+++ b/src/runtime/atomic.go
@@ -0,0 +1,51 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !arm
+
+package runtime
+
+import "unsafe"
+
+//go:noescape
+func xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func xchg64(ptr *uint64, new uint64) uint64
+
+//go:noescape
+func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func atomicload(ptr *uint32) uint32
+
+//go:noescape
+func atomicload64(ptr *uint64) uint64
+
+//go:noescape
+func atomicloadp(ptr unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func atomicor8(ptr *uint8, val uint8)
+
+//go:noescape
+func cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func atomicstore(ptr *uint32, val uint32)
+
+//go:noescape
+func atomicstore64(ptr *uint64, val uint64)
+
+//go:noescape
+func atomicstorep(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/atomic_386.c b/src/runtime/atomic_386.c
new file mode 100644
index 000000000..82d36f2d9
--- /dev/null
+++ b/src/runtime/atomic_386.c
@@ -0,0 +1,46 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "textflag.h"
+
+#pragma textflag NOSPLIT
+uint32
+runtime·atomicload(uint32 volatile* addr)
+{
+	return *addr;
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+	return *addr;
+}
+
+#pragma textflag NOSPLIT
+uint64
+runtime·xadd64(uint64 volatile* addr, int64 v)
+{
+	uint64 old;
+
+	do
+		old = *addr;
+	while(!runtime·cas64(addr, old, old+v));
+
+	return old+v;
+}
+
+#pragma textflag NOSPLIT
+uint64
+runtime·xchg64(uint64 volatile* addr, uint64 v)
+{
+	uint64 old;
+
+	do
+		old = *addr;
+	while(!runtime·cas64(addr, old, v));
+
+	return old;
+}
diff --git a/src/runtime/atomic_amd64x.c b/src/runtime/atomic_amd64x.c
new file mode 100644
index 000000000..7be57ac95
--- /dev/null
+++ b/src/runtime/atomic_amd64x.c
@@ -0,0 +1,29 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+
+#include "runtime.h"
+#include "textflag.h"
+
+#pragma textflag NOSPLIT
+uint32
+runtime·atomicload(uint32 volatile* addr)
+{
+	return *addr;
+}
+
+#pragma textflag NOSPLIT
+uint64
+runtime·atomicload64(uint64 volatile* addr)
+{
+	return *addr;
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+	return *addr;
+}
diff --git a/src/runtime/atomic_arm.go b/src/runtime/atomic_arm.go
new file mode 100644
index 000000000..b1632cdd1
--- /dev/null
+++ b/src/runtime/atomic_arm.go
@@ -0,0 +1,155 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var locktab [57]struct {
+	l   mutex
+	pad [_CacheLineSize - unsafe.Sizeof(mutex{})]byte
+}
+
+func addrLock(addr *uint64) *mutex {
+	return &locktab[(uintptr(unsafe.Pointer(addr))>>3)%uintptr(len(locktab))].l
+}
+
+// Atomic add and return new value.
+//go:nosplit
+func xadd(val *uint32, delta int32) uint32 {
+	for {
+		oval := *val
+		nval := oval + uint32(delta)
+		if cas(val, oval, nval) {
+			return nval
+		}
+	}
+}
+
+//go:nosplit
+func xchg(addr *uint32, v uint32) uint32 {
+	for {
+		old := *addr
+		if cas(addr, old, v) {
+			return old
+		}
+	}
+}
+
+//go:nosplit
+func xchgp(addr *unsafe.Pointer, v unsafe.Pointer) unsafe.Pointer {
+	for {
+		old := *addr
+		if casp(addr, old, v) {
+			return old
+		}
+	}
+}
+
+//go:nosplit
+func xchguintptr(addr *uintptr, v uintptr) uintptr {
+	return uintptr(xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
+}
+
+//go:nosplit
+func atomicload(addr *uint32) uint32 {
+	return xadd(addr, 0)
+}
+
+//go:nosplit
+func atomicloadp(addr unsafe.Pointer) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(xadd((*uint32)(addr), 0)))
+}
+
+//go:nosplit
+func atomicstorep(addr unsafe.Pointer, v unsafe.Pointer) {
+	for {
+		old := *(*unsafe.Pointer)(addr)
+		if casp((*unsafe.Pointer)(addr), old, v) {
+			return
+		}
+	}
+}
+
+//go:nosplit
+func atomicstore(addr *uint32, v uint32) {
+	for {
+		old := *addr
+		if cas(addr, old, v) {
+			return
+		}
+	}
+}
+
+//go:nosplit
+func cas64(addr *uint64, old, new uint64) bool {
+	var ok bool
+	onM(func() {
+		lock(addrLock(addr))
+		if *addr == old {
+			*addr = new
+			ok = true
+		}
+		unlock(addrLock(addr))
+	})
+	return ok
+}
+
+//go:nosplit
+func xadd64(addr *uint64, delta int64) uint64 {
+	var r uint64
+	onM(func() {
+		lock(addrLock(addr))
+		r = *addr + uint64(delta)
+		*addr = r
+		unlock(addrLock(addr))
+	})
+	return r
+}
+
+//go:nosplit
+func xchg64(addr *uint64, v uint64) uint64 {
+	var r uint64
+	onM(func() {
+		lock(addrLock(addr))
+		r = *addr
+		*addr = v
+		unlock(addrLock(addr))
+	})
+	return r
+}
+
+//go:nosplit
+func atomicload64(addr *uint64) uint64 {
+	var r uint64
+	onM(func() {
+		lock(addrLock(addr))
+		r = *addr
+		unlock(addrLock(addr))
+	})
+	return r
+}
+
+//go:nosplit
+func atomicstore64(addr *uint64, v uint64) {
+	onM(func() {
+		lock(addrLock(addr))
+		*addr = v
+		unlock(addrLock(addr))
+	})
+}
+
+//go:nosplit
+func atomicor8(addr *uint8, v uint8) {
+	// Align down to 4 bytes and use 32-bit CAS.
+	uaddr := uintptr(unsafe.Pointer(addr))
+	addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
+	word := uint32(v) << ((uaddr & 3) * 8) // little endian
+	for {
+		old := *addr32
+		if cas(addr32, old, old|word) {
+			return
+		}
+	}
+}
diff --git a/src/runtime/cgo/asm_386.s b/src/runtime/cgo/asm_386.s
new file mode 100644
index 000000000..a895083f1
--- /dev/null
+++ b/src/runtime/cgo/asm_386.s
@@ -0,0 +1,31 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ */
+TEXT crosscall2(SB),NOSPLIT,$0
+	PUSHL	BP
+	MOVL	SP, BP
+	PUSHL	BX
+	PUSHL	SI
+	PUSHL	DI
+	
+	SUBL	$8, SP
+	MOVL	16(BP), AX
+	MOVL	AX, 4(SP)
+	MOVL	12(BP), AX
+	MOVL	AX, 0(SP)
+	MOVL	8(BP), AX
+	CALL	AX
+	ADDL	$8, SP
+	
+	POPL	DI
+	POPL	SI
+	POPL	BX
+	POPL	BP
+	RET
diff --git a/src/runtime/cgo/asm_amd64.s b/src/runtime/cgo/asm_amd64.s
new file mode 100644
index 000000000..6095bd133
--- /dev/null
+++ b/src/runtime/cgo/asm_amd64.s
@@ -0,0 +1,47 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ */
+TEXT crosscall2(SB),NOSPLIT,$0
+	SUBQ	$0x58, SP	/* keeps stack pointer 32-byte aligned */
+	MOVQ	BX, 0x10(SP)
+	MOVQ	BP, 0x18(SP)
+	MOVQ	R12, 0x20(SP)
+	MOVQ	R13, 0x28(SP)
+	MOVQ	R14, 0x30(SP)
+	MOVQ	R15, 0x38(SP)
+
+#ifdef GOOS_windows
+	// Win64 save RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15
+	MOVQ	DI, 0x40(SP)
+	MOVQ	SI, 0x48(SP)
+
+	MOVQ	DX, 0(SP)	/* arg */
+	MOVQ	R8, 8(SP)	/* argsize (includes padding) */
+	
+	CALL	CX	/* fn */
+	
+	MOVQ	0x40(SP), DI
+	MOVQ	0x48(SP), SI
+#else
+	MOVQ	SI, 0(SP)	/* arg */
+	MOVQ	DX, 8(SP)	/* argsize (includes padding) */
+
+	CALL	DI	/* fn */
+#endif
+
+	MOVQ	0x10(SP), BX
+	MOVQ	0x18(SP), BP
+	MOVQ	0x20(SP), R12
+	MOVQ	0x28(SP), R13
+	MOVQ	0x30(SP), R14
+	MOVQ	0x38(SP), R15
+	
+	ADDQ	$0x58, SP
+	RET
diff --git a/src/runtime/cgo/asm_arm.s b/src/runtime/cgo/asm_arm.s
new file mode 100644
index 000000000..6e57432e3
--- /dev/null
+++ b/src/runtime/cgo/asm_arm.s
@@ -0,0 +1,24 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ */
+TEXT crosscall2(SB),NOSPLIT,$-4
+	/* 
+	 * We still need to save all callee save register as before, and then
+	 *  push 2 args for fn (R1 and R2).
+	 * Also note that at procedure entry in 5c/5g world, 4(R13) will be the
+	 *  first arg, so we must push another dummy reg (R0) for 0(R13).
+	 *  Additionally, runtime·load_g will clobber R0, so we need to save R0
+	 *  nevertheless.
+	 */
+	MOVM.WP	[R0, R1, R2, R4, R5, R6, R7, R8, R9, g, R11, R12, R14], (R13)
+	BL	runtime·load_g(SB)
+	MOVW	PC, R14
+	MOVW	0(R13), PC
+	MOVM.IAW	(R13), [R0, R1, R2, R4, R5, R6, R7, R8, R9, g, R11, R12, PC]
diff --git a/src/runtime/cgo/asm_nacl_amd64p32.s b/src/runtime/cgo/asm_nacl_amd64p32.s
new file mode 100644
index 000000000..eb92014ed
--- /dev/null
+++ b/src/runtime/cgo/asm_nacl_amd64p32.s
@@ -0,0 +1,13 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ */
+TEXT crosscall2(SB),NOSPLIT,$0
+	INT $3
+	RET
diff --git a/src/runtime/cgo/callbacks.c b/src/runtime/cgo/callbacks.c
new file mode 100644
index 000000000..f074237d5
--- /dev/null
+++ b/src/runtime/cgo/callbacks.c
@@ -0,0 +1,102 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../runtime.h"
+#include "../cgocall.h"
+#include "textflag.h"
+
+// These utility functions are available to be called from code
+// compiled with gcc via crosscall2.
+
+// The declaration of crosscall2 is:
+//   void crosscall2(void (*fn)(void *, int), void *, int);
+// 
+// We need to export the symbol crosscall2 in order to support
+// callbacks from shared libraries. This applies regardless of
+// linking mode.
+#pragma cgo_export_static crosscall2
+#pragma cgo_export_dynamic crosscall2
+
+// Allocate memory.  This allocates the requested number of bytes in
+// memory controlled by the Go runtime.  The allocated memory will be
+// zeroed.  You are responsible for ensuring that the Go garbage
+// collector can see a pointer to the allocated memory for as long as
+// it is valid, e.g., by storing a pointer in a local variable in your
+// C function, or in memory allocated by the Go runtime.  If the only
+// pointers are in a C global variable or in memory allocated via
+// malloc, then the Go garbage collector may collect the memory.
+
+// Call like this in code compiled with gcc:
+//   struct { size_t len; void *ret; } a;
+//   a.len = /* number of bytes to allocate */;
+//   crosscall2(_cgo_allocate, &a, sizeof a);
+//   /* Here a.ret is a pointer to the allocated memory.  */
+
+static void
+_cgo_allocate_internal(uintptr len, byte *ret)
+{
+	CgoMal *c;
+
+	ret = runtime·mallocgc(len, nil, 0);
+	c = runtime·mallocgc(sizeof(*c), nil, 0);
+	c->next = g->m->cgomal;
+	c->alloc = ret;
+	g->m->cgomal = c;
+	FLUSH(&ret);
+}
+
+#pragma cgo_export_static _cgo_allocate
+#pragma cgo_export_dynamic _cgo_allocate
+#pragma textflag NOSPLIT
+void
+_cgo_allocate(void *a, int32 n)
+{
+	runtime·cgocallback((void(*)(void))_cgo_allocate_internal, a, n);
+}
+
+// Panic.  The argument is converted into a Go string.
+
+// Call like this in code compiled with gcc:
+//   struct { const char *p; } a;
+//   a.p = /* string to pass to panic */;
+//   crosscall2(_cgo_panic, &a, sizeof a);
+//   /* The function call will not return.  */
+
+extern void ·cgoStringToEface(String, Eface*);
+
+static void
+_cgo_panic_internal(byte *p)
+{
+	String s;
+	Eface err;
+
+	s = runtime·gostring(p);
+	·cgoStringToEface(s, &err);
+	runtime·gopanic(err);
+}
+
+#pragma cgo_export_static _cgo_panic
+#pragma cgo_export_dynamic _cgo_panic
+#pragma textflag NOSPLIT
+void
+_cgo_panic(void *a, int32 n)
+{
+	runtime·cgocallback((void(*)(void))_cgo_panic_internal, a, n);
+}
+
+#pragma cgo_import_static x_cgo_init
+extern void x_cgo_init(G*);
+void (*_cgo_init)(G*) = x_cgo_init;
+
+#pragma cgo_import_static x_cgo_malloc
+extern void x_cgo_malloc(void*);
+void (*_cgo_malloc)(void*) = x_cgo_malloc;
+
+#pragma cgo_import_static x_cgo_free
+extern void x_cgo_free(void*);
+void (*_cgo_free)(void*) = x_cgo_free;
+
+#pragma cgo_import_static x_cgo_thread_start
+extern void x_cgo_thread_start(void*);
+void (*_cgo_thread_start)(void*) = x_cgo_thread_start;
diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go
new file mode 100644
index 000000000..786ae515c
--- /dev/null
+++ b/src/runtime/cgo/cgo.go
@@ -0,0 +1,33 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package cgo contains runtime support for code generated
+by the cgo tool.  See the documentation for the cgo command
+for details on using cgo.
+*/
+package cgo
+
+/*
+
+#cgo darwin LDFLAGS: -lpthread
+#cgo dragonfly LDFLAGS: -lpthread
+#cgo freebsd LDFLAGS: -lpthread
+#cgo android LDFLAGS: -llog
+#cgo !android,linux LDFLAGS: -lpthread
+#cgo netbsd LDFLAGS: -lpthread
+#cgo openbsd LDFLAGS: -lpthread
+#cgo windows LDFLAGS: -lm -mthreads
+
+#cgo CFLAGS: -Wall -Werror
+
+*/
+import "C"
+
+// Supports _cgo_panic by converting a string constant to an empty
+// interface.
+
+func cgoStringToEface(s string, ret *interface{}) {
+	*ret = s
+}
diff --git a/src/runtime/cgo/dragonfly.c b/src/runtime/cgo/dragonfly.c
new file mode 100644
index 000000000..acf53e265
--- /dev/null
+++ b/src/runtime/cgo/dragonfly.c
@@ -0,0 +1,13 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Supply environ and __progname, because we don't
+// link against the standard DragonFly crt0.o and the
+// libc dynamic library needs them.
+
+char *environ[1];
+char *__progname;
+
+#pragma dynexport environ environ
+#pragma dynexport __progname __progname
diff --git a/src/runtime/cgo/freebsd.c b/src/runtime/cgo/freebsd.c
new file mode 100644
index 000000000..dfcfa3a21
--- /dev/null
+++ b/src/runtime/cgo/freebsd.c
@@ -0,0 +1,13 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Supply environ and __progname, because we don't
+// link against the standard FreeBSD crt0.o and the
+// libc dynamic library needs them.
+
+char *environ[1];
+char *__progname;
+
+#pragma dynexport environ environ
+#pragma dynexport __progname __progname
diff --git a/src/runtime/cgo/gcc_386.S b/src/runtime/cgo/gcc_386.S
new file mode 100644
index 000000000..bf4142793
--- /dev/null
+++ b/src/runtime/cgo/gcc_386.S
@@ -0,0 +1,45 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * Apple still insists on underscore prefixes for C function names.
+ */
+#if defined(__APPLE__) || defined(_WIN32)
+#define EXT(s) _##s
+#else
+#define EXT(s) s
+#endif
+
+/*
+ * void crosscall_386(void (*fn)(void))
+ *
+ * Calling into the 8c tool chain, where all registers are caller save.
+ * Called from standard x86 ABI, where %ebp, %ebx, %esi,
+ * and %edi are callee-save, so they must be saved explicitly.
+ */
+.globl EXT(crosscall_386)
+EXT(crosscall_386):
+	pushl %ebp
+	movl %esp, %ebp
+	pushl %ebx
+	pushl %esi
+	pushl %edi
+
+	movl 8(%ebp), %eax	/* fn */
+	call *%eax
+
+	popl %edi
+	popl %esi
+	popl %ebx
+	popl %ebp
+	ret
+
+.globl EXT(__stack_chk_fail_local)
+EXT(__stack_chk_fail_local):
+1:
+	jmp 1b
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/src/runtime/cgo/gcc_amd64.S b/src/runtime/cgo/gcc_amd64.S
new file mode 100644
index 000000000..32d0200cf
--- /dev/null
+++ b/src/runtime/cgo/gcc_amd64.S
@@ -0,0 +1,48 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * Apple still insists on underscore prefixes for C function names.
+ */
+#if defined(__APPLE__)
+#define EXT(s) _##s
+#else
+#define EXT(s) s
+#endif
+
+/*
+ * void crosscall_amd64(void (*fn)(void))
+ *
+ * Calling into the 6c tool chain, where all registers are caller save.
+ * Called from standard x86-64 ABI, where %rbx, %rbp, %r12-%r15
+ * are callee-save so they must be saved explicitly.
+ * The standard x86-64 ABI passes the three arguments m, g, fn
+ * in %rdi, %rsi, %rdx.
+ */
+.globl EXT(crosscall_amd64)
+EXT(crosscall_amd64):
+	pushq %rbx
+	pushq %rbp
+	pushq %r12
+	pushq %r13
+	pushq %r14
+	pushq %r15
+
+#if defined(_WIN64)
+	call *%rcx	/* fn */
+#else
+	call *%rdi	/* fn */
+#endif
+
+	popq %r15
+	popq %r14
+	popq %r13
+	popq %r12
+	popq %rbp
+	popq %rbx
+	ret
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/src/runtime/cgo/gcc_android.c b/src/runtime/cgo/gcc_android.c
new file mode 100644
index 000000000..be2772568
--- /dev/null
+++ b/src/runtime/cgo/gcc_android.c
@@ -0,0 +1,31 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdarg.h>
+#include <android/log.h>
+#include "libcgo.h"
+
+void
+fatalf(const char* format, ...)
+{
+	va_list ap;
+
+	// Write to both stderr and logcat.
+	//
+	// When running from an .apk, /dev/stderr and /dev/stdout
+	// redirect to /dev/null. And when running a test binary
+	// via adb shell, it's easy to miss logcat.
+
+	fprintf(stderr, "runtime/cgo: ");
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+
+	va_start(ap, format);
+	__android_log_vprint(ANDROID_LOG_FATAL, "runtime/cgo", format, ap);
+	va_end(ap);
+
+	abort();
+}
diff --git a/src/runtime/cgo/gcc_android_arm.c b/src/runtime/cgo/gcc_android_arm.c
new file mode 100644
index 000000000..07f7e72e3
--- /dev/null
+++ b/src/runtime/cgo/gcc_android_arm.c
@@ -0,0 +1,43 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/limits.h>
+#include "libcgo.h"
+
+#define magic1 (0x23581321U)
+
+// PTHREAD_KEYS_MAX has been added to sys/limits.h at head in bionic:
+// https://android.googlesource.com/platform/bionic/+/master/libc/include/sys/limits.h
+// TODO(crawshaw): remove this definition when a new NDK is released.
+#define PTHREAD_KEYS_MAX 128
+
+// inittls allocates a thread-local storage slot for g.
+//
+// It finds the first available slot using pthread_key_create and uses
+// it as the offset value for runtime.tlsg.
+static void
+inittls(void **tlsg, void **tlsbase)
+{
+	pthread_key_t k;
+	int i, err;
+
+	err = pthread_key_create(&k, nil);
+	if(err != 0) {
+		fatalf("pthread_key_create failed: %d", err);
+	}
+	pthread_setspecific(k, (void*)magic1);
+	for (i=0; i<PTHREAD_KEYS_MAX; i++) {
+		if (*(tlsbase+i) == (void*)magic1) {
+			*tlsg = (void*)(i*sizeof(void *));
+			pthread_setspecific(k, 0);
+			return;
+		}
+	}
+	fatalf("could not find pthread key");
+}
+
+void (*x_cgo_inittls)(void **tlsg, void **tlsbase) = inittls;
diff --git a/src/runtime/cgo/gcc_arm.S b/src/runtime/cgo/gcc_arm.S
new file mode 100644
index 000000000..2e4b3528b
--- /dev/null
+++ b/src/runtime/cgo/gcc_arm.S
@@ -0,0 +1,45 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * Apple still insists on underscore prefixes for C function names.
+ */
+#if defined(__APPLE__)
+#define EXT(s) _##s
+#else
+#define EXT(s) s
+#endif
+
+/*
+ * Because the assembler might target an earlier revision of the ISA
+ * by default, we must explicitly specify the ISA revision to ensure
+ * BLX is recognized as a valid instruction.
+ */	
+.arch armv5t
+
+/*
+ * void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
+ *
+ * Calling into the 5c tool chain, where all registers are caller save.
+ * Called from standard ARM EABI, where r4-r11 are callee-save, so they
+ * must be saved explicitly.
+ */
+.globl EXT(crosscall_arm1)
+EXT(crosscall_arm1):
+	push {r4, r5, r6, r7, r8, r9, r10, r11, ip, lr}
+	mov r4, r0
+	mov r5, r1
+	mov r0, r2
+	blx r5 // setg(g) 
+	blx r4 // fn() 
+	pop {r4, r5, r6, r7, r8, r9, r10, r11, ip, pc}
+
+.globl EXT(__stack_chk_fail_local)
+EXT(__stack_chk_fail_local):
+1:
+	b 1b
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/src/runtime/cgo/gcc_darwin_386.c b/src/runtime/cgo/gcc_darwin_386.c
new file mode 100644
index 000000000..d1ef31ed4
--- /dev/null
+++ b/src/runtime/cgo/gcc_darwin_386.c
@@ -0,0 +1,155 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h> /* for strerror */
+#include <pthread.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static pthread_key_t k1;
+
+#define magic1 (0x23581321U)
+
+static void
+inittls(void)
+{
+	uint32 x;
+	pthread_key_t tofree[128], k;
+	int i, ntofree;
+
+	/*
+	 * Allocate thread-local storage slot for g.
+	 * The key numbers start at 0x100, and we expect to be
+	 * one of the early calls to pthread_key_create, so we
+	 * should be able to get a pretty low number.
+	 *
+	 * In Darwin/386 pthreads, %gs points at the thread
+	 * structure, and each key is an index into the thread-local
+	 * storage array that begins at offset 0x48 within in that structure.
+	 * It may happen that we are not quite the first function to try
+	 * to allocate thread-local storage keys, so instead of depending
+	 * on getting 0x100, we try for 0x108, allocating keys until
+	 * we get the one we want and then freeing the ones we didn't want.
+	 *
+	 * Thus the final offset to use in %gs references is
+	 * 0x48+4*0x108 = 0x468.
+	 *
+	 * The linker and runtime hard-code this constant offset
+	 * from %gs where we expect to find g.
+	 * Known to ../../../liblink/sym.c:/468
+	 * and to ../sys_darwin_386.s:/468
+	 *
+	 * This is truly disgusting and a bit fragile, but taking care
+	 * of it here protects the rest of the system from damage.
+	 * The alternative would be to use a global variable that
+	 * held the offset and refer to that variable each time we
+	 * need a %gs variable (g).  That approach would
+	 * require an extra instruction and memory reference in
+	 * every stack growth prolog and would also require
+	 * rewriting the code that 8c generates for extern registers.
+	 *
+	 * Things get more disgusting on OS X 10.7 Lion.
+	 * The 0x48 base mentioned above is the offset of the tsd
+	 * array within the per-thread structure on Leopard and Snow Leopard.
+	 * On Lion, the base moved a little, so while the math above
+	 * still applies, the base is different.  Thus, we cannot
+	 * look for specific key values if we want to build binaries
+	 * that run on both systems.  Instead, forget about the
+	 * specific key values and just allocate and initialize per-thread
+	 * storage until we find a key that writes to the memory location
+	 * we want.  Then keep that key.
+	 */
+	ntofree = 0;
+	for(;;) {
+		if(pthread_key_create(&k, nil) < 0) {
+			fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
+			abort();
+		}
+		pthread_setspecific(k, (void*)magic1);
+		asm volatile("movl %%gs:0x468, %0" : "=r"(x));
+		pthread_setspecific(k, 0);
+		if(x == magic1) {
+			k1 = k;
+			break;
+		}
+		if(ntofree >= nelem(tofree)) {
+			fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
+			fprintf(stderr, "\ttried");
+			for(i=0; i<ntofree; i++)
+				fprintf(stderr, " %#x", (unsigned)tofree[i]);
+			fprintf(stderr, "\n");
+			abort();
+		}
+		tofree[ntofree++] = k;
+	}
+
+	/*
+	 * We got the key we wanted.  Free the others.
+	 */
+	for(i=0; i<ntofree; i++)
+		pthread_key_delete(tofree[i]);
+}
+
+void
+x_cgo_init(G *g)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	inittls();
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	pthread_setspecific(k1, (void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_darwin_amd64.c b/src/runtime/cgo/gcc_darwin_amd64.c
new file mode 100644
index 000000000..358a2816a
--- /dev/null
+++ b/src/runtime/cgo/gcc_darwin_amd64.c
@@ -0,0 +1,126 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h> /* for strerror */
+#include <pthread.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static pthread_key_t k1;
+
+#define magic1 (0x23581321345589ULL)
+
+static void
+inittls(void)
+{
+	uint64 x;
+	pthread_key_t tofree[128], k;
+	int i, ntofree;
+
+	/*
+	 * Same logic, code as darwin_386.c:/inittls, except that words
+	 * are 8 bytes long now, and the thread-local storage starts
+	 * at 0x60 on Leopard / Snow Leopard. So the offset is
+	 * 0x60+8*0x108 = 0x8a0.
+	 *
+	 * The linker and runtime hard-code this constant offset
+	 * from %gs where we expect to find g.
+	 * Known to ../../../liblink/sym.c:/8a0
+	 * and to ../sys_darwin_amd64.s:/8a0
+	 *
+	 * As disgusting as on the 386; same justification.
+	 */
+	ntofree = 0;
+	for(;;) {
+		if(pthread_key_create(&k, nil) < 0) {
+			fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
+			abort();
+		}
+		pthread_setspecific(k, (void*)magic1);
+		asm volatile("movq %%gs:0x8a0, %0" : "=r"(x));
+		pthread_setspecific(k, 0);
+		if(x == magic1) {
+			k1 = k;
+			break;
+		}
+		if(ntofree >= nelem(tofree)) {
+			fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
+			fprintf(stderr, "\ttried");
+			for(i=0; i<ntofree; i++)
+				fprintf(stderr, " %#x", (unsigned)tofree[i]);
+			fprintf(stderr, "\n");
+			abort();
+		}
+		tofree[ntofree++] = k;
+	}
+
+	/*
+	 * We got the key we wanted.  Free the others.
+	 */
+	for(i=0; i<ntofree; i++)
+		pthread_key_delete(tofree[i]);
+}
+
+void
+x_cgo_init(G *g)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	inittls();
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	pthread_setspecific(k1, (void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_dragonfly_386.c b/src/runtime/cgo/gcc_dragonfly_386.c
new file mode 100644
index 000000000..6af61ac49
--- /dev/null
+++ b/src/runtime/cgo/gcc_dragonfly_386.c
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	SIGFILLSET(ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_dragonfly_amd64.c b/src/runtime/cgo/gcc_dragonfly_amd64.c
new file mode 100644
index 000000000..a29d52294
--- /dev/null
+++ b/src/runtime/cgo/gcc_dragonfly_amd64.c
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	SIGFILLSET(ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_fatalf.c b/src/runtime/cgo/gcc_fatalf.c
new file mode 100644
index 000000000..21c1acfaa
--- /dev/null
+++ b/src/runtime/cgo/gcc_fatalf.c
@@ -0,0 +1,23 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !android,linux
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "libcgo.h"
+
+void
+fatalf(const char* format, ...)
+{
+	va_list ap;
+
+	fprintf(stderr, "runtime/cgo: ");
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+	abort();
+}
diff --git a/src/runtime/cgo/gcc_freebsd_386.c b/src/runtime/cgo/gcc_freebsd_386.c
new file mode 100644
index 000000000..6af61ac49
--- /dev/null
+++ b/src/runtime/cgo/gcc_freebsd_386.c
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	SIGFILLSET(ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_freebsd_amd64.c b/src/runtime/cgo/gcc_freebsd_amd64.c
new file mode 100644
index 000000000..a29d52294
--- /dev/null
+++ b/src/runtime/cgo/gcc_freebsd_amd64.c
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	SIGFILLSET(ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_freebsd_arm.c b/src/runtime/cgo/gcc_freebsd_arm.c
new file mode 100644
index 000000000..16530f020
--- /dev/null
+++ b/src/runtime/cgo/gcc_freebsd_arm.c
@@ -0,0 +1,89 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <machine/sysarch.h>
+#include <sys/signalvar.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+#ifdef ARM_TP_ADDRESS
+// ARM_TP_ADDRESS is (ARM_VECTORS_HIGH + 0x1000) or 0xffff1000
+// and is known to runtime.read_tls_fallback. Verify it with
+// cpp.
+#if ARM_TP_ADDRESS != 0xffff1000
+#error Wrong ARM_TP_ADDRESS!
+#endif
+#endif
+
+static void *threadentry(void*);
+
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	SIGFILLSET(ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	// Not sure why the memset is necessary here,
+	// but without it, we get a bogus stack size
+	// out of pthread_attr_getstacksize.  C'est la Linux.
+	memset(&attr, 0, sizeof attr);
+	pthread_attr_init(&attr);
+	size = 0;
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+extern void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
+
+	crosscall_arm1(ts.fn, setg_gcc, (void*)ts.g);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_linux_386.c b/src/runtime/cgo/gcc_linux_386.c
new file mode 100644
index 000000000..82b156cbb
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_386.c
@@ -0,0 +1,79 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void *threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	// Not sure why the memset is necessary here,
+	// but without it, we get a bogus stack size
+	// out of pthread_attr_getstacksize.  C'est la Linux.
+	memset(&attr, 0, sizeof attr);
+	pthread_attr_init(&attr);
+	size = 0;
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_linux_amd64.c b/src/runtime/cgo/gcc_linux_amd64.c
new file mode 100644
index 000000000..fdbf51c25
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_amd64.c
@@ -0,0 +1,74 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <string.h> // strerror
+#include <signal.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G* g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_linux_arm.c b/src/runtime/cgo/gcc_linux_arm.c
new file mode 100644
index 000000000..ef16d2341
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_arm.c
@@ -0,0 +1,80 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void *threadentry(void*);
+
+void (*x_cgo_inittls)(void **tlsg, void **tlsbase);
+void (*setg_gcc)(void*);
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	// Not sure why the memset is necessary here,
+	// but without it, we get a bogus stack size
+	// out of pthread_attr_getstacksize.  C'est la Linux.
+	memset(&attr, 0, sizeof attr);
+	pthread_attr_init(&attr);
+	size = 0;
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+extern void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
+
+	crosscall_arm1(ts.fn, setg_gcc, (void*)ts.g);
+	return nil;
+}
+
+void
+x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	if (x_cgo_inittls) {
+		x_cgo_inittls(tlsg, tlsbase);
+	}
+}
diff --git a/src/runtime/cgo/gcc_netbsd_386.c b/src/runtime/cgo/gcc_netbsd_386.c
new file mode 100644
index 000000000..a2b7ef3fc
--- /dev/null
+++ b/src/runtime/cgo/gcc_netbsd_386.c
@@ -0,0 +1,76 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_netbsd_amd64.c b/src/runtime/cgo/gcc_netbsd_amd64.c
new file mode 100644
index 000000000..ccd08b73c
--- /dev/null
+++ b/src/runtime/cgo/gcc_netbsd_amd64.c
@@ -0,0 +1,77 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_netbsd_arm.c b/src/runtime/cgo/gcc_netbsd_arm.c
new file mode 100644
index 000000000..5c0603dc6
--- /dev/null
+++ b/src/runtime/cgo/gcc_netbsd_arm.c
@@ -0,0 +1,73 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void *threadentry(void*);
+
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+extern void crosscall_arm1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096 * 2;
+
+	crosscall_arm1(ts.fn, setg_gcc, (void*)ts.g);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_openbsd_386.c b/src/runtime/cgo/gcc_openbsd_386.c
new file mode 100644
index 000000000..48b4bc739
--- /dev/null
+++ b/src/runtime/cgo/gcc_openbsd_386.c
@@ -0,0 +1,165 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+// TCB_SIZE is sizeof(struct thread_control_block),
+// as defined in /usr/src/lib/librthread/tcb.h
+#define TCB_SIZE (4 * sizeof(void *))
+#define TLS_SIZE (2 * sizeof(void *))
+
+void *__get_tcb(void);
+void __set_tcb(void *);
+
+static int (*sys_pthread_create)(pthread_t *thread, const pthread_attr_t *attr,
+	void *(*start_routine)(void *), void *arg);
+
+struct thread_args {
+	void *(*func)(void *);
+	void *arg;
+};
+
+static void
+tcb_fixup(int mainthread)
+{
+	void *newtcb, *oldtcb;
+
+	// The OpenBSD ld.so(1) does not currently support PT_TLS. As a result,
+	// we need to allocate our own TLS space while preserving the existing
+	// TCB that has been setup via librthread.
+
+	newtcb = malloc(TCB_SIZE + TLS_SIZE);
+	if(newtcb == NULL)
+		abort();
+
+	// The signal trampoline expects the TLS slots to be zeroed.
+	bzero(newtcb, TLS_SIZE);
+
+	oldtcb = __get_tcb();
+	bcopy(oldtcb, newtcb + TLS_SIZE, TCB_SIZE);
+	__set_tcb(newtcb + TLS_SIZE);
+
+	// NOTE(jsing, minux): we can't free oldtcb without causing double-free
+	// problem. so newtcb will be memory leaks. Get rid of this when OpenBSD
+	// has proper support for PT_TLS.
+}
+
+static void *
+thread_start_wrapper(void *arg)
+{
+	struct thread_args args = *(struct thread_args *)arg;
+
+	free(arg);
+	tcb_fixup(0);
+
+	return args.func(args.arg);
+}
+
+int
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+	void *(*start_routine)(void *), void *arg)
+{
+	struct thread_args *p;
+
+	p = malloc(sizeof(*p));
+	if(p == NULL) {
+		errno = ENOMEM;
+		return -1;
+	}
+	p->func = start_routine;
+	p->arg = arg;
+
+	return sys_pthread_create(thread, attr, thread_start_wrapper, p);
+}
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+	void *handle;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	// Locate symbol for the system pthread_create function.
+	handle = dlopen("libpthread.so", RTLD_LAZY);
+	if(handle == NULL) {
+		fprintf(stderr, "dlopen: failed to load libpthread: %s\n", dlerror());
+		abort();
+	}
+	sys_pthread_create = dlsym(handle, "pthread_create");
+	if(sys_pthread_create == NULL) {
+		fprintf(stderr, "dlsym: failed to find pthread_create: %s\n", dlerror());
+		abort();
+	}
+	dlclose(handle);
+
+	tcb_fixup(1);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	ts->g->stackguard = size;
+	err = sys_pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	tcb_fixup(0);
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_386(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_openbsd_amd64.c b/src/runtime/cgo/gcc_openbsd_amd64.c
new file mode 100644
index 000000000..5f0d3bb45
--- /dev/null
+++ b/src/runtime/cgo/gcc_openbsd_amd64.c
@@ -0,0 +1,166 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <sys/types.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include "libcgo.h"
+
+static void* threadentry(void*);
+static void (*setg_gcc)(void*);
+
+// TCB_SIZE is sizeof(struct thread_control_block),
+// as defined in /usr/src/lib/librthread/tcb.h
+#define TCB_SIZE (4 * sizeof(void *))
+#define TLS_SIZE (2 * sizeof(void *))
+
+void *__get_tcb(void);
+void __set_tcb(void *);
+
+static int (*sys_pthread_create)(pthread_t *thread, const pthread_attr_t *attr,
+	void *(*start_routine)(void *), void *arg);
+
+struct thread_args {
+	void *(*func)(void *);
+	void *arg;
+};
+
+static void
+tcb_fixup(int mainthread)
+{
+	void *newtcb, *oldtcb;
+
+	// The OpenBSD ld.so(1) does not currently support PT_TLS. As a result,
+	// we need to allocate our own TLS space while preserving the existing
+	// TCB that has been setup via librthread.
+
+	newtcb = malloc(TCB_SIZE + TLS_SIZE);
+	if(newtcb == NULL)
+		abort();
+
+	// The signal trampoline expects the TLS slots to be zeroed.
+	bzero(newtcb, TLS_SIZE);
+
+	oldtcb = __get_tcb();
+	bcopy(oldtcb, newtcb + TLS_SIZE, TCB_SIZE);
+	__set_tcb(newtcb + TLS_SIZE);
+
+	// NOTE(jsing, minux): we can't free oldtcb without causing double-free
+	// problem. so newtcb will be memory leaks. Get rid of this when OpenBSD
+	// has proper support for PT_TLS.
+}
+
+static void *
+thread_start_wrapper(void *arg)
+{
+	struct thread_args args = *(struct thread_args *)arg;
+
+	free(arg);
+	tcb_fixup(0);
+
+	return args.func(args.arg);
+}
+
+int
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+	void *(*start_routine)(void *), void *arg)
+{
+	struct thread_args *p;
+
+	p = malloc(sizeof(*p));
+	if(p == NULL) {
+		errno = ENOMEM;
+		return -1;
+	}
+	p->func = start_routine;
+	p->arg = arg;
+
+	return sys_pthread_create(thread, attr, thread_start_wrapper, p);
+}
+
+void
+x_cgo_init(G *g, void (*setg)(void*))
+{
+	pthread_attr_t attr;
+	size_t size;
+	void *handle;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stackguard = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	// Locate symbol for the system pthread_create function.
+	handle = dlopen("libpthread.so", RTLD_LAZY);
+	if(handle == NULL) {
+		fprintf(stderr, "dlopen: failed to load libpthread: %s\n", dlerror());
+		abort();
+	}
+	sys_pthread_create = dlsym(handle, "pthread_create");
+	if(sys_pthread_create == NULL) {
+		fprintf(stderr, "dlsym: failed to find pthread_create: %s\n", dlerror());
+		abort();
+	}
+	dlclose(handle);
+
+	tcb_fixup(1);
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+
+	ts->g->stackguard = size;
+	err = sys_pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fprintf(stderr, "runtime/cgo: pthread_create failed: %s\n", strerror(err));
+		abort();
+	}
+}
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	tcb_fixup(0);
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+
+	/*
+	 * _cgo_sys_thread_start set stackguard to stack size;
+	 * change to actual guard pointer.
+	 */
+	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
+
+	/*
+	 * Set specific keys.
+	 */
+	setg_gcc((void*)ts.g);
+
+	crosscall_amd64(ts.fn);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_setenv.c b/src/runtime/cgo/gcc_setenv.c
new file mode 100644
index 000000000..8b128b946
--- /dev/null
+++ b/src/runtime/cgo/gcc_setenv.c
@@ -0,0 +1,16 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+#include "libcgo.h"
+
+#include <stdlib.h>
+
+/* Stub for calling setenv */
+void
+x_cgo_setenv(char **arg)
+{
+	setenv(arg[0], arg[1], 1);
+}
diff --git a/src/runtime/cgo/gcc_util.c b/src/runtime/cgo/gcc_util.c
new file mode 100644
index 000000000..143734e94
--- /dev/null
+++ b/src/runtime/cgo/gcc_util.c
@@ -0,0 +1,47 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "libcgo.h"
+
+/* Stub for calling malloc from Go */
+void
+x_cgo_malloc(void *p)
+{
+	struct a {
+		long long n;
+		void *ret;
+	} *a = p;
+
+	a->ret = malloc(a->n);
+	if(a->ret == NULL && a->n == 0)
+		a->ret = malloc(1);
+}
+
+/* Stub for calling free from Go */
+void
+x_cgo_free(void *p)
+{
+	struct a {
+		void *arg;
+	} *a = p;
+
+	free(a->arg);
+}
+
+/* Stub for creating a new thread */
+void
+x_cgo_thread_start(ThreadStart *arg)
+{
+	ThreadStart *ts;
+
+	/* Make our own copy that can persist after we return. */
+	ts = malloc(sizeof *ts);
+	if(ts == nil) {
+		fprintf(stderr, "runtime/cgo: out of memory in thread_start\n");
+		abort();
+	}
+	*ts = *arg;
+
+	_cgo_sys_thread_start(ts);	/* OS-dependent half */
+}
diff --git a/src/runtime/cgo/gcc_windows_386.c b/src/runtime/cgo/gcc_windows_386.c
new file mode 100644
index 000000000..0935b74f2
--- /dev/null
+++ b/src/runtime/cgo/gcc_windows_386.c
@@ -0,0 +1,61 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "libcgo.h"
+
+static void threadentry(void*);
+
+/* 1MB is default stack size for 32-bit Windows.
+   Allocation granularity on Windows is typically 64 KB.
+   The constant is also hardcoded in cmd/ld/pe.c (keep synchronized). */
+#define STACKSIZE (1*1024*1024)
+
+void
+x_cgo_init(G *g)
+{
+	int tmp;
+	g->stackguard = (uintptr)&tmp - STACKSIZE + 8*1024;
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	uintptr_t thandle;
+
+	thandle = _beginthread(threadentry, 0, ts);
+	if(thandle == -1) {
+		fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
+		abort();
+	}
+}
+
+static void
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+	ts.g->stackguard = (uintptr)&ts - STACKSIZE + 8*1024;
+
+	/*
+	 * Set specific keys in thread local storage.
+	 */
+	asm volatile (
+		"movl %0, %%fs:0x14\n"	// MOVL tls0, 0x14(FS)
+		"movl %%fs:0x14, %%eax\n"	// MOVL 0x14(FS), tmp
+		"movl %1, 0(%%eax)\n"	// MOVL g, 0(FS)
+		:: "r"(ts.tls), "r"(ts.g) : "%eax"
+	);
+	
+	crosscall_386(ts.fn);
+}
diff --git a/src/runtime/cgo/gcc_windows_amd64.c b/src/runtime/cgo/gcc_windows_amd64.c
new file mode 100644
index 000000000..4a2540a35
--- /dev/null
+++ b/src/runtime/cgo/gcc_windows_amd64.c
@@ -0,0 +1,61 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define WIN64_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "libcgo.h"
+
+static void threadentry(void*);
+
+/* 2MB is default stack size for 64-bit Windows.
+   Allocation granularity on Windows is typically 64 KB.
+   The constant is also hardcoded in cmd/ld/pe.c (keep synchronized). */
+#define STACKSIZE (2*1024*1024)
+
+void
+x_cgo_init(G *g)
+{
+	int tmp;
+	g->stackguard = (uintptr)&tmp - STACKSIZE + 8*1024;
+}
+
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	uintptr_t thandle;
+
+	thandle = _beginthread(threadentry, 0, ts);
+	if(thandle == -1) {
+		fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
+		abort();
+	}
+}
+
+static void
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	ts.g->stackbase = (uintptr)&ts;
+	ts.g->stackguard = (uintptr)&ts - STACKSIZE + 8*1024;
+
+	/*
+	 * Set specific keys in thread local storage.
+	 */
+	asm volatile (
+	  "movq %0, %%gs:0x28\n"	// MOVL tls0, 0x28(GS)
+	  "movq %%gs:0x28, %%rax\n" // MOVQ 0x28(GS), tmp
+	  "movq %1, 0(%%rax)\n" // MOVQ g, 0(GS)
+	  :: "r"(ts.tls), "r"(ts.g) : "%rax"
+	);
+
+	crosscall_amd64(ts.fn);
+}
diff --git a/src/runtime/cgo/iscgo.c b/src/runtime/cgo/iscgo.c
new file mode 100644
index 000000000..0907a1958
--- /dev/null
+++ b/src/runtime/cgo/iscgo.c
@@ -0,0 +1,15 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The runtime package contains an uninitialized definition
+// for runtime·iscgo.  Override it to tell the runtime we're here.
+// There are various function pointers that should be set too,
+// but those depend on dynamic linker magic to get initialized
+// correctly, and sometimes they break.  This variable is a
+// backup: it depends only on old C style static linking rules.
+
+#include "../runtime.h"
+
+bool runtime·iscgo = 1;
+uint32 runtime·needextram = 1;  // create an extra M on first cgo call
diff --git a/src/runtime/cgo/libcgo.h b/src/runtime/cgo/libcgo.h
new file mode 100644
index 000000000..799af05ea
--- /dev/null
+++ b/src/runtime/cgo/libcgo.h
@@ -0,0 +1,65 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define nil ((void*)0)
+#define nelem(x) (sizeof(x)/sizeof((x)[0]))
+
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef uintptr_t uintptr;
+
+/*
+ * The beginning of the per-goroutine structure,
+ * as defined in ../pkg/runtime/runtime.h.
+ * Just enough to edit these two fields.
+ */
+typedef struct G G;
+struct G
+{
+	uintptr stackguard;
+	uintptr stackbase;
+};
+
+/*
+ * Arguments to the _cgo_thread_start call.
+ * Also known to ../pkg/runtime/runtime.h.
+ */
+typedef struct ThreadStart ThreadStart;
+struct ThreadStart
+{
+	G *g;
+	uintptr *tls;
+	void (*fn)(void);
+};
+
+/*
+ * Called by 5c/6c/8c world.
+ * Makes a local copy of the ThreadStart and
+ * calls _cgo_sys_thread_start(ts).
+ */
+extern void (*_cgo_thread_start)(ThreadStart *ts);
+
+/*
+ * Creates the new operating system thread (OS, arch dependent).
+ */
+void _cgo_sys_thread_start(ThreadStart *ts);
+
+/*
+ * Call fn in the 6c world.
+ */
+void crosscall_amd64(void (*fn)(void));
+
+/*
+ * Call fn in the 8c world.
+ */
+void crosscall_386(void (*fn)(void));
+
+/*
+ * Prints error then calls abort. For linux and android.
+ */
+void fatalf(const char* format, ...);
diff --git a/src/runtime/cgo/netbsd.c b/src/runtime/cgo/netbsd.c
new file mode 100644
index 000000000..b6403f686
--- /dev/null
+++ b/src/runtime/cgo/netbsd.c
@@ -0,0 +1,13 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Supply environ and __progname, because we don't
+// link against the standard NetBSD crt0.o and the
+// libc dynamic library needs them.
+
+char *environ[1];
+char *__progname;
+
+#pragma dynexport environ environ
+#pragma dynexport __progname __progname
diff --git a/src/runtime/cgo/openbsd.c b/src/runtime/cgo/openbsd.c
new file mode 100644
index 000000000..84e9f9eff
--- /dev/null
+++ b/src/runtime/cgo/openbsd.c
@@ -0,0 +1,21 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Supply environ, __progname and __guard_local, because
+// we don't link against the standard OpenBSD crt0.o and
+// the libc dynamic library needs them.
+
+char *environ[1];
+char *__progname;
+long __guard_local;
+
+#pragma dynexport environ environ
+#pragma dynexport __progname __progname
+
+// This is normally marked as hidden and placed in the
+// .openbsd.randomdata section.
+#pragma dynexport __guard_local __guard_local
+
+// We override pthread_create to support PT_TLS.
+#pragma dynexport pthread_create pthread_create
diff --git a/src/runtime/cgo/setenv.c b/src/runtime/cgo/setenv.c
new file mode 100644
index 000000000..ee529904f
--- /dev/null
+++ b/src/runtime/cgo/setenv.c
@@ -0,0 +1,10 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+#pragma cgo_import_static x_cgo_setenv
+
+void x_cgo_setenv(char**);
+void (*runtime·_cgo_setenv)(char**) = x_cgo_setenv;
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
new file mode 100644
index 000000000..76a533e93
--- /dev/null
+++ b/src/runtime/cgocall.go
@@ -0,0 +1,270 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Cgo call and callback support.
+//
+// To call into the C function f from Go, the cgo-generated code calls
+// runtime.cgocall(_cgo_Cfunc_f, frame), where _cgo_Cfunc_f is a
+// gcc-compiled function written by cgo.
+//
+// runtime.cgocall (below) locks g to m, calls entersyscall
+// so as not to block other goroutines or the garbage collector,
+// and then calls runtime.asmcgocall(_cgo_Cfunc_f, frame).
+//
+// runtime.asmcgocall (in asm_$GOARCH.s) switches to the m->g0 stack
+// (assumed to be an operating system-allocated stack, so safe to run
+// gcc-compiled code on) and calls _cgo_Cfunc_f(frame).
+//
+// _cgo_Cfunc_f invokes the actual C function f with arguments
+// taken from the frame structure, records the results in the frame,
+// and returns to runtime.asmcgocall.
+//
+// After it regains control, runtime.asmcgocall switches back to the
+// original g (m->curg)'s stack and returns to runtime.cgocall.
+//
+// After it regains control, runtime.cgocall calls exitsyscall, which blocks
+// until this m can run Go code without violating the $GOMAXPROCS limit,
+// and then unlocks g from m.
+//
+// The above description skipped over the possibility of the gcc-compiled
+// function f calling back into Go.  If that happens, we continue down
+// the rabbit hole during the execution of f.
+//
+// To make it possible for gcc-compiled C code to call a Go function p.GoF,
+// cgo writes a gcc-compiled function named GoF (not p.GoF, since gcc doesn't
+// know about packages).  The gcc-compiled C function f calls GoF.
+//
+// GoF calls crosscall2(_cgoexp_GoF, frame, framesize).  Crosscall2
+// (in cgo/gcc_$GOARCH.S, a gcc-compiled assembly file) is a two-argument
+// adapter from the gcc function call ABI to the 6c function call ABI.
+// It is called from gcc to call 6c functions.  In this case it calls
+// _cgoexp_GoF(frame, framesize), still running on m->g0's stack
+// and outside the $GOMAXPROCS limit.  Thus, this code cannot yet
+// call arbitrary Go code directly and must be careful not to allocate
+// memory or use up m->g0's stack.
+//
+// _cgoexp_GoF calls runtime.cgocallback(p.GoF, frame, framesize).
+// (The reason for having _cgoexp_GoF instead of writing a crosscall3
+// to make this call directly is that _cgoexp_GoF, because it is compiled
+// with 6c instead of gcc, can refer to dotted names like
+// runtime.cgocallback and p.GoF.)
+//
+// runtime.cgocallback (in asm_$GOARCH.s) switches from m->g0's
+// stack to the original g (m->curg)'s stack, on which it calls
+// runtime.cgocallbackg(p.GoF, frame, framesize).
+// As part of the stack switch, runtime.cgocallback saves the current
+// SP as m->g0->sched.sp, so that any use of m->g0's stack during the
+// execution of the callback will be done below the existing stack frames.
+// Before overwriting m->g0->sched.sp, it pushes the old value on the
+// m->g0 stack, so that it can be restored later.
+//
+// runtime.cgocallbackg (below) is now running on a real goroutine
+// stack (not an m->g0 stack).  First it calls runtime.exitsyscall, which will
+// block until the $GOMAXPROCS limit allows running this goroutine.
+// Once exitsyscall has returned, it is safe to do things like call the memory
+// allocator or invoke the Go callback function p.GoF.  runtime.cgocallbackg
+// first defers a function to unwind m->g0.sched.sp, so that if p.GoF
+// panics, m->g0.sched.sp will be restored to its old value: the m->g0 stack
+// and the m->curg stack will be unwound in lock step.
+// Then it calls p.GoF.  Finally it pops but does not execute the deferred
+// function, calls runtime.entersyscall, and returns to runtime.cgocallback.
+//
+// After it regains control, runtime.cgocallback switches back to
+// m->g0's stack (the pointer is still in m->g0.sched.sp), restores the old
+// m->g0.sched.sp value from the stack, and returns to _cgoexp_GoF.
+//
+// _cgoexp_GoF immediately returns to crosscall2, which restores the
+// callee-save registers for gcc and returns to GoF, which returns to f.
+
+package runtime
+
+import "unsafe"
+
+// Call from Go to C.
+//go:nosplit
+func cgocall(fn, arg unsafe.Pointer) {
+	cgocall_errno(fn, arg)
+}
+
+//go:nosplit
+func cgocall_errno(fn, arg unsafe.Pointer) int32 {
+	if !iscgo && GOOS != "solaris" && GOOS != "windows" {
+		gothrow("cgocall unavailable")
+	}
+
+	if fn == nil {
+		gothrow("cgocall nil")
+	}
+
+	if raceenabled {
+		racereleasemerge(unsafe.Pointer(&racecgosync))
+	}
+
+	// Create an extra M for callbacks on threads not created by Go on first cgo call.
+	if needextram == 1 && cas(&needextram, 1, 0) {
+		onM(newextram)
+	}
+
+	/*
+	 * Lock g to m to ensure we stay on the same stack if we do a
+	 * cgo callback. Add entry to defer stack in case of panic.
+	 */
+	lockOSThread()
+	mp := getg().m
+	mp.ncgocall++
+	mp.ncgo++
+	defer endcgo(mp)
+
+	/*
+	 * Announce we are entering a system call
+	 * so that the scheduler knows to create another
+	 * M to run goroutines while we are in the
+	 * foreign code.
+	 *
+	 * The call to asmcgocall is guaranteed not to
+	 * split the stack and does not allocate memory,
+	 * so it is safe to call while "in a system call", outside
+	 * the $GOMAXPROCS accounting.
+	 */
+	entersyscall()
+	errno := asmcgocall_errno(fn, arg)
+	exitsyscall()
+
+	return errno
+}
+
+func endcgo(mp *m) {
+	mp.ncgo--
+	if mp.ncgo == 0 {
+		// We are going back to Go and are not in a recursive
+		// call.  Let the GC collect any memory allocated via
+		// _cgo_allocate that is no longer referenced.
+		mp.cgomal = nil
+	}
+
+	if raceenabled {
+		raceacquire(unsafe.Pointer(&racecgosync))
+	}
+
+	unlockOSThread() // invalidates mp
+}
+
+// Helper functions for cgo code.
+
+// Filled by schedinit from corresponding C variables,
+// which are in turn filled in by dynamic linker when Cgo is available.
+var cgoMalloc, cgoFree unsafe.Pointer
+
+func cmalloc(n uintptr) unsafe.Pointer {
+	var args struct {
+		n   uint64
+		ret unsafe.Pointer
+	}
+	args.n = uint64(n)
+	cgocall(cgoMalloc, unsafe.Pointer(&args))
+	if args.ret == nil {
+		gothrow("C malloc failed")
+	}
+	return args.ret
+}
+
+func cfree(p unsafe.Pointer) {
+	cgocall(cgoFree, p)
+}
+
+// Call from C back to Go.
+//go:nosplit
+func cgocallbackg() {
+	if gp := getg(); gp != gp.m.curg {
+		println("runtime: bad g in cgocallback")
+		exit(2)
+	}
+
+	exitsyscall() // coming out of cgo call
+	cgocallbackg1()
+	entersyscall() // going back to cgo call
+}
+
+func cgocallbackg1() {
+	gp := getg()
+	if gp.m.needextram {
+		gp.m.needextram = false
+		onM(newextram)
+	}
+
+	// Add entry to defer stack in case of panic.
+	restore := true
+	defer unwindm(&restore)
+
+	if raceenabled {
+		raceacquire(unsafe.Pointer(&racecgosync))
+	}
+
+	type args struct {
+		fn      *funcval
+		arg     unsafe.Pointer
+		argsize uintptr
+	}
+	var cb *args
+
+	// Location of callback arguments depends on stack frame layout
+	// and size of stack frame of cgocallback_gofunc.
+	sp := gp.m.g0.sched.sp
+	switch GOARCH {
+	default:
+		gothrow("cgocallbackg is unimplemented on arch")
+	case "arm":
+		// On arm, stack frame is two words and there's a saved LR between
+		// SP and the stack frame and between the stack frame and the arguments.
+		cb = (*args)(unsafe.Pointer(sp + 4*ptrSize))
+	case "amd64":
+		// On amd64, stack frame is one word, plus caller PC.
+		cb = (*args)(unsafe.Pointer(sp + 2*ptrSize))
+	case "386":
+		// On 386, stack frame is three words, plus caller PC.
+		cb = (*args)(unsafe.Pointer(sp + 4*ptrSize))
+	}
+
+	// Invoke callback.
+	reflectcall(unsafe.Pointer(cb.fn), unsafe.Pointer(cb.arg), uint32(cb.argsize), 0)
+
+	if raceenabled {
+		racereleasemerge(unsafe.Pointer(&racecgosync))
+	}
+
+	// Do not unwind m->g0->sched.sp.
+	// Our caller, cgocallback, will do that.
+	restore = false
+}
+
+func unwindm(restore *bool) {
+	if !*restore {
+		return
+	}
+	// Restore sp saved by cgocallback during
+	// unwind of g's stack (see comment at top of file).
+	mp := acquirem()
+	sched := &mp.g0.sched
+	switch GOARCH {
+	default:
+		gothrow("unwindm not implemented")
+	case "386", "amd64":
+		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp))
+	case "arm":
+		sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 4))
+	}
+	releasem(mp)
+}
+
+// called from assembly
+func badcgocallback() {
+	gothrow("misaligned stack in cgocallback")
+}
+
+// called from (incomplete) assembly
+func cgounimpl() {
+	gothrow("cgo not implemented")
+}
+
+var racecgosync uint64 // represents possible synchronization in C code
diff --git a/src/runtime/cgocall.h b/src/runtime/cgocall.h
new file mode 100644
index 000000000..c87a9cdc5
--- /dev/null
+++ b/src/runtime/cgocall.h
@@ -0,0 +1,13 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * Cgo interface.
+ */
+
+void runtime·cgocall(void (*fn)(void*), void*);
+int32 runtime·cgocall_errno(void (*fn)(void*), void*);
+void runtime·cgocallback(void (*fn)(void), void*, uintptr);
+void *runtime·cmalloc(uintptr);
+void runtime·cfree(void*);
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
new file mode 100644
index 000000000..91ade4d37
--- /dev/null
+++ b/src/runtime/chan.go
@@ -0,0 +1,644 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// This file contains the implementation of Go channels.
+
+import "unsafe"
+
+const (
+	maxAlign  = 8
+	hchanSize = unsafe.Sizeof(hchan{}) + uintptr(-int(unsafe.Sizeof(hchan{}))&(maxAlign-1))
+	debugChan = false
+)
+
+// TODO(khr): make hchan.buf an unsafe.Pointer, not a *uint8
+
+func makechan(t *chantype, size int64) *hchan {
+	elem := t.elem
+
+	// compiler checks this but be safe.
+	if elem.size >= 1<<16 {
+		gothrow("makechan: invalid channel element type")
+	}
+	if hchanSize%maxAlign != 0 || elem.align > maxAlign {
+		gothrow("makechan: bad alignment")
+	}
+	if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (maxMem-hchanSize)/uintptr(elem.size)) {
+		panic("makechan: size out of range")
+	}
+
+	var c *hchan
+	if elem.kind&kindNoPointers != 0 || size == 0 {
+		// Allocate memory in one call.
+		// Hchan does not contain pointers interesting for GC in this case:
+		// buf points into the same allocation, elemtype is persistent.
+		// SudoG's are referenced from their owning thread so they can't be collected.
+		// TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
+		c = (*hchan)(gomallocgc(hchanSize+uintptr(size)*uintptr(elem.size), nil, flagNoScan))
+		if size > 0 && elem.size != 0 {
+			c.buf = (*uint8)(add(unsafe.Pointer(c), hchanSize))
+		} else {
+			c.buf = (*uint8)(unsafe.Pointer(c)) // race detector uses this location for synchronization
+		}
+	} else {
+		c = new(hchan)
+		c.buf = (*uint8)(newarray(elem, uintptr(size)))
+	}
+	c.elemsize = uint16(elem.size)
+	c.elemtype = elem
+	c.dataqsiz = uint(size)
+
+	if debugChan {
+		print("makechan: chan=", c, "; elemsize=", elem.size, "; elemalg=", elem.alg, "; dataqsiz=", size, "\n")
+	}
+	return c
+}
+
+// chanbuf(c, i) is pointer to the i'th slot in the buffer.
+func chanbuf(c *hchan, i uint) unsafe.Pointer {
+	return add(unsafe.Pointer(c.buf), uintptr(i)*uintptr(c.elemsize))
+}
+
+// entry point for c <- x from compiled code
+//go:nosplit
+func chansend1(t *chantype, c *hchan, elem unsafe.Pointer) {
+	chansend(t, c, elem, true, getcallerpc(unsafe.Pointer(&t)))
+}
+
+/*
+ * generic single channel send/recv
+ * If block is not nil,
+ * then the protocol will not
+ * sleep but return if it could
+ * not complete.
+ *
+ * sleep can wake up with g.param == nil
+ * when a channel involved in the sleep has
+ * been closed.  it is easiest to loop and re-run
+ * the operation; we'll see that it's now closed.
+ */
+func chansend(t *chantype, c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
+	if raceenabled {
+		raceReadObjectPC(t.elem, ep, callerpc, funcPC(chansend))
+	}
+
+	if c == nil {
+		if !block {
+			return false
+		}
+		gopark(nil, nil, "chan send (nil chan)")
+		gothrow("unreachable")
+	}
+
+	if debugChan {
+		print("chansend: chan=", c, "\n")
+	}
+
+	if raceenabled {
+		racereadpc(unsafe.Pointer(c), callerpc, funcPC(chansend))
+	}
+
+	// Fast path: check for failed non-blocking operation without acquiring the lock.
+	//
+	// After observing that the channel is not closed, we observe that the channel is
+	// not ready for sending. Each of these observations is a single word-sized read
+	// (first c.closed and second c.recvq.first or c.qcount depending on kind of channel).
+	// Because a closed channel cannot transition from 'ready for sending' to
+	// 'not ready for sending', even if the channel is closed between the two observations,
+	// they imply a moment between the two when the channel was both not yet closed
+	// and not ready for sending. We behave as if we observed the channel at that moment,
+	// and report that the send cannot proceed.
+	//
+	// It is okay if the reads are reordered here: if we observe that the channel is not
+	// ready for sending and then observe that it is not closed, that implies that the
+	// channel wasn't closed during the first observation.
+	if !block && c.closed == 0 && ((c.dataqsiz == 0 && c.recvq.first == nil) ||
+		(c.dataqsiz > 0 && c.qcount == c.dataqsiz)) {
+		return false
+	}
+
+	var t0 int64
+	if blockprofilerate > 0 {
+		t0 = cputicks()
+	}
+
+	lock(&c.lock)
+	if c.closed != 0 {
+		unlock(&c.lock)
+		panic("send on closed channel")
+	}
+
+	if c.dataqsiz == 0 { // synchronous channel
+		sg := c.recvq.dequeue()
+		if sg != nil { // found a waiting receiver
+			if raceenabled {
+				racesync(c, sg)
+			}
+			unlock(&c.lock)
+
+			recvg := sg.g
+			recvg.param = unsafe.Pointer(sg)
+			if sg.elem != nil {
+				memmove(unsafe.Pointer(sg.elem), ep, uintptr(c.elemsize))
+			}
+			if sg.releasetime != 0 {
+				sg.releasetime = cputicks()
+			}
+			goready(recvg)
+			return true
+		}
+
+		if !block {
+			unlock(&c.lock)
+			return false
+		}
+
+		// no receiver available: block on this channel.
+		gp := getg()
+		mysg := acquireSudog()
+		mysg.releasetime = 0
+		if t0 != 0 {
+			mysg.releasetime = -1
+		}
+		mysg.elem = ep
+		mysg.waitlink = nil
+		gp.waiting = mysg
+		mysg.g = gp
+		mysg.selectdone = nil
+		gp.param = nil
+		c.sendq.enqueue(mysg)
+		goparkunlock(&c.lock, "chan send")
+
+		// someone woke us up.
+		if gp.param == nil {
+			if c.closed == 0 {
+				gothrow("chansend: spurious wakeup")
+			}
+			panic("send on closed channel")
+		}
+		if mysg.releasetime > 0 {
+			blockevent(int64(mysg.releasetime)-t0, 2)
+		}
+		if mysg != gp.waiting {
+			gothrow("G waiting list is corrupted!")
+		}
+		gp.waiting = nil
+		releaseSudog(mysg)
+		return true
+	}
+
+	// asynchronous channel
+	// wait for some space to write our data
+	var t1 int64
+	for c.qcount >= c.dataqsiz {
+		if !block {
+			unlock(&c.lock)
+			return false
+		}
+		gp := getg()
+		mysg := acquireSudog()
+		mysg.releasetime = 0
+		if t0 != 0 {
+			mysg.releasetime = -1
+		}
+		mysg.g = gp
+		mysg.elem = nil
+		mysg.selectdone = nil
+		c.sendq.enqueue(mysg)
+		goparkunlock(&c.lock, "chan send")
+
+		// someone woke us up - try again
+		if mysg.releasetime > 0 {
+			t1 = mysg.releasetime
+		}
+		releaseSudog(mysg)
+		lock(&c.lock)
+		if c.closed != 0 {
+			unlock(&c.lock)
+			panic("send on closed channel")
+		}
+	}
+
+	// write our data into the channel buffer
+	if raceenabled {
+		raceacquire(chanbuf(c, c.sendx))
+		racerelease(chanbuf(c, c.sendx))
+	}
+	memmove(chanbuf(c, c.sendx), ep, uintptr(c.elemsize))
+	c.sendx++
+	if c.sendx == c.dataqsiz {
+		c.sendx = 0
+	}
+	c.qcount++
+
+	// wake up a waiting receiver
+	sg := c.recvq.dequeue()
+	if sg != nil {
+		recvg := sg.g
+		unlock(&c.lock)
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(recvg)
+	} else {
+		unlock(&c.lock)
+	}
+	if t1 > 0 {
+		blockevent(t1-t0, 2)
+	}
+	return true
+}
+
+func closechan(c *hchan) {
+	if c == nil {
+		panic("close of nil channel")
+	}
+
+	lock(&c.lock)
+	if c.closed != 0 {
+		unlock(&c.lock)
+		panic("close of closed channel")
+	}
+
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&c))
+		racewritepc(unsafe.Pointer(c), callerpc, funcPC(closechan))
+		racerelease(unsafe.Pointer(c))
+	}
+
+	c.closed = 1
+
+	// release all readers
+	for {
+		sg := c.recvq.dequeue()
+		if sg == nil {
+			break
+		}
+		gp := sg.g
+		gp.param = nil
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(gp)
+	}
+
+	// release all writers
+	for {
+		sg := c.sendq.dequeue()
+		if sg == nil {
+			break
+		}
+		gp := sg.g
+		gp.param = nil
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(gp)
+	}
+	unlock(&c.lock)
+}
+
+// entry points for <- c from compiled code
+//go:nosplit
+func chanrecv1(t *chantype, c *hchan, elem unsafe.Pointer) {
+	chanrecv(t, c, elem, true)
+}
+
+//go:nosplit
+func chanrecv2(t *chantype, c *hchan, elem unsafe.Pointer) (received bool) {
+	_, received = chanrecv(t, c, elem, true)
+	return
+}
+
+// chanrecv receives on channel c and writes the received data to ep.
+// ep may be nil, in which case received data is ignored.
+// If block == false and no elements are available, returns (false, false).
+// Otherwise, if c is closed, zeros *ep and returns (true, false).
+// Otherwise, fills in *ep with an element and returns (true, true).
+func chanrecv(t *chantype, c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) {
+	// raceenabled: don't need to check ep, as it is always on the stack.
+
+	if debugChan {
+		print("chanrecv: chan=", c, "\n")
+	}
+
+	if c == nil {
+		if !block {
+			return
+		}
+		gopark(nil, nil, "chan receive (nil chan)")
+		gothrow("unreachable")
+	}
+
+	// Fast path: check for failed non-blocking operation without acquiring the lock.
+	//
+	// After observing that the channel is not ready for receiving, we observe that the
+	// channel is not closed. Each of these observations is a single word-sized read
+	// (first c.sendq.first or c.qcount, and second c.closed).
+	// Because a channel cannot be reopened, the later observation of the channel
+	// being not closed implies that it was also not closed at the moment of the
+	// first observation. We behave as if we observed the channel at that moment
+	// and report that the receive cannot proceed.
+	//
+	// The order of operations is important here: reversing the operations can lead to
+	// incorrect behavior when racing with a close.
+	if !block && (c.dataqsiz == 0 && c.sendq.first == nil ||
+		c.dataqsiz > 0 && atomicloaduint(&c.qcount) == 0) &&
+		atomicload(&c.closed) == 0 {
+		return
+	}
+
+	var t0 int64
+	if blockprofilerate > 0 {
+		t0 = cputicks()
+	}
+
+	lock(&c.lock)
+	if c.dataqsiz == 0 { // synchronous channel
+		if c.closed != 0 {
+			return recvclosed(c, ep)
+		}
+
+		sg := c.sendq.dequeue()
+		if sg != nil {
+			if raceenabled {
+				racesync(c, sg)
+			}
+			unlock(&c.lock)
+
+			if ep != nil {
+				memmove(ep, sg.elem, uintptr(c.elemsize))
+			}
+			gp := sg.g
+			gp.param = unsafe.Pointer(sg)
+			if sg.releasetime != 0 {
+				sg.releasetime = cputicks()
+			}
+			goready(gp)
+			selected = true
+			received = true
+			return
+		}
+
+		if !block {
+			unlock(&c.lock)
+			return
+		}
+
+		// no sender available: block on this channel.
+		gp := getg()
+		mysg := acquireSudog()
+		mysg.releasetime = 0
+		if t0 != 0 {
+			mysg.releasetime = -1
+		}
+		mysg.elem = ep
+		mysg.waitlink = nil
+		gp.waiting = mysg
+		mysg.g = gp
+		mysg.selectdone = nil
+		gp.param = nil
+		c.recvq.enqueue(mysg)
+		goparkunlock(&c.lock, "chan receive")
+
+		// someone woke us up
+		gp.waiting = nil
+		if mysg.releasetime > 0 {
+			blockevent(mysg.releasetime-t0, 2)
+		}
+		releaseSudog(mysg)
+
+		if gp.param != nil {
+			// a sender sent us some data. It already wrote to ep.
+			selected = true
+			received = true
+			return
+		}
+
+		lock(&c.lock)
+		if c.closed == 0 {
+			gothrow("chanrecv: spurious wakeup")
+		}
+		return recvclosed(c, ep)
+	}
+
+	// asynchronous channel
+	// wait for some data to appear
+	var t1 int64
+	for c.qcount <= 0 {
+		if c.closed != 0 {
+			selected, received = recvclosed(c, ep)
+			if t1 > 0 {
+				blockevent(t1-t0, 2)
+			}
+			return
+		}
+
+		if !block {
+			unlock(&c.lock)
+			return
+		}
+
+		// wait for someone to send an element
+		gp := getg()
+		mysg := acquireSudog()
+		mysg.releasetime = 0
+		if t0 != 0 {
+			mysg.releasetime = -1
+		}
+		mysg.elem = nil
+		mysg.g = gp
+		mysg.selectdone = nil
+
+		c.recvq.enqueue(mysg)
+		goparkunlock(&c.lock, "chan receive")
+
+		// someone woke us up - try again
+		if mysg.releasetime > 0 {
+			t1 = mysg.releasetime
+		}
+		releaseSudog(mysg)
+		lock(&c.lock)
+	}
+
+	if raceenabled {
+		raceacquire(chanbuf(c, c.recvx))
+		racerelease(chanbuf(c, c.recvx))
+	}
+	if ep != nil {
+		memmove(ep, chanbuf(c, c.recvx), uintptr(c.elemsize))
+	}
+	memclr(chanbuf(c, c.recvx), uintptr(c.elemsize))
+
+	c.recvx++
+	if c.recvx == c.dataqsiz {
+		c.recvx = 0
+	}
+	c.qcount--
+
+	// ping a sender now that there is space
+	sg := c.sendq.dequeue()
+	if sg != nil {
+		gp := sg.g
+		unlock(&c.lock)
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(gp)
+	} else {
+		unlock(&c.lock)
+	}
+
+	if t1 > 0 {
+		blockevent(t1-t0, 2)
+	}
+	selected = true
+	received = true
+	return
+}
+
+// recvclosed is a helper function for chanrecv.  Handles cleanup
+// when the receiver encounters a closed channel.
+// Caller must hold c.lock, recvclosed will release the lock.
+func recvclosed(c *hchan, ep unsafe.Pointer) (selected, recevied bool) {
+	if raceenabled {
+		raceacquire(unsafe.Pointer(c))
+	}
+	unlock(&c.lock)
+	if ep != nil {
+		memclr(ep, uintptr(c.elemsize))
+	}
+	return true, false
+}
+
+// compiler implements
+//
+//	select {
+//	case c <- v:
+//		... foo
+//	default:
+//		... bar
+//	}
+//
+// as
+//
+//	if selectnbsend(c, v) {
+//		... foo
+//	} else {
+//		... bar
+//	}
+//
+func selectnbsend(t *chantype, c *hchan, elem unsafe.Pointer) (selected bool) {
+	return chansend(t, c, elem, false, getcallerpc(unsafe.Pointer(&t)))
+}
+
+// compiler implements
+//
+//	select {
+//	case v = <-c:
+//		... foo
+//	default:
+//		... bar
+//	}
+//
+// as
+//
+//	if selectnbrecv(&v, c) {
+//		... foo
+//	} else {
+//		... bar
+//	}
+//
+func selectnbrecv(t *chantype, elem unsafe.Pointer, c *hchan) (selected bool) {
+	selected, _ = chanrecv(t, c, elem, false)
+	return
+}
+
+// compiler implements
+//
+//	select {
+//	case v, ok = <-c:
+//		... foo
+//	default:
+//		... bar
+//	}
+//
+// as
+//
+//	if c != nil && selectnbrecv2(&v, &ok, c) {
+//		... foo
+//	} else {
+//		... bar
+//	}
+//
+func selectnbrecv2(t *chantype, elem unsafe.Pointer, received *bool, c *hchan) (selected bool) {
+	// TODO(khr): just return 2 values from this function, now that it is in Go.
+	selected, *received = chanrecv(t, c, elem, false)
+	return
+}
+
+func reflect_chansend(t *chantype, c *hchan, elem unsafe.Pointer, nb bool) (selected bool) {
+	return chansend(t, c, elem, !nb, getcallerpc(unsafe.Pointer(&t)))
+}
+
+func reflect_chanrecv(t *chantype, c *hchan, nb bool, elem unsafe.Pointer) (selected bool, received bool) {
+	return chanrecv(t, c, elem, !nb)
+}
+
+func reflect_chanlen(c *hchan) int {
+	if c == nil {
+		return 0
+	}
+	return int(c.qcount)
+}
+
+func reflect_chancap(c *hchan) int {
+	if c == nil {
+		return 0
+	}
+	return int(c.dataqsiz)
+}
+
+func (q *waitq) enqueue(sgp *sudog) {
+	sgp.next = nil
+	if q.first == nil {
+		q.first = sgp
+		q.last = sgp
+		return
+	}
+	q.last.next = sgp
+	q.last = sgp
+}
+
+func (q *waitq) dequeue() *sudog {
+	for {
+		sgp := q.first
+		if sgp == nil {
+			return nil
+		}
+		q.first = sgp.next
+		if q.last == sgp {
+			q.last = nil
+		}
+
+		// if sgp participates in a select and is already signaled, ignore it
+		if sgp.selectdone != nil {
+			// claim the right to signal
+			if *sgp.selectdone != 0 || !cas(sgp.selectdone, 0, 1) {
+				continue
+			}
+		}
+
+		return sgp
+	}
+}
+
+func racesync(c *hchan, sg *sudog) {
+	racerelease(chanbuf(c, 0))
+	raceacquireg(sg.g, chanbuf(c, 0))
+	racereleaseg(sg.g, chanbuf(c, 0))
+	raceacquire(chanbuf(c, 0))
+}
diff --git a/src/runtime/chan.h b/src/runtime/chan.h
new file mode 100644
index 000000000..c34ff1533
--- /dev/null
+++ b/src/runtime/chan.h
@@ -0,0 +1,68 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define	MAXALIGN	8
+
+typedef	struct	WaitQ	WaitQ;
+typedef	struct	Select	Select;
+typedef	struct	Scase	Scase;
+
+struct	WaitQ
+{
+	SudoG*	first;
+	SudoG*	last;
+};
+
+struct	Hchan
+{
+	uintgo	qcount;			// total data in the q
+	uintgo	dataqsiz;		// size of the circular q
+	byte*	buf;
+	uint16	elemsize;
+	uint32	closed;
+	Type*	elemtype;		// element type
+	uintgo	sendx;			// send index
+	uintgo	recvx;			// receive index
+	WaitQ	recvq;			// list of recv waiters
+	WaitQ	sendq;			// list of send waiters
+	Mutex	lock;
+};
+
+// Buffer follows Hchan immediately in memory.
+// chanbuf(c, i) is pointer to the i'th slot in the buffer.
+#define chanbuf(c, i) ((byte*)((c)->buf)+(uintptr)(c)->elemsize*(i))
+
+enum
+{
+	debug = 0,
+
+	// Scase.kind
+	CaseRecv,
+	CaseSend,
+	CaseDefault,
+};
+
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+struct	Scase
+{
+	void*	elem;			// data element
+	Hchan*	chan;			// chan
+	uintptr	pc;			// return pc
+	uint16	kind;
+	uint16	so;			// vararg of selected bool
+	bool*	receivedp;		// pointer to received bool (recv2)
+	int64	releasetime;
+};
+
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+struct	Select
+{
+	uint16	tcase;			// total count of scase[]
+	uint16	ncase;			// currently filled scase[]
+	uint16*	pollorder;		// case poll order
+	Hchan**	lockorder;		// channel lock order
+	Scase	scase[1];		// one per case (in order of appearance)
+};
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
new file mode 100644
index 000000000..01632892e
--- /dev/null
+++ b/src/runtime/chan_test.go
@@ -0,0 +1,791 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestChan(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
+	N := 200
+	if testing.Short() {
+		N = 20
+	}
+	for chanCap := 0; chanCap < N; chanCap++ {
+		{
+			// Ensure that receive from empty chan blocks.
+			c := make(chan int, chanCap)
+			recv1 := false
+			go func() {
+				_ = <-c
+				recv1 = true
+			}()
+			recv2 := false
+			go func() {
+				_, _ = <-c
+				recv2 = true
+			}()
+			time.Sleep(time.Millisecond)
+			if recv1 || recv2 {
+				t.Fatalf("chan[%d]: receive from empty chan", chanCap)
+			}
+			// Ensure that non-blocking receive does not block.
+			select {
+			case _ = <-c:
+				t.Fatalf("chan[%d]: receive from empty chan", chanCap)
+			default:
+			}
+			select {
+			case _, _ = <-c:
+				t.Fatalf("chan[%d]: receive from empty chan", chanCap)
+			default:
+			}
+			c <- 0
+			c <- 0
+		}
+
+		{
+			// Ensure that send to full chan blocks.
+			c := make(chan int, chanCap)
+			for i := 0; i < chanCap; i++ {
+				c <- i
+			}
+			sent := uint32(0)
+			go func() {
+				c <- 0
+				atomic.StoreUint32(&sent, 1)
+			}()
+			time.Sleep(time.Millisecond)
+			if atomic.LoadUint32(&sent) != 0 {
+				t.Fatalf("chan[%d]: send to full chan", chanCap)
+			}
+			// Ensure that non-blocking send does not block.
+			select {
+			case c <- 0:
+				t.Fatalf("chan[%d]: send to full chan", chanCap)
+			default:
+			}
+			<-c
+		}
+
+		{
+			// Ensure that we receive 0 from closed chan.
+			c := make(chan int, chanCap)
+			for i := 0; i < chanCap; i++ {
+				c <- i
+			}
+			close(c)
+			for i := 0; i < chanCap; i++ {
+				v := <-c
+				if v != i {
+					t.Fatalf("chan[%d]: received %v, expected %v", chanCap, v, i)
+				}
+			}
+			if v := <-c; v != 0 {
+				t.Fatalf("chan[%d]: received %v, expected %v", chanCap, v, 0)
+			}
+			if v, ok := <-c; v != 0 || ok {
+				t.Fatalf("chan[%d]: received %v/%v, expected %v/%v", chanCap, v, ok, 0, false)
+			}
+		}
+
+		{
+			// Ensure that close unblocks receive.
+			c := make(chan int, chanCap)
+			done := make(chan bool)
+			go func() {
+				v, ok := <-c
+				done <- v == 0 && ok == false
+			}()
+			time.Sleep(time.Millisecond)
+			close(c)
+			if !<-done {
+				t.Fatalf("chan[%d]: received non zero from closed chan", chanCap)
+			}
+		}
+
+		{
+			// Send 100 integers,
+			// ensure that we receive them non-corrupted in FIFO order.
+			c := make(chan int, chanCap)
+			go func() {
+				for i := 0; i < 100; i++ {
+					c <- i
+				}
+			}()
+			for i := 0; i < 100; i++ {
+				v := <-c
+				if v != i {
+					t.Fatalf("chan[%d]: received %v, expected %v", chanCap, v, i)
+				}
+			}
+
+			// Same, but using recv2.
+			go func() {
+				for i := 0; i < 100; i++ {
+					c <- i
+				}
+			}()
+			for i := 0; i < 100; i++ {
+				v, ok := <-c
+				if !ok {
+					t.Fatalf("chan[%d]: receive failed, expected %v", chanCap, i)
+				}
+				if v != i {
+					t.Fatalf("chan[%d]: received %v, expected %v", chanCap, v, i)
+				}
+			}
+
+			// Send 1000 integers in 4 goroutines,
+			// ensure that we receive what we send.
+			const P = 4
+			const L = 1000
+			for p := 0; p < P; p++ {
+				go func() {
+					for i := 0; i < L; i++ {
+						c <- i
+					}
+				}()
+			}
+			done := make(chan map[int]int)
+			for p := 0; p < P; p++ {
+				go func() {
+					recv := make(map[int]int)
+					for i := 0; i < L; i++ {
+						v := <-c
+						recv[v] = recv[v] + 1
+					}
+					done <- recv
+				}()
+			}
+			recv := make(map[int]int)
+			for p := 0; p < P; p++ {
+				for k, v := range <-done {
+					recv[k] = recv[k] + v
+				}
+			}
+			if len(recv) != L {
+				t.Fatalf("chan[%d]: received %v values, expected %v", chanCap, len(recv), L)
+			}
+			for _, v := range recv {
+				if v != P {
+					t.Fatalf("chan[%d]: received %v values, expected %v", chanCap, v, P)
+				}
+			}
+		}
+
+		{
+			// Test len/cap.
+			c := make(chan int, chanCap)
+			if len(c) != 0 || cap(c) != chanCap {
+				t.Fatalf("chan[%d]: bad len/cap, expect %v/%v, got %v/%v", chanCap, 0, chanCap, len(c), cap(c))
+			}
+			for i := 0; i < chanCap; i++ {
+				c <- i
+			}
+			if len(c) != chanCap || cap(c) != chanCap {
+				t.Fatalf("chan[%d]: bad len/cap, expect %v/%v, got %v/%v", chanCap, chanCap, chanCap, len(c), cap(c))
+			}
+		}
+
+	}
+}
+
+func TestNonblockRecvRace(t *testing.T) {
+	n := 10000
+	if testing.Short() {
+		n = 100
+	}
+	for i := 0; i < n; i++ {
+		c := make(chan int, 1)
+		c <- 1
+		go func() {
+			select {
+			case <-c:
+			default:
+				t.Fatal("chan is not ready")
+			}
+		}()
+		close(c)
+		<-c
+	}
+}
+
+func TestSelfSelect(t *testing.T) {
+	// Ensure that send/recv on the same chan in select
+	// does not crash nor deadlock.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	for _, chanCap := range []int{0, 10} {
+		var wg sync.WaitGroup
+		wg.Add(2)
+		c := make(chan int, chanCap)
+		for p := 0; p < 2; p++ {
+			p := p
+			go func() {
+				defer wg.Done()
+				for i := 0; i < 1000; i++ {
+					if p == 0 || i%2 == 0 {
+						select {
+						case c <- p:
+						case v := <-c:
+							if chanCap == 0 && v == p {
+								t.Fatalf("self receive")
+							}
+						}
+					} else {
+						select {
+						case v := <-c:
+							if chanCap == 0 && v == p {
+								t.Fatalf("self receive")
+							}
+						case c <- p:
+						}
+					}
+				}
+			}()
+		}
+		wg.Wait()
+	}
+}
+
+func TestSelectStress(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(10))
+	var c [4]chan int
+	c[0] = make(chan int)
+	c[1] = make(chan int)
+	c[2] = make(chan int, 2)
+	c[3] = make(chan int, 3)
+	N := int(1e5)
+	if testing.Short() {
+		N /= 10
+	}
+	// There are 4 goroutines that send N values on each of the chans,
+	// + 4 goroutines that receive N values on each of the chans,
+	// + 1 goroutine that sends N values on each of the chans in a single select,
+	// + 1 goroutine that receives N values on each of the chans in a single select.
+	// All these sends, receives and selects interact chaotically at runtime,
+	// but we are careful that this whole construct does not deadlock.
+	var wg sync.WaitGroup
+	wg.Add(10)
+	for k := 0; k < 4; k++ {
+		k := k
+		go func() {
+			for i := 0; i < N; i++ {
+				c[k] <- 0
+			}
+			wg.Done()
+		}()
+		go func() {
+			for i := 0; i < N; i++ {
+				<-c[k]
+			}
+			wg.Done()
+		}()
+	}
+	go func() {
+		var n [4]int
+		c1 := c
+		for i := 0; i < 4*N; i++ {
+			select {
+			case c1[3] <- 0:
+				n[3]++
+				if n[3] == N {
+					c1[3] = nil
+				}
+			case c1[2] <- 0:
+				n[2]++
+				if n[2] == N {
+					c1[2] = nil
+				}
+			case c1[0] <- 0:
+				n[0]++
+				if n[0] == N {
+					c1[0] = nil
+				}
+			case c1[1] <- 0:
+				n[1]++
+				if n[1] == N {
+					c1[1] = nil
+				}
+			}
+		}
+		wg.Done()
+	}()
+	go func() {
+		var n [4]int
+		c1 := c
+		for i := 0; i < 4*N; i++ {
+			select {
+			case <-c1[0]:
+				n[0]++
+				if n[0] == N {
+					c1[0] = nil
+				}
+			case <-c1[1]:
+				n[1]++
+				if n[1] == N {
+					c1[1] = nil
+				}
+			case <-c1[2]:
+				n[2]++
+				if n[2] == N {
+					c1[2] = nil
+				}
+			case <-c1[3]:
+				n[3]++
+				if n[3] == N {
+					c1[3] = nil
+				}
+			}
+		}
+		wg.Done()
+	}()
+	wg.Wait()
+}
+
+func TestChanSendInterface(t *testing.T) {
+	type mt struct{}
+	m := &mt{}
+	c := make(chan interface{}, 1)
+	c <- m
+	select {
+	case c <- m:
+	default:
+	}
+	select {
+	case c <- m:
+	case c <- &mt{}:
+	default:
+	}
+}
+
+func TestPseudoRandomSend(t *testing.T) {
+	n := 100
+	for _, chanCap := range []int{0, n} {
+		c := make(chan int, chanCap)
+		l := make([]int, n)
+		var m sync.Mutex
+		m.Lock()
+		go func() {
+			for i := 0; i < n; i++ {
+				runtime.Gosched()
+				l[i] = <-c
+			}
+			m.Unlock()
+		}()
+		for i := 0; i < n; i++ {
+			select {
+			case c <- 1:
+			case c <- 0:
+			}
+		}
+		m.Lock() // wait
+		n0 := 0
+		n1 := 0
+		for _, i := range l {
+			n0 += (i + 1) % 2
+			n1 += i
+		}
+		if n0 <= n/10 || n1 <= n/10 {
+			t.Errorf("Want pseudorandom, got %d zeros and %d ones (chan cap %d)", n0, n1, chanCap)
+		}
+	}
+}
+
+func TestMultiConsumer(t *testing.T) {
+	const nwork = 23
+	const niter = 271828
+
+	pn := []int{2, 3, 7, 11, 13, 17, 19, 23, 27, 31}
+
+	q := make(chan int, nwork*3)
+	r := make(chan int, nwork*3)
+
+	// workers
+	var wg sync.WaitGroup
+	for i := 0; i < nwork; i++ {
+		wg.Add(1)
+		go func(w int) {
+			for v := range q {
+				// mess with the fifo-ish nature of range
+				if pn[w%len(pn)] == v {
+					runtime.Gosched()
+				}
+				r <- v
+			}
+			wg.Done()
+		}(i)
+	}
+
+	// feeder & closer
+	expect := 0
+	go func() {
+		for i := 0; i < niter; i++ {
+			v := pn[i%len(pn)]
+			expect += v
+			q <- v
+		}
+		close(q)  // no more work
+		wg.Wait() // workers done
+		close(r)  // ... so there can be no more results
+	}()
+
+	// consume & check
+	n := 0
+	s := 0
+	for v := range r {
+		n++
+		s += v
+	}
+	if n != niter || s != expect {
+		t.Errorf("Expected sum %d (got %d) from %d iter (saw %d)",
+			expect, s, niter, n)
+	}
+}
+
+func TestShrinkStackDuringBlockedSend(t *testing.T) {
+	// make sure that channel operations still work when we are
+	// blocked on a channel send and we shrink the stack.
+	// NOTE: this test probably won't fail unless stack.c:StackDebug
+	// is set to >= 1.
+	const n = 10
+	c := make(chan int)
+	done := make(chan struct{})
+
+	go func() {
+		for i := 0; i < n; i++ {
+			c <- i
+			// use lots of stack, briefly.
+			stackGrowthRecursive(20)
+		}
+		done <- struct{}{}
+	}()
+
+	for i := 0; i < n; i++ {
+		x := <-c
+		if x != i {
+			t.Errorf("bad channel read: want %d, got %d", i, x)
+		}
+		// Waste some time so sender can finish using lots of stack
+		// and block in channel send.
+		time.Sleep(1 * time.Millisecond)
+		// trigger GC which will shrink the stack of the sender.
+		runtime.GC()
+	}
+	<-done
+}
+
+func BenchmarkChanNonblocking(b *testing.B) {
+	myc := make(chan int)
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			select {
+			case <-myc:
+			default:
+			}
+		}
+	})
+}
+
+func BenchmarkSelectUncontended(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		myc1 := make(chan int, 1)
+		myc2 := make(chan int, 1)
+		myc1 <- 0
+		for pb.Next() {
+			select {
+			case <-myc1:
+				myc2 <- 0
+			case <-myc2:
+				myc1 <- 0
+			}
+		}
+	})
+}
+
+func BenchmarkSelectSyncContended(b *testing.B) {
+	myc1 := make(chan int)
+	myc2 := make(chan int)
+	myc3 := make(chan int)
+	done := make(chan int)
+	b.RunParallel(func(pb *testing.PB) {
+		go func() {
+			for {
+				select {
+				case myc1 <- 0:
+				case myc2 <- 0:
+				case myc3 <- 0:
+				case <-done:
+					return
+				}
+			}
+		}()
+		for pb.Next() {
+			select {
+			case <-myc1:
+			case <-myc2:
+			case <-myc3:
+			}
+		}
+	})
+	close(done)
+}
+
+func BenchmarkSelectAsyncContended(b *testing.B) {
+	procs := runtime.GOMAXPROCS(0)
+	myc1 := make(chan int, procs)
+	myc2 := make(chan int, procs)
+	b.RunParallel(func(pb *testing.PB) {
+		myc1 <- 0
+		for pb.Next() {
+			select {
+			case <-myc1:
+				myc2 <- 0
+			case <-myc2:
+				myc1 <- 0
+			}
+		}
+	})
+}
+
+func BenchmarkSelectNonblock(b *testing.B) {
+	myc1 := make(chan int)
+	myc2 := make(chan int)
+	myc3 := make(chan int, 1)
+	myc4 := make(chan int, 1)
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			select {
+			case <-myc1:
+			default:
+			}
+			select {
+			case myc2 <- 0:
+			default:
+			}
+			select {
+			case <-myc3:
+			default:
+			}
+			select {
+			case myc4 <- 0:
+			default:
+			}
+		}
+	})
+}
+
+func BenchmarkChanUncontended(b *testing.B) {
+	const C = 100
+	b.RunParallel(func(pb *testing.PB) {
+		myc := make(chan int, C)
+		for pb.Next() {
+			for i := 0; i < C; i++ {
+				myc <- 0
+			}
+			for i := 0; i < C; i++ {
+				<-myc
+			}
+		}
+	})
+}
+
+func BenchmarkChanContended(b *testing.B) {
+	const C = 100
+	myc := make(chan int, C*runtime.GOMAXPROCS(0))
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			for i := 0; i < C; i++ {
+				myc <- 0
+			}
+			for i := 0; i < C; i++ {
+				<-myc
+			}
+		}
+	})
+}
+
+func BenchmarkChanSync(b *testing.B) {
+	const CallsPerSched = 1000
+	procs := 2
+	N := int32(b.N / CallsPerSched / procs * procs)
+	c := make(chan bool, procs)
+	myc := make(chan int)
+	for p := 0; p < procs; p++ {
+		go func() {
+			for {
+				i := atomic.AddInt32(&N, -1)
+				if i < 0 {
+					break
+				}
+				for g := 0; g < CallsPerSched; g++ {
+					if i%2 == 0 {
+						<-myc
+						myc <- 0
+					} else {
+						myc <- 0
+						<-myc
+					}
+				}
+			}
+			c <- true
+		}()
+	}
+	for p := 0; p < procs; p++ {
+		<-c
+	}
+}
+
+func benchmarkChanProdCons(b *testing.B, chanSize, localWork int) {
+	const CallsPerSched = 1000
+	procs := runtime.GOMAXPROCS(-1)
+	N := int32(b.N / CallsPerSched)
+	c := make(chan bool, 2*procs)
+	myc := make(chan int, chanSize)
+	for p := 0; p < procs; p++ {
+		go func() {
+			foo := 0
+			for atomic.AddInt32(&N, -1) >= 0 {
+				for g := 0; g < CallsPerSched; g++ {
+					for i := 0; i < localWork; i++ {
+						foo *= 2
+						foo /= 2
+					}
+					myc <- 1
+				}
+			}
+			myc <- 0
+			c <- foo == 42
+		}()
+		go func() {
+			foo := 0
+			for {
+				v := <-myc
+				if v == 0 {
+					break
+				}
+				for i := 0; i < localWork; i++ {
+					foo *= 2
+					foo /= 2
+				}
+			}
+			c <- foo == 42
+		}()
+	}
+	for p := 0; p < procs; p++ {
+		<-c
+		<-c
+	}
+}
+
+func BenchmarkChanProdCons0(b *testing.B) {
+	benchmarkChanProdCons(b, 0, 0)
+}
+
+func BenchmarkChanProdCons10(b *testing.B) {
+	benchmarkChanProdCons(b, 10, 0)
+}
+
+func BenchmarkChanProdCons100(b *testing.B) {
+	benchmarkChanProdCons(b, 100, 0)
+}
+
+func BenchmarkChanProdConsWork0(b *testing.B) {
+	benchmarkChanProdCons(b, 0, 100)
+}
+
+func BenchmarkChanProdConsWork10(b *testing.B) {
+	benchmarkChanProdCons(b, 10, 100)
+}
+
+func BenchmarkChanProdConsWork100(b *testing.B) {
+	benchmarkChanProdCons(b, 100, 100)
+}
+
+func BenchmarkSelectProdCons(b *testing.B) {
+	const CallsPerSched = 1000
+	procs := runtime.GOMAXPROCS(-1)
+	N := int32(b.N / CallsPerSched)
+	c := make(chan bool, 2*procs)
+	myc := make(chan int, 128)
+	myclose := make(chan bool)
+	for p := 0; p < procs; p++ {
+		go func() {
+			// Producer: sends to myc.
+			foo := 0
+			// Intended to not fire during benchmarking.
+			mytimer := time.After(time.Hour)
+			for atomic.AddInt32(&N, -1) >= 0 {
+				for g := 0; g < CallsPerSched; g++ {
+					// Model some local work.
+					for i := 0; i < 100; i++ {
+						foo *= 2
+						foo /= 2
+					}
+					select {
+					case myc <- 1:
+					case <-mytimer:
+					case <-myclose:
+					}
+				}
+			}
+			myc <- 0
+			c <- foo == 42
+		}()
+		go func() {
+			// Consumer: receives from myc.
+			foo := 0
+			// Intended to not fire during benchmarking.
+			mytimer := time.After(time.Hour)
+		loop:
+			for {
+				select {
+				case v := <-myc:
+					if v == 0 {
+						break loop
+					}
+				case <-mytimer:
+				case <-myclose:
+				}
+				// Model some local work.
+				for i := 0; i < 100; i++ {
+					foo *= 2
+					foo /= 2
+				}
+			}
+			c <- foo == 42
+		}()
+	}
+	for p := 0; p < procs; p++ {
+		<-c
+		<-c
+	}
+}
+
+func BenchmarkChanCreation(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			myc := make(chan int, 1)
+			myc <- 0
+			<-myc
+		}
+	})
+}
+
+func BenchmarkChanSem(b *testing.B) {
+	type Empty struct{}
+	myc := make(chan Empty, runtime.GOMAXPROCS(0))
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			myc <- Empty{}
+			<-myc
+		}
+	})
+}
diff --git a/src/runtime/closure_test.go b/src/runtime/closure_test.go
new file mode 100644
index 000000000..ea65fbd5f
--- /dev/null
+++ b/src/runtime/closure_test.go
@@ -0,0 +1,53 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime_test
+
+import "testing"
+
+var s int
+
+func BenchmarkCallClosure(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		s += func(ii int) int { return 2 * ii }(i)
+	}
+}
+
+func BenchmarkCallClosure1(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		j := i
+		s += func(ii int) int { return 2*ii + j }(i)
+	}
+}
+
+var ss *int
+
+func BenchmarkCallClosure2(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		j := i
+		s += func() int {
+			ss = &j
+			return 2
+		}()
+	}
+}
+
+func addr1(x int) *int {
+	return func() *int { return &x }()
+}
+
+func BenchmarkCallClosure3(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ss = addr1(i)
+	}
+}
+
+func addr2() (x int, p *int) {
+	return 0, func() *int { return &x }()
+}
+
+func BenchmarkCallClosure4(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_, ss = addr2()
+	}
+}
diff --git a/src/runtime/compiler.go b/src/runtime/compiler.go
new file mode 100644
index 000000000..562a46022
--- /dev/null
+++ b/src/runtime/compiler.go
@@ -0,0 +1,13 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// Compiler is the name of the compiler toolchain that built the
+// running binary.  Known toolchains are:
+//
+//	gc      The 5g/6g/8g compiler suite at code.google.com/p/go.
+//	gccgo   The gccgo front end, part of the GCC compiler suite.
+//
+const Compiler = "gc"
diff --git a/src/runtime/complex.go b/src/runtime/complex.go
new file mode 100644
index 000000000..ec50f8947
--- /dev/null
+++ b/src/runtime/complex.go
@@ -0,0 +1,52 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func complex128div(n complex128, d complex128) complex128 {
+	// Special cases as in C99.
+	ninf := real(n) == posinf || real(n) == neginf ||
+		imag(n) == posinf || imag(n) == neginf
+	dinf := real(d) == posinf || real(d) == neginf ||
+		imag(d) == posinf || imag(d) == neginf
+
+	nnan := !ninf && (real(n) != real(n) || imag(n) != imag(n))
+	dnan := !dinf && (real(d) != real(d) || imag(d) != imag(d))
+
+	switch {
+	case nnan || dnan:
+		return complex(nan, nan)
+	case ninf && !dinf:
+		return complex(posinf, posinf)
+	case !ninf && dinf:
+		return complex(0, 0)
+	case real(d) == 0 && imag(d) == 0:
+		if real(n) == 0 && imag(n) == 0 {
+			return complex(nan, nan)
+		} else {
+			return complex(posinf, posinf)
+		}
+	default:
+		// Standard complex arithmetic, factored to avoid unnecessary overflow.
+		a := real(d)
+		if a < 0 {
+			a = -a
+		}
+		b := imag(d)
+		if b < 0 {
+			b = -b
+		}
+		if a <= b {
+			ratio := real(d) / imag(d)
+			denom := real(d)*ratio + imag(d)
+			return complex((real(n)*ratio+imag(n))/denom,
+				(imag(n)*ratio-real(n))/denom)
+		} else {
+			ratio := imag(d) / real(d)
+			denom := imag(d)*ratio + real(d)
+			return complex((imag(n)*ratio+real(n))/denom,
+				(imag(n)-real(n)*ratio)/denom)
+		}
+	}
+}
diff --git a/src/runtime/complex_test.go b/src/runtime/complex_test.go
new file mode 100644
index 000000000..f41e6a357
--- /dev/null
+++ b/src/runtime/complex_test.go
@@ -0,0 +1,67 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math/cmplx"
+	"testing"
+)
+
+var result complex128
+
+func BenchmarkComplex128DivNormal(b *testing.B) {
+	d := 15 + 2i
+	n := 32 + 3i
+	res := 0i
+	for i := 0; i < b.N; i++ {
+		n += 0.1i
+		res += n / d
+	}
+	result = res
+}
+
+func BenchmarkComplex128DivNisNaN(b *testing.B) {
+	d := cmplx.NaN()
+	n := 32 + 3i
+	res := 0i
+	for i := 0; i < b.N; i++ {
+		n += 0.1i
+		res += n / d
+	}
+	result = res
+}
+
+func BenchmarkComplex128DivDisNaN(b *testing.B) {
+	d := 15 + 2i
+	n := cmplx.NaN()
+	res := 0i
+	for i := 0; i < b.N; i++ {
+		d += 0.1i
+		res += n / d
+	}
+	result = res
+}
+
+func BenchmarkComplex128DivNisInf(b *testing.B) {
+	d := 15 + 2i
+	n := cmplx.Inf()
+	res := 0i
+	for i := 0; i < b.N; i++ {
+		d += 0.1i
+		res += n / d
+	}
+	result = res
+}
+
+func BenchmarkComplex128DivDisInf(b *testing.B) {
+	d := cmplx.Inf()
+	n := 32 + 3i
+	res := 0i
+	for i := 0; i < b.N; i++ {
+		n += 0.1i
+		res += n / d
+	}
+	result = res
+}
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go
new file mode 100644
index 000000000..8b1c1c632
--- /dev/null
+++ b/src/runtime/cpuprof.go
@@ -0,0 +1,425 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// CPU profiling.
+// Based on algorithms and data structures used in
+// http://code.google.com/p/google-perftools/.
+//
+// The main difference between this code and the google-perftools
+// code is that this code is written to allow copying the profile data
+// to an arbitrary io.Writer, while the google-perftools code always
+// writes to an operating system file.
+//
+// The signal handler for the profiling clock tick adds a new stack trace
+// to a hash table tracking counts for recent traces.  Most clock ticks
+// hit in the cache.  In the event of a cache miss, an entry must be
+// evicted from the hash table, copied to a log that will eventually be
+// written as profile data.  The google-perftools code flushed the
+// log itself during the signal handler.  This code cannot do that, because
+// the io.Writer might block or need system calls or locks that are not
+// safe to use from within the signal handler.  Instead, we split the log
+// into two halves and let the signal handler fill one half while a goroutine
+// is writing out the other half.  When the signal handler fills its half, it
+// offers to swap with the goroutine.  If the writer is not done with its half,
+// we lose the stack trace for this clock tick (and record that loss).
+// The goroutine interacts with the signal handler by calling getprofile() to
+// get the next log piece to write, implicitly handing back the last log
+// piece it obtained.
+//
+// The state of this dance between the signal handler and the goroutine
+// is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
+// is not using either log half and is waiting (or will soon be waiting) for
+// a new piece by calling notesleep(&p->wait).  If the signal handler
+// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
+// to wake the goroutine.  The value indicates the number of entries in the
+// log half being handed off.  The goroutine leaves the non-zero value in
+// place until it has finished processing the log half and then flips the number
+// back to zero.  Setting the high bit in handoff means that the profiling is over,
+// and the goroutine is now in charge of flushing the data left in the hash table
+// to the log and returning that data.
+//
+// The handoff field is manipulated using atomic operations.
+// For the most part, the manipulation of handoff is orderly: if handoff == 0
+// then the signal handler owns it and can change it to non-zero.
+// If handoff != 0 then the goroutine owns it and can change it to zero.
+// If that were the end of the story then we would not need to manipulate
+// handoff using atomic operations.  The operations are needed, however,
+// in order to let the log closer set the high bit to indicate "EOF" safely
+// in the situation when normally the goroutine "owns" handoff.
+
+package runtime
+
+import "unsafe"
+
+const (
+	numBuckets      = 1 << 10
+	logSize         = 1 << 17
+	assoc           = 4
+	maxCPUProfStack = 64
+)
+
+type cpuprofEntry struct {
+	count uintptr
+	depth uintptr
+	stack [maxCPUProfStack]uintptr
+}
+
+type cpuProfile struct {
+	on     bool    // profiling is on
+	wait   note    // goroutine waits here
+	count  uintptr // tick count
+	evicts uintptr // eviction count
+	lost   uintptr // lost ticks that need to be logged
+
+	// Active recent stack traces.
+	hash [numBuckets]struct {
+		entry [assoc]cpuprofEntry
+	}
+
+	// Log of traces evicted from hash.
+	// Signal handler has filled log[toggle][:nlog].
+	// Goroutine is writing log[1-toggle][:handoff].
+	log     [2][logSize / 2]uintptr
+	nlog    uintptr
+	toggle  int32
+	handoff uint32
+
+	// Writer state.
+	// Writer maintains its own toggle to avoid races
+	// looking at signal handler's toggle.
+	wtoggle  uint32
+	wholding bool // holding & need to release a log half
+	flushing bool // flushing hash table - profile is over
+	eodSent  bool // special end-of-data record sent; => flushing
+}
+
+var (
+	cpuprofLock mutex
+	cpuprof     *cpuProfile
+
+	eod = [3]uintptr{0, 1, 0}
+)
+
+func setcpuprofilerate_m() // proc.c
+
+func setcpuprofilerate(hz int32) {
+	g := getg()
+	g.m.scalararg[0] = uintptr(hz)
+	onM(setcpuprofilerate_m)
+}
+
+// lostProfileData is a no-op function used in profiles
+// to mark the number of profiling stack traces that were
+// discarded due to slow data writers.
+func lostProfileData() {}
+
+// SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
+// If hz <= 0, SetCPUProfileRate turns off profiling.
+// If the profiler is on, the rate cannot be changed without first turning it off.
+//
+// Most clients should use the runtime/pprof package or
+// the testing package's -test.cpuprofile flag instead of calling
+// SetCPUProfileRate directly.
+func SetCPUProfileRate(hz int) {
+	// Clamp hz to something reasonable.
+	if hz < 0 {
+		hz = 0
+	}
+	if hz > 1000000 {
+		hz = 1000000
+	}
+
+	lock(&cpuprofLock)
+	if hz > 0 {
+		if cpuprof == nil {
+			cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys))
+			if cpuprof == nil {
+				print("runtime: cpu profiling cannot allocate memory\n")
+				unlock(&cpuprofLock)
+				return
+			}
+		}
+		if cpuprof.on || cpuprof.handoff != 0 {
+			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
+			unlock(&cpuprofLock)
+			return
+		}
+
+		cpuprof.on = true
+		// pprof binary header format.
+		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
+		p := &cpuprof.log[0]
+		p[0] = 0                 // count for header
+		p[1] = 3                 // depth for header
+		p[2] = 0                 // version number
+		p[3] = uintptr(1e6 / hz) // period (microseconds)
+		p[4] = 0
+		cpuprof.nlog = 5
+		cpuprof.toggle = 0
+		cpuprof.wholding = false
+		cpuprof.wtoggle = 0
+		cpuprof.flushing = false
+		cpuprof.eodSent = false
+		noteclear(&cpuprof.wait)
+
+		setcpuprofilerate(int32(hz))
+	} else if cpuprof != nil && cpuprof.on {
+		setcpuprofilerate(0)
+		cpuprof.on = false
+
+		// Now add is not running anymore, and getprofile owns the entire log.
+		// Set the high bit in prof->handoff to tell getprofile.
+		for {
+			n := cpuprof.handoff
+			if n&0x80000000 != 0 {
+				print("runtime: setcpuprofile(off) twice\n")
+			}
+			if cas(&cpuprof.handoff, n, n|0x80000000) {
+				if n == 0 {
+					// we did the transition from 0 -> nonzero so we wake getprofile
+					notewakeup(&cpuprof.wait)
+				}
+				break
+			}
+		}
+	}
+	unlock(&cpuprofLock)
+}
+
+func cpuproftick(pc *uintptr, n int32) {
+	if n > maxCPUProfStack {
+		n = maxCPUProfStack
+	}
+	s := (*[maxCPUProfStack]uintptr)(unsafe.Pointer(pc))[:n]
+	cpuprof.add(s)
+}
+
+// add adds the stack trace to the profile.
+// It is called from signal handlers and other limited environments
+// and cannot allocate memory or acquire locks that might be
+// held at the time of the signal, nor can it use substantial amounts
+// of stack.  It is allowed to call evict.
+func (p *cpuProfile) add(pc []uintptr) {
+	// Compute hash.
+	h := uintptr(0)
+	for _, x := range pc {
+		h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
+		h += x*31 + x*7 + x*3
+	}
+	p.count++
+
+	// Add to entry count if already present in table.
+	b := &p.hash[h%numBuckets]
+Assoc:
+	for i := range b.entry {
+		e := &b.entry[i]
+		if e.depth != uintptr(len(pc)) {
+			continue
+		}
+		for j := range pc {
+			if e.stack[j] != pc[j] {
+				continue Assoc
+			}
+		}
+		e.count++
+		return
+	}
+
+	// Evict entry with smallest count.
+	var e *cpuprofEntry
+	for i := range b.entry {
+		if e == nil || b.entry[i].count < e.count {
+			e = &b.entry[i]
+		}
+	}
+	if e.count > 0 {
+		if !p.evict(e) {
+			// Could not evict entry.  Record lost stack.
+			p.lost++
+			return
+		}
+		p.evicts++
+	}
+
+	// Reuse the newly evicted entry.
+	e.depth = uintptr(len(pc))
+	e.count = 1
+	copy(e.stack[:], pc)
+}
+
+// evict copies the given entry's data into the log, so that
+// the entry can be reused.  evict is called from add, which
+// is called from the profiling signal handler, so it must not
+// allocate memory or block.  It is safe to call flushlog.
+// evict returns true if the entry was copied to the log,
+// false if there was no room available.
+func (p *cpuProfile) evict(e *cpuprofEntry) bool {
+	d := e.depth
+	nslot := d + 2
+	log := &p.log[p.toggle]
+	if p.nlog+nslot > uintptr(len(p.log[0])) {
+		if !p.flushlog() {
+			return false
+		}
+		log = &p.log[p.toggle]
+	}
+
+	q := p.nlog
+	log[q] = e.count
+	q++
+	log[q] = d
+	q++
+	copy(log[q:], e.stack[:d])
+	q += d
+	p.nlog = q
+	e.count = 0
+	return true
+}
+
+// flushlog tries to flush the current log and switch to the other one.
+// flushlog is called from evict, called from add, called from the signal handler,
+// so it cannot allocate memory or block.  It can try to swap logs with
+// the writing goroutine, as explained in the comment at the top of this file.
+func (p *cpuProfile) flushlog() bool {
+	if !cas(&p.handoff, 0, uint32(p.nlog)) {
+		return false
+	}
+	notewakeup(&p.wait)
+
+	p.toggle = 1 - p.toggle
+	log := &p.log[p.toggle]
+	q := uintptr(0)
+	if p.lost > 0 {
+		lostPC := funcPC(lostProfileData)
+		log[0] = p.lost
+		log[1] = 1
+		log[2] = lostPC
+		q = 3
+		p.lost = 0
+	}
+	p.nlog = q
+	return true
+}
+
+// getprofile blocks until the next block of profiling data is available
+// and returns it as a []byte.  It is called from the writing goroutine.
+func (p *cpuProfile) getprofile() []byte {
+	if p == nil {
+		return nil
+	}
+
+	if p.wholding {
+		// Release previous log to signal handling side.
+		// Loop because we are racing against SetCPUProfileRate(0).
+		for {
+			n := p.handoff
+			if n == 0 {
+				print("runtime: phase error during cpu profile handoff\n")
+				return nil
+			}
+			if n&0x80000000 != 0 {
+				p.wtoggle = 1 - p.wtoggle
+				p.wholding = false
+				p.flushing = true
+				goto Flush
+			}
+			if cas(&p.handoff, n, 0) {
+				break
+			}
+		}
+		p.wtoggle = 1 - p.wtoggle
+		p.wholding = false
+	}
+
+	if p.flushing {
+		goto Flush
+	}
+
+	if !p.on && p.handoff == 0 {
+		return nil
+	}
+
+	// Wait for new log.
+	notetsleepg(&p.wait, -1)
+	noteclear(&p.wait)
+
+	switch n := p.handoff; {
+	case n == 0:
+		print("runtime: phase error during cpu profile wait\n")
+		return nil
+	case n == 0x80000000:
+		p.flushing = true
+		goto Flush
+	default:
+		n &^= 0x80000000
+
+		// Return new log to caller.
+		p.wholding = true
+
+		return uintptrBytes(p.log[p.wtoggle][:n])
+	}
+
+	// In flush mode.
+	// Add is no longer being called.  We own the log.
+	// Also, p->handoff is non-zero, so flushlog will return false.
+	// Evict the hash table into the log and return it.
+Flush:
+	for i := range p.hash {
+		b := &p.hash[i]
+		for j := range b.entry {
+			e := &b.entry[j]
+			if e.count > 0 && !p.evict(e) {
+				// Filled the log.  Stop the loop and return what we've got.
+				break Flush
+			}
+		}
+	}
+
+	// Return pending log data.
+	if p.nlog > 0 {
+		// Note that we're using toggle now, not wtoggle,
+		// because we're working on the log directly.
+		n := p.nlog
+		p.nlog = 0
+		return uintptrBytes(p.log[p.toggle][:n])
+	}
+
+	// Made it through the table without finding anything to log.
+	if !p.eodSent {
+		// We may not have space to append this to the partial log buf,
+		// so we always return a new slice for the end-of-data marker.
+		p.eodSent = true
+		return uintptrBytes(eod[:])
+	}
+
+	// Finally done.  Clean up and return nil.
+	p.flushing = false
+	if !cas(&p.handoff, p.handoff, 0) {
+		print("runtime: profile flush racing with something\n")
+	}
+	return nil
+}
+
+func uintptrBytes(p []uintptr) (ret []byte) {
+	pp := (*sliceStruct)(unsafe.Pointer(&p))
+	rp := (*sliceStruct)(unsafe.Pointer(&ret))
+
+	rp.array = pp.array
+	rp.len = pp.len * int(unsafe.Sizeof(p[0]))
+	rp.cap = rp.len
+
+	return
+}
+
+// CPUProfile returns the next chunk of binary CPU profiling stack trace data,
+// blocking until data is available.  If profiling is turned off and all the profile
+// data accumulated while it was on has been returned, CPUProfile returns nil.
+// The caller must save the returned data before calling CPUProfile again.
+//
+// Most clients should use the runtime/pprof package or
+// the testing package's -test.cpuprofile flag instead of calling
+// CPUProfile directly.
+func CPUProfile() []byte {
+	return cpuprof.getprofile()
+}
diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go
new file mode 100644
index 000000000..4ff0084c2
--- /dev/null
+++ b/src/runtime/crash_cgo_test.go
@@ -0,0 +1,119 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build cgo
+
+package runtime_test
+
+import (
+	"runtime"
+	"testing"
+)
+
+func TestCgoCrashHandler(t *testing.T) {
+	testCrashHandler(t, true)
+}
+
+func TestCgoSignalDeadlock(t *testing.T) {
+	if testing.Short() && runtime.GOOS == "windows" {
+		t.Skip("Skipping in short mode") // takes up to 64 seconds
+	}
+	got := executeTest(t, cgoSignalDeadlockSource, nil)
+	want := "OK\n"
+	if got != want {
+		t.Fatalf("expected %q, but got %q", want, got)
+	}
+}
+
+func TestCgoTraceback(t *testing.T) {
+	got := executeTest(t, cgoTracebackSource, nil)
+	want := "OK\n"
+	if got != want {
+		t.Fatalf("expected %q, but got %q", want, got)
+	}
+}
+
+const cgoSignalDeadlockSource = `
+package main
+
+import "C"
+
+import (
+	"fmt"
+	"runtime"
+	"time"
+)
+
+func main() {
+	runtime.GOMAXPROCS(100)
+	ping := make(chan bool)
+	go func() {
+		for i := 0; ; i++ {
+			runtime.Gosched()
+			select {
+			case done := <-ping:
+				if done {
+					ping <- true
+					return
+				}
+				ping <- true
+			default:
+			}
+			func() {
+				defer func() {
+					recover()
+				}()
+				var s *string
+				*s = ""
+			}()
+		}
+	}()
+	time.Sleep(time.Millisecond)
+	for i := 0; i < 64; i++ {
+		go func() {
+			runtime.LockOSThread()
+			select {}
+		}()
+		go func() {
+			runtime.LockOSThread()
+			select {}
+		}()
+		time.Sleep(time.Millisecond)
+		ping <- false
+		select {
+		case <-ping:
+		case <-time.After(time.Second):
+			fmt.Printf("HANG\n")
+			return
+		}
+	}
+	ping <- true
+	select {
+	case <-ping:
+	case <-time.After(time.Second):
+		fmt.Printf("HANG\n")
+		return
+	}
+	fmt.Printf("OK\n")
+}
+`
+
+const cgoTracebackSource = `
+package main
+
+/* void foo(void) {} */
+import "C"
+
+import (
+	"fmt"
+	"runtime"
+)
+
+func main() {
+	C.foo()
+	buf := make([]byte, 1)
+	runtime.Stack(buf, true)
+	fmt.Printf("OK\n")
+}
+`
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
new file mode 100644
index 000000000..c61fa162f
--- /dev/null
+++ b/src/runtime/crash_test.go
@@ -0,0 +1,382 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+	"text/template"
+)
+
+// testEnv excludes GODEBUG from the environment
+// to prevent its output from breaking tests that
+// are trying to parse other command output.
+func testEnv(cmd *exec.Cmd) *exec.Cmd {
+	if cmd.Env != nil {
+		panic("environment already set")
+	}
+	for _, env := range os.Environ() {
+		if strings.HasPrefix(env, "GODEBUG=") {
+			continue
+		}
+		cmd.Env = append(cmd.Env, env)
+	}
+	return cmd
+}
+
+func executeTest(t *testing.T, templ string, data interface{}) string {
+	switch runtime.GOOS {
+	case "android", "nacl":
+		t.Skipf("skipping on %s", runtime.GOOS)
+	}
+
+	checkStaleRuntime(t)
+
+	st := template.Must(template.New("crashSource").Parse(templ))
+
+	dir, err := ioutil.TempDir("", "go-build")
+	if err != nil {
+		t.Fatalf("failed to create temp directory: %v", err)
+	}
+	defer os.RemoveAll(dir)
+
+	src := filepath.Join(dir, "main.go")
+	f, err := os.Create(src)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+	err = st.Execute(f, data)
+	if err != nil {
+		f.Close()
+		t.Fatalf("failed to execute template: %v", err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatalf("failed to close file: %v", err)
+	}
+
+	got, _ := testEnv(exec.Command("go", "run", src)).CombinedOutput()
+	return string(got)
+}
+
+func checkStaleRuntime(t *testing.T) {
+	// 'go run' uses the installed copy of runtime.a, which may be out of date.
+	out, err := testEnv(exec.Command("go", "list", "-f", "{{.Stale}}", "runtime")).CombinedOutput()
+	if err != nil {
+		t.Fatalf("failed to execute 'go list': %v\n%v", err, string(out))
+	}
+	if string(out) != "false\n" {
+		t.Fatalf("Stale runtime.a. Run 'go install runtime'.")
+	}
+}
+
+func testCrashHandler(t *testing.T, cgo bool) {
+	type crashTest struct {
+		Cgo bool
+	}
+	output := executeTest(t, crashSource, &crashTest{Cgo: cgo})
+	want := "main: recovered done\nnew-thread: recovered done\nsecond-new-thread: recovered done\nmain-again: recovered done\n"
+	if output != want {
+		t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+	}
+}
+
+func TestCrashHandler(t *testing.T) {
+	testCrashHandler(t, false)
+}
+
+func testDeadlock(t *testing.T, source string) {
+	output := executeTest(t, source, nil)
+	want := "fatal error: all goroutines are asleep - deadlock!\n"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+func TestSimpleDeadlock(t *testing.T) {
+	testDeadlock(t, simpleDeadlockSource)
+}
+
+func TestInitDeadlock(t *testing.T) {
+	testDeadlock(t, initDeadlockSource)
+}
+
+func TestLockedDeadlock(t *testing.T) {
+	testDeadlock(t, lockedDeadlockSource)
+}
+
+func TestLockedDeadlock2(t *testing.T) {
+	testDeadlock(t, lockedDeadlockSource2)
+}
+
+func TestGoexitDeadlock(t *testing.T) {
+	output := executeTest(t, goexitDeadlockSource, nil)
+	want := "no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.Contains(output, want) {
+		t.Fatalf("output:\n%s\n\nwant output containing: %s", output, want)
+	}
+}
+
+func TestStackOverflow(t *testing.T) {
+	output := executeTest(t, stackOverflowSource, nil)
+	want := "runtime: goroutine stack exceeds 4194304-byte limit\nfatal error: stack overflow"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+func TestThreadExhaustion(t *testing.T) {
+	output := executeTest(t, threadExhaustionSource, nil)
+	want := "runtime: program exceeds 10-thread limit\nfatal error: thread exhaustion"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+func TestRecursivePanic(t *testing.T) {
+	output := executeTest(t, recursivePanicSource, nil)
+	want := `wrap: bad
+panic: again
+
+`
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+
+}
+
+func TestGoexitCrash(t *testing.T) {
+	output := executeTest(t, goexitExitSource, nil)
+	want := "no goroutines (main called runtime.Goexit) - deadlock!"
+	if !strings.Contains(output, want) {
+		t.Fatalf("output:\n%s\n\nwant output containing: %s", output, want)
+	}
+}
+
+func TestGoNil(t *testing.T) {
+	output := executeTest(t, goNilSource, nil)
+	want := "go of nil func value"
+	if !strings.Contains(output, want) {
+		t.Fatalf("output:\n%s\n\nwant output containing: %s", output, want)
+	}
+}
+
+func TestMainGoroutineId(t *testing.T) {
+	output := executeTest(t, mainGoroutineIdSource, nil)
+	want := "panic: test\n\ngoroutine 1 [running]:\n"
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+const crashSource = `
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+{{if .Cgo}}
+import "C"
+{{end}}
+
+func test(name string) {
+	defer func() {
+		if x := recover(); x != nil {
+			fmt.Printf(" recovered")
+		}
+		fmt.Printf(" done\n")
+	}()
+	fmt.Printf("%s:", name)
+	var s *string
+	_ = *s
+	fmt.Print("SHOULD NOT BE HERE")
+}
+
+func testInNewThread(name string) {
+	c := make(chan bool)
+	go func() {
+		runtime.LockOSThread()
+		test(name)
+		c <- true
+	}()
+	<-c
+}
+
+func main() {
+	runtime.LockOSThread()
+	test("main")
+	testInNewThread("new-thread")
+	testInNewThread("second-new-thread")
+	test("main-again")
+}
+`
+
+const simpleDeadlockSource = `
+package main
+func main() {
+	select {}
+}
+`
+
+const initDeadlockSource = `
+package main
+func init() {
+	select {}
+}
+func main() {
+}
+`
+
+const lockedDeadlockSource = `
+package main
+import "runtime"
+func main() {
+	runtime.LockOSThread()
+	select {}
+}
+`
+
+const lockedDeadlockSource2 = `
+package main
+import (
+	"runtime"
+	"time"
+)
+func main() {
+	go func() {
+		runtime.LockOSThread()
+		select {}
+	}()
+	time.Sleep(time.Millisecond)
+	select {}
+}
+`
+
+const goexitDeadlockSource = `
+package main
+import (
+      "runtime"
+)
+
+func F() {
+      for i := 0; i < 10; i++ {
+      }
+}
+
+func main() {
+      go F()
+      go F()
+      runtime.Goexit()
+}
+`
+
+const stackOverflowSource = `
+package main
+
+import "runtime/debug"
+
+func main() {
+	debug.SetMaxStack(4<<20)
+	f(make([]byte, 10))
+}
+
+func f(x []byte) byte {
+	var buf [64<<10]byte
+	return x[0] + f(buf[:])
+}
+`
+
+const threadExhaustionSource = `
+package main
+
+import (
+	"runtime"
+	"runtime/debug"
+)
+
+func main() {
+	debug.SetMaxThreads(10)
+	c := make(chan int)
+	for i := 0; i < 100; i++ {
+		go func() {
+			runtime.LockOSThread()
+			c <- 0
+			select{}
+		}()
+		<-c
+	}
+}
+`
+
+const recursivePanicSource = `
+package main
+
+import (
+	"fmt"
+)
+
+func main() {
+	func() {
+		defer func() {
+			fmt.Println(recover())
+		}()
+		var x [8192]byte
+		func(x [8192]byte) {
+			defer func() {
+				if err := recover(); err != nil {
+					panic("wrap: " + err.(string))
+				}
+			}()
+			panic("bad")
+		}(x)
+	}()
+	panic("again")
+}
+`
+
+const goexitExitSource = `
+package main
+
+import (
+	"runtime"
+	"time"
+)
+
+func main() {
+	go func() {
+		time.Sleep(time.Millisecond)
+	}()
+	i := 0
+	runtime.SetFinalizer(&i, func(p *int) {})
+	runtime.GC()
+	runtime.Goexit()
+}
+`
+
+const goNilSource = `
+package main
+
+func main() {
+	defer func() {
+		recover()
+	}()
+	var f func()
+	go f()
+	select{}
+}
+`
+
+const mainGoroutineIdSource = `
+package main
+func main() {
+	panic("test")
+}
+`
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
new file mode 100644
index 000000000..bb4bd60ed
--- /dev/null
+++ b/src/runtime/debug.go
@@ -0,0 +1,56 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Breakpoint executes a breakpoint trap.
+func Breakpoint()
+
+// LockOSThread wires the calling goroutine to its current operating system thread.
+// Until the calling goroutine exits or calls UnlockOSThread, it will always
+// execute in that thread, and no other goroutine can.
+func LockOSThread()
+
+// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
+// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+func UnlockOSThread()
+
+// GOMAXPROCS sets the maximum number of CPUs that can be executing
+// simultaneously and returns the previous setting.  If n < 1, it does not
+// change the current setting.
+// The number of logical CPUs on the local machine can be queried with NumCPU.
+// This call will go away when the scheduler improves.
+func GOMAXPROCS(n int) int {
+	g := getg()
+	g.m.scalararg[0] = uintptr(n)
+	onM(gomaxprocs_m)
+	n = int(g.m.scalararg[0])
+	g.m.scalararg[0] = 0
+	return n
+}
+
+func gomaxprocs_m() // proc.c
+
+// NumCPU returns the number of logical CPUs on the local machine.
+func NumCPU() int {
+	return int(ncpu)
+}
+
+// NumCgoCall returns the number of cgo calls made by the current process.
+func NumCgoCall() int64 {
+	var n int64
+	for mp := (*m)(atomicloadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink {
+		n += int64(mp.ncgocall)
+	}
+	return n
+}
+
+// NumGoroutine returns the number of goroutines that currently exist.
+func NumGoroutine() int {
+	return int(gcount())
+}
+
+func gcount() int32
diff --git a/src/runtime/debug/debug.c b/src/runtime/debug/debug.c
new file mode 100644
index 000000000..a7292c477
--- /dev/null
+++ b/src/runtime/debug/debug.c
@@ -0,0 +1,9 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Nothing to see here.
+// This file exists so that the go command knows that parts of the
+// package are implemented in C, so that it does not instruct the
+// Go compiler to complain about extern declarations.
+// The actual implementations are in package runtime.
diff --git a/src/runtime/debug/garbage.go b/src/runtime/debug/garbage.go
new file mode 100644
index 000000000..30994f219
--- /dev/null
+++ b/src/runtime/debug/garbage.go
@@ -0,0 +1,147 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package debug
+
+import (
+	"runtime"
+	"sort"
+	"time"
+)
+
+// GCStats collect information about recent garbage collections.
+type GCStats struct {
+	LastGC         time.Time       // time of last collection
+	NumGC          int64           // number of garbage collections
+	PauseTotal     time.Duration   // total pause for all collections
+	Pause          []time.Duration // pause history, most recent first
+	PauseQuantiles []time.Duration
+}
+
+// ReadGCStats reads statistics about garbage collection into stats.
+// The number of entries in the pause history is system-dependent;
+// stats.Pause slice will be reused if large enough, reallocated otherwise.
+// ReadGCStats may use the full capacity of the stats.Pause slice.
+// If stats.PauseQuantiles is non-empty, ReadGCStats fills it with quantiles
+// summarizing the distribution of pause time. For example, if
+// len(stats.PauseQuantiles) is 5, it will be filled with the minimum,
+// 25%, 50%, 75%, and maximum pause times.
+func ReadGCStats(stats *GCStats) {
+	// Create a buffer with space for at least two copies of the
+	// pause history tracked by the runtime. One will be returned
+	// to the caller and the other will be used as a temporary buffer
+	// for computing quantiles.
+	const maxPause = len(((*runtime.MemStats)(nil)).PauseNs)
+	if cap(stats.Pause) < 2*maxPause {
+		stats.Pause = make([]time.Duration, 2*maxPause)
+	}
+
+	// readGCStats fills in the pause history (up to maxPause entries)
+	// and then three more: Unix ns time of last GC, number of GC,
+	// and total pause time in nanoseconds. Here we depend on the
+	// fact that time.Duration's native unit is nanoseconds, so the
+	// pauses and the total pause time do not need any conversion.
+	readGCStats(&stats.Pause)
+	n := len(stats.Pause) - 3
+	stats.LastGC = time.Unix(0, int64(stats.Pause[n]))
+	stats.NumGC = int64(stats.Pause[n+1])
+	stats.PauseTotal = stats.Pause[n+2]
+	stats.Pause = stats.Pause[:n]
+
+	if len(stats.PauseQuantiles) > 0 {
+		if n == 0 {
+			for i := range stats.PauseQuantiles {
+				stats.PauseQuantiles[i] = 0
+			}
+		} else {
+			// There's room for a second copy of the data in stats.Pause.
+			// See the allocation at the top of the function.
+			sorted := stats.Pause[n : n+n]
+			copy(sorted, stats.Pause)
+			sort.Sort(byDuration(sorted))
+			nq := len(stats.PauseQuantiles) - 1
+			for i := 0; i < nq; i++ {
+				stats.PauseQuantiles[i] = sorted[len(sorted)*i/nq]
+			}
+			stats.PauseQuantiles[nq] = sorted[len(sorted)-1]
+		}
+	}
+}
+
+type byDuration []time.Duration
+
+func (x byDuration) Len() int           { return len(x) }
+func (x byDuration) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byDuration) Less(i, j int) bool { return x[i] < x[j] }
+
+// SetGCPercent sets the garbage collection target percentage:
+// a collection is triggered when the ratio of freshly allocated data
+// to live data remaining after the previous collection reaches this percentage.
+// SetGCPercent returns the previous setting.
+// The initial setting is the value of the GOGC environment variable
+// at startup, or 100 if the variable is not set.
+// A negative percentage disables garbage collection.
+func SetGCPercent(percent int) int {
+	old := setGCPercent(int32(percent))
+	runtime.GC()
+	return int(old)
+}
+
+// FreeOSMemory forces a garbage collection followed by an
+// attempt to return as much memory to the operating system
+// as possible. (Even if this is not called, the runtime gradually
+// returns memory to the operating system in a background task.)
+func FreeOSMemory() {
+	freeOSMemory()
+}
+
+// SetMaxStack sets the maximum amount of memory that
+// can be used by a single goroutine stack.
+// If any goroutine exceeds this limit while growing its stack,
+// the program crashes.
+// SetMaxStack returns the previous setting.
+// The initial setting is 1 GB on 64-bit systems, 250 MB on 32-bit systems.
+//
+// SetMaxStack is useful mainly for limiting the damage done by
+// goroutines that enter an infinite recursion. It only limits future
+// stack growth.
+func SetMaxStack(bytes int) int {
+	return setMaxStack(bytes)
+}
+
+// SetMaxThreads sets the maximum number of operating system
+// threads that the Go program can use. If it attempts to use more than
+// this many, the program crashes.
+// SetMaxThreads returns the previous setting.
+// The initial setting is 10,000 threads.
+//
+// The limit controls the number of operating system threads, not the number
+// of goroutines. A Go program creates a new thread only when a goroutine
+// is ready to run but all the existing threads are blocked in system calls, cgo calls,
+// or are locked to other goroutines due to use of runtime.LockOSThread.
+//
+// SetMaxThreads is useful mainly for limiting the damage done by
+// programs that create an unbounded number of threads. The idea is
+// to take down the program before it takes down the operating system.
+func SetMaxThreads(threads int) int {
+	return setMaxThreads(threads)
+}
+
+// SetPanicOnFault controls the runtime's behavior when a program faults
+// at an unexpected (non-nil) address. Such faults are typically caused by
+// bugs such as runtime memory corruption, so the default response is to crash
+// the program. Programs working with memory-mapped files or unsafe
+// manipulation of memory may cause faults at non-nil addresses in less
+// dramatic situations; SetPanicOnFault allows such programs to request
+// that the runtime trigger only a panic, not a crash.
+// SetPanicOnFault applies only to the current goroutine.
+// It returns the previous setting.
+func SetPanicOnFault(enabled bool) bool {
+	return setPanicOnFault(enabled)
+}
+
+// WriteHeapDump writes a description of the heap and the objects in
+// it to the given file descriptor.
+// The heap dump format is defined at http://golang.org/s/go13heapdump.
+func WriteHeapDump(fd uintptr)
diff --git a/src/runtime/debug/garbage_test.go b/src/runtime/debug/garbage_test.go
new file mode 100644
index 000000000..149bafc6f
--- /dev/null
+++ b/src/runtime/debug/garbage_test.go
@@ -0,0 +1,102 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package debug
+
+import (
+	"runtime"
+	"testing"
+	"time"
+)
+
+func TestReadGCStats(t *testing.T) {
+	defer SetGCPercent(SetGCPercent(-1))
+
+	var stats GCStats
+	var mstats runtime.MemStats
+	var min, max time.Duration
+
+	// First ReadGCStats will allocate, second should not,
+	// especially if we follow up with an explicit garbage collection.
+	stats.PauseQuantiles = make([]time.Duration, 10)
+	ReadGCStats(&stats)
+	runtime.GC()
+
+	// Assume these will return same data: no GC during ReadGCStats.
+	ReadGCStats(&stats)
+	runtime.ReadMemStats(&mstats)
+
+	if stats.NumGC != int64(mstats.NumGC) {
+		t.Errorf("stats.NumGC = %d, but mstats.NumGC = %d", stats.NumGC, mstats.NumGC)
+	}
+	if stats.PauseTotal != time.Duration(mstats.PauseTotalNs) {
+		t.Errorf("stats.PauseTotal = %d, but mstats.PauseTotalNs = %d", stats.PauseTotal, mstats.PauseTotalNs)
+	}
+	if stats.LastGC.UnixNano() != int64(mstats.LastGC) {
+		t.Errorf("stats.LastGC.UnixNano = %d, but mstats.LastGC = %d", stats.LastGC.UnixNano(), mstats.LastGC)
+	}
+	n := int(mstats.NumGC)
+	if n > len(mstats.PauseNs) {
+		n = len(mstats.PauseNs)
+	}
+	if len(stats.Pause) != n {
+		t.Errorf("len(stats.Pause) = %d, want %d", len(stats.Pause), n)
+	} else {
+		off := (int(mstats.NumGC) + len(mstats.PauseNs) - 1) % len(mstats.PauseNs)
+		for i := 0; i < n; i++ {
+			dt := stats.Pause[i]
+			if dt != time.Duration(mstats.PauseNs[off]) {
+				t.Errorf("stats.Pause[%d] = %d, want %d", i, dt, mstats.PauseNs[off])
+			}
+			if max < dt {
+				max = dt
+			}
+			if min > dt || i == 0 {
+				min = dt
+			}
+			off = (off + len(mstats.PauseNs) - 1) % len(mstats.PauseNs)
+		}
+	}
+
+	q := stats.PauseQuantiles
+	nq := len(q)
+	if q[0] != min || q[nq-1] != max {
+		t.Errorf("stats.PauseQuantiles = [%d, ..., %d], want [%d, ..., %d]", q[0], q[nq-1], min, max)
+	}
+
+	for i := 0; i < nq-1; i++ {
+		if q[i] > q[i+1] {
+			t.Errorf("stats.PauseQuantiles[%d]=%d > stats.PauseQuantiles[%d]=%d", i, q[i], i+1, q[i+1])
+		}
+	}
+}
+
+var big = make([]byte, 1<<20)
+
+func TestFreeOSMemory(t *testing.T) {
+	var ms1, ms2 runtime.MemStats
+
+	if big == nil {
+		t.Skip("test is not reliable when run multiple times")
+	}
+	big = nil
+	runtime.GC()
+	runtime.ReadMemStats(&ms1)
+	FreeOSMemory()
+	runtime.ReadMemStats(&ms2)
+	if ms1.HeapReleased >= ms2.HeapReleased {
+		t.Errorf("released before=%d; released after=%d; did not go up", ms1.HeapReleased, ms2.HeapReleased)
+	}
+}
+
+func TestSetGCPercent(t *testing.T) {
+	// Test that the variable is being set and returned correctly.
+	// Assume the percentage itself is implemented fine during GC,
+	// which is harder to test.
+	old := SetGCPercent(123)
+	new := SetGCPercent(old)
+	if new != 123 {
+		t.Errorf("SetGCPercent(123); SetGCPercent(x) = %d, want 123", new)
+	}
+}
diff --git a/src/runtime/debug/heapdump_test.go b/src/runtime/debug/heapdump_test.go
new file mode 100644
index 000000000..920190115
--- /dev/null
+++ b/src/runtime/debug/heapdump_test.go
@@ -0,0 +1,33 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package debug
+
+import (
+	"io/ioutil"
+	"os"
+	"runtime"
+	"testing"
+)
+
+func TestWriteHeapDumpNonempty(t *testing.T) {
+	if runtime.GOOS == "nacl" {
+		t.Skip("WriteHeapDump is not available on NaCl.")
+	}
+	f, err := ioutil.TempFile("", "heapdumptest")
+	if err != nil {
+		t.Fatalf("TempFile failed: %v", err)
+	}
+	defer os.Remove(f.Name())
+	defer f.Close()
+	WriteHeapDump(f.Fd())
+	fi, err := f.Stat()
+	if err != nil {
+		t.Fatalf("Stat failed: %v", err)
+	}
+	const minSize = 1
+	if size := fi.Size(); size < minSize {
+		t.Fatalf("Heap dump size %d bytes, expected at least %d bytes", size, minSize)
+	}
+}
diff --git a/src/runtime/debug/stack.go b/src/runtime/debug/stack.go
new file mode 100644
index 000000000..c29b0a226
--- /dev/null
+++ b/src/runtime/debug/stack.go
@@ -0,0 +1,98 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package debug contains facilities for programs to debug themselves while
+// they are running.
+package debug
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"runtime"
+)
+
+var (
+	dunno     = []byte("???")
+	centerDot = []byte("·")
+	dot       = []byte(".")
+	slash     = []byte("/")
+)
+
+// PrintStack prints to standard error the stack trace returned by Stack.
+func PrintStack() {
+	os.Stderr.Write(stack())
+}
+
+// Stack returns a formatted stack trace of the goroutine that calls it.
+// For each routine, it includes the source line information and PC value,
+// then attempts to discover, for Go functions, the calling function or
+// method and the text of the line containing the invocation.
+//
+// This function is deprecated. Use package runtime's Stack instead.
+func Stack() []byte {
+	return stack()
+}
+
+// stack implements Stack, skipping 2 frames
+func stack() []byte {
+	buf := new(bytes.Buffer) // the returned data
+	// As we loop, we open files and read them. These variables record the currently
+	// loaded file.
+	var lines [][]byte
+	var lastFile string
+	for i := 2; ; i++ { // Caller we care about is the user, 2 frames up
+		pc, file, line, ok := runtime.Caller(i)
+		if !ok {
+			break
+		}
+		// Print this much at least.  If we can't find the source, it won't show.
+		fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc)
+		if file != lastFile {
+			data, err := ioutil.ReadFile(file)
+			if err != nil {
+				continue
+			}
+			lines = bytes.Split(data, []byte{'\n'})
+			lastFile = file
+		}
+		line-- // in stack trace, lines are 1-indexed but our array is 0-indexed
+		fmt.Fprintf(buf, "\t%s: %s\n", function(pc), source(lines, line))
+	}
+	return buf.Bytes()
+}
+
+// source returns a space-trimmed slice of the n'th line.
+func source(lines [][]byte, n int) []byte {
+	if n < 0 || n >= len(lines) {
+		return dunno
+	}
+	return bytes.Trim(lines[n], " \t")
+}
+
+// function returns, if possible, the name of the function containing the PC.
+func function(pc uintptr) []byte {
+	fn := runtime.FuncForPC(pc)
+	if fn == nil {
+		return dunno
+	}
+	name := []byte(fn.Name())
+	// The name includes the path name to the package, which is unnecessary
+	// since the file name is already included.  Plus, it has center dots.
+	// That is, we see
+	//	runtime/debug.*T·ptrmethod
+	// and want
+	//	*T.ptrmethod
+	// Since the package path might contains dots (e.g. code.google.com/...),
+	// we first remove the path prefix if there is one.
+	if lastslash := bytes.LastIndex(name, slash); lastslash >= 0 {
+		name = name[lastslash+1:]
+	}
+	if period := bytes.Index(name, dot); period >= 0 {
+		name = name[period+1:]
+	}
+	name = bytes.Replace(name, centerDot, dot, -1)
+	return name
+}
diff --git a/src/runtime/debug/stack_test.go b/src/runtime/debug/stack_test.go
new file mode 100644
index 000000000..28691ee4d
--- /dev/null
+++ b/src/runtime/debug/stack_test.go
@@ -0,0 +1,62 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package debug
+
+import (
+	"strings"
+	"testing"
+)
+
+type T int
+
+func (t *T) ptrmethod() []byte {
+	return Stack()
+}
+func (t T) method() []byte {
+	return t.ptrmethod()
+}
+
+/*
+	The traceback should look something like this, modulo line numbers and hex constants.
+	Don't worry much about the base levels, but check the ones in our own package.
+
+		/Users/r/go/src/runtime/debug/stack_test.go:15 (0x13878)
+			(*T).ptrmethod: return Stack()
+		/Users/r/go/src/runtime/debug/stack_test.go:18 (0x138dd)
+			T.method: return t.ptrmethod()
+		/Users/r/go/src/runtime/debug/stack_test.go:23 (0x13920)
+			TestStack: b := T(0).method()
+		/Users/r/go/src/testing/testing.go:132 (0x14a7a)
+			tRunner: test.F(t)
+		/Users/r/go/src/runtime/proc.c:145 (0xc970)
+			???: runtime·unlock(&runtime·sched);
+*/
+func TestStack(t *testing.T) {
+	b := T(0).method()
+	lines := strings.Split(string(b), "\n")
+	if len(lines) < 6 {
+		t.Fatal("too few lines")
+	}
+	n := 0
+	frame := func(line, code string) {
+		check(t, lines[n], line)
+		n++
+		// The source might not be available while running the test.
+		if strings.HasPrefix(lines[n], "\t") {
+			check(t, lines[n], code)
+			n++
+		}
+	}
+	frame("src/runtime/debug/stack_test.go", "\t(*T).ptrmethod: return Stack()")
+	frame("src/runtime/debug/stack_test.go", "\tT.method: return t.ptrmethod()")
+	frame("src/runtime/debug/stack_test.go", "\tTestStack: b := T(0).method()")
+	frame("src/testing/testing.go", "")
+}
+
+func check(t *testing.T, line, has string) {
+	if strings.Index(line, has) < 0 {
+		t.Errorf("expected %q in %q", has, line)
+	}
+}
diff --git a/src/runtime/debug/stubs.go b/src/runtime/debug/stubs.go
new file mode 100644
index 000000000..8fba6cf34
--- /dev/null
+++ b/src/runtime/debug/stubs.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package debug
+
+import (
+	"time"
+)
+
+// Uses assembly to call corresponding runtime-internal functions.
+func setMaxStack(int) int
+func setGCPercent(int32) int32
+func setPanicOnFault(bool) bool
+func setMaxThreads(int) int
+
+// Implemented in package runtime.
+func readGCStats(*[]time.Duration)
+func enableGC(bool) bool
+func freeOSMemory()
diff --git a/src/runtime/debug/stubs.s b/src/runtime/debug/stubs.s
new file mode 100644
index 000000000..d56274f2d
--- /dev/null
+++ b/src/runtime/debug/stubs.s
@@ -0,0 +1,21 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#ifdef GOARCH_arm
+#define JMP B
+#endif
+
+TEXT ·setMaxStack(SB),NOSPLIT,$0-0
+  JMP runtime·setMaxStack(SB)
+
+TEXT ·setGCPercent(SB),NOSPLIT,$0-0
+  JMP runtime·setGCPercent(SB)
+
+TEXT ·setPanicOnFault(SB),NOSPLIT,$0-0
+  JMP runtime·setPanicOnFault(SB)
+
+TEXT ·setMaxThreads(SB),NOSPLIT,$0-0
+  JMP runtime·setMaxThreads(SB)
diff --git a/src/runtime/defs.c b/src/runtime/defs.c
new file mode 100644
index 000000000..b0a9b20d7
--- /dev/null
+++ b/src/runtime/defs.c
@@ -0,0 +1,15 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file is compiled by cmd/dist to obtain debug information
+// about the given header files.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "type.h"
+#include "race.h"
+#include "chan.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
diff --git a/src/runtime/defs1_linux.go b/src/runtime/defs1_linux.go
new file mode 100644
index 000000000..392cc4ab5
--- /dev/null
+++ b/src/runtime/defs1_linux.go
@@ -0,0 +1,37 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo -cdefs
+
+GOARCH=amd64 cgo -cdefs defs.go defs1.go >amd64/defs.h
+*/
+
+package runtime
+
+/*
+#include <ucontext.h>
+#include <fcntl.h>
+*/
+import "C"
+
+const (
+	O_RDONLY  = C.O_RDONLY
+	O_CLOEXEC = C.O_CLOEXEC
+)
+
+type Usigset C.__sigset_t
+type Fpxreg C.struct__libc_fpxreg
+type Xmmreg C.struct__libc_xmmreg
+type Fpstate C.struct__libc_fpstate
+type Fpxreg1 C.struct__fpxreg
+type Xmmreg1 C.struct__xmmreg
+type Fpstate1 C.struct__fpstate
+type Fpreg1 C.struct__fpreg
+type SigaltstackT C.struct_sigaltstack
+type Mcontext C.mcontext_t
+type Ucontext C.ucontext_t
+type Sigcontext C.struct_sigcontext
diff --git a/src/runtime/defs2_linux.go b/src/runtime/defs2_linux.go
new file mode 100644
index 000000000..980df9ec3
--- /dev/null
+++ b/src/runtime/defs2_linux.go
@@ -0,0 +1,146 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+ * Input to cgo -cdefs
+
+GOARCH=386 go tool cgo -cdefs defs2_linux.go >defs_linux_386.h
+
+The asm header tricks we have to use for Linux on amd64
+(see defs.c and defs1.c) don't work here, so this is yet another
+file.  Sigh.
+*/
+
+package runtime
+
+/*
+#cgo CFLAGS: -I/tmp/linux/arch/x86/include -I/tmp/linux/include -D_LOOSE_KERNEL_NAMES -D__ARCH_SI_UID_T=__kernel_uid32_t
+
+#define size_t __kernel_size_t
+#define pid_t int
+#include <asm/signal.h>
+#include <asm/mman.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/siginfo.h>
+#include <asm-generic/errno.h>
+#include <asm-generic/fcntl.h>
+#include <asm-generic/poll.h>
+#include <linux/eventpoll.h>
+
+// This is the sigaction structure from the Linux 2.1.68 kernel which
+//   is used with the rt_sigaction system call.  For 386 this is not
+//   defined in any public header file.
+
+struct kernel_sigaction {
+	__sighandler_t k_sa_handler;
+	unsigned long sa_flags;
+	void (*sa_restorer) (void);
+	unsigned long long sa_mask;
+};
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EAGAIN = C.EAGAIN
+	ENOMEM = C.ENOMEM
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANONYMOUS
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_DONTNEED = C.MADV_DONTNEED
+
+	SA_RESTART  = C.SA_RESTART
+	SA_ONSTACK  = C.SA_ONSTACK
+	SA_RESTORER = C.SA_RESTORER
+	SA_SIGINFO  = C.SA_SIGINFO
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGBUS    = C.SIGBUS
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGUSR1   = C.SIGUSR1
+	SIGSEGV   = C.SIGSEGV
+	SIGUSR2   = C.SIGUSR2
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGSTKFLT = C.SIGSTKFLT
+	SIGCHLD   = C.SIGCHLD
+	SIGCONT   = C.SIGCONT
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGURG    = C.SIGURG
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGIO     = C.SIGIO
+	SIGPWR    = C.SIGPWR
+	SIGSYS    = C.SIGSYS
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	O_RDONLY  = C.O_RDONLY
+	O_CLOEXEC = C.O_CLOEXEC
+
+	EPOLLIN       = C.POLLIN
+	EPOLLOUT      = C.POLLOUT
+	EPOLLERR      = C.POLLERR
+	EPOLLHUP      = C.POLLHUP
+	EPOLLRDHUP    = C.POLLRDHUP
+	EPOLLET       = C.EPOLLET
+	EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
+	EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
+	EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
+	EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
+)
+
+type Fpreg C.struct__fpreg
+type Fpxreg C.struct__fpxreg
+type Xmmreg C.struct__xmmreg
+type Fpstate C.struct__fpstate
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Sigaction C.struct_kernel_sigaction
+type Siginfo C.siginfo_t
+type SigaltstackT C.struct_sigaltstack
+type Sigcontext C.struct_sigcontext
+type Ucontext C.struct_ucontext
+type Itimerval C.struct_itimerval
+type EpollEvent C.struct_epoll_event
diff --git a/src/runtime/defs_android_arm.h b/src/runtime/defs_android_arm.h
new file mode 100644
index 000000000..3611b3a10
--- /dev/null
+++ b/src/runtime/defs_android_arm.h
@@ -0,0 +1,3 @@
+// TODO: Generate using cgo like defs_linux_{386,amd64}.h
+
+#include "defs_linux_arm.h"
diff --git a/src/runtime/defs_arm_linux.go b/src/runtime/defs_arm_linux.go
new file mode 100644
index 000000000..afd6897e3
--- /dev/null
+++ b/src/runtime/defs_arm_linux.go
@@ -0,0 +1,124 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+On a Debian Lenny arm linux distribution:
+
+cgo -cdefs defs_arm.c >arm/defs.h
+*/
+
+package runtime
+
+/*
+#cgo CFLAGS: -I/usr/src/linux-headers-2.6.26-2-versatile/include
+
+#define __ARCH_SI_UID_T int
+#include <asm/signal.h>
+#include <asm/mman.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/siginfo.h>
+#include <linux/time.h>
+
+struct xsiginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+	char _sifields[4];
+};
+
+#undef sa_handler
+#undef sa_flags
+#undef sa_restorer
+#undef sa_mask
+
+struct xsigaction {
+	void (*sa_handler)(void);
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+	unsigned int sa_mask;		// mask last for extensibility
+};
+*/
+import "C"
+
+const (
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANONYMOUS
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_DONTNEED = C.MADV_DONTNEED
+
+	SA_RESTART  = C.SA_RESTART
+	SA_ONSTACK  = C.SA_ONSTACK
+	SA_RESTORER = C.SA_RESTORER
+	SA_SIGINFO  = C.SA_SIGINFO
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGBUS    = C.SIGBUS
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGUSR1   = C.SIGUSR1
+	SIGSEGV   = C.SIGSEGV
+	SIGUSR2   = C.SIGUSR2
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGSTKFLT = C.SIGSTKFLT
+	SIGCHLD   = C.SIGCHLD
+	SIGCONT   = C.SIGCONT
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGURG    = C.SIGURG
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGIO     = C.SIGIO
+	SIGPWR    = C.SIGPWR
+	SIGSYS    = C.SIGSYS
+
+	FPE_INTDIV = C.FPE_INTDIV & 0xFFFF
+	FPE_INTOVF = C.FPE_INTOVF & 0xFFFF
+	FPE_FLTDIV = C.FPE_FLTDIV & 0xFFFF
+	FPE_FLTOVF = C.FPE_FLTOVF & 0xFFFF
+	FPE_FLTUND = C.FPE_FLTUND & 0xFFFF
+	FPE_FLTRES = C.FPE_FLTRES & 0xFFFF
+	FPE_FLTINV = C.FPE_FLTINV & 0xFFFF
+	FPE_FLTSUB = C.FPE_FLTSUB & 0xFFFF
+
+	BUS_ADRALN = C.BUS_ADRALN & 0xFFFF
+	BUS_ADRERR = C.BUS_ADRERR & 0xFFFF
+	BUS_OBJERR = C.BUS_OBJERR & 0xFFFF
+
+	SEGV_MAPERR = C.SEGV_MAPERR & 0xFFFF
+	SEGV_ACCERR = C.SEGV_ACCERR & 0xFFFF
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_PROF    = C.ITIMER_PROF
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+)
+
+type Timespec C.struct_timespec
+type SigaltstackT C.struct_sigaltstack
+type Sigcontext C.struct_sigcontext
+type Ucontext C.struct_ucontext
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+type Siginfo C.struct_xsiginfo
+type Sigaction C.struct_xsigaction
diff --git a/src/runtime/defs_darwin.go b/src/runtime/defs_darwin.go
new file mode 100644
index 000000000..722013ba9
--- /dev/null
+++ b/src/runtime/defs_darwin.go
@@ -0,0 +1,179 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_darwin.go >defs_darwin_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_darwin.go >defs_darwin_386.h
+*/
+
+package runtime
+
+/*
+#define __DARWIN_UNIX03 0
+#include <mach/mach.h>
+#include <mach/message.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/event.h>
+#include <sys/mman.h>
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EFAULT = C.EFAULT
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_DONTNEED = C.MADV_DONTNEED
+	MADV_FREE     = C.MADV_FREE
+
+	MACH_MSG_TYPE_MOVE_RECEIVE   = C.MACH_MSG_TYPE_MOVE_RECEIVE
+	MACH_MSG_TYPE_MOVE_SEND      = C.MACH_MSG_TYPE_MOVE_SEND
+	MACH_MSG_TYPE_MOVE_SEND_ONCE = C.MACH_MSG_TYPE_MOVE_SEND_ONCE
+	MACH_MSG_TYPE_COPY_SEND      = C.MACH_MSG_TYPE_COPY_SEND
+	MACH_MSG_TYPE_MAKE_SEND      = C.MACH_MSG_TYPE_MAKE_SEND
+	MACH_MSG_TYPE_MAKE_SEND_ONCE = C.MACH_MSG_TYPE_MAKE_SEND_ONCE
+	MACH_MSG_TYPE_COPY_RECEIVE   = C.MACH_MSG_TYPE_COPY_RECEIVE
+
+	MACH_MSG_PORT_DESCRIPTOR         = C.MACH_MSG_PORT_DESCRIPTOR
+	MACH_MSG_OOL_DESCRIPTOR          = C.MACH_MSG_OOL_DESCRIPTOR
+	MACH_MSG_OOL_PORTS_DESCRIPTOR    = C.MACH_MSG_OOL_PORTS_DESCRIPTOR
+	MACH_MSG_OOL_VOLATILE_DESCRIPTOR = C.MACH_MSG_OOL_VOLATILE_DESCRIPTOR
+
+	MACH_MSGH_BITS_COMPLEX = C.MACH_MSGH_BITS_COMPLEX
+
+	MACH_SEND_MSG  = C.MACH_SEND_MSG
+	MACH_RCV_MSG   = C.MACH_RCV_MSG
+	MACH_RCV_LARGE = C.MACH_RCV_LARGE
+
+	MACH_SEND_TIMEOUT   = C.MACH_SEND_TIMEOUT
+	MACH_SEND_INTERRUPT = C.MACH_SEND_INTERRUPT
+	MACH_SEND_ALWAYS    = C.MACH_SEND_ALWAYS
+	MACH_SEND_TRAILER   = C.MACH_SEND_TRAILER
+	MACH_RCV_TIMEOUT    = C.MACH_RCV_TIMEOUT
+	MACH_RCV_NOTIFY     = C.MACH_RCV_NOTIFY
+	MACH_RCV_INTERRUPT  = C.MACH_RCV_INTERRUPT
+	MACH_RCV_OVERWRITE  = C.MACH_RCV_OVERWRITE
+
+	NDR_PROTOCOL_2_0      = C.NDR_PROTOCOL_2_0
+	NDR_INT_BIG_ENDIAN    = C.NDR_INT_BIG_ENDIAN
+	NDR_INT_LITTLE_ENDIAN = C.NDR_INT_LITTLE_ENDIAN
+	NDR_FLOAT_IEEE        = C.NDR_FLOAT_IEEE
+	NDR_CHAR_ASCII        = C.NDR_CHAR_ASCII
+
+	SA_SIGINFO   = C.SA_SIGINFO
+	SA_RESTART   = C.SA_RESTART
+	SA_ONSTACK   = C.SA_ONSTACK
+	SA_USERTRAMP = C.SA_USERTRAMP
+	SA_64REGSET  = C.SA_64REGSET
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGINFO   = C.SIGINFO
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EV_ADD       = C.EV_ADD
+	EV_DELETE    = C.EV_DELETE
+	EV_CLEAR     = C.EV_CLEAR
+	EV_RECEIPT   = C.EV_RECEIPT
+	EV_ERROR     = C.EV_ERROR
+	EVFILT_READ  = C.EVFILT_READ
+	EVFILT_WRITE = C.EVFILT_WRITE
+)
+
+type MachBody C.mach_msg_body_t
+type MachHeader C.mach_msg_header_t
+type MachNDR C.NDR_record_t
+type MachPort C.mach_msg_port_descriptor_t
+
+type StackT C.struct_sigaltstack
+type Sighandler C.union___sigaction_u
+
+type Sigaction C.struct___sigaction // used in syscalls
+// type Sigaction C.struct_sigaction	// used by the C library
+type Sigval C.union_sigval
+type Siginfo C.siginfo_t
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+type Timespec C.struct_timespec
+
+type FPControl C.struct_fp_control
+type FPStatus C.struct_fp_status
+type RegMMST C.struct_mmst_reg
+type RegXMM C.struct_xmm_reg
+
+type Regs64 C.struct_x86_thread_state64
+type FloatState64 C.struct_x86_float_state64
+type ExceptionState64 C.struct_x86_exception_state64
+type Mcontext64 C.struct_mcontext64
+
+type Regs32 C.struct_i386_thread_state
+type FloatState32 C.struct_i386_float_state
+type ExceptionState32 C.struct_i386_exception_state
+type Mcontext32 C.struct_mcontext32
+
+type Ucontext C.struct_ucontext
+
+type Kevent C.struct_kevent
diff --git a/src/runtime/defs_darwin_386.h b/src/runtime/defs_darwin_386.h
new file mode 100644
index 000000000..0e0b4fbf7
--- /dev/null
+++ b/src/runtime/defs_darwin_386.h
@@ -0,0 +1,392 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_darwin.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_DONTNEED	= 0x4,
+	MADV_FREE	= 0x5,
+
+	MACH_MSG_TYPE_MOVE_RECEIVE	= 0x10,
+	MACH_MSG_TYPE_MOVE_SEND		= 0x11,
+	MACH_MSG_TYPE_MOVE_SEND_ONCE	= 0x12,
+	MACH_MSG_TYPE_COPY_SEND		= 0x13,
+	MACH_MSG_TYPE_MAKE_SEND		= 0x14,
+	MACH_MSG_TYPE_MAKE_SEND_ONCE	= 0x15,
+	MACH_MSG_TYPE_COPY_RECEIVE	= 0x16,
+
+	MACH_MSG_PORT_DESCRIPTOR		= 0x0,
+	MACH_MSG_OOL_DESCRIPTOR			= 0x1,
+	MACH_MSG_OOL_PORTS_DESCRIPTOR		= 0x2,
+	MACH_MSG_OOL_VOLATILE_DESCRIPTOR	= 0x3,
+
+	MACH_MSGH_BITS_COMPLEX	= 0x80000000,
+
+	MACH_SEND_MSG	= 0x1,
+	MACH_RCV_MSG	= 0x2,
+	MACH_RCV_LARGE	= 0x4,
+
+	MACH_SEND_TIMEOUT	= 0x10,
+	MACH_SEND_INTERRUPT	= 0x40,
+	MACH_SEND_ALWAYS	= 0x10000,
+	MACH_SEND_TRAILER	= 0x20000,
+	MACH_RCV_TIMEOUT	= 0x100,
+	MACH_RCV_NOTIFY		= 0x200,
+	MACH_RCV_INTERRUPT	= 0x400,
+	MACH_RCV_OVERWRITE	= 0x1000,
+
+	NDR_PROTOCOL_2_0	= 0x0,
+	NDR_INT_BIG_ENDIAN	= 0x0,
+	NDR_INT_LITTLE_ENDIAN	= 0x1,
+	NDR_FLOAT_IEEE		= 0x0,
+	NDR_CHAR_ASCII		= 0x0,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+	SA_USERTRAMP	= 0x100,
+	SA_64REGSET	= 0x200,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x7,
+	FPE_INTOVF	= 0x8,
+	FPE_FLTDIV	= 0x1,
+	FPE_FLTOVF	= 0x2,
+	FPE_FLTUND	= 0x3,
+	FPE_FLTRES	= 0x4,
+	FPE_FLTINV	= 0x5,
+	FPE_FLTSUB	= 0x6,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0x40,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct MachBody MachBody;
+typedef struct MachHeader MachHeader;
+typedef struct MachNDR MachNDR;
+typedef struct MachPort MachPort;
+typedef struct StackT StackT;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct Timespec Timespec;
+typedef struct FPControl FPControl;
+typedef struct FPStatus FPStatus;
+typedef struct RegMMST RegMMST;
+typedef struct RegXMM RegXMM;
+typedef struct Regs64 Regs64;
+typedef struct FloatState64 FloatState64;
+typedef struct ExceptionState64 ExceptionState64;
+typedef struct Mcontext64 Mcontext64;
+typedef struct Regs32 Regs32;
+typedef struct FloatState32 FloatState32;
+typedef struct ExceptionState32 ExceptionState32;
+typedef struct Mcontext32 Mcontext32;
+typedef struct Ucontext Ucontext;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct MachBody {
+	uint32	msgh_descriptor_count;
+};
+struct MachHeader {
+	uint32	msgh_bits;
+	uint32	msgh_size;
+	uint32	msgh_remote_port;
+	uint32	msgh_local_port;
+	uint32	msgh_reserved;
+	int32	msgh_id;
+};
+struct MachNDR {
+	uint8	mig_vers;
+	uint8	if_vers;
+	uint8	reserved1;
+	uint8	mig_encoding;
+	uint8	int_rep;
+	uint8	char_rep;
+	uint8	float_rep;
+	uint8	reserved2;
+};
+struct MachPort {
+	uint32	name;
+	uint32	pad1;
+	uint16	pad2;
+	uint8	disposition;
+	uint8	type;
+};
+
+struct StackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+typedef	byte	Sighandler[4];
+
+struct SigactionT {
+	byte	__sigaction_u[4];
+	void	*sa_tramp;
+	uint32	sa_mask;
+	int32	sa_flags;
+};
+
+typedef	byte	Sigval[4];
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[4];
+	int32	si_band;
+	uint32	__pad[7];
+};
+struct Timeval {
+	int32	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+struct Timespec {
+	int32	tv_sec;
+	int32	tv_nsec;
+};
+
+struct FPControl {
+	byte	Pad_cgo_0[2];
+};
+struct FPStatus {
+	byte	Pad_cgo_0[2];
+};
+struct RegMMST {
+	int8	mmst_reg[10];
+	int8	mmst_rsrv[6];
+};
+struct RegXMM {
+	int8	xmm_reg[16];
+};
+
+struct Regs64 {
+	uint64	rax;
+	uint64	rbx;
+	uint64	rcx;
+	uint64	rdx;
+	uint64	rdi;
+	uint64	rsi;
+	uint64	rbp;
+	uint64	rsp;
+	uint64	r8;
+	uint64	r9;
+	uint64	r10;
+	uint64	r11;
+	uint64	r12;
+	uint64	r13;
+	uint64	r14;
+	uint64	r15;
+	uint64	rip;
+	uint64	rflags;
+	uint64	cs;
+	uint64	fs;
+	uint64	gs;
+};
+struct FloatState64 {
+	int32	fpu_reserved[2];
+	FPControl	fpu_fcw;
+	FPStatus	fpu_fsw;
+	uint8	fpu_ftw;
+	uint8	fpu_rsrv1;
+	uint16	fpu_fop;
+	uint32	fpu_ip;
+	uint16	fpu_cs;
+	uint16	fpu_rsrv2;
+	uint32	fpu_dp;
+	uint16	fpu_ds;
+	uint16	fpu_rsrv3;
+	uint32	fpu_mxcsr;
+	uint32	fpu_mxcsrmask;
+	RegMMST	fpu_stmm0;
+	RegMMST	fpu_stmm1;
+	RegMMST	fpu_stmm2;
+	RegMMST	fpu_stmm3;
+	RegMMST	fpu_stmm4;
+	RegMMST	fpu_stmm5;
+	RegMMST	fpu_stmm6;
+	RegMMST	fpu_stmm7;
+	RegXMM	fpu_xmm0;
+	RegXMM	fpu_xmm1;
+	RegXMM	fpu_xmm2;
+	RegXMM	fpu_xmm3;
+	RegXMM	fpu_xmm4;
+	RegXMM	fpu_xmm5;
+	RegXMM	fpu_xmm6;
+	RegXMM	fpu_xmm7;
+	RegXMM	fpu_xmm8;
+	RegXMM	fpu_xmm9;
+	RegXMM	fpu_xmm10;
+	RegXMM	fpu_xmm11;
+	RegXMM	fpu_xmm12;
+	RegXMM	fpu_xmm13;
+	RegXMM	fpu_xmm14;
+	RegXMM	fpu_xmm15;
+	int8	fpu_rsrv4[96];
+	int32	fpu_reserved1;
+};
+struct ExceptionState64 {
+	uint16	trapno;
+	uint16	cpu;
+	uint32	err;
+	uint64	faultvaddr;
+};
+struct Mcontext64 {
+	ExceptionState64	es;
+	Regs64	ss;
+	FloatState64	fs;
+};
+
+struct Regs32 {
+	uint32	eax;
+	uint32	ebx;
+	uint32	ecx;
+	uint32	edx;
+	uint32	edi;
+	uint32	esi;
+	uint32	ebp;
+	uint32	esp;
+	uint32	ss;
+	uint32	eflags;
+	uint32	eip;
+	uint32	cs;
+	uint32	ds;
+	uint32	es;
+	uint32	fs;
+	uint32	gs;
+};
+struct FloatState32 {
+	int32	fpu_reserved[2];
+	FPControl	fpu_fcw;
+	FPStatus	fpu_fsw;
+	uint8	fpu_ftw;
+	uint8	fpu_rsrv1;
+	uint16	fpu_fop;
+	uint32	fpu_ip;
+	uint16	fpu_cs;
+	uint16	fpu_rsrv2;
+	uint32	fpu_dp;
+	uint16	fpu_ds;
+	uint16	fpu_rsrv3;
+	uint32	fpu_mxcsr;
+	uint32	fpu_mxcsrmask;
+	RegMMST	fpu_stmm0;
+	RegMMST	fpu_stmm1;
+	RegMMST	fpu_stmm2;
+	RegMMST	fpu_stmm3;
+	RegMMST	fpu_stmm4;
+	RegMMST	fpu_stmm5;
+	RegMMST	fpu_stmm6;
+	RegMMST	fpu_stmm7;
+	RegXMM	fpu_xmm0;
+	RegXMM	fpu_xmm1;
+	RegXMM	fpu_xmm2;
+	RegXMM	fpu_xmm3;
+	RegXMM	fpu_xmm4;
+	RegXMM	fpu_xmm5;
+	RegXMM	fpu_xmm6;
+	RegXMM	fpu_xmm7;
+	int8	fpu_rsrv4[224];
+	int32	fpu_reserved1;
+};
+struct ExceptionState32 {
+	uint16	trapno;
+	uint16	cpu;
+	uint32	err;
+	uint32	faultvaddr;
+};
+struct Mcontext32 {
+	ExceptionState32	es;
+	Regs32	ss;
+	FloatState32	fs;
+};
+
+struct Ucontext {
+	int32	uc_onstack;
+	uint32	uc_sigmask;
+	StackT	uc_stack;
+	Ucontext	*uc_link;
+	uint32	uc_mcsize;
+	Mcontext32	*uc_mcontext;
+};
+
+struct KeventT {
+	uint32	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int32	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_darwin_amd64.h b/src/runtime/defs_darwin_amd64.h
new file mode 100644
index 000000000..4bf83c1cb
--- /dev/null
+++ b/src/runtime/defs_darwin_amd64.h
@@ -0,0 +1,395 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_darwin.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_DONTNEED	= 0x4,
+	MADV_FREE	= 0x5,
+
+	MACH_MSG_TYPE_MOVE_RECEIVE	= 0x10,
+	MACH_MSG_TYPE_MOVE_SEND		= 0x11,
+	MACH_MSG_TYPE_MOVE_SEND_ONCE	= 0x12,
+	MACH_MSG_TYPE_COPY_SEND		= 0x13,
+	MACH_MSG_TYPE_MAKE_SEND		= 0x14,
+	MACH_MSG_TYPE_MAKE_SEND_ONCE	= 0x15,
+	MACH_MSG_TYPE_COPY_RECEIVE	= 0x16,
+
+	MACH_MSG_PORT_DESCRIPTOR		= 0x0,
+	MACH_MSG_OOL_DESCRIPTOR			= 0x1,
+	MACH_MSG_OOL_PORTS_DESCRIPTOR		= 0x2,
+	MACH_MSG_OOL_VOLATILE_DESCRIPTOR	= 0x3,
+
+	MACH_MSGH_BITS_COMPLEX	= 0x80000000,
+
+	MACH_SEND_MSG	= 0x1,
+	MACH_RCV_MSG	= 0x2,
+	MACH_RCV_LARGE	= 0x4,
+
+	MACH_SEND_TIMEOUT	= 0x10,
+	MACH_SEND_INTERRUPT	= 0x40,
+	MACH_SEND_ALWAYS	= 0x10000,
+	MACH_SEND_TRAILER	= 0x20000,
+	MACH_RCV_TIMEOUT	= 0x100,
+	MACH_RCV_NOTIFY		= 0x200,
+	MACH_RCV_INTERRUPT	= 0x400,
+	MACH_RCV_OVERWRITE	= 0x1000,
+
+	NDR_PROTOCOL_2_0	= 0x0,
+	NDR_INT_BIG_ENDIAN	= 0x0,
+	NDR_INT_LITTLE_ENDIAN	= 0x1,
+	NDR_FLOAT_IEEE		= 0x0,
+	NDR_CHAR_ASCII		= 0x0,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+	SA_USERTRAMP	= 0x100,
+	SA_64REGSET	= 0x200,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x7,
+	FPE_INTOVF	= 0x8,
+	FPE_FLTDIV	= 0x1,
+	FPE_FLTOVF	= 0x2,
+	FPE_FLTUND	= 0x3,
+	FPE_FLTRES	= 0x4,
+	FPE_FLTINV	= 0x5,
+	FPE_FLTSUB	= 0x6,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0x40,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct MachBody MachBody;
+typedef struct MachHeader MachHeader;
+typedef struct MachNDR MachNDR;
+typedef struct MachPort MachPort;
+typedef struct StackT StackT;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct Timespec Timespec;
+typedef struct FPControl FPControl;
+typedef struct FPStatus FPStatus;
+typedef struct RegMMST RegMMST;
+typedef struct RegXMM RegXMM;
+typedef struct Regs64 Regs64;
+typedef struct FloatState64 FloatState64;
+typedef struct ExceptionState64 ExceptionState64;
+typedef struct Mcontext64 Mcontext64;
+typedef struct Regs32 Regs32;
+typedef struct FloatState32 FloatState32;
+typedef struct ExceptionState32 ExceptionState32;
+typedef struct Mcontext32 Mcontext32;
+typedef struct Ucontext Ucontext;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct MachBody {
+	uint32	msgh_descriptor_count;
+};
+struct MachHeader {
+	uint32	msgh_bits;
+	uint32	msgh_size;
+	uint32	msgh_remote_port;
+	uint32	msgh_local_port;
+	uint32	msgh_reserved;
+	int32	msgh_id;
+};
+struct MachNDR {
+	uint8	mig_vers;
+	uint8	if_vers;
+	uint8	reserved1;
+	uint8	mig_encoding;
+	uint8	int_rep;
+	uint8	char_rep;
+	uint8	float_rep;
+	uint8	reserved2;
+};
+struct MachPort {
+	uint32	name;
+	uint32	pad1;
+	uint16	pad2;
+	uint8	disposition;
+	uint8	type;
+};
+
+struct StackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+typedef	byte	Sighandler[8];
+
+struct SigactionT {
+	byte	__sigaction_u[8];
+	void	*sa_tramp;
+	uint32	sa_mask;
+	int32	sa_flags;
+};
+
+typedef	byte	Sigval[8];
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[8];
+	int64	si_band;
+	uint64	__pad[7];
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+	byte	Pad_cgo_0[4];
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+
+struct FPControl {
+	byte	Pad_cgo_0[2];
+};
+struct FPStatus {
+	byte	Pad_cgo_0[2];
+};
+struct RegMMST {
+	int8	mmst_reg[10];
+	int8	mmst_rsrv[6];
+};
+struct RegXMM {
+	int8	xmm_reg[16];
+};
+
+struct Regs64 {
+	uint64	rax;
+	uint64	rbx;
+	uint64	rcx;
+	uint64	rdx;
+	uint64	rdi;
+	uint64	rsi;
+	uint64	rbp;
+	uint64	rsp;
+	uint64	r8;
+	uint64	r9;
+	uint64	r10;
+	uint64	r11;
+	uint64	r12;
+	uint64	r13;
+	uint64	r14;
+	uint64	r15;
+	uint64	rip;
+	uint64	rflags;
+	uint64	cs;
+	uint64	fs;
+	uint64	gs;
+};
+struct FloatState64 {
+	int32	fpu_reserved[2];
+	FPControl	fpu_fcw;
+	FPStatus	fpu_fsw;
+	uint8	fpu_ftw;
+	uint8	fpu_rsrv1;
+	uint16	fpu_fop;
+	uint32	fpu_ip;
+	uint16	fpu_cs;
+	uint16	fpu_rsrv2;
+	uint32	fpu_dp;
+	uint16	fpu_ds;
+	uint16	fpu_rsrv3;
+	uint32	fpu_mxcsr;
+	uint32	fpu_mxcsrmask;
+	RegMMST	fpu_stmm0;
+	RegMMST	fpu_stmm1;
+	RegMMST	fpu_stmm2;
+	RegMMST	fpu_stmm3;
+	RegMMST	fpu_stmm4;
+	RegMMST	fpu_stmm5;
+	RegMMST	fpu_stmm6;
+	RegMMST	fpu_stmm7;
+	RegXMM	fpu_xmm0;
+	RegXMM	fpu_xmm1;
+	RegXMM	fpu_xmm2;
+	RegXMM	fpu_xmm3;
+	RegXMM	fpu_xmm4;
+	RegXMM	fpu_xmm5;
+	RegXMM	fpu_xmm6;
+	RegXMM	fpu_xmm7;
+	RegXMM	fpu_xmm8;
+	RegXMM	fpu_xmm9;
+	RegXMM	fpu_xmm10;
+	RegXMM	fpu_xmm11;
+	RegXMM	fpu_xmm12;
+	RegXMM	fpu_xmm13;
+	RegXMM	fpu_xmm14;
+	RegXMM	fpu_xmm15;
+	int8	fpu_rsrv4[96];
+	int32	fpu_reserved1;
+};
+struct ExceptionState64 {
+	uint16	trapno;
+	uint16	cpu;
+	uint32	err;
+	uint64	faultvaddr;
+};
+struct Mcontext64 {
+	ExceptionState64	es;
+	Regs64	ss;
+	FloatState64	fs;
+	byte	Pad_cgo_0[4];
+};
+
+struct Regs32 {
+	uint32	eax;
+	uint32	ebx;
+	uint32	ecx;
+	uint32	edx;
+	uint32	edi;
+	uint32	esi;
+	uint32	ebp;
+	uint32	esp;
+	uint32	ss;
+	uint32	eflags;
+	uint32	eip;
+	uint32	cs;
+	uint32	ds;
+	uint32	es;
+	uint32	fs;
+	uint32	gs;
+};
+struct FloatState32 {
+	int32	fpu_reserved[2];
+	FPControl	fpu_fcw;
+	FPStatus	fpu_fsw;
+	uint8	fpu_ftw;
+	uint8	fpu_rsrv1;
+	uint16	fpu_fop;
+	uint32	fpu_ip;
+	uint16	fpu_cs;
+	uint16	fpu_rsrv2;
+	uint32	fpu_dp;
+	uint16	fpu_ds;
+	uint16	fpu_rsrv3;
+	uint32	fpu_mxcsr;
+	uint32	fpu_mxcsrmask;
+	RegMMST	fpu_stmm0;
+	RegMMST	fpu_stmm1;
+	RegMMST	fpu_stmm2;
+	RegMMST	fpu_stmm3;
+	RegMMST	fpu_stmm4;
+	RegMMST	fpu_stmm5;
+	RegMMST	fpu_stmm6;
+	RegMMST	fpu_stmm7;
+	RegXMM	fpu_xmm0;
+	RegXMM	fpu_xmm1;
+	RegXMM	fpu_xmm2;
+	RegXMM	fpu_xmm3;
+	RegXMM	fpu_xmm4;
+	RegXMM	fpu_xmm5;
+	RegXMM	fpu_xmm6;
+	RegXMM	fpu_xmm7;
+	int8	fpu_rsrv4[224];
+	int32	fpu_reserved1;
+};
+struct ExceptionState32 {
+	uint16	trapno;
+	uint16	cpu;
+	uint32	err;
+	uint32	faultvaddr;
+};
+struct Mcontext32 {
+	ExceptionState32	es;
+	Regs32	ss;
+	FloatState32	fs;
+};
+
+struct Ucontext {
+	int32	uc_onstack;
+	uint32	uc_sigmask;
+	StackT	uc_stack;
+	Ucontext	*uc_link;
+	uint64	uc_mcsize;
+	Mcontext64	*uc_mcontext;
+};
+
+struct KeventT {
+	uint64	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_dragonfly.go b/src/runtime/defs_dragonfly.go
new file mode 100644
index 000000000..555b8f595
--- /dev/null
+++ b/src/runtime/defs_dragonfly.go
@@ -0,0 +1,126 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_dragonfly.go >defs_dragonfly_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_dragonfly.go >defs_dragonfly_386.h
+*/
+
+package runtime
+
+/*
+#include <sys/user.h>
+#include <sys/time.h>
+#include <sys/event.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+#include <sys/rtprio.h>
+#include <sys/signal.h>
+#include <sys/unistd.h>
+#include <errno.h>
+#include <signal.h>
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EFAULT = C.EFAULT
+	EBUSY  = C.EBUSY
+	EAGAIN = C.EAGAIN
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_FREE = C.MADV_FREE
+
+	SA_SIGINFO = C.SA_SIGINFO
+	SA_RESTART = C.SA_RESTART
+	SA_ONSTACK = C.SA_ONSTACK
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGINFO   = C.SIGINFO
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EV_ADD       = C.EV_ADD
+	EV_DELETE    = C.EV_DELETE
+	EV_CLEAR     = C.EV_CLEAR
+	EV_ERROR     = C.EV_ERROR
+	EVFILT_READ  = C.EVFILT_READ
+	EVFILT_WRITE = C.EVFILT_WRITE
+)
+
+type Rtprio C.struct_rtprio
+type Lwpparams C.struct_lwp_params
+type SigaltstackT C.struct_sigaltstack
+type Sigset C.struct___sigset
+type StackT C.stack_t
+
+type Siginfo C.siginfo_t
+
+type Mcontext C.mcontext_t
+type Ucontext C.ucontext_t
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+
+type Kevent C.struct_kevent
diff --git a/src/runtime/defs_dragonfly_386.h b/src/runtime/defs_dragonfly_386.h
new file mode 100644
index 000000000..f86b9c6b9
--- /dev/null
+++ b/src/runtime/defs_dragonfly_386.h
@@ -0,0 +1,198 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_dragonfly.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+	EBUSY	= 0x10,
+	EAGAIN	= 0x23,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x2,
+	FPE_INTOVF	= 0x1,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct Rtprio Rtprio;
+typedef struct Lwpparams Lwpparams;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct Rtprio {
+	uint16	type;
+	uint16	prio;
+};
+struct Lwpparams {
+	void	*func;
+	byte	*arg;
+	byte	*stack;
+	int32	*tid1;
+	int32	*tid2;
+};
+struct SigaltstackT {
+	int8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct StackT {
+	int8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[4];
+	int32	si_band;
+	int32	__spare__[7];
+};
+
+struct Mcontext {
+	int32	mc_onstack;
+	int32	mc_gs;
+	int32	mc_fs;
+	int32	mc_es;
+	int32	mc_ds;
+	int32	mc_edi;
+	int32	mc_esi;
+	int32	mc_ebp;
+	int32	mc_isp;
+	int32	mc_ebx;
+	int32	mc_edx;
+	int32	mc_ecx;
+	int32	mc_eax;
+	int32	mc_xflags;
+	int32	mc_trapno;
+	int32	mc_err;
+	int32	mc_eip;
+	int32	mc_cs;
+	int32	mc_eflags;
+	int32	mc_esp;
+	int32	mc_ss;
+	int32	mc_len;
+	int32	mc_fpformat;
+	int32	mc_ownedfp;
+	int32	mc_fpregs[128];
+	int32	__spare__[16];
+};
+struct Ucontext {
+	Sigset	uc_sigmask;
+	Mcontext	uc_mcontext;
+	Ucontext	*uc_link;
+	StackT	uc_stack;
+	int32	__spare__[8];
+};
+
+struct Timespec {
+	int32	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int32	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint32	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int32	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_dragonfly_amd64.h b/src/runtime/defs_dragonfly_amd64.h
new file mode 100644
index 000000000..671555241
--- /dev/null
+++ b/src/runtime/defs_dragonfly_amd64.h
@@ -0,0 +1,208 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_dragonfly.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+	EBUSY	= 0x10,
+	EAGAIN	= 0x23,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x2,
+	FPE_INTOVF	= 0x1,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct Rtprio Rtprio;
+typedef struct Lwpparams Lwpparams;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct Rtprio {
+	uint16	type;
+	uint16	prio;
+};
+struct Lwpparams {
+	void	*func;
+	byte	*arg;
+	byte	*stack;
+	int32	*tid1;
+	int32	*tid2;
+};
+struct SigaltstackT {
+	int8	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct StackT {
+	int8	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[8];
+	int64	si_band;
+	int32	__spare__[7];
+	byte	Pad_cgo_0[4];
+};
+
+struct Mcontext {
+	int64	mc_onstack;
+	int64	mc_rdi;
+	int64	mc_rsi;
+	int64	mc_rdx;
+	int64	mc_rcx;
+	int64	mc_r8;
+	int64	mc_r9;
+	int64	mc_rax;
+	int64	mc_rbx;
+	int64	mc_rbp;
+	int64	mc_r10;
+	int64	mc_r11;
+	int64	mc_r12;
+	int64	mc_r13;
+	int64	mc_r14;
+	int64	mc_r15;
+	int64	mc_xflags;
+	int64	mc_trapno;
+	int64	mc_addr;
+	int64	mc_flags;
+	int64	mc_err;
+	int64	mc_rip;
+	int64	mc_cs;
+	int64	mc_rflags;
+	int64	mc_rsp;
+	int64	mc_ss;
+	uint32	mc_len;
+	uint32	mc_fpformat;
+	uint32	mc_ownedfp;
+	uint32	mc_reserved;
+	uint32	mc_unused[8];
+	int32	mc_fpregs[256];
+};
+struct Ucontext {
+	Sigset	uc_sigmask;
+	byte	Pad_cgo_0[48];
+	Mcontext	uc_mcontext;
+	Ucontext	*uc_link;
+	StackT	uc_stack;
+	int32	__spare__[8];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int64	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint64	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_freebsd.go b/src/runtime/defs_freebsd.go
new file mode 100644
index 000000000..0253685aa
--- /dev/null
+++ b/src/runtime/defs_freebsd.go
@@ -0,0 +1,133 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_freebsd.go >defs_freebsd_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_freebsd.go >defs_freebsd_386.h
+GOARCH=arm go tool cgo -cdefs defs_freebsd.go >defs_freebsd_arm.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/event.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+#include <sys/umtx.h>
+#include <sys/rtprio.h>
+#include <sys/thr.h>
+#include <sys/_sigset.h>
+#include <sys/unistd.h>
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EFAULT = C.EFAULT
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_FREE = C.MADV_FREE
+
+	SA_SIGINFO = C.SA_SIGINFO
+	SA_RESTART = C.SA_RESTART
+	SA_ONSTACK = C.SA_ONSTACK
+
+	UMTX_OP_WAIT_UINT         = C.UMTX_OP_WAIT_UINT
+	UMTX_OP_WAIT_UINT_PRIVATE = C.UMTX_OP_WAIT_UINT_PRIVATE
+	UMTX_OP_WAKE              = C.UMTX_OP_WAKE
+	UMTX_OP_WAKE_PRIVATE      = C.UMTX_OP_WAKE_PRIVATE
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGINFO   = C.SIGINFO
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EV_ADD       = C.EV_ADD
+	EV_DELETE    = C.EV_DELETE
+	EV_CLEAR     = C.EV_CLEAR
+	EV_RECEIPT   = C.EV_RECEIPT
+	EV_ERROR     = C.EV_ERROR
+	EVFILT_READ  = C.EVFILT_READ
+	EVFILT_WRITE = C.EVFILT_WRITE
+)
+
+type Rtprio C.struct_rtprio
+type ThrParam C.struct_thr_param
+type SigaltstackT C.struct_sigaltstack
+type Sigset C.struct___sigset
+type StackT C.stack_t
+
+type Siginfo C.siginfo_t
+
+type Mcontext C.mcontext_t
+type Ucontext C.ucontext_t
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+
+type Kevent C.struct_kevent
diff --git a/src/runtime/defs_freebsd_386.h b/src/runtime/defs_freebsd_386.h
new file mode 100644
index 000000000..156dccba4
--- /dev/null
+++ b/src/runtime/defs_freebsd_386.h
@@ -0,0 +1,213 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_freebsd.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	UMTX_OP_WAIT_UINT		= 0xb,
+	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
+	UMTX_OP_WAKE			= 0x3,
+	UMTX_OP_WAKE_PRIVATE		= 0x10,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x2,
+	FPE_INTOVF	= 0x1,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0x40,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct Rtprio Rtprio;
+typedef struct ThrParam ThrParam;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct Rtprio {
+	uint16	type;
+	uint16	prio;
+};
+struct ThrParam {
+	void	*start_func;
+	byte	*arg;
+	int8	*stack_base;
+	uint32	stack_size;
+	int8	*tls_base;
+	uint32	tls_size;
+	int32	*child_tid;
+	int32	*parent_tid;
+	int32	flags;
+	Rtprio	*rtp;
+	void	*spare[3];
+};
+struct SigaltstackT {
+	int8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct StackT {
+	int8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[4];
+	byte	_reason[32];
+};
+
+struct Mcontext {
+	int32	mc_onstack;
+	int32	mc_gs;
+	int32	mc_fs;
+	int32	mc_es;
+	int32	mc_ds;
+	int32	mc_edi;
+	int32	mc_esi;
+	int32	mc_ebp;
+	int32	mc_isp;
+	int32	mc_ebx;
+	int32	mc_edx;
+	int32	mc_ecx;
+	int32	mc_eax;
+	int32	mc_trapno;
+	int32	mc_err;
+	int32	mc_eip;
+	int32	mc_cs;
+	int32	mc_eflags;
+	int32	mc_esp;
+	int32	mc_ss;
+	int32	mc_len;
+	int32	mc_fpformat;
+	int32	mc_ownedfp;
+	int32	mc_flags;
+	int32	mc_fpstate[128];
+	int32	mc_fsbase;
+	int32	mc_gsbase;
+	int32	mc_xfpustate;
+	int32	mc_xfpustate_len;
+	int32	mc_spare2[4];
+};
+struct Ucontext {
+	Sigset	uc_sigmask;
+	Mcontext	uc_mcontext;
+	Ucontext	*uc_link;
+	StackT	uc_stack;
+	int32	uc_flags;
+	int32	__spare__[4];
+	byte	Pad_cgo_0[12];
+};
+
+struct Timespec {
+	int32	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int32	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint32	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int32	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_freebsd_amd64.h b/src/runtime/defs_freebsd_amd64.h
new file mode 100644
index 000000000..4ba8956a2
--- /dev/null
+++ b/src/runtime/defs_freebsd_amd64.h
@@ -0,0 +1,224 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_freebsd.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	UMTX_OP_WAIT_UINT		= 0xb,
+	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
+	UMTX_OP_WAKE			= 0x3,
+	UMTX_OP_WAKE_PRIVATE		= 0x10,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x2,
+	FPE_INTOVF	= 0x1,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0x40,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct Rtprio Rtprio;
+typedef struct ThrParam ThrParam;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct Rtprio {
+	uint16	type;
+	uint16	prio;
+};
+struct ThrParam {
+	void	*start_func;
+	byte	*arg;
+	int8	*stack_base;
+	uint64	stack_size;
+	int8	*tls_base;
+	uint64	tls_size;
+	int64	*child_tid;
+	int64	*parent_tid;
+	int32	flags;
+	byte	Pad_cgo_0[4];
+	Rtprio	*rtp;
+	void	*spare[3];
+};
+struct SigaltstackT {
+	int8	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct StackT {
+	int8	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[8];
+	byte	_reason[40];
+};
+
+struct Mcontext {
+	int64	mc_onstack;
+	int64	mc_rdi;
+	int64	mc_rsi;
+	int64	mc_rdx;
+	int64	mc_rcx;
+	int64	mc_r8;
+	int64	mc_r9;
+	int64	mc_rax;
+	int64	mc_rbx;
+	int64	mc_rbp;
+	int64	mc_r10;
+	int64	mc_r11;
+	int64	mc_r12;
+	int64	mc_r13;
+	int64	mc_r14;
+	int64	mc_r15;
+	uint32	mc_trapno;
+	uint16	mc_fs;
+	uint16	mc_gs;
+	int64	mc_addr;
+	uint32	mc_flags;
+	uint16	mc_es;
+	uint16	mc_ds;
+	int64	mc_err;
+	int64	mc_rip;
+	int64	mc_cs;
+	int64	mc_rflags;
+	int64	mc_rsp;
+	int64	mc_ss;
+	int64	mc_len;
+	int64	mc_fpformat;
+	int64	mc_ownedfp;
+	int64	mc_fpstate[64];
+	int64	mc_fsbase;
+	int64	mc_gsbase;
+	int64	mc_xfpustate;
+	int64	mc_xfpustate_len;
+	int64	mc_spare[4];
+};
+struct Ucontext {
+	Sigset	uc_sigmask;
+	Mcontext	uc_mcontext;
+	Ucontext	*uc_link;
+	StackT	uc_stack;
+	int32	uc_flags;
+	int32	__spare__[4];
+	byte	Pad_cgo_0[12];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int64	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint64	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_freebsd_arm.h b/src/runtime/defs_freebsd_arm.h
new file mode 100644
index 000000000..17deba68d
--- /dev/null
+++ b/src/runtime/defs_freebsd_arm.h
@@ -0,0 +1,186 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_freebsd.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	UMTX_OP_WAIT_UINT		= 0xb,
+	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
+	UMTX_OP_WAKE			= 0x3,
+	UMTX_OP_WAKE_PRIVATE		= 0x10,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x2,
+	FPE_INTOVF	= 0x1,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0x40,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct Rtprio Rtprio;
+typedef struct ThrParam ThrParam;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct Rtprio {
+	uint16	type;
+	uint16	prio;
+};
+struct ThrParam {
+	void	*start_func;
+	byte	*arg;
+	uint8	*stack_base;
+	uint32	stack_size;
+	uint8	*tls_base;
+	uint32	tls_size;
+	int32	*child_tid;
+	int32	*parent_tid;
+	int32	flags;
+	Rtprio	*rtp;
+	void	*spare[3];
+};
+struct SigaltstackT {
+	uint8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct StackT {
+	uint8	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	int32	si_pid;
+	uint32	si_uid;
+	int32	si_status;
+	byte	*si_addr;
+	byte	si_value[4];
+	byte	_reason[32];
+};
+
+struct Mcontext {
+	uint32	__gregs[17];
+	byte	__fpu[140];
+};
+struct Ucontext {
+	Sigset	uc_sigmask;
+	Mcontext	uc_mcontext;
+	Ucontext	*uc_link;
+	StackT	uc_stack;
+	int32	uc_flags;
+	int32	__spare__[4];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int32	tv_nsec;
+	byte	Pad_cgo_0[4];
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+	byte	Pad_cgo_0[4];
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint32	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int32	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_linux.go b/src/runtime/defs_linux.go
new file mode 100644
index 000000000..8657dbb0e
--- /dev/null
+++ b/src/runtime/defs_linux.go
@@ -0,0 +1,124 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo -cdefs
+
+GOARCH=amd64 go tool cgo -cdefs defs_linux.go defs1_linux.go >defs_linux_amd64.h
+*/
+
+package runtime
+
+/*
+// Linux glibc and Linux kernel define different and conflicting
+// definitions for struct sigaction, struct timespec, etc.
+// We want the kernel ones, which are in the asm/* headers.
+// But then we'd get conflicts when we include the system
+// headers for things like ucontext_t, so that happens in
+// a separate file, defs1.go.
+
+#include <asm/posix_types.h>
+#define size_t __kernel_size_t
+#include <asm/signal.h>
+#include <asm/siginfo.h>
+#include <asm/mman.h>
+#include <asm-generic/errno.h>
+#include <asm-generic/poll.h>
+#include <linux/eventpoll.h>
+#undef size_t
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EAGAIN = C.EAGAIN
+	ENOMEM = C.ENOMEM
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANONYMOUS
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_DONTNEED = C.MADV_DONTNEED
+
+	SA_RESTART  = C.SA_RESTART
+	SA_ONSTACK  = C.SA_ONSTACK
+	SA_RESTORER = C.SA_RESTORER
+	SA_SIGINFO  = C.SA_SIGINFO
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGBUS    = C.SIGBUS
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGUSR1   = C.SIGUSR1
+	SIGSEGV   = C.SIGSEGV
+	SIGUSR2   = C.SIGUSR2
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGSTKFLT = C.SIGSTKFLT
+	SIGCHLD   = C.SIGCHLD
+	SIGCONT   = C.SIGCONT
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGURG    = C.SIGURG
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGIO     = C.SIGIO
+	SIGPWR    = C.SIGPWR
+	SIGSYS    = C.SIGSYS
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EPOLLIN       = C.POLLIN
+	EPOLLOUT      = C.POLLOUT
+	EPOLLERR      = C.POLLERR
+	EPOLLHUP      = C.POLLHUP
+	EPOLLRDHUP    = C.POLLRDHUP
+	EPOLLET       = C.EPOLLET
+	EPOLL_CLOEXEC = C.EPOLL_CLOEXEC
+	EPOLL_CTL_ADD = C.EPOLL_CTL_ADD
+	EPOLL_CTL_DEL = C.EPOLL_CTL_DEL
+	EPOLL_CTL_MOD = C.EPOLL_CTL_MOD
+)
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Sigaction C.struct_sigaction
+type Siginfo C.siginfo_t
+type Itimerval C.struct_itimerval
+type EpollEvent C.struct_epoll_event
diff --git a/src/runtime/defs_linux_386.h b/src/runtime/defs_linux_386.h
new file mode 100644
index 000000000..24a05d862
--- /dev/null
+++ b/src/runtime/defs_linux_386.h
@@ -0,0 +1,211 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs2_linux.go
+
+
+enum {
+	EINTR	= 0x4,
+	EAGAIN	= 0xb,
+	ENOMEM	= 0xc,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x20,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_DONTNEED	= 0x4,
+
+	SA_RESTART	= 0x10000000,
+	SA_ONSTACK	= 0x8000000,
+	SA_RESTORER	= 0x4000000,
+	SA_SIGINFO	= 0x4,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGBUS		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGUSR1		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGUSR2		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGSTKFLT	= 0x10,
+	SIGCHLD		= 0x11,
+	SIGCONT		= 0x12,
+	SIGSTOP		= 0x13,
+	SIGTSTP		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGURG		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGIO		= 0x1d,
+	SIGPWR		= 0x1e,
+	SIGSYS		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	O_RDONLY	= 0x0,
+	O_CLOEXEC	= 0x80000,
+
+	EPOLLIN		= 0x1,
+	EPOLLOUT	= 0x4,
+	EPOLLERR	= 0x8,
+	EPOLLHUP	= 0x10,
+	EPOLLRDHUP	= 0x2000,
+	EPOLLET		= -0x80000000,
+	EPOLL_CLOEXEC	= 0x80000,
+	EPOLL_CTL_ADD	= 0x1,
+	EPOLL_CTL_DEL	= 0x2,
+	EPOLL_CTL_MOD	= 0x3,
+};
+
+typedef struct Fpreg Fpreg;
+typedef struct Fpxreg Fpxreg;
+typedef struct Xmmreg Xmmreg;
+typedef struct Fpstate Fpstate;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigcontext Sigcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
+
+#pragma pack on
+
+struct Fpreg {
+	uint16	significand[4];
+	uint16	exponent;
+};
+struct Fpxreg {
+	uint16	significand[4];
+	uint16	exponent;
+	uint16	padding[3];
+};
+struct Xmmreg {
+	uint32	element[4];
+};
+struct Fpstate {
+	uint32	cw;
+	uint32	sw;
+	uint32	tag;
+	uint32	ipoff;
+	uint32	cssel;
+	uint32	dataoff;
+	uint32	datasel;
+	Fpreg	_st[8];
+	uint16	status;
+	uint16	magic;
+	uint32	_fxsr_env[6];
+	uint32	mxcsr;
+	uint32	reserved;
+	Fpxreg	_fxsr_st[8];
+	Xmmreg	_xmm[8];
+	uint32	padding1[44];
+	byte	anon0[48];
+};
+struct Timespec {
+	int32	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int32	tv_sec;
+	int32	tv_usec;
+};
+struct SigactionT {
+	void	*k_sa_handler;
+	uint32	sa_flags;
+	void	*sa_restorer;
+	uint64	sa_mask;
+};
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	byte	_sifields[116];
+};
+struct SigaltstackT {
+	byte	*ss_sp;
+	int32	ss_flags;
+	uint32	ss_size;
+};
+struct Sigcontext {
+	uint16	gs;
+	uint16	__gsh;
+	uint16	fs;
+	uint16	__fsh;
+	uint16	es;
+	uint16	__esh;
+	uint16	ds;
+	uint16	__dsh;
+	uint32	edi;
+	uint32	esi;
+	uint32	ebp;
+	uint32	esp;
+	uint32	ebx;
+	uint32	edx;
+	uint32	ecx;
+	uint32	eax;
+	uint32	trapno;
+	uint32	err;
+	uint32	eip;
+	uint16	cs;
+	uint16	__csh;
+	uint32	eflags;
+	uint32	esp_at_signal;
+	uint16	ss;
+	uint16	__ssh;
+	Fpstate	*fpstate;
+	uint32	oldmask;
+	uint32	cr2;
+};
+struct Ucontext {
+	uint32	uc_flags;
+	Ucontext	*uc_link;
+	SigaltstackT	uc_stack;
+	Sigcontext	uc_mcontext;
+	uint32	uc_sigmask;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+struct EpollEvent {
+	uint32	events;
+	byte	data[8]; // to match amd64
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_linux_amd64.h b/src/runtime/defs_linux_amd64.h
new file mode 100644
index 000000000..14616dffe
--- /dev/null
+++ b/src/runtime/defs_linux_amd64.h
@@ -0,0 +1,254 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs1_linux.go
+
+
+enum {
+	EINTR	= 0x4,
+	EAGAIN	= 0xb,
+	ENOMEM	= 0xc,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x20,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_DONTNEED	= 0x4,
+
+	SA_RESTART	= 0x10000000,
+	SA_ONSTACK	= 0x8000000,
+	SA_RESTORER	= 0x4000000,
+	SA_SIGINFO	= 0x4,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGBUS		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGUSR1		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGUSR2		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGSTKFLT	= 0x10,
+	SIGCHLD		= 0x11,
+	SIGCONT		= 0x12,
+	SIGSTOP		= 0x13,
+	SIGTSTP		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGURG		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGIO		= 0x1d,
+	SIGPWR		= 0x1e,
+	SIGSYS		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EPOLLIN		= 0x1,
+	EPOLLOUT	= 0x4,
+	EPOLLERR	= 0x8,
+	EPOLLHUP	= 0x10,
+	EPOLLRDHUP	= 0x2000,
+	EPOLLET		= -0x80000000,
+	EPOLL_CLOEXEC	= 0x80000,
+	EPOLL_CTL_ADD	= 0x1,
+	EPOLL_CTL_DEL	= 0x2,
+	EPOLL_CTL_MOD	= 0x3,
+};
+
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct SigactionT SigactionT;
+typedef struct Siginfo Siginfo;
+typedef struct Itimerval Itimerval;
+typedef struct EpollEvent EpollEvent;
+
+#pragma pack on
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int64	tv_usec;
+};
+struct SigactionT {
+	void	*sa_handler;
+	uint64	sa_flags;
+	void	*sa_restorer;
+	uint64	sa_mask;
+};
+struct Siginfo {
+	int32	si_signo;
+	int32	si_errno;
+	int32	si_code;
+	byte	Pad_cgo_0[4];
+	byte	_sifields[112];
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+struct EpollEvent {
+	uint32	events;
+	byte	data[8]; // unaligned uintptr
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_linux.go defs1_linux.go
+
+
+enum {
+	O_RDONLY	= 0x0,
+	O_CLOEXEC	= 0x80000,
+};
+
+typedef struct Usigset Usigset;
+typedef struct Fpxreg Fpxreg;
+typedef struct Xmmreg Xmmreg;
+typedef struct Fpstate Fpstate;
+typedef struct Fpxreg1 Fpxreg1;
+typedef struct Xmmreg1 Xmmreg1;
+typedef struct Fpstate1 Fpstate1;
+typedef struct Fpreg1 Fpreg1;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Sigcontext Sigcontext;
+
+#pragma pack on
+
+struct Usigset {
+	uint64	__val[16];
+};
+struct Fpxreg {
+	uint16	significand[4];
+	uint16	exponent;
+	uint16	padding[3];
+};
+struct Xmmreg {
+	uint32	element[4];
+};
+struct Fpstate {
+	uint16	cwd;
+	uint16	swd;
+	uint16	ftw;
+	uint16	fop;
+	uint64	rip;
+	uint64	rdp;
+	uint32	mxcsr;
+	uint32	mxcr_mask;
+	Fpxreg	_st[8];
+	Xmmreg	_xmm[16];
+	uint32	padding[24];
+};
+struct Fpxreg1 {
+	uint16	significand[4];
+	uint16	exponent;
+	uint16	padding[3];
+};
+struct Xmmreg1 {
+	uint32	element[4];
+};
+struct Fpstate1 {
+	uint16	cwd;
+	uint16	swd;
+	uint16	ftw;
+	uint16	fop;
+	uint64	rip;
+	uint64	rdp;
+	uint32	mxcsr;
+	uint32	mxcr_mask;
+	Fpxreg1	_st[8];
+	Xmmreg1	_xmm[16];
+	uint32	padding[24];
+};
+struct Fpreg1 {
+	uint16	significand[4];
+	uint16	exponent;
+};
+struct SigaltstackT {
+	byte	*ss_sp;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+	uint64	ss_size;
+};
+struct Mcontext {
+	int64	gregs[23];
+	Fpstate	*fpregs;
+	uint64	__reserved1[8];
+};
+struct Ucontext {
+	uint64	uc_flags;
+	Ucontext	*uc_link;
+	SigaltstackT	uc_stack;
+	Mcontext	uc_mcontext;
+	Usigset	uc_sigmask;
+	Fpstate	__fpregs_mem;
+};
+struct Sigcontext {
+	uint64	r8;
+	uint64	r9;
+	uint64	r10;
+	uint64	r11;
+	uint64	r12;
+	uint64	r13;
+	uint64	r14;
+	uint64	r15;
+	uint64	rdi;
+	uint64	rsi;
+	uint64	rbp;
+	uint64	rbx;
+	uint64	rdx;
+	uint64	rax;
+	uint64	rcx;
+	uint64	rsp;
+	uint64	rip;
+	uint64	eflags;
+	uint16	cs;
+	uint16	gs;
+	uint16	fs;
+	uint16	__pad0;
+	uint64	err;
+	uint64	trapno;
+	uint64	oldmask;
+	uint64	cr2;
+	Fpstate1	*fpstate;
+	uint64	__reserved1[8];
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_linux_arm.h b/src/runtime/defs_linux_arm.h
new file mode 100644
index 000000000..50b3c919e
--- /dev/null
+++ b/src/runtime/defs_linux_arm.h
@@ -0,0 +1,168 @@
+// TODO: Generate using cgo like defs_linux_{386,amd64}.h
+
+// Constants
+enum {
+	EINTR  = 0x4,
+	ENOMEM = 0xc,
+	EAGAIN = 0xb,
+
+	PROT_NONE = 0,
+	PROT_READ = 0x1,
+	PROT_WRITE = 0x2,
+	PROT_EXEC = 0x4,
+	MAP_ANON = 0x20,
+	MAP_PRIVATE = 0x2,
+	MAP_FIXED = 0x10,
+	MADV_DONTNEED = 0x4,
+	SA_RESTART = 0x10000000,
+	SA_ONSTACK = 0x8000000,
+	SA_RESTORER = 0, // unused on ARM
+	SA_SIGINFO = 0x4,
+	SIGHUP = 0x1,
+	SIGINT = 0x2,
+	SIGQUIT = 0x3,
+	SIGILL = 0x4,
+	SIGTRAP = 0x5,
+	SIGABRT = 0x6,
+	SIGBUS = 0x7,
+	SIGFPE = 0x8,
+	SIGKILL = 0x9,
+	SIGUSR1 = 0xa,
+	SIGSEGV = 0xb,
+	SIGUSR2 = 0xc,
+	SIGPIPE = 0xd,
+	SIGALRM = 0xe,
+	SIGSTKFLT = 0x10,
+	SIGCHLD = 0x11,
+	SIGCONT = 0x12,
+	SIGSTOP = 0x13,
+	SIGTSTP = 0x14,
+	SIGTTIN = 0x15,
+	SIGTTOU = 0x16,
+	SIGURG = 0x17,
+	SIGXCPU = 0x18,
+	SIGXFSZ = 0x19,
+	SIGVTALRM = 0x1a,
+	SIGPROF = 0x1b,
+	SIGWINCH = 0x1c,
+	SIGIO = 0x1d,
+	SIGPWR = 0x1e,
+	SIGSYS = 0x1f,
+	FPE_INTDIV = 0x1,
+	FPE_INTOVF = 0x2,
+	FPE_FLTDIV = 0x3,
+	FPE_FLTOVF = 0x4,
+	FPE_FLTUND = 0x5,
+	FPE_FLTRES = 0x6,
+	FPE_FLTINV = 0x7,
+	FPE_FLTSUB = 0x8,
+	BUS_ADRALN = 0x1,
+	BUS_ADRERR = 0x2,
+	BUS_OBJERR = 0x3,
+	SEGV_MAPERR = 0x1,
+	SEGV_ACCERR = 0x2,
+	ITIMER_REAL = 0,
+	ITIMER_PROF = 0x2,
+	ITIMER_VIRTUAL = 0x1,
+	O_RDONLY = 0,
+	O_CLOEXEC = 02000000,
+
+	EPOLLIN		= 0x1,
+	EPOLLOUT	= 0x4,
+	EPOLLERR	= 0x8,
+	EPOLLHUP	= 0x10,
+	EPOLLRDHUP	= 0x2000,
+	EPOLLET		= -0x80000000,
+	EPOLL_CLOEXEC	= 0x80000,
+	EPOLL_CTL_ADD	= 0x1,
+	EPOLL_CTL_DEL	= 0x2,
+	EPOLL_CTL_MOD	= 0x3,
+};
+
+// Types
+#pragma pack on
+
+typedef struct Timespec Timespec;
+struct Timespec {
+	int32 tv_sec;
+	int32 tv_nsec;
+};
+
+typedef struct SigaltstackT SigaltstackT;
+struct SigaltstackT {
+	void *ss_sp;
+	int32 ss_flags;
+	uint32 ss_size;
+};
+
+typedef struct Sigcontext Sigcontext;
+struct Sigcontext {
+	uint32 trap_no;
+	uint32 error_code;
+	uint32 oldmask;
+	uint32 arm_r0;
+	uint32 arm_r1;
+	uint32 arm_r2;
+	uint32 arm_r3;
+	uint32 arm_r4;
+	uint32 arm_r5;
+	uint32 arm_r6;
+	uint32 arm_r7;
+	uint32 arm_r8;
+	uint32 arm_r9;
+	uint32 arm_r10;
+	uint32 arm_fp;
+	uint32 arm_ip;
+	uint32 arm_sp;
+	uint32 arm_lr;
+	uint32 arm_pc;
+	uint32 arm_cpsr;
+	uint32 fault_address;
+};
+
+typedef struct Ucontext Ucontext;
+struct Ucontext {
+	uint32 uc_flags;
+	Ucontext *uc_link;
+	SigaltstackT uc_stack;
+	Sigcontext uc_mcontext;
+	uint32 uc_sigmask;
+	int32 __unused[31];
+	uint32 uc_regspace[128];
+};
+
+typedef struct Timeval Timeval;
+struct Timeval {
+	int32 tv_sec;
+	int32 tv_usec;
+};
+
+typedef struct Itimerval Itimerval;
+struct Itimerval {
+	Timeval it_interval;
+	Timeval it_value;
+};
+
+typedef struct Siginfo Siginfo;
+struct Siginfo {
+	int32 si_signo;
+	int32 si_errno;
+	int32 si_code;
+	uint8 _sifields[4];
+};
+
+typedef struct SigactionT SigactionT;
+struct SigactionT {
+	void *sa_handler;
+	uint32 sa_flags;
+	void *sa_restorer;
+	uint64 sa_mask;
+};
+
+typedef struct EpollEvent EpollEvent;
+struct EpollEvent {
+	uint32	events;
+	uint32	_pad;
+	byte	data[8]; // to match amd64
+};
+#pragma pack off
diff --git a/src/runtime/defs_nacl_386.h b/src/runtime/defs_nacl_386.h
new file mode 100644
index 000000000..e8fbb38e1
--- /dev/null
+++ b/src/runtime/defs_nacl_386.h
@@ -0,0 +1,63 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Created by hand, not machine generated.
+
+enum
+{
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	SIGSEGV = 11,
+	SIGPROF = 27,
+};
+
+typedef struct Siginfo Siginfo;
+
+// native_client/src/trusted/service_runtime/include/machine/_types.h
+typedef struct Timespec Timespec;
+
+struct Timespec
+{
+	int64 tv_sec;
+	int32 tv_nsec;
+};
+
+// native_client/src/trusted/service_runtime/nacl_exception.h
+// native_client/src/include/nacl/nacl_exception.h
+
+typedef struct ExcContext ExcContext;
+typedef struct ExcPortable ExcPortable;
+typedef struct ExcRegs386 ExcRegs386;
+
+struct ExcRegs386
+{
+	uint32	eax;
+	uint32	ecx;
+	uint32	edx;
+	uint32	ebx;
+	uint32	esp;
+	uint32	ebp;
+	uint32	esi;
+	uint32	edi;
+	uint32	eip;
+	uint32	eflags;
+};
+
+struct ExcContext
+{
+	uint32	size;
+	uint32	portable_context_offset;
+	uint32	portable_context_size;
+	uint32	arch;
+	uint32	regs_size;
+	uint32	reserved[11];
+	ExcRegs386	regs;
+};
+
+struct ExcPortableContext
+{
+	uint32	pc;
+	uint32	sp;
+	uint32	fp;
+};
diff --git a/src/runtime/defs_nacl_amd64p32.h b/src/runtime/defs_nacl_amd64p32.h
new file mode 100644
index 000000000..45663d40a
--- /dev/null
+++ b/src/runtime/defs_nacl_amd64p32.h
@@ -0,0 +1,90 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Created by hand, not machine generated.
+
+enum
+{
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	SIGSEGV = 11,
+	SIGPROF = 27,
+};
+
+typedef struct Siginfo Siginfo;
+
+
+// native_client/src/trusted/service_runtime/include/machine/_types.h
+typedef struct Timespec Timespec;
+
+struct Timespec
+{
+	int64 tv_sec;
+	int32 tv_nsec;
+};
+
+// native_client/src/trusted/service_runtime/nacl_exception.h
+// native_client/src/include/nacl/nacl_exception.h
+
+typedef struct ExcContext ExcContext;
+typedef struct ExcPortable ExcPortable;
+typedef struct ExcRegs386 ExcRegs386;
+typedef struct ExcRegsAmd64 ExcRegsAmd64;
+
+struct ExcRegs386
+{
+	uint32	eax;
+	uint32	ecx;
+	uint32	edx;
+	uint32	ebx;
+	uint32	esp;
+	uint32	ebp;
+	uint32	esi;
+	uint32	edi;
+	uint32	eip;
+	uint32	eflags;
+};
+
+struct ExcRegsAmd64
+{
+	uint64	rax;
+	uint64	rcx;
+	uint64	rdx;
+	uint64	rbx;
+	uint64	rsp;
+	uint64	rbp;
+	uint64	rsi;
+	uint64	rdi;
+	uint64	r8;
+	uint64	r9;
+	uint64	r10;
+	uint64	r11;
+	uint64	r12;
+	uint64	r13;
+	uint64	r14;
+	uint64	r15;
+	uint64	rip;
+	uint32	rflags;
+};
+
+struct ExcContext
+{
+	uint32	size;
+	uint32	portable_context_offset;
+	uint32	portable_context_size;
+	uint32	arch;
+	uint32	regs_size;
+	uint32	reserved[11];
+	union {
+		ExcRegs386	regs;
+		ExcRegsAmd64	regs64;
+	} regs;
+};
+
+struct ExcPortableContext
+{
+	uint32	pc;
+	uint32	sp;
+	uint32	fp;
+};
diff --git a/src/runtime/defs_nacl_arm.h b/src/runtime/defs_nacl_arm.h
new file mode 100644
index 000000000..9ce07ccb2
--- /dev/null
+++ b/src/runtime/defs_nacl_arm.h
@@ -0,0 +1,70 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Created by hand, not machine generated.
+
+enum
+{
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	SIGSEGV = 11,
+	SIGPROF = 27,
+};
+
+typedef struct Siginfo Siginfo;
+
+// native_client/src/trusted/service_runtime/include/machine/_types.h
+typedef struct Timespec Timespec;
+
+struct Timespec
+{
+	int64 tv_sec;
+	int32 tv_nsec;
+};
+
+// native_client/src/trusted/service_runtime/nacl_exception.h
+// native_client/src/include/nacl/nacl_exception.h
+
+typedef struct ExcContext ExcContext;
+typedef struct ExcPortable ExcPortable;
+typedef struct ExcRegsARM ExcRegsARM;
+
+struct ExcRegsARM
+{
+	uint32	r0;
+	uint32	r1;
+	uint32	r2;
+	uint32	r3;
+	uint32	r4;
+	uint32	r5;
+	uint32	r6;
+	uint32	r7;
+	uint32	r8;
+	uint32	r9;	// the value reported here is undefined.
+	uint32	r10;
+	uint32	r11;
+	uint32	r12;
+	uint32	sp;	/* r13 */
+	uint32	lr;	/* r14 */
+	uint32	pc;	/* r15 */
+	uint32	cpsr;
+};
+
+struct ExcContext
+{
+	uint32	size;
+	uint32	portable_context_offset;
+	uint32	portable_context_size;
+	uint32	arch;
+	uint32	regs_size;
+	uint32	reserved[11];
+	ExcRegsARM	regs;
+};
+
+struct ExcPortableContext
+{
+	uint32	pc;
+	uint32	sp;
+	uint32	fp;
+};
diff --git a/src/runtime/defs_netbsd.go b/src/runtime/defs_netbsd.go
new file mode 100644
index 000000000..b27949e42
--- /dev/null
+++ b/src/runtime/defs_netbsd.go
@@ -0,0 +1,125 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go >defs_netbsd_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_netbsd.go defs_netbsd_386.go >defs_netbsd_386.h
+GOARCH=arm go tool cgo -cdefs defs_netbsd.go defs_netbsd_arm.go >defs_netbsd_arm.h
+*/
+
+// +godefs map __fpregset_t [644]byte
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/event.h>
+#include <sys/time.h>
+#include <sys/ucontext.h>
+#include <sys/unistd.h>
+#include <errno.h>
+#include <signal.h>
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EFAULT = C.EFAULT
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_FREE = C.MADV_FREE
+
+	SA_SIGINFO = C.SA_SIGINFO
+	SA_RESTART = C.SA_RESTART
+	SA_ONSTACK = C.SA_ONSTACK
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGINFO   = C.SIGINFO
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EV_ADD       = C.EV_ADD
+	EV_DELETE    = C.EV_DELETE
+	EV_CLEAR     = C.EV_CLEAR
+	EV_RECEIPT   = 0
+	EV_ERROR     = C.EV_ERROR
+	EVFILT_READ  = C.EVFILT_READ
+	EVFILT_WRITE = C.EVFILT_WRITE
+)
+
+type SigaltstackT C.struct_sigaltstack
+type Sigset C.sigset_t
+type Siginfo C.struct__ksiginfo
+
+type StackT C.stack_t
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+
+type McontextT C.mcontext_t
+type UcontextT C.ucontext_t
+
+type Kevent C.struct_kevent
diff --git a/src/runtime/defs_netbsd_386.go b/src/runtime/defs_netbsd_386.go
new file mode 100644
index 000000000..c26f24607
--- /dev/null
+++ b/src/runtime/defs_netbsd_386.go
@@ -0,0 +1,41 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=386 go tool cgo -cdefs defs_netbsd.go defs_netbsd_386.go >defs_netbsd_386.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <machine/mcontext.h>
+*/
+import "C"
+
+const (
+	REG_GS     = C._REG_GS
+	REG_FS     = C._REG_FS
+	REG_ES     = C._REG_ES
+	REG_DS     = C._REG_DS
+	REG_EDI    = C._REG_EDI
+	REG_ESI    = C._REG_ESI
+	REG_EBP    = C._REG_EBP
+	REG_ESP    = C._REG_ESP
+	REG_EBX    = C._REG_EBX
+	REG_EDX    = C._REG_EDX
+	REG_ECX    = C._REG_ECX
+	REG_EAX    = C._REG_EAX
+	REG_TRAPNO = C._REG_TRAPNO
+	REG_ERR    = C._REG_ERR
+	REG_EIP    = C._REG_EIP
+	REG_CS     = C._REG_CS
+	REG_EFL    = C._REG_EFL
+	REG_UESP   = C._REG_UESP
+	REG_SS     = C._REG_SS
+)
diff --git a/src/runtime/defs_netbsd_386.h b/src/runtime/defs_netbsd_386.h
new file mode 100644
index 000000000..fd87804f9
--- /dev/null
+++ b/src/runtime/defs_netbsd_386.h
@@ -0,0 +1,182 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x6,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= 0x0,
+	EVFILT_WRITE	= 0x1,
+};
+
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct Siginfo Siginfo;
+typedef struct StackT StackT;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct McontextT McontextT;
+typedef struct UcontextT UcontextT;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct Siginfo {
+	int32	_signo;
+	int32	_code;
+	int32	_errno;
+	byte	_reason[20];
+};
+
+struct StackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct McontextT {
+	int32	__gregs[19];
+	byte	__fpregs[644];
+	int32	_mc_tlsbase;
+};
+struct UcontextT {
+	uint32	uc_flags;
+	UcontextT	*uc_link;
+	Sigset	uc_sigmask;
+	StackT	uc_stack;
+	McontextT	uc_mcontext;
+	int32	__uc_pad[4];
+};
+
+struct KeventT {
+	uint32	ident;
+	uint32	filter;
+	uint32	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
+
+
+enum {
+	REG_GS		= 0x0,
+	REG_FS		= 0x1,
+	REG_ES		= 0x2,
+	REG_DS		= 0x3,
+	REG_EDI		= 0x4,
+	REG_ESI		= 0x5,
+	REG_EBP		= 0x6,
+	REG_ESP		= 0x7,
+	REG_EBX		= 0x8,
+	REG_EDX		= 0x9,
+	REG_ECX		= 0xa,
+	REG_EAX		= 0xb,
+	REG_TRAPNO	= 0xc,
+	REG_ERR		= 0xd,
+	REG_EIP		= 0xe,
+	REG_CS		= 0xf,
+	REG_EFL		= 0x10,
+	REG_UESP	= 0x11,
+	REG_SS		= 0x12,
+};
+
diff --git a/src/runtime/defs_netbsd_amd64.go b/src/runtime/defs_netbsd_amd64.go
new file mode 100644
index 000000000..f18a7b1fe
--- /dev/null
+++ b/src/runtime/defs_netbsd_amd64.go
@@ -0,0 +1,48 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go >defs_netbsd_amd64.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <machine/mcontext.h>
+*/
+import "C"
+
+const (
+	REG_RDI    = C._REG_RDI
+	REG_RSI    = C._REG_RSI
+	REG_RDX    = C._REG_RDX
+	REG_RCX    = C._REG_RCX
+	REG_R8     = C._REG_R8
+	REG_R9     = C._REG_R9
+	REG_R10    = C._REG_R10
+	REG_R11    = C._REG_R11
+	REG_R12    = C._REG_R12
+	REG_R13    = C._REG_R13
+	REG_R14    = C._REG_R14
+	REG_R15    = C._REG_R15
+	REG_RBP    = C._REG_RBP
+	REG_RBX    = C._REG_RBX
+	REG_RAX    = C._REG_RAX
+	REG_GS     = C._REG_GS
+	REG_FS     = C._REG_FS
+	REG_ES     = C._REG_ES
+	REG_DS     = C._REG_DS
+	REG_TRAPNO = C._REG_TRAPNO
+	REG_ERR    = C._REG_ERR
+	REG_RIP    = C._REG_RIP
+	REG_CS     = C._REG_CS
+	REG_RFLAGS = C._REG_RFLAGS
+	REG_RSP    = C._REG_RSP
+	REG_SS     = C._REG_SS
+)
diff --git a/src/runtime/defs_netbsd_amd64.h b/src/runtime/defs_netbsd_amd64.h
new file mode 100644
index 000000000..dac94b113
--- /dev/null
+++ b/src/runtime/defs_netbsd_amd64.h
@@ -0,0 +1,194 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x6,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= 0x0,
+	EVFILT_WRITE	= 0x1,
+};
+
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct Siginfo Siginfo;
+typedef struct StackT StackT;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct McontextT McontextT;
+typedef struct UcontextT UcontextT;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct Siginfo {
+	int32	_signo;
+	int32	_code;
+	int32	_errno;
+	int32	_pad;
+	byte	_reason[24];
+};
+
+struct StackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+	byte	Pad_cgo_0[4];
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct McontextT {
+	uint64	__gregs[26];
+	uint64	_mc_tlsbase;
+	int8	__fpregs[512];
+};
+struct UcontextT {
+	uint32	uc_flags;
+	byte	Pad_cgo_0[4];
+	UcontextT	*uc_link;
+	Sigset	uc_sigmask;
+	StackT	uc_stack;
+	McontextT	uc_mcontext;
+};
+
+struct KeventT {
+	uint64	ident;
+	uint32	filter;
+	uint32	flags;
+	uint32	fflags;
+	byte	Pad_cgo_0[4];
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
+
+
+enum {
+	REG_RDI		= 0x0,
+	REG_RSI		= 0x1,
+	REG_RDX		= 0x2,
+	REG_RCX		= 0x3,
+	REG_R8		= 0x4,
+	REG_R9		= 0x5,
+	REG_R10		= 0x6,
+	REG_R11		= 0x7,
+	REG_R12		= 0x8,
+	REG_R13		= 0x9,
+	REG_R14		= 0xa,
+	REG_R15		= 0xb,
+	REG_RBP		= 0xc,
+	REG_RBX		= 0xd,
+	REG_RAX		= 0xe,
+	REG_GS		= 0xf,
+	REG_FS		= 0x10,
+	REG_ES		= 0x11,
+	REG_DS		= 0x12,
+	REG_TRAPNO	= 0x13,
+	REG_ERR		= 0x14,
+	REG_RIP		= 0x15,
+	REG_CS		= 0x16,
+	REG_RFLAGS	= 0x17,
+	REG_RSP		= 0x18,
+	REG_SS		= 0x19,
+};
+
diff --git a/src/runtime/defs_netbsd_arm.go b/src/runtime/defs_netbsd_arm.go
new file mode 100644
index 000000000..cb0dce66b
--- /dev/null
+++ b/src/runtime/defs_netbsd_arm.go
@@ -0,0 +1,39 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=arm go tool cgo -cdefs defs_netbsd.go defs_netbsd_arm.go >defs_netbsd_arm.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <machine/mcontext.h>
+*/
+import "C"
+
+const (
+	REG_R0   = C._REG_R0
+	REG_R1   = C._REG_R1
+	REG_R2   = C._REG_R2
+	REG_R3   = C._REG_R3
+	REG_R4   = C._REG_R4
+	REG_R5   = C._REG_R5
+	REG_R6   = C._REG_R6
+	REG_R7   = C._REG_R7
+	REG_R8   = C._REG_R8
+	REG_R9   = C._REG_R9
+	REG_R10  = C._REG_R10
+	REG_R11  = C._REG_R11
+	REG_R12  = C._REG_R12
+	REG_R13  = C._REG_R13
+	REG_R14  = C._REG_R14
+	REG_R15  = C._REG_R15
+	REG_CPSR = C._REG_CPSR
+)
diff --git a/src/runtime/defs_netbsd_arm.h b/src/runtime/defs_netbsd_arm.h
new file mode 100644
index 000000000..70f34af47
--- /dev/null
+++ b/src/runtime/defs_netbsd_arm.h
@@ -0,0 +1,184 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x6,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_RECEIPT	= 0,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= 0x0,
+	EVFILT_WRITE	= 0x1,
+};
+
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct Siginfo Siginfo;
+typedef struct StackT StackT;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct McontextT McontextT;
+typedef struct UcontextT UcontextT;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigset {
+	uint32	__bits[4];
+};
+struct Siginfo {
+	int32	_signo;
+	int32	_code;
+	int32	_errno;
+	byte	_reason[20];
+};
+
+struct StackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct McontextT {
+	uint32	__gregs[17];
+#ifdef __ARM_EABI__
+	byte	__fpu[4+8*32+4];
+#else
+	byte	__fpu[4+4*33+4];
+#endif
+	uint32	_mc_tlsbase;
+};
+struct UcontextT {
+	uint32	uc_flags;
+	UcontextT	*uc_link;
+	Sigset	uc_sigmask;
+	StackT	uc_stack;
+	McontextT	uc_mcontext;
+	int32	__uc_pad[2];
+};
+
+struct KeventT {
+	uint32	ident;
+	uint32	filter;
+	uint32	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
+
+
+enum {
+	REG_R0		= 0x0,
+	REG_R1		= 0x1,
+	REG_R2		= 0x2,
+	REG_R3		= 0x3,
+	REG_R4		= 0x4,
+	REG_R5		= 0x5,
+	REG_R6		= 0x6,
+	REG_R7		= 0x7,
+	REG_R8		= 0x8,
+	REG_R9		= 0x9,
+	REG_R10		= 0xa,
+	REG_R11		= 0xb,
+	REG_R12		= 0xc,
+	REG_R13		= 0xd,
+	REG_R14		= 0xe,
+	REG_R15		= 0xf,
+	REG_CPSR	= 0x10,
+};
+
diff --git a/src/runtime/defs_openbsd.go b/src/runtime/defs_openbsd.go
new file mode 100644
index 000000000..39224c988
--- /dev/null
+++ b/src/runtime/defs_openbsd.go
@@ -0,0 +1,121 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_openbsd.go >defs_openbsd_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_openbsd.go >defs_openbsd_386.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/unistd.h>
+#include <sys/signal.h>
+#include <errno.h>
+#include <signal.h>
+*/
+import "C"
+
+const (
+	EINTR  = C.EINTR
+	EFAULT = C.EFAULT
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_FREE = C.MADV_FREE
+
+	SA_SIGINFO = C.SA_SIGINFO
+	SA_RESTART = C.SA_RESTART
+	SA_ONSTACK = C.SA_ONSTACK
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGINFO   = C.SIGINFO
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	EV_ADD       = C.EV_ADD
+	EV_DELETE    = C.EV_DELETE
+	EV_CLEAR     = C.EV_CLEAR
+	EV_ERROR     = C.EV_ERROR
+	EVFILT_READ  = C.EVFILT_READ
+	EVFILT_WRITE = C.EVFILT_WRITE
+)
+
+type TforkT C.struct___tfork
+
+type SigaltstackT C.struct_sigaltstack
+type Sigcontext C.struct_sigcontext
+type Siginfo C.siginfo_t
+type Sigset C.sigset_t
+type Sigval C.union_sigval
+
+type StackT C.stack_t
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+
+type KeventT C.struct_kevent
diff --git a/src/runtime/defs_openbsd_386.h b/src/runtime/defs_openbsd_386.h
new file mode 100644
index 000000000..6b77e0084
--- /dev/null
+++ b/src/runtime/defs_openbsd_386.h
@@ -0,0 +1,168 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_openbsd.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x6,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct TforkT TforkT;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigcontext Sigcontext;
+typedef struct Siginfo Siginfo;
+typedef struct StackT StackT;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct TforkT {
+	byte	*tf_tcb;
+	int32	*tf_tid;
+	byte	*tf_stack;
+};
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+struct Sigcontext {
+	int32	sc_gs;
+	int32	sc_fs;
+	int32	sc_es;
+	int32	sc_ds;
+	int32	sc_edi;
+	int32	sc_esi;
+	int32	sc_ebp;
+	int32	sc_ebx;
+	int32	sc_edx;
+	int32	sc_ecx;
+	int32	sc_eax;
+	int32	sc_eip;
+	int32	sc_cs;
+	int32	sc_eflags;
+	int32	sc_esp;
+	int32	sc_ss;
+	int32	__sc_unused;
+	int32	sc_mask;
+	int32	sc_trapno;
+	int32	sc_err;
+	void	*sc_fpstate;
+};
+struct Siginfo {
+	int32	si_signo;
+	int32	si_code;
+	int32	si_errno;
+	byte	_data[116];
+};
+typedef	uint32	Sigset;
+typedef	byte	Sigval[4];
+
+struct StackT {
+	byte	*ss_sp;
+	uint32	ss_size;
+	int32	ss_flags;
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int32	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int32	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint32	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_openbsd_amd64.h b/src/runtime/defs_openbsd_amd64.h
new file mode 100644
index 000000000..761e8e47d
--- /dev/null
+++ b/src/runtime/defs_openbsd_amd64.h
@@ -0,0 +1,179 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_openbsd.go
+
+
+enum {
+	EINTR	= 0x4,
+	EFAULT	= 0xe,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x1000,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x6,
+
+	SA_SIGINFO	= 0x40,
+	SA_RESTART	= 0x2,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x10,
+	SIGSTOP		= 0x11,
+	SIGTSTP		= 0x12,
+	SIGCONT		= 0x13,
+	SIGCHLD		= 0x14,
+	SIGTTIN		= 0x15,
+	SIGTTOU		= 0x16,
+	SIGIO		= 0x17,
+	SIGXCPU		= 0x18,
+	SIGXFSZ		= 0x19,
+	SIGVTALRM	= 0x1a,
+	SIGPROF		= 0x1b,
+	SIGWINCH	= 0x1c,
+	SIGINFO		= 0x1d,
+	SIGUSR1		= 0x1e,
+	SIGUSR2		= 0x1f,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	EV_ADD		= 0x1,
+	EV_DELETE	= 0x2,
+	EV_CLEAR	= 0x20,
+	EV_ERROR	= 0x4000,
+	EVFILT_READ	= -0x1,
+	EVFILT_WRITE	= -0x2,
+};
+
+typedef struct TforkT TforkT;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigcontext Sigcontext;
+typedef struct Siginfo Siginfo;
+typedef struct StackT StackT;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct KeventT KeventT;
+
+#pragma pack on
+
+struct TforkT {
+	byte	*tf_tcb;
+	int32	*tf_tid;
+	byte	*tf_stack;
+};
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+struct Sigcontext {
+	int64	sc_rdi;
+	int64	sc_rsi;
+	int64	sc_rdx;
+	int64	sc_rcx;
+	int64	sc_r8;
+	int64	sc_r9;
+	int64	sc_r10;
+	int64	sc_r11;
+	int64	sc_r12;
+	int64	sc_r13;
+	int64	sc_r14;
+	int64	sc_r15;
+	int64	sc_rbp;
+	int64	sc_rbx;
+	int64	sc_rax;
+	int64	sc_gs;
+	int64	sc_fs;
+	int64	sc_es;
+	int64	sc_ds;
+	int64	sc_trapno;
+	int64	sc_err;
+	int64	sc_rip;
+	int64	sc_cs;
+	int64	sc_rflags;
+	int64	sc_rsp;
+	int64	sc_ss;
+	void	*sc_fpstate;
+	int32	__sc_unused;
+	int32	sc_mask;
+};
+struct Siginfo {
+	int32	si_signo;
+	int32	si_code;
+	int32	si_errno;
+	byte	Pad_cgo_0[4];
+	byte	_data[120];
+};
+typedef	uint32	Sigset;
+typedef	byte	Sigval[8];
+
+struct StackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int64	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct KeventT {
+	uint64	ident;
+	int16	filter;
+	uint16	flags;
+	uint32	fflags;
+	int64	data;
+	byte	*udata;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_plan9_386.h b/src/runtime/defs_plan9_386.h
new file mode 100644
index 000000000..a762b8589
--- /dev/null
+++ b/src/runtime/defs_plan9_386.h
@@ -0,0 +1,26 @@
+#define PAGESIZE 0x1000
+
+typedef struct Ureg Ureg;
+
+struct Ureg
+{
+	uint32	di;		/* general registers */
+	uint32	si;		/* ... */
+	uint32	bp;		/* ... */
+	uint32	nsp;
+	uint32	bx;		/* ... */
+	uint32	dx;		/* ... */
+	uint32	cx;		/* ... */
+	uint32	ax;		/* ... */
+	uint32	gs;		/* data segments */
+	uint32	fs;		/* ... */
+	uint32	es;		/* ... */
+	uint32	ds;		/* ... */
+	uint32	trap;		/* trap type */
+	uint32	ecode;		/* error code (or zero) */
+	uint32	pc;		/* pc */
+	uint32	cs;		/* old context */
+	uint32	flags;		/* old flags */
+	uint32	sp;
+	uint32	ss;		/* old stack segment */
+};
diff --git a/src/runtime/defs_plan9_amd64.h b/src/runtime/defs_plan9_amd64.h
new file mode 100644
index 000000000..20bca479c
--- /dev/null
+++ b/src/runtime/defs_plan9_amd64.h
@@ -0,0 +1,34 @@
+#define PAGESIZE 0x1000
+
+typedef struct Ureg Ureg;
+
+struct Ureg {
+	uint64	ax;
+	uint64	bx;
+	uint64	cx;
+	uint64	dx;
+	uint64	si;
+	uint64	di;
+	uint64	bp;
+	uint64	r8;
+	uint64	r9;
+	uint64	r10;
+	uint64	r11;
+	uint64	r12;
+	uint64	r13;
+	uint64	r14;
+	uint64	r15;
+
+	uint16	ds;
+	uint16	es;
+	uint16	fs;
+	uint16	gs;
+
+	uint64	type;
+	uint64	error;				/* error code (or zero) */
+	uint64	ip;				/* pc */
+	uint64	cs;				/* old context */
+	uint64	flags;				/* old flags */
+	uint64	sp;				/* sp */
+	uint64	ss;				/* old stack segment */
+};
diff --git a/src/runtime/defs_solaris.go b/src/runtime/defs_solaris.go
new file mode 100644
index 000000000..ba44e5fd4
--- /dev/null
+++ b/src/runtime/defs_solaris.go
@@ -0,0 +1,156 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_solaris.go >defs_solaris_amd64.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/select.h>
+#include <sys/siginfo.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/ucontext.h>
+#include <sys/regset.h>
+#include <sys/unistd.h>
+#include <sys/fork.h>
+#include <sys/port.h>
+#include <semaphore.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <netdb.h>
+*/
+import "C"
+
+const (
+	EINTR       = C.EINTR
+	EBADF       = C.EBADF
+	EFAULT      = C.EFAULT
+	EAGAIN      = C.EAGAIN
+	ETIMEDOUT   = C.ETIMEDOUT
+	EWOULDBLOCK = C.EWOULDBLOCK
+	EINPROGRESS = C.EINPROGRESS
+
+	PROT_NONE  = C.PROT_NONE
+	PROT_READ  = C.PROT_READ
+	PROT_WRITE = C.PROT_WRITE
+	PROT_EXEC  = C.PROT_EXEC
+
+	MAP_ANON    = C.MAP_ANON
+	MAP_PRIVATE = C.MAP_PRIVATE
+	MAP_FIXED   = C.MAP_FIXED
+
+	MADV_FREE = C.MADV_FREE
+
+	SA_SIGINFO = C.SA_SIGINFO
+	SA_RESTART = C.SA_RESTART
+	SA_ONSTACK = C.SA_ONSTACK
+
+	SIGHUP    = C.SIGHUP
+	SIGINT    = C.SIGINT
+	SIGQUIT   = C.SIGQUIT
+	SIGILL    = C.SIGILL
+	SIGTRAP   = C.SIGTRAP
+	SIGABRT   = C.SIGABRT
+	SIGEMT    = C.SIGEMT
+	SIGFPE    = C.SIGFPE
+	SIGKILL   = C.SIGKILL
+	SIGBUS    = C.SIGBUS
+	SIGSEGV   = C.SIGSEGV
+	SIGSYS    = C.SIGSYS
+	SIGPIPE   = C.SIGPIPE
+	SIGALRM   = C.SIGALRM
+	SIGTERM   = C.SIGTERM
+	SIGURG    = C.SIGURG
+	SIGSTOP   = C.SIGSTOP
+	SIGTSTP   = C.SIGTSTP
+	SIGCONT   = C.SIGCONT
+	SIGCHLD   = C.SIGCHLD
+	SIGTTIN   = C.SIGTTIN
+	SIGTTOU   = C.SIGTTOU
+	SIGIO     = C.SIGIO
+	SIGXCPU   = C.SIGXCPU
+	SIGXFSZ   = C.SIGXFSZ
+	SIGVTALRM = C.SIGVTALRM
+	SIGPROF   = C.SIGPROF
+	SIGWINCH  = C.SIGWINCH
+	SIGUSR1   = C.SIGUSR1
+	SIGUSR2   = C.SIGUSR2
+
+	FPE_INTDIV = C.FPE_INTDIV
+	FPE_INTOVF = C.FPE_INTOVF
+	FPE_FLTDIV = C.FPE_FLTDIV
+	FPE_FLTOVF = C.FPE_FLTOVF
+	FPE_FLTUND = C.FPE_FLTUND
+	FPE_FLTRES = C.FPE_FLTRES
+	FPE_FLTINV = C.FPE_FLTINV
+	FPE_FLTSUB = C.FPE_FLTSUB
+
+	BUS_ADRALN = C.BUS_ADRALN
+	BUS_ADRERR = C.BUS_ADRERR
+	BUS_OBJERR = C.BUS_OBJERR
+
+	SEGV_MAPERR = C.SEGV_MAPERR
+	SEGV_ACCERR = C.SEGV_ACCERR
+
+	ITIMER_REAL    = C.ITIMER_REAL
+	ITIMER_VIRTUAL = C.ITIMER_VIRTUAL
+	ITIMER_PROF    = C.ITIMER_PROF
+
+	_SC_NPROCESSORS_ONLN = C._SC_NPROCESSORS_ONLN
+
+	PTHREAD_CREATE_DETACHED = C.PTHREAD_CREATE_DETACHED
+
+	FORK_NOSIGCHLD = C.FORK_NOSIGCHLD
+	FORK_WAITPID   = C.FORK_WAITPID
+
+	MAXHOSTNAMELEN = C.MAXHOSTNAMELEN
+
+	O_NONBLOCK = C.O_NONBLOCK
+	FD_CLOEXEC = C.FD_CLOEXEC
+	F_GETFL    = C.F_GETFL
+	F_SETFL    = C.F_SETFL
+	F_SETFD    = C.F_SETFD
+
+	POLLIN  = C.POLLIN
+	POLLOUT = C.POLLOUT
+	POLLHUP = C.POLLHUP
+	POLLERR = C.POLLERR
+
+	PORT_SOURCE_FD = C.PORT_SOURCE_FD
+)
+
+type SemT C.sem_t
+
+type SigaltstackT C.struct_sigaltstack
+type Sigset C.sigset_t
+type StackT C.stack_t
+
+type Siginfo C.siginfo_t
+type Sigaction C.struct_sigaction
+
+type Fpregset C.fpregset_t
+type Mcontext C.mcontext_t
+type Ucontext C.ucontext_t
+
+type Timespec C.struct_timespec
+type Timeval C.struct_timeval
+type Itimerval C.struct_itimerval
+
+type PortEvent C.port_event_t
+type Pthread C.pthread_t
+type PthreadAttr C.pthread_attr_t
+
+// depends on Timespec, must appear below
+type Stat C.struct_stat
diff --git a/src/runtime/defs_solaris_amd64.go b/src/runtime/defs_solaris_amd64.go
new file mode 100644
index 000000000..049317888
--- /dev/null
+++ b/src/runtime/defs_solaris_amd64.go
@@ -0,0 +1,48 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_solaris.go defs_solaris_amd64.go >defs_solaris_amd64.h
+*/
+
+package runtime
+
+/*
+#include <sys/types.h>
+#include <sys/regset.h>
+*/
+import "C"
+
+const (
+	REG_RDI    = C.REG_RDI
+	REG_RSI    = C.REG_RSI
+	REG_RDX    = C.REG_RDX
+	REG_RCX    = C.REG_RCX
+	REG_R8     = C.REG_R8
+	REG_R9     = C.REG_R9
+	REG_R10    = C.REG_R10
+	REG_R11    = C.REG_R11
+	REG_R12    = C.REG_R12
+	REG_R13    = C.REG_R13
+	REG_R14    = C.REG_R14
+	REG_R15    = C.REG_R15
+	REG_RBP    = C.REG_RBP
+	REG_RBX    = C.REG_RBX
+	REG_RAX    = C.REG_RAX
+	REG_GS     = C.REG_GS
+	REG_FS     = C.REG_FS
+	REG_ES     = C.REG_ES
+	REG_DS     = C.REG_DS
+	REG_TRAPNO = C.REG_TRAPNO
+	REG_ERR    = C.REG_ERR
+	REG_RIP    = C.REG_RIP
+	REG_CS     = C.REG_CS
+	REG_RFLAGS = C.REG_RFL
+	REG_RSP    = C.REG_RSP
+	REG_SS     = C.REG_SS
+)
diff --git a/src/runtime/defs_solaris_amd64.h b/src/runtime/defs_solaris_amd64.h
new file mode 100644
index 000000000..cb1cfeadc
--- /dev/null
+++ b/src/runtime/defs_solaris_amd64.h
@@ -0,0 +1,254 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
+
+
+enum {
+	EINTR		= 0x4,
+	EBADF		= 0x9,
+	EFAULT		= 0xe,
+	EAGAIN		= 0xb,
+	ETIMEDOUT	= 0x91,
+	EWOULDBLOCK	= 0xb,
+	EINPROGRESS	= 0x96,
+
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_ANON	= 0x100,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+
+	MADV_FREE	= 0x5,
+
+	SA_SIGINFO	= 0x8,
+	SA_RESTART	= 0x4,
+	SA_ONSTACK	= 0x1,
+
+	SIGHUP		= 0x1,
+	SIGINT		= 0x2,
+	SIGQUIT		= 0x3,
+	SIGILL		= 0x4,
+	SIGTRAP		= 0x5,
+	SIGABRT		= 0x6,
+	SIGEMT		= 0x7,
+	SIGFPE		= 0x8,
+	SIGKILL		= 0x9,
+	SIGBUS		= 0xa,
+	SIGSEGV		= 0xb,
+	SIGSYS		= 0xc,
+	SIGPIPE		= 0xd,
+	SIGALRM		= 0xe,
+	SIGTERM		= 0xf,
+	SIGURG		= 0x15,
+	SIGSTOP		= 0x17,
+	SIGTSTP		= 0x18,
+	SIGCONT		= 0x19,
+	SIGCHLD		= 0x12,
+	SIGTTIN		= 0x1a,
+	SIGTTOU		= 0x1b,
+	SIGIO		= 0x16,
+	SIGXCPU		= 0x1e,
+	SIGXFSZ		= 0x1f,
+	SIGVTALRM	= 0x1c,
+	SIGPROF		= 0x1d,
+	SIGWINCH	= 0x14,
+	SIGUSR1		= 0x10,
+	SIGUSR2		= 0x11,
+
+	FPE_INTDIV	= 0x1,
+	FPE_INTOVF	= 0x2,
+	FPE_FLTDIV	= 0x3,
+	FPE_FLTOVF	= 0x4,
+	FPE_FLTUND	= 0x5,
+	FPE_FLTRES	= 0x6,
+	FPE_FLTINV	= 0x7,
+	FPE_FLTSUB	= 0x8,
+
+	BUS_ADRALN	= 0x1,
+	BUS_ADRERR	= 0x2,
+	BUS_OBJERR	= 0x3,
+
+	SEGV_MAPERR	= 0x1,
+	SEGV_ACCERR	= 0x2,
+
+	ITIMER_REAL	= 0x0,
+	ITIMER_VIRTUAL	= 0x1,
+	ITIMER_PROF	= 0x2,
+
+	_SC_NPROCESSORS_ONLN	= 0xf,
+
+	PTHREAD_CREATE_DETACHED	= 0x40,
+
+	FORK_NOSIGCHLD	= 0x1,
+	FORK_WAITPID	= 0x2,
+
+	MAXHOSTNAMELEN	= 0x100,
+
+	O_NONBLOCK	= 0x80,
+	FD_CLOEXEC	= 0x1,
+	F_GETFL		= 0x3,
+	F_SETFL		= 0x4,
+	F_SETFD		= 0x2,
+
+	POLLIN	= 0x1,
+	POLLOUT	= 0x4,
+	POLLHUP	= 0x10,
+	POLLERR	= 0x8,
+
+	PORT_SOURCE_FD	= 0x4,
+};
+
+typedef struct SemT SemT;
+typedef struct SigaltstackT SigaltstackT;
+typedef struct Sigset Sigset;
+typedef struct StackT StackT;
+typedef struct Siginfo Siginfo;
+typedef struct SigactionT SigactionT;
+typedef struct Fpregset Fpregset;
+typedef struct Mcontext Mcontext;
+typedef struct Ucontext Ucontext;
+typedef struct Timespec Timespec;
+typedef struct Timeval Timeval;
+typedef struct Itimerval Itimerval;
+typedef struct PortEvent PortEvent;
+typedef struct PthreadAttr PthreadAttr;
+typedef struct Stat Stat;
+
+#pragma pack on
+
+struct SemT {
+	uint32	sem_count;
+	uint16	sem_type;
+	uint16	sem_magic;
+	uint64	sem_pad1[3];
+	uint64	sem_pad2[2];
+};
+
+struct SigaltstackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+struct Sigset {
+	uint32	__sigbits[4];
+};
+struct StackT {
+	byte	*ss_sp;
+	uint64	ss_size;
+	int32	ss_flags;
+	byte	Pad_cgo_0[4];
+};
+
+struct Siginfo {
+	int32	si_signo;
+	int32	si_code;
+	int32	si_errno;
+	int32	si_pad;
+	byte	__data[240];
+};
+struct SigactionT {
+	int32	sa_flags;
+	byte	Pad_cgo_0[4];
+	byte	_funcptr[8];
+	Sigset	sa_mask;
+};
+
+struct Fpregset {
+	byte	fp_reg_set[528];
+};
+struct Mcontext {
+	int64	gregs[28];
+	Fpregset	fpregs;
+};
+struct Ucontext {
+	uint64	uc_flags;
+	Ucontext	*uc_link;
+	Sigset	uc_sigmask;
+	StackT	uc_stack;
+	byte	Pad_cgo_0[8];
+	Mcontext	uc_mcontext;
+	int64	uc_filler[5];
+	byte	Pad_cgo_1[8];
+};
+
+struct Timespec {
+	int64	tv_sec;
+	int64	tv_nsec;
+};
+struct Timeval {
+	int64	tv_sec;
+	int64	tv_usec;
+};
+struct Itimerval {
+	Timeval	it_interval;
+	Timeval	it_value;
+};
+
+struct PortEvent {
+	int32	portev_events;
+	uint16	portev_source;
+	uint16	portev_pad;
+	uint64	portev_object;
+	byte	*portev_user;
+};
+typedef	uint32	Pthread;
+struct PthreadAttr {
+	byte	*__pthread_attrp;
+};
+
+struct Stat {
+	uint64	st_dev;
+	uint64	st_ino;
+	uint32	st_mode;
+	uint32	st_nlink;
+	uint32	st_uid;
+	uint32	st_gid;
+	uint64	st_rdev;
+	int64	st_size;
+	Timespec	st_atim;
+	Timespec	st_mtim;
+	Timespec	st_ctim;
+	int32	st_blksize;
+	byte	Pad_cgo_0[4];
+	int64	st_blocks;
+	int8	st_fstype[16];
+};
+
+
+#pragma pack off
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
+
+
+enum {
+	REG_RDI		= 0x8,
+	REG_RSI		= 0x9,
+	REG_RDX		= 0xc,
+	REG_RCX		= 0xd,
+	REG_R8		= 0x7,
+	REG_R9		= 0x6,
+	REG_R10		= 0x5,
+	REG_R11		= 0x4,
+	REG_R12		= 0x3,
+	REG_R13		= 0x2,
+	REG_R14		= 0x1,
+	REG_R15		= 0x0,
+	REG_RBP		= 0xa,
+	REG_RBX		= 0xb,
+	REG_RAX		= 0xe,
+	REG_GS		= 0x17,
+	REG_FS		= 0x16,
+	REG_ES		= 0x18,
+	REG_DS		= 0x19,
+	REG_TRAPNO	= 0xf,
+	REG_ERR		= 0x10,
+	REG_RIP		= 0x11,
+	REG_CS		= 0x12,
+	REG_RFLAGS	= 0x13,
+	REG_RSP		= 0x14,
+	REG_SS		= 0x15,
+};
+
diff --git a/src/runtime/defs_windows.go b/src/runtime/defs_windows.go
new file mode 100644
index 000000000..01aea92de
--- /dev/null
+++ b/src/runtime/defs_windows.go
@@ -0,0 +1,70 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+/*
+Input to cgo.
+
+GOARCH=amd64 go tool cgo -cdefs defs_windows.go > defs_windows_amd64.h
+GOARCH=386 go tool cgo -cdefs defs_windows.go > defs_windows_386.h
+*/
+
+package runtime
+
+/*
+#include <signal.h>
+#include <stdarg.h>
+#include <windef.h>
+#include <winbase.h>
+#include <wincon.h>
+
+#ifndef _X86_
+typedef struct {} FLOATING_SAVE_AREA;
+#endif
+#ifndef _AMD64_
+typedef struct {} M128A;
+#endif
+*/
+import "C"
+
+const (
+	PROT_NONE  = 0
+	PROT_READ  = 1
+	PROT_WRITE = 2
+	PROT_EXEC  = 4
+
+	MAP_ANON    = 1
+	MAP_PRIVATE = 2
+
+	DUPLICATE_SAME_ACCESS   = C.DUPLICATE_SAME_ACCESS
+	THREAD_PRIORITY_HIGHEST = C.THREAD_PRIORITY_HIGHEST
+
+	SIGINT           = C.SIGINT
+	CTRL_C_EVENT     = C.CTRL_C_EVENT
+	CTRL_BREAK_EVENT = C.CTRL_BREAK_EVENT
+
+	CONTEXT_CONTROL = C.CONTEXT_CONTROL
+	CONTEXT_FULL    = C.CONTEXT_FULL
+
+	EXCEPTION_ACCESS_VIOLATION     = C.STATUS_ACCESS_VIOLATION
+	EXCEPTION_BREAKPOINT           = C.STATUS_BREAKPOINT
+	EXCEPTION_FLT_DENORMAL_OPERAND = C.STATUS_FLOAT_DENORMAL_OPERAND
+	EXCEPTION_FLT_DIVIDE_BY_ZERO   = C.STATUS_FLOAT_DIVIDE_BY_ZERO
+	EXCEPTION_FLT_INEXACT_RESULT   = C.STATUS_FLOAT_INEXACT_RESULT
+	EXCEPTION_FLT_OVERFLOW         = C.STATUS_FLOAT_OVERFLOW
+	EXCEPTION_FLT_UNDERFLOW        = C.STATUS_FLOAT_UNDERFLOW
+	EXCEPTION_INT_DIVIDE_BY_ZERO   = C.STATUS_INTEGER_DIVIDE_BY_ZERO
+	EXCEPTION_INT_OVERFLOW         = C.STATUS_INTEGER_OVERFLOW
+
+	INFINITE     = C.INFINITE
+	WAIT_TIMEOUT = C.WAIT_TIMEOUT
+)
+
+type SystemInfo C.SYSTEM_INFO
+type ExceptionRecord C.EXCEPTION_RECORD
+type FloatingSaveArea C.FLOATING_SAVE_AREA
+type M128a C.M128A
+type Context C.CONTEXT
+type Overlapped C.OVERLAPPED
diff --git a/src/runtime/defs_windows_386.h b/src/runtime/defs_windows_386.h
new file mode 100644
index 000000000..db3629a1d
--- /dev/null
+++ b/src/runtime/defs_windows_386.h
@@ -0,0 +1,113 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_windows.go
+
+
+enum {
+	PROT_NONE	= 0,
+	PROT_READ	= 1,
+	PROT_WRITE	= 2,
+	PROT_EXEC	= 4,
+
+	MAP_ANON	= 1,
+	MAP_PRIVATE	= 2,
+
+	DUPLICATE_SAME_ACCESS	= 0x2,
+	THREAD_PRIORITY_HIGHEST	= 0x2,
+
+	SIGINT			= 0x2,
+	CTRL_C_EVENT		= 0x0,
+	CTRL_BREAK_EVENT	= 0x1,
+
+	CONTEXT_CONTROL	= 0x10001,
+	CONTEXT_FULL	= 0x10007,
+
+	EXCEPTION_ACCESS_VIOLATION	= 0xc0000005,
+	EXCEPTION_BREAKPOINT		= 0x80000003,
+	EXCEPTION_FLT_DENORMAL_OPERAND	= 0xc000008d,
+	EXCEPTION_FLT_DIVIDE_BY_ZERO	= 0xc000008e,
+	EXCEPTION_FLT_INEXACT_RESULT	= 0xc000008f,
+	EXCEPTION_FLT_OVERFLOW		= 0xc0000091,
+	EXCEPTION_FLT_UNDERFLOW		= 0xc0000093,
+	EXCEPTION_INT_DIVIDE_BY_ZERO	= 0xc0000094,
+	EXCEPTION_INT_OVERFLOW		= 0xc0000095,
+
+	INFINITE	= 0xffffffff,
+	WAIT_TIMEOUT	= 0x102,
+};
+
+typedef struct SystemInfo SystemInfo;
+typedef struct ExceptionRecord ExceptionRecord;
+typedef struct FloatingSaveArea FloatingSaveArea;
+typedef struct M128a M128a;
+typedef struct Context Context;
+typedef struct Overlapped Overlapped;
+
+#pragma pack on
+
+struct SystemInfo {
+	byte	anon0[4];
+	uint32	dwPageSize;
+	byte	*lpMinimumApplicationAddress;
+	byte	*lpMaximumApplicationAddress;
+	uint32	dwActiveProcessorMask;
+	uint32	dwNumberOfProcessors;
+	uint32	dwProcessorType;
+	uint32	dwAllocationGranularity;
+	uint16	wProcessorLevel;
+	uint16	wProcessorRevision;
+};
+struct ExceptionRecord {
+	uint32	ExceptionCode;
+	uint32	ExceptionFlags;
+	ExceptionRecord	*ExceptionRecord;
+	byte	*ExceptionAddress;
+	uint32	NumberParameters;
+	uint32	ExceptionInformation[15];
+};
+struct FloatingSaveArea {
+	uint32	ControlWord;
+	uint32	StatusWord;
+	uint32	TagWord;
+	uint32	ErrorOffset;
+	uint32	ErrorSelector;
+	uint32	DataOffset;
+	uint32	DataSelector;
+	uint8	RegisterArea[80];
+	uint32	Cr0NpxState;
+};
+struct Context {
+	uint32	ContextFlags;
+	uint32	Dr0;
+	uint32	Dr1;
+	uint32	Dr2;
+	uint32	Dr3;
+	uint32	Dr6;
+	uint32	Dr7;
+	FloatingSaveArea	FloatSave;
+	uint32	SegGs;
+	uint32	SegFs;
+	uint32	SegEs;
+	uint32	SegDs;
+	uint32	Edi;
+	uint32	Esi;
+	uint32	Ebx;
+	uint32	Edx;
+	uint32	Ecx;
+	uint32	Eax;
+	uint32	Ebp;
+	uint32	Eip;
+	uint32	SegCs;
+	uint32	EFlags;
+	uint32	Esp;
+	uint32	SegSs;
+	uint8	ExtendedRegisters[512];
+};
+struct Overlapped {
+	uint32	Internal;
+	uint32	InternalHigh;
+	byte	anon0[8];
+	byte	*hEvent;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/defs_windows_amd64.h b/src/runtime/defs_windows_amd64.h
new file mode 100644
index 000000000..fe26f5a84
--- /dev/null
+++ b/src/runtime/defs_windows_amd64.h
@@ -0,0 +1,128 @@
+// Created by cgo -cdefs - DO NOT EDIT
+// cgo -cdefs defs_windows.go
+
+
+enum {
+	PROT_NONE	= 0,
+	PROT_READ	= 1,
+	PROT_WRITE	= 2,
+	PROT_EXEC	= 4,
+
+	MAP_ANON	= 1,
+	MAP_PRIVATE	= 2,
+
+	DUPLICATE_SAME_ACCESS	= 0x2,
+	THREAD_PRIORITY_HIGHEST	= 0x2,
+
+	SIGINT			= 0x2,
+	CTRL_C_EVENT		= 0x0,
+	CTRL_BREAK_EVENT	= 0x1,
+
+	CONTEXT_CONTROL	= 0x100001,
+	CONTEXT_FULL	= 0x10000b,
+
+	EXCEPTION_ACCESS_VIOLATION	= 0xc0000005,
+	EXCEPTION_BREAKPOINT		= 0x80000003,
+	EXCEPTION_FLT_DENORMAL_OPERAND	= 0xc000008d,
+	EXCEPTION_FLT_DIVIDE_BY_ZERO	= 0xc000008e,
+	EXCEPTION_FLT_INEXACT_RESULT	= 0xc000008f,
+	EXCEPTION_FLT_OVERFLOW		= 0xc0000091,
+	EXCEPTION_FLT_UNDERFLOW		= 0xc0000093,
+	EXCEPTION_INT_DIVIDE_BY_ZERO	= 0xc0000094,
+	EXCEPTION_INT_OVERFLOW		= 0xc0000095,
+
+	INFINITE	= 0xffffffff,
+	WAIT_TIMEOUT	= 0x102,
+};
+
+typedef struct SystemInfo SystemInfo;
+typedef struct ExceptionRecord ExceptionRecord;
+typedef struct FloatingSaveArea FloatingSaveArea;
+typedef struct M128a M128a;
+typedef struct Context Context;
+typedef struct Overlapped Overlapped;
+
+#pragma pack on
+
+struct SystemInfo {
+	byte	anon0[4];
+	uint32	dwPageSize;
+	byte	*lpMinimumApplicationAddress;
+	byte	*lpMaximumApplicationAddress;
+	uint64	dwActiveProcessorMask;
+	uint32	dwNumberOfProcessors;
+	uint32	dwProcessorType;
+	uint32	dwAllocationGranularity;
+	uint16	wProcessorLevel;
+	uint16	wProcessorRevision;
+};
+struct ExceptionRecord {
+	uint32	ExceptionCode;
+	uint32	ExceptionFlags;
+	ExceptionRecord	*ExceptionRecord;
+	byte	*ExceptionAddress;
+	uint32	NumberParameters;
+	byte	Pad_cgo_0[4];
+	uint64	ExceptionInformation[15];
+};
+struct M128a {
+	uint64	Low;
+	int64	High;
+};
+struct Context {
+	uint64	P1Home;
+	uint64	P2Home;
+	uint64	P3Home;
+	uint64	P4Home;
+	uint64	P5Home;
+	uint64	P6Home;
+	uint32	ContextFlags;
+	uint32	MxCsr;
+	uint16	SegCs;
+	uint16	SegDs;
+	uint16	SegEs;
+	uint16	SegFs;
+	uint16	SegGs;
+	uint16	SegSs;
+	uint32	EFlags;
+	uint64	Dr0;
+	uint64	Dr1;
+	uint64	Dr2;
+	uint64	Dr3;
+	uint64	Dr6;
+	uint64	Dr7;
+	uint64	Rax;
+	uint64	Rcx;
+	uint64	Rdx;
+	uint64	Rbx;
+	uint64	Rsp;
+	uint64	Rbp;
+	uint64	Rsi;
+	uint64	Rdi;
+	uint64	R8;
+	uint64	R9;
+	uint64	R10;
+	uint64	R11;
+	uint64	R12;
+	uint64	R13;
+	uint64	R14;
+	uint64	R15;
+	uint64	Rip;
+	byte	anon0[512];
+	M128a	VectorRegister[26];
+	uint64	VectorControl;
+	uint64	DebugControl;
+	uint64	LastBranchToRip;
+	uint64	LastBranchFromRip;
+	uint64	LastExceptionToRip;
+	uint64	LastExceptionFromRip;
+};
+struct Overlapped {
+	uint64	Internal;
+	uint64	InternalHigh;
+	byte	anon0[8];
+	byte	*hEvent;
+};
+
+
+#pragma pack off
diff --git a/src/runtime/env_plan9.go b/src/runtime/env_plan9.go
new file mode 100644
index 000000000..76e9867e0
--- /dev/null
+++ b/src/runtime/env_plan9.go
@@ -0,0 +1,52 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func getenv(s *byte) *byte {
+	val := gogetenv(gostringnocopy(s))
+	if val == "" {
+		return nil
+	}
+	// Strings found in environment are NUL-terminated.
+	return &bytes(val)[0]
+}
+
+var tracebackbuf [128]byte
+
+func gogetenv(key string) string {
+	var file [128]byte
+	if len(key) > len(file)-6 {
+		return ""
+	}
+
+	copy(file[:], "/env/")
+	copy(file[5:], key)
+
+	fd := open(&file[0], _OREAD, 0)
+	if fd < 0 {
+		return ""
+	}
+	n := seek(fd, 0, 2) - 1
+	if n <= 0 {
+		close(fd)
+		return ""
+	}
+
+	p := make([]byte, n)
+
+	r := pread(fd, unsafe.Pointer(&p[0]), int32(n), 0)
+	close(fd)
+	if r < 0 {
+		return ""
+	}
+
+	var s string
+	sp := (*_string)(unsafe.Pointer(&s))
+	sp.str = &p[0]
+	sp.len = int(r)
+	return s
+}
diff --git a/src/runtime/env_posix.go b/src/runtime/env_posix.go
new file mode 100644
index 000000000..6c04f6cc7
--- /dev/null
+++ b/src/runtime/env_posix.go
@@ -0,0 +1,52 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+
+package runtime
+
+import "unsafe"
+
+func environ() []string
+
+func getenv(s *byte) *byte {
+	val := gogetenv(gostringnocopy(s))
+	if val == "" {
+		return nil
+	}
+	// Strings found in environment are NUL-terminated.
+	return &bytes(val)[0]
+}
+
+func gogetenv(key string) string {
+	env := environ()
+	if env == nil {
+		gothrow("getenv before env init")
+	}
+	for _, s := range environ() {
+		if len(s) > len(key) && s[len(key)] == '=' && s[:len(key)] == key {
+			return s[len(key)+1:]
+		}
+	}
+	return ""
+}
+
+var _cgo_setenv uintptr // pointer to C function
+
+// Update the C environment if cgo is loaded.
+// Called from syscall.Setenv.
+func syscall_setenv_c(k string, v string) {
+	if _cgo_setenv == 0 {
+		return
+	}
+	arg := [2]unsafe.Pointer{cstring(k), cstring(v)}
+	asmcgocall(unsafe.Pointer(_cgo_setenv), unsafe.Pointer(&arg))
+}
+
+func cstring(s string) unsafe.Pointer {
+	p := make([]byte, len(s)+1)
+	sp := (*_string)(unsafe.Pointer(&s))
+	memmove(unsafe.Pointer(&p[0]), unsafe.Pointer(sp.str), uintptr(len(s)))
+	return unsafe.Pointer(&p[0])
+}
diff --git a/src/runtime/error.go b/src/runtime/error.go
new file mode 100644
index 000000000..3ea93680c
--- /dev/null
+++ b/src/runtime/error.go
@@ -0,0 +1,129 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// The Error interface identifies a run time error.
+type Error interface {
+	error
+
+	// RuntimeError is a no-op function but
+	// serves to distinguish types that are runtime
+	// errors from ordinary errors: a type is a
+	// runtime error if it has a RuntimeError method.
+	RuntimeError()
+}
+
+// A TypeAssertionError explains a failed type assertion.
+type TypeAssertionError struct {
+	interfaceString string
+	concreteString  string
+	assertedString  string
+	missingMethod   string // one method needed by Interface, missing from Concrete
+}
+
+func (*TypeAssertionError) RuntimeError() {}
+
+func (e *TypeAssertionError) Error() string {
+	inter := e.interfaceString
+	if inter == "" {
+		inter = "interface"
+	}
+	if e.concreteString == "" {
+		return "interface conversion: " + inter + " is nil, not " + e.assertedString
+	}
+	if e.missingMethod == "" {
+		return "interface conversion: " + inter + " is " + e.concreteString +
+			", not " + e.assertedString
+	}
+	return "interface conversion: " + e.concreteString + " is not " + e.assertedString +
+		": missing method " + e.missingMethod
+}
+
+// For calling from C.
+func newTypeAssertionError(ps1, ps2, ps3 *string, pmeth *string, ret *interface{}) {
+	var s1, s2, s3, meth string
+
+	if ps1 != nil {
+		s1 = *ps1
+	}
+	if ps2 != nil {
+		s2 = *ps2
+	}
+	if ps3 != nil {
+		s3 = *ps3
+	}
+	if pmeth != nil {
+		meth = *pmeth
+	}
+	*ret = &TypeAssertionError{s1, s2, s3, meth}
+}
+
+// An errorString represents a runtime error described by a single string.
+type errorString string
+
+func (e errorString) RuntimeError() {}
+
+func (e errorString) Error() string {
+	return "runtime error: " + string(e)
+}
+
+// For calling from C.
+func newErrorString(s string, ret *interface{}) {
+	*ret = errorString(s)
+}
+
+// An errorCString represents a runtime error described by a single C string.
+// Not "type errorCString unsafe.Pointer" because of http://golang.org/issue/7084.
+// Not uintptr because we want to avoid an allocation if interfaces can't hold
+// uintptrs directly (and cstr _is_ a pointer).
+type errorCString struct{ cstr unsafe.Pointer }
+
+func (e errorCString) RuntimeError() {}
+
+func (e errorCString) Error() string {
+	return "runtime error: " + gostringnocopy((*byte)(e.cstr))
+}
+
+// For calling from C.
+func newErrorCString(s unsafe.Pointer, ret *interface{}) {
+	*ret = errorCString{s}
+}
+
+type stringer interface {
+	String() string
+}
+
+func typestring(x interface{}) string {
+	e := (*eface)(unsafe.Pointer(&x))
+	return *e._type._string
+}
+
+// For calling from C.
+// Prints an argument passed to panic.
+// There's room for arbitrary complexity here, but we keep it
+// simple and handle just a few important cases: int, string, and Stringer.
+func printany(i interface{}) {
+	switch v := i.(type) {
+	case nil:
+		print("nil")
+	case stringer:
+		print(v.String())
+	case error:
+		print(v.Error())
+	case int:
+		print(v)
+	case string:
+		print(v)
+	default:
+		print("(", typestring(i), ") ", i)
+	}
+}
+
+// called from generated code
+func panicwrap(pkg, typ, meth string) {
+	panic("value method " + pkg + "." + typ + "." + meth + " called using nil *" + typ + " pointer")
+}
diff --git a/src/runtime/export_futex_test.go b/src/runtime/export_futex_test.go
new file mode 100644
index 000000000..96281f650
--- /dev/null
+++ b/src/runtime/export_futex_test.go
@@ -0,0 +1,10 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build dragonfly freebsd linux
+
+package runtime
+
+var Futexsleep = futexsleep
+var Futexwakeup = futexwakeup
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
new file mode 100644
index 000000000..07ef26f25
--- /dev/null
+++ b/src/runtime/export_test.go
@@ -0,0 +1,163 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing.
+
+package runtime
+
+import "unsafe"
+
+var Fadd64 = fadd64
+var Fsub64 = fsub64
+var Fmul64 = fmul64
+var Fdiv64 = fdiv64
+var F64to32 = f64to32
+var F32to64 = f32to64
+var Fcmp64 = fcmp64
+var Fintto64 = fintto64
+var F64toint = f64toint
+
+// in asm_*.s
+func stackguard() (sp, limit uintptr)
+
+var Entersyscall = entersyscall
+var Exitsyscall = exitsyscall
+var LockedOSThread = lockedOSThread
+var Stackguard = stackguard
+
+type LFNode struct {
+	Next    *LFNode
+	Pushcnt uintptr
+}
+
+func lfstackpush_m()
+func lfstackpop_m()
+
+func LFStackPush(head *uint64, node *LFNode) {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(head)
+	mp.ptrarg[1] = unsafe.Pointer(node)
+	onM(lfstackpush_m)
+	releasem(mp)
+}
+
+func LFStackPop(head *uint64) *LFNode {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(head)
+	onM(lfstackpop_m)
+	node := (*LFNode)(unsafe.Pointer(mp.ptrarg[0]))
+	mp.ptrarg[0] = nil
+	releasem(mp)
+	return node
+}
+
+type ParFor struct {
+	body    *byte
+	done    uint32
+	Nthr    uint32
+	nthrmax uint32
+	thrseq  uint32
+	Cnt     uint32
+	Ctx     *byte
+	wait    bool
+}
+
+func newparfor_m()
+func parforsetup_m()
+func parfordo_m()
+func parforiters_m()
+
+func NewParFor(nthrmax uint32) *ParFor {
+	mp := acquirem()
+	mp.scalararg[0] = uintptr(nthrmax)
+	onM(newparfor_m)
+	desc := (*ParFor)(mp.ptrarg[0])
+	mp.ptrarg[0] = nil
+	releasem(mp)
+	return desc
+}
+
+func ParForSetup(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32)) {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(desc)
+	mp.ptrarg[1] = unsafe.Pointer(ctx)
+	mp.ptrarg[2] = unsafe.Pointer(funcPC(body)) // TODO(rsc): Should be a scalar.
+	mp.scalararg[0] = uintptr(nthr)
+	mp.scalararg[1] = uintptr(n)
+	mp.scalararg[2] = 0
+	if wait {
+		mp.scalararg[2] = 1
+	}
+	onM(parforsetup_m)
+	releasem(mp)
+}
+
+func ParForDo(desc *ParFor) {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(desc)
+	onM(parfordo_m)
+	releasem(mp)
+}
+
+func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(desc)
+	mp.scalararg[0] = uintptr(tid)
+	onM(parforiters_m)
+	begin := uint32(mp.scalararg[0])
+	end := uint32(mp.scalararg[1])
+	releasem(mp)
+	return begin, end
+}
+
+// in mgc0.c
+//go:noescape
+func getgcmask(data unsafe.Pointer, typ *_type, array **byte, len *uint)
+
+func GCMask(x interface{}) (ret []byte) {
+	e := (*eface)(unsafe.Pointer(&x))
+	s := (*slice)(unsafe.Pointer(&ret))
+	onM(func() {
+		getgcmask(e.data, e._type, &s.array, &s.len)
+		s.cap = s.len
+	})
+	return
+}
+
+func testSchedLocalQueue()
+func testSchedLocalQueueSteal()
+func RunSchedLocalQueueTest() {
+	onM(testSchedLocalQueue)
+}
+func RunSchedLocalQueueStealTest() {
+	onM(testSchedLocalQueueSteal)
+}
+
+var HaveGoodHash = haveGoodHash
+var StringHash = stringHash
+var BytesHash = bytesHash
+var Int32Hash = int32Hash
+var Int64Hash = int64Hash
+var EfaceHash = efaceHash
+var IfaceHash = ifaceHash
+var MemclrBytes = memclrBytes
+
+var HashLoad = &hashLoad
+
+// For testing.
+func GogoBytes() int32 {
+	return _RuntimeGogoBytes
+}
+
+// in string.c
+//go:noescape
+func gostringw(w *uint16) string
+
+// entry point for testing
+func GostringW(w []uint16) (s string) {
+	onM(func() {
+		s = gostringw(&w[0])
+	})
+	return
+}
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
new file mode 100644
index 000000000..3d06a23fc
--- /dev/null
+++ b/src/runtime/extern.go
@@ -0,0 +1,155 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package runtime contains operations that interact with Go's runtime system,
+such as functions to control goroutines. It also includes the low-level type information
+used by the reflect package; see reflect's documentation for the programmable
+interface to the run-time type system.
+
+Environment Variables
+
+The following environment variables ($name or %name%, depending on the host
+operating system) control the run-time behavior of Go programs. The meanings
+and use may change from release to release.
+
+The GOGC variable sets the initial garbage collection target percentage.
+A collection is triggered when the ratio of freshly allocated data to live data
+remaining after the previous collection reaches this percentage. The default
+is GOGC=100. Setting GOGC=off disables the garbage collector entirely.
+The runtime/debug package's SetGCPercent function allows changing this
+percentage at run time. See http://golang.org/pkg/runtime/debug/#SetGCPercent.
+
+The GODEBUG variable controls debug output from the runtime. GODEBUG value is
+a comma-separated list of name=val pairs. Supported names are:
+
+	allocfreetrace: setting allocfreetrace=1 causes every allocation to be
+	profiled and a stack trace printed on each object's allocation and free.
+
+	efence: setting efence=1 causes the allocator to run in a mode
+	where each object is allocated on a unique page and addresses are
+	never recycled.
+
+	gctrace: setting gctrace=1 causes the garbage collector to emit a single line to standard
+	error at each collection, summarizing the amount of memory collected and the
+	length of the pause. Setting gctrace=2 emits the same summary but also
+	repeats each collection.
+
+	gcdead: setting gcdead=1 causes the garbage collector to clobber all stack slots
+	that it thinks are dead.
+
+	scheddetail: setting schedtrace=X and scheddetail=1 causes the scheduler to emit
+	detailed multiline info every X milliseconds, describing state of the scheduler,
+	processors, threads and goroutines.
+
+	schedtrace: setting schedtrace=X causes the scheduler to emit a single line to standard
+	error every X milliseconds, summarizing the scheduler state.
+
+	scavenge: scavenge=1 enables debugging mode of heap scavenger.
+
+The GOMAXPROCS variable limits the number of operating system threads that
+can execute user-level Go code simultaneously. There is no limit to the number of threads
+that can be blocked in system calls on behalf of Go code; those do not count against
+the GOMAXPROCS limit. This package's GOMAXPROCS function queries and changes
+the limit.
+
+The GOTRACEBACK variable controls the amount of output generated when a Go
+program fails due to an unrecovered panic or an unexpected runtime condition.
+By default, a failure prints a stack trace for every extant goroutine, eliding functions
+internal to the run-time system, and then exits with exit code 2.
+If GOTRACEBACK=0, the per-goroutine stack traces are omitted entirely.
+If GOTRACEBACK=1, the default behavior is used.
+If GOTRACEBACK=2, the per-goroutine stack traces include run-time functions.
+If GOTRACEBACK=crash, the per-goroutine stack traces include run-time functions,
+and if possible the program crashes in an operating-specific manner instead of
+exiting. For example, on Unix systems, the program raises SIGABRT to trigger a
+core dump.
+
+The GOARCH, GOOS, GOPATH, and GOROOT environment variables complete
+the set of Go environment variables. They influence the building of Go programs
+(see http://golang.org/cmd/go and http://golang.org/pkg/go/build).
+GOARCH, GOOS, and GOROOT are recorded at compile time and made available by
+constants or functions in this package, but they do not influence the execution
+of the run-time system.
+*/
+package runtime
+
+// Caller reports file and line number information about function invocations on
+// the calling goroutine's stack.  The argument skip is the number of stack frames
+// to ascend, with 0 identifying the caller of Caller.  (For historical reasons the
+// meaning of skip differs between Caller and Callers.) The return values report the
+// program counter, file name, and line number within the file of the corresponding
+// call.  The boolean ok is false if it was not possible to recover the information.
+func Caller(skip int) (pc uintptr, file string, line int, ok bool) {
+	// Ask for two PCs: the one we were asked for
+	// and what it called, so that we can see if it
+	// "called" sigpanic.
+	var rpc [2]uintptr
+	if callers(1+skip-1, &rpc[0], 2) < 2 {
+		return
+	}
+	f := findfunc(rpc[1])
+	if f == nil {
+		// TODO(rsc): Probably a bug?
+		// The C version said "have retpc at least"
+		// but actually returned pc=0.
+		ok = true
+		return
+	}
+	pc = rpc[1]
+	xpc := pc
+	g := findfunc(rpc[0])
+	// All architectures turn faults into apparent calls to sigpanic.
+	// If we see a call to sigpanic, we do not back up the PC to find
+	// the line number of the call instruction, because there is no call.
+	if xpc > f.entry && (g == nil || g.entry != funcPC(sigpanic)) {
+		xpc--
+	}
+	line = int(funcline(f, xpc, &file))
+	ok = true
+	return
+}
+
+// Callers fills the slice pc with the program counters of function invocations
+// on the calling goroutine's stack.  The argument skip is the number of stack frames
+// to skip before recording in pc, with 0 identifying the frame for Callers itself and
+// 1 identifying the caller of Callers.
+// It returns the number of entries written to pc.
+func Callers(skip int, pc []uintptr) int {
+	// runtime.callers uses pc.array==nil as a signal
+	// to print a stack trace.  Pick off 0-length pc here
+	// so that we don't let a nil pc slice get to it.
+	if len(pc) == 0 {
+		return 0
+	}
+	return callers(skip, &pc[0], len(pc))
+}
+
+func getgoroot() string
+
+// GOROOT returns the root of the Go tree.
+// It uses the GOROOT environment variable, if set,
+// or else the root used during the Go build.
+func GOROOT() string {
+	s := gogetenv("GOROOT")
+	if s != "" {
+		return s
+	}
+	return defaultGoroot
+}
+
+// Version returns the Go tree's version string.
+// It is either the commit hash and date at the time of the build or,
+// when possible, a release tag like "go1.3".
+func Version() string {
+	return theVersion
+}
+
+// GOOS is the running program's operating system target:
+// one of darwin, freebsd, linux, and so on.
+const GOOS string = theGoos
+
+// GOARCH is the running program's architecture target:
+// 386, amd64, or arm.
+const GOARCH string = theGoarch
diff --git a/src/runtime/float.c b/src/runtime/float.c
new file mode 100644
index 000000000..42082e434
--- /dev/null
+++ b/src/runtime/float.c
@@ -0,0 +1,10 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// used as float64 via runtime· names
+uint64	·nan		= 0x7FF8000000000001ULL;
+uint64	·posinf	= 0x7FF0000000000000ULL;
+uint64	·neginf	= 0xFFF0000000000000ULL;
diff --git a/src/runtime/funcdata.h b/src/runtime/funcdata.h
new file mode 100644
index 000000000..dc9c41363
--- /dev/null
+++ b/src/runtime/funcdata.h
@@ -0,0 +1,37 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file defines the IDs for PCDATA and FUNCDATA instructions
+// in Go binaries. It is included by both C and assembly, so it must
+// be written using #defines. It is included by the runtime package
+// as well as the compilers.
+//
+// symtab.go also contains a copy of these constants.
+
+#define PCDATA_ArgSize 0 /* argument size at CALL instruction */
+#define PCDATA_StackMapIndex 1
+
+#define FUNCDATA_ArgsPointerMaps 0 /* garbage collector blocks */
+#define FUNCDATA_LocalsPointerMaps 1
+#define FUNCDATA_DeadValueMaps 2
+
+// To be used in assembly.
+#define ARGSIZE(n) PCDATA $PCDATA_ArgSize, $n
+
+// ArgsSizeUnknown is set in Func.argsize to mark all functions
+// whose argument size is unknown (C vararg functions, and
+// assembly code without an explicit specification).
+// This value is generated by the compiler, assembler, or linker.
+#define ArgsSizeUnknown 0x80000000
+
+/*c2go
+enum {
+	PCDATA_ArgSize = 0,
+	PCDATA_StackMapIndex = 1,
+	FUNCDATA_ArgsPointerMaps = 0,
+	FUNCDATA_LocalsPointerMaps = 1,
+	FUNCDATA_DeadValueMaps = 2,
+	ArgsSizeUnknown = 0x80000000,
+};
+*/
diff --git a/src/runtime/futex_test.go b/src/runtime/futex_test.go
new file mode 100644
index 000000000..f57fc52b8
--- /dev/null
+++ b/src/runtime/futex_test.go
@@ -0,0 +1,77 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Futex is only available on DragonFly BSD, FreeBSD and Linux.
+// The race detector emits calls to split stack functions so it breaks
+// the test.
+
+// +build dragonfly freebsd linux
+// +build !race
+
+package runtime_test
+
+import (
+	"runtime"
+	"testing"
+	"time"
+)
+
+type futexsleepTest struct {
+	mtx uint32
+	ns  int64
+	msg string
+	ch  chan futexsleepTest
+}
+
+var futexsleepTests = []futexsleepTest{
+	beforeY2038: {mtx: 0, ns: 86400 * 1e9, msg: "before the year 2038", ch: make(chan futexsleepTest, 1)},
+	afterY2038:  {mtx: 0, ns: (1<<31 + 100) * 1e9, msg: "after the year 2038", ch: make(chan futexsleepTest, 1)},
+}
+
+const (
+	beforeY2038 = iota
+	afterY2038
+)
+
+func TestFutexsleep(t *testing.T) {
+	if runtime.GOMAXPROCS(0) > 1 {
+		// futexsleep doesn't handle EINTR or other signals,
+		// so spurious wakeups may happen.
+		t.Skip("skipping; GOMAXPROCS>1")
+	}
+
+	start := time.Now()
+	for _, tt := range futexsleepTests {
+		go func(tt futexsleepTest) {
+			runtime.Entersyscall()
+			runtime.Futexsleep(&tt.mtx, tt.mtx, tt.ns)
+			runtime.Exitsyscall()
+			tt.ch <- tt
+		}(tt)
+	}
+loop:
+	for {
+		select {
+		case tt := <-futexsleepTests[beforeY2038].ch:
+			t.Errorf("futexsleep test %q finished early after %s", tt.msg, time.Since(start))
+			break loop
+		case tt := <-futexsleepTests[afterY2038].ch:
+			// Looks like FreeBSD 10 kernel has changed
+			// the semantics of timedwait on userspace
+			// mutex to make broken stuff look broken.
+			switch {
+			case runtime.GOOS == "freebsd" && runtime.GOARCH == "386":
+				t.Log("freebsd/386 may not work correctly after the year 2038, see golang.org/issue/7194")
+			default:
+				t.Errorf("futexsleep test %q finished early after %s", tt.msg, time.Since(start))
+				break loop
+			}
+		case <-time.After(time.Second):
+			break loop
+		}
+	}
+	for _, tt := range futexsleepTests {
+		runtime.Futexwakeup(&tt.mtx, 1)
+	}
+}
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
new file mode 100644
index 000000000..6abec4cca
--- /dev/null
+++ b/src/runtime/gc_test.go
@@ -0,0 +1,292 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"os"
+	"runtime"
+	"runtime/debug"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+func TestGcSys(t *testing.T) {
+	if os.Getenv("GOGC") == "off" {
+		t.Skip("skipping test; GOGC=off in environment")
+	}
+	data := struct{ Short bool }{testing.Short()}
+	got := executeTest(t, testGCSysSource, &data)
+	want := "OK\n"
+	if got != want {
+		t.Fatalf("expected %q, but got %q", want, got)
+	}
+}
+
+const testGCSysSource = `
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+func main() {
+	runtime.GOMAXPROCS(1)
+	memstats := new(runtime.MemStats)
+	runtime.GC()
+	runtime.ReadMemStats(memstats)
+	sys := memstats.Sys
+
+	runtime.MemProfileRate = 0 // disable profiler
+
+	itercount := 1000000
+{{if .Short}}
+	itercount = 100000
+{{end}}
+	for i := 0; i < itercount; i++ {
+		workthegc()
+	}
+
+	// Should only be using a few MB.
+	// We allocated 100 MB or (if not short) 1 GB.
+	runtime.ReadMemStats(memstats)
+	if sys > memstats.Sys {
+		sys = 0
+	} else {
+		sys = memstats.Sys - sys
+	}
+	if sys > 16<<20 {
+		fmt.Printf("using too much memory: %d bytes\n", sys)
+		return
+	}
+	fmt.Printf("OK\n")
+}
+
+func workthegc() []byte {
+	return make([]byte, 1029)
+}
+`
+
+func TestGcDeepNesting(t *testing.T) {
+	type T [2][2][2][2][2][2][2][2][2][2]*int
+	a := new(T)
+
+	// Prevent the compiler from applying escape analysis.
+	// This makes sure new(T) is allocated on heap, not on the stack.
+	t.Logf("%p", a)
+
+	a[0][0][0][0][0][0][0][0][0][0] = new(int)
+	*a[0][0][0][0][0][0][0][0][0][0] = 13
+	runtime.GC()
+	if *a[0][0][0][0][0][0][0][0][0][0] != 13 {
+		t.Fail()
+	}
+}
+
+func TestGcHashmapIndirection(t *testing.T) {
+	defer debug.SetGCPercent(debug.SetGCPercent(1))
+	runtime.GC()
+	type T struct {
+		a [256]int
+	}
+	m := make(map[T]T)
+	for i := 0; i < 2000; i++ {
+		var a T
+		a.a[0] = i
+		m[a] = T{}
+	}
+}
+
+func TestGcArraySlice(t *testing.T) {
+	type X struct {
+		buf     [1]byte
+		nextbuf []byte
+		next    *X
+	}
+	var head *X
+	for i := 0; i < 10; i++ {
+		p := &X{}
+		p.buf[0] = 42
+		p.next = head
+		if head != nil {
+			p.nextbuf = head.buf[:]
+		}
+		head = p
+		runtime.GC()
+	}
+	for p := head; p != nil; p = p.next {
+		if p.buf[0] != 42 {
+			t.Fatal("corrupted heap")
+		}
+	}
+}
+
+func TestGcRescan(t *testing.T) {
+	type X struct {
+		c     chan error
+		nextx *X
+	}
+	type Y struct {
+		X
+		nexty *Y
+		p     *int
+	}
+	var head *Y
+	for i := 0; i < 10; i++ {
+		p := &Y{}
+		p.c = make(chan error)
+		if head != nil {
+			p.nextx = &head.X
+		}
+		p.nexty = head
+		p.p = new(int)
+		*p.p = 42
+		head = p
+		runtime.GC()
+	}
+	for p := head; p != nil; p = p.nexty {
+		if *p.p != 42 {
+			t.Fatal("corrupted heap")
+		}
+	}
+}
+
+func TestGcLastTime(t *testing.T) {
+	ms := new(runtime.MemStats)
+	t0 := time.Now().UnixNano()
+	runtime.GC()
+	t1 := time.Now().UnixNano()
+	runtime.ReadMemStats(ms)
+	last := int64(ms.LastGC)
+	if t0 > last || last > t1 {
+		t.Fatalf("bad last GC time: got %v, want [%v, %v]", last, t0, t1)
+	}
+	pause := ms.PauseNs[(ms.NumGC+255)%256]
+	// Due to timer granularity, pause can actually be 0 on windows
+	// or on virtualized environments.
+	if pause == 0 {
+		t.Logf("last GC pause was 0")
+	} else if pause > 10e9 {
+		t.Logf("bad last GC pause: got %v, want [0, 10e9]", pause)
+	}
+}
+
+var hugeSink interface{}
+
+func TestHugeGCInfo(t *testing.T) {
+	// The test ensures that compiler can chew these huge types even on weakest machines.
+	// The types are not allocated at runtime.
+	if hugeSink != nil {
+		// 400MB on 32 bots, 4TB on 64-bits.
+		const n = (400 << 20) + (unsafe.Sizeof(uintptr(0))-4)<<40
+		hugeSink = new([n]*byte)
+		hugeSink = new([n]uintptr)
+		hugeSink = new(struct {
+			x float64
+			y [n]*byte
+			z []string
+		})
+		hugeSink = new(struct {
+			x float64
+			y [n]uintptr
+			z []string
+		})
+	}
+}
+
+func BenchmarkSetTypeNoPtr1(b *testing.B) {
+	type NoPtr1 struct {
+		p uintptr
+	}
+	var p *NoPtr1
+	for i := 0; i < b.N; i++ {
+		p = &NoPtr1{}
+	}
+	_ = p
+}
+func BenchmarkSetTypeNoPtr2(b *testing.B) {
+	type NoPtr2 struct {
+		p, q uintptr
+	}
+	var p *NoPtr2
+	for i := 0; i < b.N; i++ {
+		p = &NoPtr2{}
+	}
+	_ = p
+}
+func BenchmarkSetTypePtr1(b *testing.B) {
+	type Ptr1 struct {
+		p *byte
+	}
+	var p *Ptr1
+	for i := 0; i < b.N; i++ {
+		p = &Ptr1{}
+	}
+	_ = p
+}
+func BenchmarkSetTypePtr2(b *testing.B) {
+	type Ptr2 struct {
+		p, q *byte
+	}
+	var p *Ptr2
+	for i := 0; i < b.N; i++ {
+		p = &Ptr2{}
+	}
+	_ = p
+}
+
+func BenchmarkAllocation(b *testing.B) {
+	type T struct {
+		x, y *byte
+	}
+	ngo := runtime.GOMAXPROCS(0)
+	work := make(chan bool, b.N+ngo)
+	result := make(chan *T)
+	for i := 0; i < b.N; i++ {
+		work <- true
+	}
+	for i := 0; i < ngo; i++ {
+		work <- false
+	}
+	for i := 0; i < ngo; i++ {
+		go func() {
+			var x *T
+			for <-work {
+				for i := 0; i < 1000; i++ {
+					x = &T{}
+				}
+			}
+			result <- x
+		}()
+	}
+	for i := 0; i < ngo; i++ {
+		<-result
+	}
+}
+
+func TestPrintGC(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	done := make(chan bool)
+	go func() {
+		for {
+			select {
+			case <-done:
+				return
+			default:
+				runtime.GC()
+			}
+		}
+	}()
+	for i := 0; i < 1e4; i++ {
+		func() {
+			defer print("")
+		}()
+	}
+	close(done)
+}
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
new file mode 100644
index 000000000..88f6703f9
--- /dev/null
+++ b/src/runtime/gcinfo_test.go
@@ -0,0 +1,193 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"bytes"
+	"runtime"
+	"testing"
+)
+
+// TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info.
+func TestGCInfo(t *testing.T) {
+	verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, nonStackInfo(infoScalarPtr))
+	verifyGCInfo(t, "bss PtrScalar", &bssPtrScalar, nonStackInfo(infoPtrScalar))
+	verifyGCInfo(t, "bss BigStruct", &bssBigStruct, nonStackInfo(infoBigStruct()))
+	verifyGCInfo(t, "bss string", &bssString, nonStackInfo(infoString))
+	verifyGCInfo(t, "bss slice", &bssSlice, nonStackInfo(infoSlice))
+	verifyGCInfo(t, "bss eface", &bssEface, nonStackInfo(infoEface))
+	verifyGCInfo(t, "bss iface", &bssIface, nonStackInfo(infoIface))
+
+	verifyGCInfo(t, "data ScalarPtr", &dataScalarPtr, nonStackInfo(infoScalarPtr))
+	verifyGCInfo(t, "data PtrScalar", &dataPtrScalar, nonStackInfo(infoPtrScalar))
+	verifyGCInfo(t, "data BigStruct", &dataBigStruct, nonStackInfo(infoBigStruct()))
+	verifyGCInfo(t, "data string", &dataString, nonStackInfo(infoString))
+	verifyGCInfo(t, "data slice", &dataSlice, nonStackInfo(infoSlice))
+	verifyGCInfo(t, "data eface", &dataEface, nonStackInfo(infoEface))
+	verifyGCInfo(t, "data iface", &dataIface, nonStackInfo(infoIface))
+
+	verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr)
+	verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar)
+	verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct())
+	verifyGCInfo(t, "stack string", new(string), infoString)
+	verifyGCInfo(t, "stack slice", new([]string), infoSlice)
+	verifyGCInfo(t, "stack eface", new(interface{}), infoEface)
+	verifyGCInfo(t, "stack iface", new(Iface), infoIface)
+
+	for i := 0; i < 10; i++ {
+		verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), nonStackInfo(infoScalarPtr))
+		verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), nonStackInfo(infoPtrScalar))
+		verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), nonStackInfo(infoBigStruct()))
+		verifyGCInfo(t, "heap string", escape(new(string)), nonStackInfo(infoString))
+		verifyGCInfo(t, "heap eface", escape(new(interface{})), nonStackInfo(infoEface))
+		verifyGCInfo(t, "heap iface", escape(new(Iface)), nonStackInfo(infoIface))
+	}
+
+}
+
+func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
+	mask := runtime.GCMask(p)
+	if len(mask) > len(mask0) {
+		mask0 = append(mask0, BitsDead)
+		mask = mask[:len(mask0)]
+	}
+	if bytes.Compare(mask, mask0) != 0 {
+		t.Errorf("bad GC program for %v:\nwant %+v\ngot  %+v", name, mask0, mask)
+		return
+	}
+}
+
+func nonStackInfo(mask []byte) []byte {
+	// BitsDead is replaced with BitsScalar everywhere except stacks.
+	mask1 := make([]byte, len(mask))
+	mw := false
+	for i, v := range mask {
+		if !mw && v == BitsDead {
+			v = BitsScalar
+		}
+		mw = !mw && v == BitsMultiWord
+		mask1[i] = v
+	}
+	return mask1
+}
+
+var gcinfoSink interface{}
+
+func escape(p interface{}) interface{} {
+	gcinfoSink = p
+	return p
+}
+
+const (
+	BitsDead = iota
+	BitsScalar
+	BitsPointer
+	BitsMultiWord
+)
+
+const (
+	BitsString = iota // unused
+	BitsSlice         // unused
+	BitsIface
+	BitsEface
+)
+
+type ScalarPtr struct {
+	q int
+	w *int
+	e int
+	r *int
+	t int
+	y *int
+}
+
+var infoScalarPtr = []byte{BitsScalar, BitsPointer, BitsScalar, BitsPointer, BitsScalar, BitsPointer}
+
+type PtrScalar struct {
+	q *int
+	w int
+	e *int
+	r int
+	t *int
+	y int
+}
+
+var infoPtrScalar = []byte{BitsPointer, BitsScalar, BitsPointer, BitsScalar, BitsPointer, BitsScalar}
+
+type BigStruct struct {
+	q *int
+	w byte
+	e [17]byte
+	r []byte
+	t int
+	y uint16
+	u uint64
+	i string
+}
+
+func infoBigStruct() []byte {
+	switch runtime.GOARCH {
+	case "386", "arm":
+		return []byte{
+			BitsPointer,                                                // q *int
+			BitsScalar, BitsScalar, BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte
+			BitsPointer, BitsDead, BitsDead, // r []byte
+			BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64
+			BitsPointer, BitsDead, // i string
+		}
+	case "amd64":
+		return []byte{
+			BitsPointer,                        // q *int
+			BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte
+			BitsPointer, BitsDead, BitsDead, // r []byte
+			BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64
+			BitsPointer, BitsDead, // i string
+		}
+	case "amd64p32":
+		return []byte{
+			BitsPointer,                                                // q *int
+			BitsScalar, BitsScalar, BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte
+			BitsPointer, BitsDead, BitsDead, // r []byte
+			BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64
+			BitsPointer, BitsDead, // i string
+		}
+	default:
+		panic("unknown arch")
+	}
+}
+
+type Iface interface {
+	f()
+}
+
+type IfaceImpl int
+
+func (IfaceImpl) f() {
+}
+
+var (
+	// BSS
+	bssScalarPtr ScalarPtr
+	bssPtrScalar PtrScalar
+	bssBigStruct BigStruct
+	bssString    string
+	bssSlice     []string
+	bssEface     interface{}
+	bssIface     Iface
+
+	// DATA
+	dataScalarPtr             = ScalarPtr{q: 1}
+	dataPtrScalar             = PtrScalar{w: 1}
+	dataBigStruct             = BigStruct{w: 1}
+	dataString                = "foo"
+	dataSlice                 = []string{"foo"}
+	dataEface     interface{} = 42
+	dataIface     Iface       = IfaceImpl(42)
+
+	infoString = []byte{BitsPointer, BitsDead}
+	infoSlice  = []byte{BitsPointer, BitsDead, BitsDead}
+	infoEface  = []byte{BitsMultiWord, BitsEface}
+	infoIface  = []byte{BitsMultiWord, BitsIface}
+)
diff --git a/src/runtime/hash_test.go b/src/runtime/hash_test.go
new file mode 100644
index 000000000..41fff98eb
--- /dev/null
+++ b/src/runtime/hash_test.go
@@ -0,0 +1,572 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"fmt"
+	"math"
+	"math/rand"
+	. "runtime"
+	"strings"
+	"testing"
+)
+
+// Smhasher is a torture test for hash functions.
+// https://code.google.com/p/smhasher/
+// This code is a port of some of the Smhasher tests to Go.
+//
+// The current AES hash function passes Smhasher.  Our fallback
+// hash functions don't, so we only enable the difficult tests when
+// we know the AES implementation is available.
+
+// Sanity checks.
+// hash should not depend on values outside key.
+// hash should not depend on alignment.
+func TestSmhasherSanity(t *testing.T) {
+	r := rand.New(rand.NewSource(1234))
+	const REP = 10
+	const KEYMAX = 128
+	const PAD = 16
+	const OFFMAX = 16
+	for k := 0; k < REP; k++ {
+		for n := 0; n < KEYMAX; n++ {
+			for i := 0; i < OFFMAX; i++ {
+				var b [KEYMAX + OFFMAX + 2*PAD]byte
+				var c [KEYMAX + OFFMAX + 2*PAD]byte
+				randBytes(r, b[:])
+				randBytes(r, c[:])
+				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
+				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
+					t.Errorf("hash depends on bytes outside key")
+				}
+			}
+		}
+	}
+}
+
+type HashSet struct {
+	m map[uintptr]struct{} // set of hashes added
+	n int                  // number of hashes added
+}
+
+func newHashSet() *HashSet {
+	return &HashSet{make(map[uintptr]struct{}), 0}
+}
+func (s *HashSet) add(h uintptr) {
+	s.m[h] = struct{}{}
+	s.n++
+}
+func (s *HashSet) addS(x string) {
+	s.add(StringHash(x, 0))
+}
+func (s *HashSet) addB(x []byte) {
+	s.add(BytesHash(x, 0))
+}
+func (s *HashSet) addS_seed(x string, seed uintptr) {
+	s.add(StringHash(x, seed))
+}
+func (s *HashSet) check(t *testing.T) {
+	const SLOP = 10.0
+	collisions := s.n - len(s.m)
+	//fmt.Printf("%d/%d\n", len(s.m), s.n)
+	pairs := int64(s.n) * int64(s.n-1) / 2
+	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
+	stddev := math.Sqrt(expected)
+	if float64(collisions) > expected+SLOP*3*stddev {
+		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
+	}
+}
+
+// a string plus adding zeros must make distinct hashes
+func TestSmhasherAppendedZeros(t *testing.T) {
+	s := "hello" + strings.Repeat("\x00", 256)
+	h := newHashSet()
+	for i := 0; i <= len(s); i++ {
+		h.addS(s[:i])
+	}
+	h.check(t)
+}
+
+// All 0-3 byte strings have distinct hashes.
+func TestSmhasherSmallKeys(t *testing.T) {
+	h := newHashSet()
+	var b [3]byte
+	for i := 0; i < 256; i++ {
+		b[0] = byte(i)
+		h.addB(b[:1])
+		for j := 0; j < 256; j++ {
+			b[1] = byte(j)
+			h.addB(b[:2])
+			if !testing.Short() {
+				for k := 0; k < 256; k++ {
+					b[2] = byte(k)
+					h.addB(b[:3])
+				}
+			}
+		}
+	}
+	h.check(t)
+}
+
+// Different length strings of all zeros have distinct hashes.
+func TestSmhasherZeros(t *testing.T) {
+	N := 256 * 1024
+	if testing.Short() {
+		N = 1024
+	}
+	h := newHashSet()
+	b := make([]byte, N)
+	for i := 0; i <= N; i++ {
+		h.addB(b[:i])
+	}
+	h.check(t)
+}
+
+// Strings with up to two nonzero bytes all have distinct hashes.
+func TestSmhasherTwoNonzero(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	h := newHashSet()
+	for n := 2; n <= 16; n++ {
+		twoNonZero(h, n)
+	}
+	h.check(t)
+}
+func twoNonZero(h *HashSet, n int) {
+	b := make([]byte, n)
+
+	// all zero
+	h.addB(b[:])
+
+	// one non-zero byte
+	for i := 0; i < n; i++ {
+		for x := 1; x < 256; x++ {
+			b[i] = byte(x)
+			h.addB(b[:])
+			b[i] = 0
+		}
+	}
+
+	// two non-zero bytes
+	for i := 0; i < n; i++ {
+		for x := 1; x < 256; x++ {
+			b[i] = byte(x)
+			for j := i + 1; j < n; j++ {
+				for y := 1; y < 256; y++ {
+					b[j] = byte(y)
+					h.addB(b[:])
+					b[j] = 0
+				}
+			}
+			b[i] = 0
+		}
+	}
+}
+
+// Test strings with repeats, like "abcdabcdabcdabcd..."
+func TestSmhasherCyclic(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	if !HaveGoodHash() {
+		t.Skip("fallback hash not good enough for this test")
+	}
+	r := rand.New(rand.NewSource(1234))
+	const REPEAT = 8
+	const N = 1000000
+	for n := 4; n <= 12; n++ {
+		h := newHashSet()
+		b := make([]byte, REPEAT*n)
+		for i := 0; i < N; i++ {
+			b[0] = byte(i * 79 % 97)
+			b[1] = byte(i * 43 % 137)
+			b[2] = byte(i * 151 % 197)
+			b[3] = byte(i * 199 % 251)
+			randBytes(r, b[4:n])
+			for j := n; j < n*REPEAT; j++ {
+				b[j] = b[j-n]
+			}
+			h.addB(b)
+		}
+		h.check(t)
+	}
+}
+
+// Test strings with only a few bits set
+func TestSmhasherSparse(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	sparse(t, 32, 6)
+	sparse(t, 40, 6)
+	sparse(t, 48, 5)
+	sparse(t, 56, 5)
+	sparse(t, 64, 5)
+	sparse(t, 96, 4)
+	sparse(t, 256, 3)
+	sparse(t, 2048, 2)
+}
+func sparse(t *testing.T, n int, k int) {
+	b := make([]byte, n/8)
+	h := newHashSet()
+	setbits(h, b, 0, k)
+	h.check(t)
+}
+
+// set up to k bits at index i and greater
+func setbits(h *HashSet, b []byte, i int, k int) {
+	h.addB(b)
+	if k == 0 {
+		return
+	}
+	for j := i; j < len(b)*8; j++ {
+		b[j/8] |= byte(1 << uint(j&7))
+		setbits(h, b, j+1, k-1)
+		b[j/8] &= byte(^(1 << uint(j&7)))
+	}
+}
+
+// Test all possible combinations of n blocks from the set s.
+// "permutation" is a bad name here, but it is what Smhasher uses.
+func TestSmhasherPermutation(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	if !HaveGoodHash() {
+		t.Skip("fallback hash not good enough for this test")
+	}
+	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
+	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
+	permutation(t, []uint32{0, 1}, 20)
+	permutation(t, []uint32{0, 1 << 31}, 20)
+	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
+}
+func permutation(t *testing.T, s []uint32, n int) {
+	b := make([]byte, n*4)
+	h := newHashSet()
+	genPerm(h, b, s, 0)
+	h.check(t)
+}
+func genPerm(h *HashSet, b []byte, s []uint32, n int) {
+	h.addB(b[:n])
+	if n == len(b) {
+		return
+	}
+	for _, v := range s {
+		b[n] = byte(v)
+		b[n+1] = byte(v >> 8)
+		b[n+2] = byte(v >> 16)
+		b[n+3] = byte(v >> 24)
+		genPerm(h, b, s, n+4)
+	}
+}
+
+type Key interface {
+	clear()              // set bits all to 0
+	random(r *rand.Rand) // set key to something random
+	bits() int           // how many bits key has
+	flipBit(i int)       // flip bit i of the key
+	hash() uintptr       // hash the key
+	name() string        // for error reporting
+}
+
+type BytesKey struct {
+	b []byte
+}
+
+func (k *BytesKey) clear() {
+	for i := range k.b {
+		k.b[i] = 0
+	}
+}
+func (k *BytesKey) random(r *rand.Rand) {
+	randBytes(r, k.b)
+}
+func (k *BytesKey) bits() int {
+	return len(k.b) * 8
+}
+func (k *BytesKey) flipBit(i int) {
+	k.b[i>>3] ^= byte(1 << uint(i&7))
+}
+func (k *BytesKey) hash() uintptr {
+	return BytesHash(k.b, 0)
+}
+func (k *BytesKey) name() string {
+	return fmt.Sprintf("bytes%d", len(k.b))
+}
+
+type Int32Key struct {
+	i uint32
+}
+
+func (k *Int32Key) clear() {
+	k.i = 0
+}
+func (k *Int32Key) random(r *rand.Rand) {
+	k.i = r.Uint32()
+}
+func (k *Int32Key) bits() int {
+	return 32
+}
+func (k *Int32Key) flipBit(i int) {
+	k.i ^= 1 << uint(i)
+}
+func (k *Int32Key) hash() uintptr {
+	return Int32Hash(k.i, 0)
+}
+func (k *Int32Key) name() string {
+	return "int32"
+}
+
+type Int64Key struct {
+	i uint64
+}
+
+func (k *Int64Key) clear() {
+	k.i = 0
+}
+func (k *Int64Key) random(r *rand.Rand) {
+	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
+}
+func (k *Int64Key) bits() int {
+	return 64
+}
+func (k *Int64Key) flipBit(i int) {
+	k.i ^= 1 << uint(i)
+}
+func (k *Int64Key) hash() uintptr {
+	return Int64Hash(k.i, 0)
+}
+func (k *Int64Key) name() string {
+	return "int64"
+}
+
+type EfaceKey struct {
+	i interface{}
+}
+
+func (k *EfaceKey) clear() {
+	k.i = nil
+}
+func (k *EfaceKey) random(r *rand.Rand) {
+	k.i = uint64(r.Int63())
+}
+func (k *EfaceKey) bits() int {
+	// use 64 bits.  This tests inlined interfaces
+	// on 64-bit targets and indirect interfaces on
+	// 32-bit targets.
+	return 64
+}
+func (k *EfaceKey) flipBit(i int) {
+	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
+}
+func (k *EfaceKey) hash() uintptr {
+	return EfaceHash(k.i, 0)
+}
+func (k *EfaceKey) name() string {
+	return "Eface"
+}
+
+type IfaceKey struct {
+	i interface {
+		F()
+	}
+}
+type fInter uint64
+
+func (x fInter) F() {
+}
+
+func (k *IfaceKey) clear() {
+	k.i = nil
+}
+func (k *IfaceKey) random(r *rand.Rand) {
+	k.i = fInter(r.Int63())
+}
+func (k *IfaceKey) bits() int {
+	// use 64 bits.  This tests inlined interfaces
+	// on 64-bit targets and indirect interfaces on
+	// 32-bit targets.
+	return 64
+}
+func (k *IfaceKey) flipBit(i int) {
+	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
+}
+func (k *IfaceKey) hash() uintptr {
+	return IfaceHash(k.i, 0)
+}
+func (k *IfaceKey) name() string {
+	return "Iface"
+}
+
+// Flipping a single bit of a key should flip each output bit with 50% probability.
+func TestSmhasherAvalanche(t *testing.T) {
+	if !HaveGoodHash() {
+		t.Skip("fallback hash not good enough for this test")
+	}
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	avalancheTest1(t, &BytesKey{make([]byte, 2)})
+	avalancheTest1(t, &BytesKey{make([]byte, 4)})
+	avalancheTest1(t, &BytesKey{make([]byte, 8)})
+	avalancheTest1(t, &BytesKey{make([]byte, 16)})
+	avalancheTest1(t, &BytesKey{make([]byte, 32)})
+	avalancheTest1(t, &BytesKey{make([]byte, 200)})
+	avalancheTest1(t, &Int32Key{})
+	avalancheTest1(t, &Int64Key{})
+	avalancheTest1(t, &EfaceKey{})
+	avalancheTest1(t, &IfaceKey{})
+}
+func avalancheTest1(t *testing.T, k Key) {
+	const REP = 100000
+	r := rand.New(rand.NewSource(1234))
+	n := k.bits()
+
+	// grid[i][j] is a count of whether flipping
+	// input bit i affects output bit j.
+	grid := make([][hashSize]int, n)
+
+	for z := 0; z < REP; z++ {
+		// pick a random key, hash it
+		k.random(r)
+		h := k.hash()
+
+		// flip each bit, hash & compare the results
+		for i := 0; i < n; i++ {
+			k.flipBit(i)
+			d := h ^ k.hash()
+			k.flipBit(i)
+
+			// record the effects of that bit flip
+			g := &grid[i]
+			for j := 0; j < hashSize; j++ {
+				g[j] += int(d & 1)
+				d >>= 1
+			}
+		}
+	}
+
+	// Each entry in the grid should be about REP/2.
+	// More precisely, we did N = k.bits() * hashSize experiments where
+	// each is the sum of REP coin flips.  We want to find bounds on the
+	// sum of coin flips such that a truly random experiment would have
+	// all sums inside those bounds with 99% probability.
+	N := n * hashSize
+	var c float64
+	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
+	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
+	}
+	c *= 4.0 // allowed slack - we don't need to be perfectly random
+	mean := .5 * REP
+	stddev := .5 * math.Sqrt(REP)
+	low := int(mean - c*stddev)
+	high := int(mean + c*stddev)
+	for i := 0; i < n; i++ {
+		for j := 0; j < hashSize; j++ {
+			x := grid[i][j]
+			if x < low || x > high {
+				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
+			}
+		}
+	}
+}
+
+// All bit rotations of a set of distinct keys
+func TestSmhasherWindowed(t *testing.T) {
+	windowed(t, &Int32Key{})
+	windowed(t, &Int64Key{})
+	windowed(t, &BytesKey{make([]byte, 128)})
+}
+func windowed(t *testing.T, k Key) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	const BITS = 16
+
+	for r := 0; r < k.bits(); r++ {
+		h := newHashSet()
+		for i := 0; i < 1<<BITS; i++ {
+			k.clear()
+			for j := 0; j < BITS; j++ {
+				if i>>uint(j)&1 != 0 {
+					k.flipBit((j + r) % k.bits())
+				}
+			}
+			h.add(k.hash())
+		}
+		h.check(t)
+	}
+}
+
+// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
+func TestSmhasherText(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping in short mode")
+	}
+	text(t, "Foo", "Bar")
+	text(t, "FooBar", "")
+	text(t, "", "FooBar")
+}
+func text(t *testing.T, prefix, suffix string) {
+	const N = 4
+	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
+	const L = len(S)
+	b := make([]byte, len(prefix)+N+len(suffix))
+	copy(b, prefix)
+	copy(b[len(prefix)+N:], suffix)
+	h := newHashSet()
+	c := b[len(prefix):]
+	for i := 0; i < L; i++ {
+		c[0] = S[i]
+		for j := 0; j < L; j++ {
+			c[1] = S[j]
+			for k := 0; k < L; k++ {
+				c[2] = S[k]
+				for x := 0; x < L; x++ {
+					c[3] = S[x]
+					h.addB(b)
+				}
+			}
+		}
+	}
+	h.check(t)
+}
+
+// Make sure different seed values generate different hashes.
+func TestSmhasherSeed(t *testing.T) {
+	h := newHashSet()
+	const N = 100000
+	s := "hello"
+	for i := 0; i < N; i++ {
+		h.addS_seed(s, uintptr(i))
+	}
+	h.check(t)
+}
+
+// size of the hash output (32 or 64 bits)
+const hashSize = 32 + int(^uintptr(0)>>63<<5)
+
+func randBytes(r *rand.Rand, b []byte) {
+	for i := range b {
+		b[i] = byte(r.Uint32())
+	}
+}
+
+func benchmarkHash(b *testing.B, n int) {
+	s := strings.Repeat("A", n)
+
+	for i := 0; i < b.N; i++ {
+		StringHash(s, 0)
+	}
+	b.SetBytes(int64(n))
+}
+
+func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
+func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
+func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
+func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
+func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
diff --git a/src/runtime/hashmap.go b/src/runtime/hashmap.go
new file mode 100644
index 000000000..55287f6ff
--- /dev/null
+++ b/src/runtime/hashmap.go
@@ -0,0 +1,944 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// This file contains the implementation of Go's map type.
+//
+// A map is just a hash table.  The data is arranged
+// into an array of buckets.  Each bucket contains up to
+// 8 key/value pairs.  The low-order bits of the hash are
+// used to select a bucket.  Each bucket contains a few
+// high-order bits of each hash to distinguish the entries
+// within a single bucket.
+//
+// If more than 8 keys hash to a bucket, we chain on
+// extra buckets.
+//
+// When the hashtable grows, we allocate a new array
+// of buckets twice as big.  Buckets are incrementally
+// copied from the old bucket array to the new bucket array.
+//
+// Map iterators walk through the array of buckets and
+// return the keys in walk order (bucket #, then overflow
+// chain order, then bucket index).  To maintain iteration
+// semantics, we never move keys within their bucket (if
+// we did, keys might be returned 0 or 2 times).  When
+// growing the table, iterators remain iterating through the
+// old table and must check the new table if the bucket
+// they are iterating through has been moved ("evacuated")
+// to the new table.
+
+// Picking loadFactor: too large and we have lots of overflow
+// buckets, too small and we waste a lot of space.  I wrote
+// a simple program to check some stats for different loads:
+// (64-bit, 8 byte keys and values)
+//  loadFactor    %overflow  bytes/entry     hitprobe    missprobe
+//        4.00         2.13        20.77         3.00         4.00
+//        4.50         4.05        17.30         3.25         4.50
+//        5.00         6.85        14.77         3.50         5.00
+//        5.50        10.55        12.94         3.75         5.50
+//        6.00        15.27        11.67         4.00         6.00
+//        6.50        20.90        10.79         4.25         6.50
+//        7.00        27.14        10.15         4.50         7.00
+//        7.50        34.03         9.73         4.75         7.50
+//        8.00        41.10         9.40         5.00         8.00
+//
+// %overflow   = percentage of buckets which have an overflow bucket
+// bytes/entry = overhead bytes used per key/value pair
+// hitprobe    = # of entries to check when looking up a present key
+// missprobe   = # of entries to check when looking up an absent key
+//
+// Keep in mind this data is for maximally loaded tables, i.e. just
+// before the table grows.  Typical tables will be somewhat less loaded.
+
+import (
+	"unsafe"
+)
+
+const (
+	// Maximum number of key/value pairs a bucket can hold.
+	bucketCnt = 8
+
+	// Maximum average load of a bucket that triggers growth.
+	loadFactor = 6.5
+
+	// Maximum key or value size to keep inline (instead of mallocing per element).
+	// Must fit in a uint8.
+	// Fast versions cannot handle big values - the cutoff size for
+	// fast versions in ../../cmd/gc/walk.c must be at most this value.
+	maxKeySize   = 128
+	maxValueSize = 128
+
+	// data offset should be the size of the bmap struct, but needs to be
+	// aligned correctly.  For amd64p32 this means 64-bit alignment
+	// even though pointers are 32 bit.
+	dataOffset = unsafe.Offsetof(struct {
+		b bmap
+		v int64
+	}{}.v)
+
+	// Possible tophash values.  We reserve a few possibilities for special marks.
+	// Each bucket (including its overflow buckets, if any) will have either all or none of its
+	// entries in the evacuated* states (except during the evacuate() method, which only happens
+	// during map writes and thus no one else can observe the map during that time).
+	empty          = 0 // cell is empty
+	evacuatedEmpty = 1 // cell is empty, bucket is evacuated.
+	evacuatedX     = 2 // key/value is valid.  Entry has been evacuated to first half of larger table.
+	evacuatedY     = 3 // same as above, but evacuated to second half of larger table.
+	minTopHash     = 4 // minimum tophash for a normal filled cell.
+
+	// flags
+	iterator    = 1 // there may be an iterator using buckets
+	oldIterator = 2 // there may be an iterator using oldbuckets
+
+	// sentinel bucket ID for iterator checks
+	noCheck = 1<<(8*ptrSize) - 1
+
+	// trigger a garbage collection at every alloc called from this code
+	checkgc = false
+)
+
+// A header for a Go map.
+type hmap struct {
+	// Note: the format of the Hmap is encoded in ../../cmd/gc/reflect.c and
+	// ../reflect/type.go.  Don't change this structure without also changing that code!
+	count int // # live cells == size of map.  Must be first (used by len() builtin)
+	flags uint32
+	hash0 uint32 // hash seed
+	B     uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
+
+	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
+	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
+	nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)
+}
+
+// A bucket for a Go map.
+type bmap struct {
+	tophash  [bucketCnt]uint8
+	overflow *bmap
+	// Followed by bucketCnt keys and then bucketCnt values.
+	// NOTE: packing all the keys together and then all the values together makes the
+	// code a bit more complicated than alternating key/value/key/value/... but it allows
+	// us to eliminate padding which would be needed for, e.g., map[int64]int8.
+}
+
+// A hash iteration structure.
+// If you modify hiter, also change cmd/gc/reflect.c to indicate
+// the layout of this structure.
+type hiter struct {
+	key         unsafe.Pointer // Must be in first position.  Write nil to indicate iteration end (see cmd/gc/range.c).
+	value       unsafe.Pointer // Must be in second position (see cmd/gc/range.c).
+	t           *maptype
+	h           *hmap
+	buckets     unsafe.Pointer // bucket ptr at hash_iter initialization time
+	bptr        *bmap          // current bucket
+	offset      uint8          // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
+	done        bool
+	B           uint8
+	bucket      uintptr
+	i           uintptr
+	checkBucket uintptr
+}
+
+func evacuated(b *bmap) bool {
+	h := b.tophash[0]
+	return h > empty && h < minTopHash
+}
+
+func makemap(t *maptype, hint int64) *hmap {
+	if sz := unsafe.Sizeof(hmap{}); sz > 48 || sz != uintptr(t.hmap.size) {
+		gothrow("bad hmap size")
+	}
+
+	if hint < 0 || int64(int32(hint)) != hint {
+		panic("makemap: size out of range")
+		// TODO: make hint an int, then none of this nonsense
+	}
+
+	if !ismapkey(t.key) {
+		gothrow("runtime.makemap: unsupported map key type")
+	}
+
+	// check compiler's and reflect's math
+	if t.key.size > maxKeySize && (!t.indirectkey || t.keysize != uint8(ptrSize)) ||
+		t.key.size <= maxKeySize && (t.indirectkey || t.keysize != uint8(t.key.size)) {
+		gothrow("key size wrong")
+	}
+	if t.elem.size > maxValueSize && (!t.indirectvalue || t.valuesize != uint8(ptrSize)) ||
+		t.elem.size <= maxValueSize && (t.indirectvalue || t.valuesize != uint8(t.elem.size)) {
+		gothrow("value size wrong")
+	}
+
+	// invariants we depend on.  We should probably check these at compile time
+	// somewhere, but for now we'll do it here.
+	if t.key.align > bucketCnt {
+		gothrow("key align too big")
+	}
+	if t.elem.align > bucketCnt {
+		gothrow("value align too big")
+	}
+	if uintptr(t.key.size)%uintptr(t.key.align) != 0 {
+		gothrow("key size not a multiple of key align")
+	}
+	if uintptr(t.elem.size)%uintptr(t.elem.align) != 0 {
+		gothrow("value size not a multiple of value align")
+	}
+	if bucketCnt < 8 {
+		gothrow("bucketsize too small for proper alignment")
+	}
+	if dataOffset%uintptr(t.key.align) != 0 {
+		gothrow("need padding in bucket (key)")
+	}
+	if dataOffset%uintptr(t.elem.align) != 0 {
+		gothrow("need padding in bucket (value)")
+	}
+
+	// find size parameter which will hold the requested # of elements
+	B := uint8(0)
+	for ; hint > bucketCnt && float32(hint) > loadFactor*float32(uintptr(1)<<B); B++ {
+	}
+
+	// allocate initial hash table
+	// if B == 0, the buckets field is allocated lazily later (in mapassign)
+	// If hint is large zeroing this memory could take a while.
+	var buckets unsafe.Pointer
+	if B != 0 {
+		if checkgc {
+			memstats.next_gc = memstats.heap_alloc
+		}
+		buckets = newarray(t.bucket, uintptr(1)<<B)
+	}
+
+	// initialize Hmap
+	if checkgc {
+		memstats.next_gc = memstats.heap_alloc
+	}
+	h := (*hmap)(newobject(t.hmap))
+	h.count = 0
+	h.B = B
+	h.flags = 0
+	h.hash0 = fastrand1()
+	h.buckets = buckets
+	h.oldbuckets = nil
+	h.nevacuate = 0
+
+	return h
+}
+
+// mapaccess1 returns a pointer to h[key].  Never returns nil, instead
+// it will return a reference to the zero object for the value type if
+// the key is not in the map.
+// NOTE: The returned pointer may keep the whole map live, so don't
+// hold onto it for very long.
+func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		pc := funcPC(mapaccess1)
+		racereadpc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero)
+	}
+	alg := goalg(t.key.alg)
+	hash := alg.hash(key, uintptr(t.key.size), uintptr(h.hash0))
+	m := uintptr(1)<<h.B - 1
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if alg.equal(key, k, uintptr(t.key.size)) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return v
+			}
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero)
+		}
+	}
+}
+
+func mapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		pc := funcPC(mapaccess2)
+		racereadpc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero), false
+	}
+	alg := goalg(t.key.alg)
+	hash := alg.hash(key, uintptr(t.key.size), uintptr(h.hash0))
+	m := uintptr(1)<<h.B - 1
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if alg.equal(key, k, uintptr(t.key.size)) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return v, true
+			}
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero), false
+		}
+	}
+}
+
+// returns both key and value.  Used by map iterator
+func mapaccessK(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, unsafe.Pointer) {
+	if h == nil || h.count == 0 {
+		return nil, nil
+	}
+	alg := goalg(t.key.alg)
+	hash := alg.hash(key, uintptr(t.key.size), uintptr(h.hash0))
+	m := uintptr(1)<<h.B - 1
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			if t.indirectkey {
+				k = *((*unsafe.Pointer)(k))
+			}
+			if alg.equal(key, k, uintptr(t.key.size)) {
+				v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				return k, v
+			}
+		}
+		b = b.overflow
+		if b == nil {
+			return nil, nil
+		}
+	}
+}
+
+func mapassign1(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
+	if h == nil {
+		panic("assignment to entry in nil map")
+	}
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		pc := funcPC(mapassign1)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+		raceReadObjectPC(t.elem, val, callerpc, pc)
+	}
+
+	alg := goalg(t.key.alg)
+	hash := alg.hash(key, uintptr(t.key.size), uintptr(h.hash0))
+
+	if h.buckets == nil {
+		if checkgc {
+			memstats.next_gc = memstats.heap_alloc
+		}
+		h.buckets = newarray(t.bucket, 1)
+	}
+
+again:
+	bucket := hash & (uintptr(1)<<h.B - 1)
+	if h.oldbuckets != nil {
+		growWork(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+
+	var inserti *uint8
+	var insertk unsafe.Pointer
+	var insertv unsafe.Pointer
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				if b.tophash[i] == empty && inserti == nil {
+					inserti = &b.tophash[i]
+					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+					insertv = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+				}
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			k2 := k
+			if t.indirectkey {
+				k2 = *((*unsafe.Pointer)(k2))
+			}
+			if !alg.equal(key, k2, uintptr(t.key.size)) {
+				continue
+			}
+			// already have a mapping for key.  Update it.
+			memmove(k2, key, uintptr(t.key.size))
+			v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
+			v2 := v
+			if t.indirectvalue {
+				v2 = *((*unsafe.Pointer)(v2))
+			}
+			memmove(v2, val, uintptr(t.elem.size))
+			return
+		}
+		if b.overflow == nil {
+			break
+		}
+		b = b.overflow
+	}
+
+	// did not find mapping for key.  Allocate new cell & add entry.
+	if float32(h.count) >= loadFactor*float32((uintptr(1)<<h.B)) && h.count >= bucketCnt {
+		hashGrow(t, h)
+		goto again // Growing the table invalidates everything, so try again
+	}
+
+	if inserti == nil {
+		// all current buckets are full, allocate a new one.
+		if checkgc {
+			memstats.next_gc = memstats.heap_alloc
+		}
+		newb := (*bmap)(newobject(t.bucket))
+		b.overflow = newb
+		inserti = &newb.tophash[0]
+		insertk = add(unsafe.Pointer(newb), dataOffset)
+		insertv = add(insertk, bucketCnt*uintptr(t.keysize))
+	}
+
+	// store new key/value at insert position
+	if t.indirectkey {
+		if checkgc {
+			memstats.next_gc = memstats.heap_alloc
+		}
+		kmem := newobject(t.key)
+		*(*unsafe.Pointer)(insertk) = kmem
+		insertk = kmem
+	}
+	if t.indirectvalue {
+		if checkgc {
+			memstats.next_gc = memstats.heap_alloc
+		}
+		vmem := newobject(t.elem)
+		*(*unsafe.Pointer)(insertv) = vmem
+		insertv = vmem
+	}
+	memmove(insertk, key, uintptr(t.key.size))
+	memmove(insertv, val, uintptr(t.elem.size))
+	*inserti = top
+	h.count++
+}
+
+func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		pc := funcPC(mapdelete)
+		racewritepc(unsafe.Pointer(h), callerpc, pc)
+		raceReadObjectPC(t.key, key, callerpc, pc)
+	}
+	if h == nil || h.count == 0 {
+		return
+	}
+	alg := goalg(t.key.alg)
+	hash := alg.hash(key, uintptr(t.key.size), uintptr(h.hash0))
+	bucket := hash & (uintptr(1)<<h.B - 1)
+	if h.oldbuckets != nil {
+		growWork(t, h, bucket)
+	}
+	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			if b.tophash[i] != top {
+				continue
+			}
+			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
+			k2 := k
+			if t.indirectkey {
+				k2 = *((*unsafe.Pointer)(k2))
+			}
+			if !alg.equal(key, k2, uintptr(t.key.size)) {
+				continue
+			}
+			memclr(k, uintptr(t.keysize))
+			v := unsafe.Pointer(uintptr(unsafe.Pointer(b)) + dataOffset + bucketCnt*uintptr(t.keysize) + i*uintptr(t.valuesize))
+			memclr(v, uintptr(t.valuesize))
+			b.tophash[i] = empty
+			h.count--
+			return
+		}
+		b = b.overflow
+		if b == nil {
+			return
+		}
+	}
+}
+
+func mapiterinit(t *maptype, h *hmap, it *hiter) {
+	// Clear pointer fields so garbage collector does not complain.
+	it.key = nil
+	it.value = nil
+	it.t = nil
+	it.h = nil
+	it.buckets = nil
+	it.bptr = nil
+
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiterinit))
+	}
+
+	if h == nil || h.count == 0 {
+		it.key = nil
+		it.value = nil
+		return
+	}
+
+	if unsafe.Sizeof(hiter{})/ptrSize != 10 {
+		gothrow("hash_iter size incorrect") // see ../../cmd/gc/reflect.c
+	}
+	it.t = t
+	it.h = h
+
+	// grab snapshot of bucket state
+	it.B = h.B
+	it.buckets = h.buckets
+
+	// iterator state
+	it.bucket = 0
+	it.offset = uint8(fastrand1() & (bucketCnt - 1))
+	it.done = false
+	it.bptr = nil
+
+	// Remember we have an iterator.
+	// Can run concurrently with another hash_iter_init().
+	for {
+		old := h.flags
+		if old == old|iterator|oldIterator {
+			break
+		}
+		if cas(&h.flags, old, old|iterator|oldIterator) {
+			break
+		}
+	}
+
+	mapiternext(it)
+}
+
+func mapiternext(it *hiter) {
+	h := it.h
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&it))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
+	}
+	t := it.t
+	bucket := it.bucket
+	b := it.bptr
+	i := it.i
+	checkBucket := it.checkBucket
+	alg := goalg(t.key.alg)
+
+next:
+	if b == nil {
+		if it.done {
+			// end of iteration
+			it.key = nil
+			it.value = nil
+			return
+		}
+		if h.oldbuckets != nil && it.B == h.B {
+			// Iterator was started in the middle of a grow, and the grow isn't done yet.
+			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
+			// bucket hasn't been evacuated) then we need to iterate through the old
+			// bucket and only return the ones that will be migrated to this bucket.
+			oldbucket := bucket & (uintptr(1)<<(it.B-1) - 1)
+			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+			if !evacuated(b) {
+				checkBucket = bucket
+			} else {
+				b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
+				checkBucket = noCheck
+			}
+		} else {
+			b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
+			checkBucket = noCheck
+		}
+		bucket++
+		if bucket == uintptr(1)<<it.B {
+			bucket = 0
+			it.done = true
+		}
+		i = 0
+	}
+	for ; i < bucketCnt; i++ {
+		offi := (i + uintptr(it.offset)) & (bucketCnt - 1)
+		k := add(unsafe.Pointer(b), dataOffset+offi*uintptr(t.keysize))
+		v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+offi*uintptr(t.valuesize))
+		if b.tophash[offi] != empty && b.tophash[offi] != evacuatedEmpty {
+			if checkBucket != noCheck {
+				// Special case: iterator was started during a grow and the
+				// grow is not done yet.  We're working on a bucket whose
+				// oldbucket has not been evacuated yet.  Or at least, it wasn't
+				// evacuated when we started the bucket.  So we're iterating
+				// through the oldbucket, skipping any keys that will go
+				// to the other new bucket (each oldbucket expands to two
+				// buckets during a grow).
+				k2 := k
+				if t.indirectkey {
+					k2 = *((*unsafe.Pointer)(k2))
+				}
+				if alg.equal(k2, k2, uintptr(t.key.size)) {
+					// If the item in the oldbucket is not destined for
+					// the current new bucket in the iteration, skip it.
+					hash := alg.hash(k2, uintptr(t.key.size), uintptr(h.hash0))
+					if hash&(uintptr(1)<<it.B-1) != checkBucket {
+						continue
+					}
+				} else {
+					// Hash isn't repeatable if k != k (NaNs).  We need a
+					// repeatable and randomish choice of which direction
+					// to send NaNs during evacuation.  We'll use the low
+					// bit of tophash to decide which way NaNs go.
+					// NOTE: this case is why we need two evacuate tophash
+					// values, evacuatedX and evacuatedY, that differ in
+					// their low bit.
+					if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
+						continue
+					}
+				}
+			}
+			if b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY {
+				// this is the golden data, we can return it.
+				if t.indirectkey {
+					k = *((*unsafe.Pointer)(k))
+				}
+				it.key = k
+				if t.indirectvalue {
+					v = *((*unsafe.Pointer)(v))
+				}
+				it.value = v
+			} else {
+				// The hash table has grown since the iterator was started.
+				// The golden data for this key is now somewhere else.
+				k2 := k
+				if t.indirectkey {
+					k2 = *((*unsafe.Pointer)(k2))
+				}
+				if alg.equal(k2, k2, uintptr(t.key.size)) {
+					// Check the current hash table for the data.
+					// This code handles the case where the key
+					// has been deleted, updated, or deleted and reinserted.
+					// NOTE: we need to regrab the key as it has potentially been
+					// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
+					rk, rv := mapaccessK(t, h, k2)
+					if rk == nil {
+						continue // key has been deleted
+					}
+					it.key = rk
+					it.value = rv
+				} else {
+					// if key!=key then the entry can't be deleted or
+					// updated, so we can just return it.  That's lucky for
+					// us because when key!=key we can't look it up
+					// successfully in the current table.
+					it.key = k2
+					if t.indirectvalue {
+						v = *((*unsafe.Pointer)(v))
+					}
+					it.value = v
+				}
+			}
+			it.bucket = bucket
+			it.bptr = b
+			it.i = i + 1
+			it.checkBucket = checkBucket
+			return
+		}
+	}
+	b = b.overflow
+	i = 0
+	goto next
+}
+
+func hashGrow(t *maptype, h *hmap) {
+	if h.oldbuckets != nil {
+		gothrow("evacuation not done in time")
+	}
+	oldbuckets := h.buckets
+	if checkgc {
+		memstats.next_gc = memstats.heap_alloc
+	}
+	newbuckets := newarray(t.bucket, uintptr(1)<<(h.B+1))
+	flags := h.flags &^ (iterator | oldIterator)
+	if h.flags&iterator != 0 {
+		flags |= oldIterator
+	}
+	// commit the grow (atomic wrt gc)
+	h.B++
+	h.flags = flags
+	h.oldbuckets = oldbuckets
+	h.buckets = newbuckets
+	h.nevacuate = 0
+
+	// the actual copying of the hash table data is done incrementally
+	// by growWork() and evacuate().
+}
+
+func growWork(t *maptype, h *hmap, bucket uintptr) {
+	noldbuckets := uintptr(1) << (h.B - 1)
+
+	// make sure we evacuate the oldbucket corresponding
+	// to the bucket we're about to use
+	evacuate(t, h, bucket&(noldbuckets-1))
+
+	// evacuate one more oldbucket to make progress on growing
+	if h.oldbuckets != nil {
+		evacuate(t, h, h.nevacuate)
+	}
+}
+
+func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
+	b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+	newbit := uintptr(1) << (h.B - 1)
+	alg := goalg(t.key.alg)
+	if !evacuated(b) {
+		// TODO: reuse overflow buckets instead of using new ones, if there
+		// is no iterator using the old buckets.  (If !oldIterator.)
+
+		x := (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
+		y := (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
+		xi := 0
+		yi := 0
+		xk := add(unsafe.Pointer(x), dataOffset)
+		yk := add(unsafe.Pointer(y), dataOffset)
+		xv := add(xk, bucketCnt*uintptr(t.keysize))
+		yv := add(yk, bucketCnt*uintptr(t.keysize))
+		for ; b != nil; b = b.overflow {
+			k := add(unsafe.Pointer(b), dataOffset)
+			v := add(k, bucketCnt*uintptr(t.keysize))
+			for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) {
+				top := b.tophash[i]
+				if top == empty {
+					b.tophash[i] = evacuatedEmpty
+					continue
+				}
+				if top < minTopHash {
+					gothrow("bad map state")
+				}
+				k2 := k
+				if t.indirectkey {
+					k2 = *((*unsafe.Pointer)(k2))
+				}
+				// Compute hash to make our evacuation decision (whether we need
+				// to send this key/value to bucket x or bucket y).
+				hash := alg.hash(k2, uintptr(t.key.size), uintptr(h.hash0))
+				if h.flags&iterator != 0 {
+					if !alg.equal(k2, k2, uintptr(t.key.size)) {
+						// If key != key (NaNs), then the hash could be (and probably
+						// will be) entirely different from the old hash.  Moreover,
+						// it isn't reproducible.  Reproducibility is required in the
+						// presence of iterators, as our evacuation decision must
+						// match whatever decision the iterator made.
+						// Fortunately, we have the freedom to send these keys either
+						// way.  Also, tophash is meaningless for these kinds of keys.
+						// We let the low bit of tophash drive the evacuation decision.
+						// We recompute a new random tophash for the next level so
+						// these keys will get evenly distributed across all buckets
+						// after multiple grows.
+						if (top & 1) != 0 {
+							hash |= newbit
+						} else {
+							hash &^= newbit
+						}
+						top = uint8(hash >> (ptrSize*8 - 8))
+						if top < minTopHash {
+							top += minTopHash
+						}
+					}
+				}
+				if (hash & newbit) == 0 {
+					b.tophash[i] = evacuatedX
+					if xi == bucketCnt {
+						if checkgc {
+							memstats.next_gc = memstats.heap_alloc
+						}
+						newx := (*bmap)(newobject(t.bucket))
+						x.overflow = newx
+						x = newx
+						xi = 0
+						xk = add(unsafe.Pointer(x), dataOffset)
+						xv = add(xk, bucketCnt*uintptr(t.keysize))
+					}
+					x.tophash[xi] = top
+					if t.indirectkey {
+						*(*unsafe.Pointer)(xk) = k2 // copy pointer
+					} else {
+						memmove(xk, k, uintptr(t.key.size)) // copy value
+					}
+					if t.indirectvalue {
+						*(*unsafe.Pointer)(xv) = *(*unsafe.Pointer)(v)
+					} else {
+						memmove(xv, v, uintptr(t.elem.size))
+					}
+					xi++
+					xk = add(xk, uintptr(t.keysize))
+					xv = add(xv, uintptr(t.valuesize))
+				} else {
+					b.tophash[i] = evacuatedY
+					if yi == bucketCnt {
+						if checkgc {
+							memstats.next_gc = memstats.heap_alloc
+						}
+						newy := (*bmap)(newobject(t.bucket))
+						y.overflow = newy
+						y = newy
+						yi = 0
+						yk = add(unsafe.Pointer(y), dataOffset)
+						yv = add(yk, bucketCnt*uintptr(t.keysize))
+					}
+					y.tophash[yi] = top
+					if t.indirectkey {
+						*(*unsafe.Pointer)(yk) = k2
+					} else {
+						memmove(yk, k, uintptr(t.key.size))
+					}
+					if t.indirectvalue {
+						*(*unsafe.Pointer)(yv) = *(*unsafe.Pointer)(v)
+					} else {
+						memmove(yv, v, uintptr(t.elem.size))
+					}
+					yi++
+					yk = add(yk, uintptr(t.keysize))
+					yv = add(yv, uintptr(t.valuesize))
+				}
+			}
+		}
+		// Unlink the overflow buckets & clear key/value to help GC.
+		if h.flags&oldIterator == 0 {
+			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
+			b.overflow = nil
+			memclr(add(unsafe.Pointer(b), dataOffset), uintptr(t.bucketsize)-dataOffset)
+		}
+	}
+
+	// Advance evacuation mark
+	if oldbucket == h.nevacuate {
+		h.nevacuate = oldbucket + 1
+		if oldbucket+1 == newbit { // newbit == # of oldbuckets
+			// Growing is all done.  Free old main bucket array.
+			h.oldbuckets = nil
+		}
+	}
+}
+
+func ismapkey(t *_type) bool {
+	return goalg(t.alg).hash != nil
+}
+
+// Reflect stubs.  Called from ../reflect/asm_*.s
+
+func reflect_makemap(t *maptype) *hmap {
+	return makemap(t, 0)
+}
+
+func reflect_mapaccess(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
+	val, ok := mapaccess2(t, h, key)
+	if !ok {
+		// reflect wants nil for a missing element
+		val = nil
+	}
+	return val
+}
+
+func reflect_mapassign(t *maptype, h *hmap, key unsafe.Pointer, val unsafe.Pointer) {
+	mapassign1(t, h, key, val)
+}
+
+func reflect_mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
+	mapdelete(t, h, key)
+}
+
+func reflect_mapiterinit(t *maptype, h *hmap) *hiter {
+	it := new(hiter)
+	mapiterinit(t, h, it)
+	return it
+}
+
+func reflect_mapiternext(it *hiter) {
+	mapiternext(it)
+}
+
+func reflect_mapiterkey(it *hiter) unsafe.Pointer {
+	return it.key
+}
+
+func reflect_maplen(h *hmap) int {
+	if h == nil {
+		return 0
+	}
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&h))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(reflect_maplen))
+	}
+	return h.count
+}
+
+func reflect_ismapkey(t *_type) bool {
+	return ismapkey(t)
+}
diff --git a/src/runtime/hashmap_fast.go b/src/runtime/hashmap_fast.go
new file mode 100644
index 000000000..8e21e02d6
--- /dev/null
+++ b/src/runtime/hashmap_fast.go
@@ -0,0 +1,379 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero)
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table.  No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&key)), 4, uintptr(h.hash0))
+		m := uintptr(1)<<h.B - 1
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize))
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero)
+		}
+	}
+}
+
+func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero), false
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table.  No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&key)), 4, uintptr(h.hash0))
+		m := uintptr(1)<<h.B - 1
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4)))
+			if k != key {
+				continue
+			}
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero), false
+		}
+	}
+}
+
+func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero)
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table.  No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&key)), 8, uintptr(h.hash0))
+		m := uintptr(1)<<h.B - 1
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize))
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero)
+		}
+	}
+}
+
+func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero), false
+	}
+	var b *bmap
+	if h.B == 0 {
+		// One-bucket table.  No need to hash.
+		b = (*bmap)(h.buckets)
+	} else {
+		hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&key)), 8, uintptr(h.hash0))
+		m := uintptr(1)<<h.B - 1
+		b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+		if c := h.oldbuckets; c != nil {
+			oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+			if !evacuated(oldb) {
+				b = oldb
+			}
+		}
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8)))
+			if k != key {
+				continue
+			}
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero), false
+		}
+	}
+}
+
+func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero)
+	}
+	key := (*stringStruct)(unsafe.Pointer(&ky))
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i := uintptr(0); i < bucketCnt; i++ {
+				x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+				if x == empty {
+					continue
+				}
+				k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+				if k.len != key.len {
+					continue
+				}
+				if k.str == key.str || memeq(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize))
+				}
+			}
+			return unsafe.Pointer(t.elem.zero)
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i := uintptr(0); i < bucketCnt; i++ {
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize))
+			}
+			// check first 4 bytes
+			// TODO: on amd64/386 at least, make this compile to one 4-byte comparison instead of
+			// four 1-byte comparisons.
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches.  Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*ptrSize))
+			if memeq(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+keymaybe*uintptr(t.valuesize))
+			}
+		}
+		return unsafe.Pointer(t.elem.zero)
+	}
+dohash:
+	hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&ky)), 2*ptrSize, uintptr(h.hash0))
+	m := uintptr(1)<<h.B - 1
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x != top {
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str == key.str || memeq(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize))
+			}
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero)
+		}
+	}
+}
+
+func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) {
+	if raceenabled && h != nil {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr))
+	}
+	if h == nil || h.count == 0 {
+		return unsafe.Pointer(t.elem.zero), false
+	}
+	key := (*stringStruct)(unsafe.Pointer(&ky))
+	if h.B == 0 {
+		// One-bucket table.
+		b := (*bmap)(h.buckets)
+		if key.len < 32 {
+			// short key, doing lots of comparisons is ok
+			for i := uintptr(0); i < bucketCnt; i++ {
+				x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+				if x == empty {
+					continue
+				}
+				k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+				if k.len != key.len {
+					continue
+				}
+				if k.str == key.str || memeq(k.str, key.str, uintptr(key.len)) {
+					return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize)), true
+				}
+			}
+			return unsafe.Pointer(t.elem.zero), false
+		}
+		// long key, try not to do more comparisons than necessary
+		keymaybe := uintptr(bucketCnt)
+		for i := uintptr(0); i < bucketCnt; i++ {
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x == empty {
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str == key.str {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize)), true
+			}
+			// check first 4 bytes
+			if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) {
+				continue
+			}
+			// check last 4 bytes
+			if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) {
+				continue
+			}
+			if keymaybe != bucketCnt {
+				// Two keys are potential matches.  Use hash to distinguish them.
+				goto dohash
+			}
+			keymaybe = i
+		}
+		if keymaybe != bucketCnt {
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*ptrSize))
+			if memeq(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+keymaybe*uintptr(t.valuesize)), true
+			}
+		}
+		return unsafe.Pointer(t.elem.zero), false
+	}
+dohash:
+	hash := goalg(t.key.alg).hash(noescape(unsafe.Pointer(&ky)), 2*ptrSize, uintptr(h.hash0))
+	m := uintptr(1)<<h.B - 1
+	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
+	if c := h.oldbuckets; c != nil {
+		oldb := (*bmap)(add(c, (hash&(m>>1))*uintptr(t.bucketsize)))
+		if !evacuated(oldb) {
+			b = oldb
+		}
+	}
+	top := uint8(hash >> (ptrSize*8 - 8))
+	if top < minTopHash {
+		top += minTopHash
+	}
+	for {
+		for i := uintptr(0); i < bucketCnt; i++ {
+			x := *((*uint8)(add(unsafe.Pointer(b), i))) // b.topbits[i] without the bounds check
+			if x != top {
+				continue
+			}
+			k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*ptrSize))
+			if k.len != key.len {
+				continue
+			}
+			if k.str == key.str || memeq(k.str, key.str, uintptr(key.len)) {
+				return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*ptrSize+i*uintptr(t.valuesize)), true
+			}
+		}
+		b = b.overflow
+		if b == nil {
+			return unsafe.Pointer(t.elem.zero), false
+		}
+	}
+}
diff --git a/src/runtime/heapdump.c b/src/runtime/heapdump.c
new file mode 100644
index 000000000..d07fdb29b
--- /dev/null
+++ b/src/runtime/heapdump.c
@@ -0,0 +1,854 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of runtime/debug.WriteHeapDump.  Writes all
+// objects in the heap plus additional info (roots, threads,
+// finalizers, etc.) to a file.
+
+// The format of the dumped file is described at
+// http://code.google.com/p/go-wiki/wiki/heapdump13
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "mgc0.h"
+#include "type.h"
+#include "typekind.h"
+#include "funcdata.h"
+#include "zaexperiment.h"
+#include "textflag.h"
+
+extern byte runtime·data[];
+extern byte runtime·edata[];
+extern byte runtime·bss[];
+extern byte runtime·ebss[];
+
+enum {
+	FieldKindEol = 0,
+	FieldKindPtr = 1,
+	FieldKindString = 2,
+	FieldKindSlice = 3,
+	FieldKindIface = 4,
+	FieldKindEface = 5,
+
+	TagEOF = 0,
+	TagObject = 1,
+	TagOtherRoot = 2,
+	TagType = 3,
+	TagGoRoutine = 4,
+	TagStackFrame = 5,
+	TagParams = 6,
+	TagFinalizer = 7,
+	TagItab = 8,
+	TagOSThread = 9,
+	TagMemStats = 10,
+	TagQueuedFinalizer = 11,
+	TagData = 12,
+	TagBss = 13,
+	TagDefer = 14,
+	TagPanic = 15,
+	TagMemProf = 16,
+	TagAllocSample = 17,
+};
+
+static uintptr* playgcprog(uintptr offset, uintptr *prog, void (*callback)(void*,uintptr,uintptr), void *arg);
+static void dumpfields(BitVector bv);
+static void dumpbvtypes(BitVector *bv, byte *base);
+static BitVector makeheapobjbv(byte *p, uintptr size);
+
+// fd to write the dump to.
+static uintptr	dumpfd;
+static byte	*tmpbuf;
+static uintptr	tmpbufsize;
+
+// buffer of pending write data
+enum {
+	BufSize = 4096,
+};
+#pragma dataflag NOPTR
+static byte buf[BufSize];
+static uintptr nbuf;
+
+static void
+write(byte *data, uintptr len)
+{
+	if(len + nbuf <= BufSize) {
+		runtime·memmove(buf + nbuf, data, len);
+		nbuf += len;
+		return;
+	}
+	runtime·write(dumpfd, buf, nbuf);
+	if(len >= BufSize) {
+		runtime·write(dumpfd, data, len);
+		nbuf = 0;
+	} else {
+		runtime·memmove(buf, data, len);
+		nbuf = len;
+	}
+}
+
+static void
+flush(void)
+{
+	runtime·write(dumpfd, buf, nbuf);
+	nbuf = 0;
+}
+
+// Cache of types that have been serialized already.
+// We use a type's hash field to pick a bucket.
+// Inside a bucket, we keep a list of types that
+// have been serialized so far, most recently used first.
+// Note: when a bucket overflows we may end up
+// serializing a type more than once.  That's ok.
+enum {
+	TypeCacheBuckets = 256, // must be a power of 2
+	TypeCacheAssoc = 4,
+};
+typedef struct TypeCacheBucket TypeCacheBucket;
+struct TypeCacheBucket {
+	Type *t[TypeCacheAssoc];
+};
+static TypeCacheBucket typecache[TypeCacheBuckets];
+
+// dump a uint64 in a varint format parseable by encoding/binary
+static void
+dumpint(uint64 v)
+{
+	byte buf[10];
+	int32 n;
+	n = 0;
+	while(v >= 0x80) {
+		buf[n++] = v | 0x80;
+		v >>= 7;
+	}
+	buf[n++] = v;
+	write(buf, n);
+}
+
+static void
+dumpbool(bool b)
+{
+	dumpint(b ? 1 : 0);
+}
+
+// dump varint uint64 length followed by memory contents
+static void
+dumpmemrange(byte *data, uintptr len)
+{
+	dumpint(len);
+	write(data, len);
+}
+
+static void
+dumpstr(String s)
+{
+	dumpmemrange(s.str, s.len);
+}
+
+static void
+dumpcstr(int8 *c)
+{
+	dumpmemrange((byte*)c, runtime·findnull((byte*)c));
+}
+
+// dump information for a type
+static void
+dumptype(Type *t)
+{
+	TypeCacheBucket *b;
+	int32 i, j;
+
+	if(t == nil) {
+		return;
+	}
+
+	// If we've definitely serialized the type before,
+	// no need to do it again.
+	b = &typecache[t->hash & (TypeCacheBuckets-1)];
+	if(t == b->t[0]) return;
+	for(i = 1; i < TypeCacheAssoc; i++) {
+		if(t == b->t[i]) {
+			// Move-to-front
+			for(j = i; j > 0; j--) {
+				b->t[j] = b->t[j-1];
+			}
+			b->t[0] = t;
+			return;
+		}
+	}
+	// Might not have been dumped yet.  Dump it and
+	// remember we did so.
+	for(j = TypeCacheAssoc-1; j > 0; j--) {
+		b->t[j] = b->t[j-1];
+	}
+	b->t[0] = t;
+	
+	// dump the type
+	dumpint(TagType);
+	dumpint((uintptr)t);
+	dumpint(t->size);
+	if(t->x == nil || t->x->pkgPath == nil || t->x->name == nil) {
+		dumpstr(*t->string);
+	} else {
+		dumpint(t->x->pkgPath->len + 1 + t->x->name->len);
+		write(t->x->pkgPath->str, t->x->pkgPath->len);
+		write((byte*)".", 1);
+		write(t->x->name->str, t->x->name->len);
+	}
+	dumpbool((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0);
+	dumpfields((BitVector){0, nil});
+}
+
+// dump an object
+static void
+dumpobj(byte *obj, uintptr size, BitVector bv)
+{
+	dumpbvtypes(&bv, obj);
+	dumpint(TagObject);
+	dumpint((uintptr)obj);
+	dumpint(0); // Type*
+	dumpint(0); // kind
+	dumpmemrange(obj, size);
+}
+
+static void
+dumpotherroot(int8 *description, byte *to)
+{
+	dumpint(TagOtherRoot);
+	dumpcstr(description);
+	dumpint((uintptr)to);
+}
+
+static void
+dumpfinalizer(byte *obj, FuncVal *fn, Type* fint, PtrType *ot)
+{
+	dumpint(TagFinalizer);
+	dumpint((uintptr)obj);
+	dumpint((uintptr)fn);
+	dumpint((uintptr)fn->fn);
+	dumpint((uintptr)fint);
+	dumpint((uintptr)ot);
+}
+
+typedef struct ChildInfo ChildInfo;
+struct ChildInfo {
+	// Information passed up from the callee frame about
+	// the layout of the outargs region.
+	uintptr argoff;     // where the arguments start in the frame
+	uintptr arglen;     // size of args region
+	BitVector args;    // if args.n >= 0, pointer map of args region
+
+	byte *sp;           // callee sp
+	uintptr depth;      // depth in call stack (0 == most recent)
+};
+
+// dump kinds & offsets of interesting fields in bv
+static void
+dumpbv(BitVector *bv, uintptr offset)
+{
+	uintptr i;
+
+	for(i = 0; i < bv->n; i += BitsPerPointer) {
+		switch(bv->data[i/32] >> i%32 & 3) {
+		case BitsDead:
+		case BitsScalar:
+			break;
+		case BitsPointer:
+			dumpint(FieldKindPtr);
+			dumpint(offset + i / BitsPerPointer * PtrSize);
+			break;
+		case BitsMultiWord:
+			switch(bv->data[(i+BitsPerPointer)/32] >> (i+BitsPerPointer)%32 & 3) {
+			default:
+				runtime·throw("unexpected garbage collection bits");
+			case BitsIface:
+				dumpint(FieldKindIface);
+				dumpint(offset + i / BitsPerPointer * PtrSize);
+				i += BitsPerPointer;
+				break;
+			case BitsEface:
+				dumpint(FieldKindEface);
+				dumpint(offset + i / BitsPerPointer * PtrSize);
+				i += BitsPerPointer;
+				break;
+			}
+		}
+	}
+}
+
+static bool
+dumpframe(Stkframe *s, void *arg)
+{
+	Func *f;
+	ChildInfo *child;
+	uintptr pc, off, size;
+	int32 pcdata;
+	StackMap *stackmap;
+	int8 *name;
+	BitVector bv;
+
+	child = (ChildInfo*)arg;
+	f = s->fn;
+
+	// Figure out what we can about our stack map
+	pc = s->pc;
+	if(pc != f->entry)
+		pc--;
+	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, pc);
+	if(pcdata == -1) {
+		// We do not have a valid pcdata value but there might be a
+		// stackmap for this function.  It is likely that we are looking
+		// at the function prologue, assume so and hope for the best.
+		pcdata = 0;
+	}
+	stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+
+	// Dump any types we will need to resolve Efaces.
+	if(child->args.n >= 0)
+		dumpbvtypes(&child->args, (byte*)s->sp + child->argoff);
+	if(stackmap != nil && stackmap->n > 0) {
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		dumpbvtypes(&bv, (byte*)(s->varp - bv.n / BitsPerPointer * PtrSize));
+	} else {
+		bv.n = -1;
+	}
+
+	// Dump main body of stack frame.
+	dumpint(TagStackFrame);
+	dumpint(s->sp); // lowest address in frame
+	dumpint(child->depth); // # of frames deep on the stack
+	dumpint((uintptr)child->sp); // sp of child, or 0 if bottom of stack
+	dumpmemrange((byte*)s->sp, s->fp - s->sp);  // frame contents
+	dumpint(f->entry);
+	dumpint(s->pc);
+	dumpint(s->continpc);
+	name = runtime·funcname(f);
+	if(name == nil)
+		name = "unknown function";
+	dumpcstr(name);
+
+	// Dump fields in the outargs section
+	if(child->args.n >= 0) {
+		dumpbv(&child->args, child->argoff);
+	} else {
+		// conservative - everything might be a pointer
+		for(off = child->argoff; off < child->argoff + child->arglen; off += PtrSize) {
+			dumpint(FieldKindPtr);
+			dumpint(off);
+		}
+	}
+
+	// Dump fields in the local vars section
+	if(stackmap == nil) {
+		// No locals information, dump everything.
+		for(off = child->arglen; off < s->varp - s->sp; off += PtrSize) {
+			dumpint(FieldKindPtr);
+			dumpint(off);
+		}
+	} else if(stackmap->n < 0) {
+		// Locals size information, dump just the locals.
+		size = -stackmap->n;
+		for(off = s->varp - size - s->sp; off <  s->varp - s->sp; off += PtrSize) {
+			dumpint(FieldKindPtr);
+			dumpint(off);
+		}
+	} else if(stackmap->n > 0) {
+		// Locals bitmap information, scan just the pointers in
+		// locals.
+		dumpbv(&bv, s->varp - bv.n / BitsPerPointer * PtrSize - s->sp);
+	}
+	dumpint(FieldKindEol);
+
+	// Record arg info for parent.
+	child->argoff = s->argp - s->fp;
+	child->arglen = s->arglen;
+	child->sp = (byte*)s->sp;
+	child->depth++;
+	stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+	if(stackmap != nil)
+		child->args = runtime·stackmapdata(stackmap, pcdata);
+	else
+		child->args.n = -1;
+	return true;
+}
+
+static void
+dumpgoroutine(G *gp)
+{
+	uintptr sp, pc, lr;
+	ChildInfo child;
+	Defer *d;
+	Panic *p;
+	bool (*fn)(Stkframe*, void*);
+
+	if(gp->syscallstack != (uintptr)nil) {
+		sp = gp->syscallsp;
+		pc = gp->syscallpc;
+		lr = 0;
+	} else {
+		sp = gp->sched.sp;
+		pc = gp->sched.pc;
+		lr = gp->sched.lr;
+	}
+
+	dumpint(TagGoRoutine);
+	dumpint((uintptr)gp);
+	dumpint((uintptr)sp);
+	dumpint(gp->goid);
+	dumpint(gp->gopc);
+	dumpint(runtime·readgstatus(gp));
+	dumpbool(gp->issystem);
+	dumpbool(false);  // isbackground
+	dumpint(gp->waitsince);
+	dumpstr(gp->waitreason);
+	dumpint((uintptr)gp->sched.ctxt);
+	dumpint((uintptr)gp->m);
+	dumpint((uintptr)gp->defer);
+	dumpint((uintptr)gp->panic);
+
+	// dump stack
+	child.args.n = -1;
+	child.arglen = 0;
+	child.sp = nil;
+	child.depth = 0;
+	if(!ScanStackByFrames)
+		runtime·throw("need frame info to dump stacks");
+	fn = dumpframe;
+	runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, &fn, &child, false);
+
+	// dump defer & panic records
+	for(d = gp->defer; d != nil; d = d->link) {
+		dumpint(TagDefer);
+		dumpint((uintptr)d);
+		dumpint((uintptr)gp);
+		dumpint((uintptr)d->argp);
+		dumpint((uintptr)d->pc);
+		dumpint((uintptr)d->fn);
+		dumpint((uintptr)d->fn->fn);
+		dumpint((uintptr)d->link);
+	}
+	for (p = gp->panic; p != nil; p = p->link) {
+		dumpint(TagPanic);
+		dumpint((uintptr)p);
+		dumpint((uintptr)gp);
+		dumpint((uintptr)p->arg.type);
+		dumpint((uintptr)p->arg.data);
+		dumpint((uintptr)p->defer);
+		dumpint((uintptr)p->link);
+	}
+}
+
+static void
+dumpgs(void)
+{
+	G *gp;
+	uint32 i;
+	uint32 status;
+
+	// goroutines & stacks
+	for(i = 0; i < runtime·allglen; i++) {
+		gp = runtime·allg[i];
+		status = runtime·readgstatus(gp); // The world is stopped so gp will not be in a scan state.
+		switch(status){
+		default:
+			runtime·printf("runtime: unexpected G.status %d\n", status);
+			runtime·throw("dumpgs in STW - bad status");
+		case Gdead:
+			break;
+		case Grunnable:
+		case Gsyscall:
+		case Gwaiting:
+			dumpgoroutine(gp);
+			break;
+		}
+	}
+}
+
+static void
+finq_callback(FuncVal *fn, byte *obj, uintptr nret, Type *fint, PtrType *ot)
+{
+	dumpint(TagQueuedFinalizer);
+	dumpint((uintptr)obj);
+	dumpint((uintptr)fn);
+	dumpint((uintptr)fn->fn);
+	dumpint((uintptr)fint);
+	dumpint((uintptr)ot);
+	USED(&nret);
+}
+
+
+static void
+dumproots(void)
+{
+	MSpan *s, **allspans;
+	uint32 spanidx;
+	Special *sp;
+	SpecialFinalizer *spf;
+	byte *p;
+
+	// data segment
+	dumpint(TagData);
+	dumpint((uintptr)runtime·data);
+	dumpmemrange(runtime·data, runtime·edata - runtime·data);
+	dumpfields(runtime·gcdatamask);
+
+	// bss segment
+	dumpint(TagBss);
+	dumpint((uintptr)runtime·bss);
+	dumpmemrange(runtime·bss, runtime·ebss - runtime·bss);
+	dumpfields(runtime·gcdatamask);
+
+	// MSpan.types
+	allspans = runtime·mheap.allspans;
+	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
+		s = allspans[spanidx];
+		if(s->state == MSpanInUse) {
+			// Finalizers
+			for(sp = s->specials; sp != nil; sp = sp->next) {
+				if(sp->kind != KindSpecialFinalizer)
+					continue;
+				spf = (SpecialFinalizer*)sp;
+				p = (byte*)((s->start << PageShift) + spf->special.offset);
+				dumpfinalizer(p, spf->fn, spf->fint, spf->ot);
+			}
+		}
+	}
+
+	// Finalizer queue
+	runtime·iterate_finq(finq_callback);
+}
+
+// Bit vector of free marks.	
+// Needs to be as big as the largest number of objects per span.	
+#pragma dataflag NOPTR
+static byte free[PageSize/8];	
+
+static void
+dumpobjs(void)
+{
+	uintptr i, j, size, n;
+	MSpan *s;
+	MLink *l;
+	byte *p;
+
+	for(i = 0; i < runtime·mheap.nspan; i++) {
+		s = runtime·mheap.allspans[i];
+		if(s->state != MSpanInUse)
+			continue;
+		p = (byte*)(s->start << PageShift);
+		size = s->elemsize;
+		n = (s->npages << PageShift) / size;
+		if(n > nelem(free))	
+			runtime·throw("free array doesn't have enough entries");	
+		for(l = s->freelist; l != nil; l = l->next)
+			free[((byte*)l - p) / size] = true;	
+		for(j = 0; j < n; j++, p += size) {
+			if(free[j]) {	
+				free[j] = false;	
+				continue;	
+			}
+			dumpobj(p, size, makeheapobjbv(p, size));
+		}
+	}
+}
+
+static void
+dumpparams(void)
+{
+	byte *x;
+
+	dumpint(TagParams);
+	x = (byte*)1;
+	if(*(byte*)&x == 1)
+		dumpbool(false); // little-endian ptrs
+	else
+		dumpbool(true); // big-endian ptrs
+	dumpint(PtrSize);
+	dumpint((uintptr)runtime·mheap.arena_start);
+	dumpint((uintptr)runtime·mheap.arena_used);
+	dumpint(thechar);
+	dumpcstr(GOEXPERIMENT);
+	dumpint(runtime·ncpu);
+}
+
+static void
+itab_callback(Itab *tab)
+{
+	Type *t;
+
+	dumpint(TagItab);
+	dumpint((uintptr)tab);
+	t = tab->type;
+	dumpbool((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0);
+}
+
+static void
+dumpitabs(void)
+{
+	void (*fn)(Itab*);
+	
+	fn = itab_callback;
+	runtime·iterate_itabs(&fn);
+}
+
+static void
+dumpms(void)
+{
+	M *mp;
+
+	for(mp = runtime·allm; mp != nil; mp = mp->alllink) {
+		dumpint(TagOSThread);
+		dumpint((uintptr)mp);
+		dumpint(mp->id);
+		dumpint(mp->procid);
+	}
+}
+
+static void
+dumpmemstats(void)
+{
+	int32 i;
+
+	dumpint(TagMemStats);
+	dumpint(mstats.alloc);
+	dumpint(mstats.total_alloc);
+	dumpint(mstats.sys);
+	dumpint(mstats.nlookup);
+	dumpint(mstats.nmalloc);
+	dumpint(mstats.nfree);
+	dumpint(mstats.heap_alloc);
+	dumpint(mstats.heap_sys);
+	dumpint(mstats.heap_idle);
+	dumpint(mstats.heap_inuse);
+	dumpint(mstats.heap_released);
+	dumpint(mstats.heap_objects);
+	dumpint(mstats.stacks_inuse);
+	dumpint(mstats.stacks_sys);
+	dumpint(mstats.mspan_inuse);
+	dumpint(mstats.mspan_sys);
+	dumpint(mstats.mcache_inuse);
+	dumpint(mstats.mcache_sys);
+	dumpint(mstats.buckhash_sys);
+	dumpint(mstats.gc_sys);
+	dumpint(mstats.other_sys);
+	dumpint(mstats.next_gc);
+	dumpint(mstats.last_gc);
+	dumpint(mstats.pause_total_ns);
+	for(i = 0; i < 256; i++)
+		dumpint(mstats.pause_ns[i]);
+	dumpint(mstats.numgc);
+}
+
+static void
+dumpmemprof_callback(Bucket *b, uintptr nstk, uintptr *stk, uintptr size, uintptr allocs, uintptr frees)
+{
+	uintptr i, pc;
+	Func *f;
+	byte buf[20];
+	String file;
+	int32 line;
+
+	dumpint(TagMemProf);
+	dumpint((uintptr)b);
+	dumpint(size);
+	dumpint(nstk);
+	for(i = 0; i < nstk; i++) {
+		pc = stk[i];
+		f = runtime·findfunc(pc);
+		if(f == nil) {
+			runtime·snprintf(buf, sizeof(buf), "%X", (uint64)pc);
+			dumpcstr((int8*)buf);
+			dumpcstr("?");
+			dumpint(0);
+		} else {
+			dumpcstr(runtime·funcname(f));
+			// TODO: Why do we need to back up to a call instruction here?
+			// Maybe profiler should do this.
+			if(i > 0 && pc > f->entry) {
+				if(thechar == '6' || thechar == '8')
+					pc--;
+				else
+					pc -= 4; // arm, etc
+			}
+			line = runtime·funcline(f, pc, &file);
+			dumpstr(file);
+			dumpint(line);
+		}
+	}
+	dumpint(allocs);
+	dumpint(frees);
+}
+
+static void
+dumpmemprof(void)
+{
+	MSpan *s, **allspans;
+	uint32 spanidx;
+	Special *sp;
+	SpecialProfile *spp;
+	byte *p;
+	void (*fn)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr);
+	
+	fn = dumpmemprof_callback;
+	runtime·iterate_memprof(&fn);
+
+	allspans = runtime·mheap.allspans;
+	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
+		s = allspans[spanidx];
+		if(s->state != MSpanInUse)
+			continue;
+		for(sp = s->specials; sp != nil; sp = sp->next) {
+			if(sp->kind != KindSpecialProfile)
+				continue;
+			spp = (SpecialProfile*)sp;
+			p = (byte*)((s->start << PageShift) + spp->special.offset);
+			dumpint(TagAllocSample);
+			dumpint((uintptr)p);
+			dumpint((uintptr)spp->b);
+		}
+	}
+}
+
+static void
+mdump(G *gp)
+{
+	byte *hdr;
+	uintptr i;
+	MSpan *s;
+
+	// make sure we're done sweeping
+	for(i = 0; i < runtime·mheap.nspan; i++) {
+		s = runtime·mheap.allspans[i];
+		if(s->state == MSpanInUse)
+			runtime·MSpan_EnsureSwept(s);
+	}
+
+	runtime·memclr((byte*)&typecache[0], sizeof(typecache));
+	hdr = (byte*)"go1.3 heap dump\n";
+	write(hdr, runtime·findnull(hdr));
+	dumpparams();
+	dumpitabs();
+	dumpobjs();
+	dumpgs();
+	dumpms();
+	dumproots();
+	dumpmemstats();
+	dumpmemprof();
+	dumpint(TagEOF);
+	flush();
+
+	gp->param = nil;
+	runtime·casgstatus(gp, Gwaiting, Grunning);
+	runtime·gogo(&gp->sched);
+}
+
+void
+runtime∕debug·WriteHeapDump(uintptr fd)
+{
+	void (*fn)(G*);
+
+	// Stop the world.
+	runtime·semacquire(&runtime·worldsema, false);
+	g->m->gcing = 1;
+	runtime·stoptheworld();
+
+	// Update stats so we can dump them.
+	// As a side effect, flushes all the MCaches so the MSpan.freelist
+	// lists contain all the free objects.
+	runtime·updatememstats(nil);
+
+	// Set dump file.
+	dumpfd = fd;
+
+	// Call dump routine on M stack.
+	runtime·casgstatus(g, Grunning, Gwaiting);
+	g->waitreason = runtime·gostringnocopy((byte*)"dumping heap");
+	fn = mdump;
+	runtime·mcall(&fn);
+
+	// Reset dump file.
+	dumpfd = 0;
+	if(tmpbuf != nil) {
+		runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
+		tmpbuf = nil;
+		tmpbufsize = 0;
+	}
+
+	// Start up the world again.
+	g->m->gcing = 0;
+	g->m->locks++;
+	runtime·semrelease(&runtime·worldsema);
+	runtime·starttheworld();
+	g->m->locks--;
+}
+
+// dumpint() the kind & offset of each field in an object.
+static void
+dumpfields(BitVector bv)
+{
+	dumpbv(&bv, 0);
+	dumpint(FieldKindEol);
+}
+
+// The heap dump reader needs to be able to disambiguate
+// Eface entries.  So it needs to know every type that might
+// appear in such an entry.  The following routine accomplishes that.
+
+// Dump all the types that appear in the type field of
+// any Eface described by this bit vector.
+static void
+dumpbvtypes(BitVector *bv, byte *base)
+{
+	uintptr i;
+
+	for(i = 0; i < bv->n; i += BitsPerPointer) {
+		if((bv->data[i/32] >> i%32 & 3) != BitsMultiWord)
+			continue;
+		switch(bv->data[(i+BitsPerPointer)/32] >> (i+BitsPerPointer)%32 & 3) {
+		default:
+			runtime·throw("unexpected garbage collection bits");
+		case BitsIface:
+			i += BitsPerPointer;
+			break;
+		case BitsEface:
+			dumptype(*(Type**)(base + i / BitsPerPointer * PtrSize));
+			i += BitsPerPointer;
+			break;
+		}
+	}
+}
+
+static BitVector
+makeheapobjbv(byte *p, uintptr size)
+{
+	uintptr off, nptr, i;
+	byte shift, *bitp, bits;
+	bool mw;
+
+	// Extend the temp buffer if necessary.
+	nptr = size/PtrSize;
+	if(tmpbufsize < nptr*BitsPerPointer/8+1) {
+		if(tmpbuf != nil)
+			runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
+		tmpbufsize = nptr*BitsPerPointer/8+1;
+		tmpbuf = runtime·sysAlloc(tmpbufsize, &mstats.other_sys);
+		if(tmpbuf == nil)
+			runtime·throw("heapdump: out of memory");
+	}
+
+	// Copy and compact the bitmap.
+	mw = false;
+	for(i = 0; i < nptr; i++) {
+		off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
+		bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+		shift = (off % wordsPerBitmapByte) * gcBits;
+		bits = (*bitp >> (shift + 2)) & BitsMask;
+		if(!mw && bits == BitsDead)
+			break;  // end of heap object
+		mw = !mw && bits == BitsMultiWord;
+		tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
+		tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
+	}
+	return (BitVector){i*BitsPerPointer, (uint32*)tmpbuf};
+}
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
new file mode 100644
index 000000000..f60b6a79c
--- /dev/null
+++ b/src/runtime/iface.go
@@ -0,0 +1,439 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const (
+	hashSize = 1009
+)
+
+var (
+	ifaceLock mutex // lock for accessing hash
+	hash      [hashSize]*itab
+)
+
+// fInterface is our standard non-empty interface.  We use it instead
+// of interface{f()} in function prototypes because gofmt insists on
+// putting lots of newlines in the otherwise concise interface{f()}.
+type fInterface interface {
+	f()
+}
+
+func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
+	if len(inter.mhdr) == 0 {
+		gothrow("internal error - misuse of itab")
+	}
+
+	// easy case
+	x := typ.x
+	if x == nil {
+		if canfail {
+			return nil
+		}
+		i := (*imethod)(add(unsafe.Pointer(inter), unsafe.Sizeof(interfacetype{})))
+		panic(&TypeAssertionError{"", *typ._string, *inter.typ._string, *i.name})
+	}
+
+	// compiler has provided some good hash codes for us.
+	h := inter.typ.hash
+	h += 17 * typ.hash
+	// TODO(rsc): h += 23 * x.mhash ?
+	h %= hashSize
+
+	// look twice - once without lock, once with.
+	// common case will be no lock contention.
+	var m *itab
+	var locked int
+	for locked = 0; locked < 2; locked++ {
+		if locked != 0 {
+			lock(&ifaceLock)
+		}
+		for m = (*itab)(atomicloadp(unsafe.Pointer(&hash[h]))); m != nil; m = m.link {
+			if m.inter == inter && m._type == typ {
+				if m.bad != 0 {
+					m = nil
+					if !canfail {
+						// this can only happen if the conversion
+						// was already done once using the , ok form
+						// and we have a cached negative result.
+						// the cached result doesn't record which
+						// interface function was missing, so jump
+						// down to the interface check, which will
+						// do more work but give a better error.
+						goto search
+					}
+				}
+				if locked != 0 {
+					unlock(&ifaceLock)
+				}
+				return m
+			}
+		}
+	}
+
+	m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr))*ptrSize, 0, &memstats.other_sys))
+	m.inter = inter
+	m._type = typ
+
+search:
+	// both inter and typ have method sorted by name,
+	// and interface names are unique,
+	// so can iterate over both in lock step;
+	// the loop is O(ni+nt) not O(ni*nt).
+	ni := len(inter.mhdr)
+	nt := len(x.mhdr)
+	j := 0
+	for k := 0; k < ni; k++ {
+		i := (*imethod)(add(unsafe.Pointer(inter), unsafe.Sizeof(interfacetype{})+uintptr(k)*unsafe.Sizeof(imethod{})))
+		iname := i.name
+		ipkgpath := i.pkgpath
+		itype := i._type
+		for ; j < nt; j++ {
+			t := (*method)(add(unsafe.Pointer(x), unsafe.Sizeof(uncommontype{})+uintptr(j)*unsafe.Sizeof(method{})))
+			if t.mtyp == itype && t.name == iname && t.pkgpath == ipkgpath {
+				if m != nil {
+					*(*unsafe.Pointer)(add(unsafe.Pointer(m), unsafe.Sizeof(itab{})+uintptr(k)*ptrSize)) = t.ifn
+				}
+				goto nextimethod
+			}
+		}
+		// didn't find method
+		if !canfail {
+			if locked != 0 {
+				unlock(&ifaceLock)
+			}
+			panic(&TypeAssertionError{"", *typ._string, *inter.typ._string, *iname})
+		}
+		m.bad = 1
+		break
+	nextimethod:
+	}
+	if locked == 0 {
+		gothrow("invalid itab locking")
+	}
+	m.link = hash[h]
+	atomicstorep(unsafe.Pointer(&hash[h]), unsafe.Pointer(m))
+	unlock(&ifaceLock)
+	if m.bad != 0 {
+		return nil
+	}
+	return m
+}
+
+func typ2Itab(t *_type, inter *interfacetype, cache **itab) *itab {
+	tab := getitab(inter, t, false)
+	atomicstorep(unsafe.Pointer(cache), unsafe.Pointer(tab))
+	return tab
+}
+
+func convT2E(t *_type, elem unsafe.Pointer) (e interface{}) {
+	size := uintptr(t.size)
+	ep := (*eface)(unsafe.Pointer(&e))
+	if isDirectIface(t) {
+		ep._type = t
+		memmove(unsafe.Pointer(&ep.data), elem, size)
+	} else {
+		x := newobject(t)
+		// TODO: We allocate a zeroed object only to overwrite it with
+		// actual data.  Figure out how to avoid zeroing.  Also below in convT2I.
+		memmove(x, elem, size)
+		ep._type = t
+		ep.data = x
+	}
+	return
+}
+
+func convT2I(t *_type, inter *interfacetype, cache **itab, elem unsafe.Pointer) (i fInterface) {
+	tab := (*itab)(atomicloadp(unsafe.Pointer(cache)))
+	if tab == nil {
+		tab = getitab(inter, t, false)
+		atomicstorep(unsafe.Pointer(cache), unsafe.Pointer(tab))
+	}
+	size := uintptr(t.size)
+	pi := (*iface)(unsafe.Pointer(&i))
+	if isDirectIface(t) {
+		pi.tab = tab
+		memmove(unsafe.Pointer(&pi.data), elem, size)
+	} else {
+		x := newobject(t)
+		memmove(x, elem, size)
+		pi.tab = tab
+		pi.data = x
+	}
+	return
+}
+
+// TODO: give these routines a pointer to the result area instead of writing
+// extra data in the outargs section.  Then we can get rid of go:nosplit.
+//go:nosplit
+func assertI2T(t *_type, i fInterface) (r struct{}) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		panic(&TypeAssertionError{"", "", *t._string, ""})
+	}
+	if tab._type != t {
+		panic(&TypeAssertionError{*tab.inter.typ._string, *tab._type._string, *t._string, ""})
+	}
+	size := uintptr(t.size)
+	if isDirectIface(t) {
+		memmove(unsafe.Pointer(&r), unsafe.Pointer(&ip.data), size)
+	} else {
+		memmove(unsafe.Pointer(&r), ip.data, size)
+	}
+	return
+}
+
+//go:nosplit
+func assertI2T2(t *_type, i fInterface) (r byte) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	size := uintptr(t.size)
+	ok := (*bool)(add(unsafe.Pointer(&r), size))
+	tab := ip.tab
+	if tab == nil || tab._type != t {
+		*ok = false
+		memclr(unsafe.Pointer(&r), size)
+		return
+	}
+	*ok = true
+	if isDirectIface(t) {
+		memmove(unsafe.Pointer(&r), unsafe.Pointer(&ip.data), size)
+	} else {
+		memmove(unsafe.Pointer(&r), ip.data, size)
+	}
+	return
+}
+
+func assertI2TOK(t *_type, i fInterface) bool {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	return tab != nil && tab._type == t
+}
+
+//go:nosplit
+func assertE2T(t *_type, e interface{}) (r struct{}) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	if ep._type == nil {
+		panic(&TypeAssertionError{"", "", *t._string, ""})
+	}
+	if ep._type != t {
+		panic(&TypeAssertionError{"", *ep._type._string, *t._string, ""})
+	}
+	size := uintptr(t.size)
+	if isDirectIface(t) {
+		memmove(unsafe.Pointer(&r), unsafe.Pointer(&ep.data), size)
+	} else {
+		memmove(unsafe.Pointer(&r), ep.data, size)
+	}
+	return
+}
+
+//go:nosplit
+func assertE2T2(t *_type, e interface{}) (r byte) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	size := uintptr(t.size)
+	ok := (*bool)(add(unsafe.Pointer(&r), size))
+	if ep._type != t {
+		*ok = false
+		memclr(unsafe.Pointer(&r), size)
+		return
+	}
+	*ok = true
+	if isDirectIface(t) {
+		memmove(unsafe.Pointer(&r), unsafe.Pointer(&ep.data), size)
+	} else {
+		memmove(unsafe.Pointer(&r), ep.data, size)
+	}
+	return
+}
+
+func assertE2TOK(t *_type, e interface{}) bool {
+	ep := (*eface)(unsafe.Pointer(&e))
+	return t == ep._type
+}
+
+func convI2E(i fInterface) (r interface{}) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		return
+	}
+	rp := (*eface)(unsafe.Pointer(&r))
+	rp._type = tab._type
+	rp.data = ip.data
+	return
+}
+
+func assertI2E(inter *interfacetype, i fInterface) (r interface{}) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		// explicit conversions require non-nil interface value.
+		panic(&TypeAssertionError{"", "", *inter.typ._string, ""})
+	}
+	rp := (*eface)(unsafe.Pointer(&r))
+	rp._type = tab._type
+	rp.data = ip.data
+	return
+}
+
+func assertI2E2(inter *interfacetype, i fInterface) (r interface{}, ok bool) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		return
+	}
+	rp := (*eface)(unsafe.Pointer(&r))
+	rp._type = tab._type
+	rp.data = ip.data
+	ok = true
+	return
+}
+
+func convI2I(inter *interfacetype, i fInterface) (r fInterface) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		return
+	}
+	rp := (*iface)(unsafe.Pointer(&r))
+	if tab.inter == inter {
+		rp.tab = tab
+		rp.data = ip.data
+		return
+	}
+	rp.tab = getitab(inter, tab._type, false)
+	rp.data = ip.data
+	return
+}
+
+func assertI2I(inter *interfacetype, i fInterface) (r fInterface) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		// explicit conversions require non-nil interface value.
+		panic(&TypeAssertionError{"", "", *inter.typ._string, ""})
+	}
+	rp := (*iface)(unsafe.Pointer(&r))
+	if tab.inter == inter {
+		rp.tab = tab
+		rp.data = ip.data
+		return
+	}
+	rp.tab = getitab(inter, tab._type, false)
+	rp.data = ip.data
+	return
+}
+
+func assertI2I2(inter *interfacetype, i fInterface) (r fInterface, ok bool) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		return
+	}
+	rp := (*iface)(unsafe.Pointer(&r))
+	if tab.inter == inter {
+		rp.tab = tab
+		rp.data = ip.data
+		ok = true
+		return
+	}
+	tab = getitab(inter, tab._type, true)
+	if tab == nil {
+		rp.data = nil
+		rp.tab = nil
+		ok = false
+		return
+	}
+	rp.tab = tab
+	rp.data = ip.data
+	ok = true
+	return
+}
+
+func assertE2I(inter *interfacetype, e interface{}) (r fInterface) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	t := ep._type
+	if t == nil {
+		// explicit conversions require non-nil interface value.
+		panic(&TypeAssertionError{"", "", *inter.typ._string, ""})
+	}
+	rp := (*iface)(unsafe.Pointer(&r))
+	rp.tab = getitab(inter, t, false)
+	rp.data = ep.data
+	return
+}
+
+func assertE2I2(inter *interfacetype, e interface{}) (r fInterface, ok bool) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	t := ep._type
+	if t == nil {
+		return
+	}
+	tab := getitab(inter, t, true)
+	if tab == nil {
+		return
+	}
+	rp := (*iface)(unsafe.Pointer(&r))
+	rp.tab = tab
+	rp.data = ep.data
+	ok = true
+	return
+}
+
+func reflect_ifaceE2I(inter *interfacetype, e interface{}, dst *fInterface) {
+	*dst = assertE2I(inter, e)
+}
+
+func assertE2E(inter *interfacetype, e interface{}) interface{} {
+	ep := (*eface)(unsafe.Pointer(&e))
+	if ep._type == nil {
+		// explicit conversions require non-nil interface value.
+		panic(&TypeAssertionError{"", "", *inter.typ._string, ""})
+	}
+	return e
+}
+
+func assertE2E2(inter *interfacetype, e interface{}) (interface{}, bool) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	if ep._type == nil {
+		return nil, false
+	}
+	return e, true
+}
+
+func ifacethash(i fInterface) uint32 {
+	ip := (*iface)(unsafe.Pointer(&i))
+	tab := ip.tab
+	if tab == nil {
+		return 0
+	}
+	return tab._type.hash
+}
+
+func efacethash(e interface{}) uint32 {
+	ep := (*eface)(unsafe.Pointer(&e))
+	t := ep._type
+	if t == nil {
+		return 0
+	}
+	return t.hash
+}
+
+func iterate_itabs(fn func(*itab)) {
+	for _, h := range &hash {
+		for ; h != nil; h = h.link {
+			fn(h)
+		}
+	}
+}
+
+func ifaceE2I2(inter *interfacetype, e interface{}, r *fInterface) (ok bool) {
+	*r, ok = assertE2I2(inter, e)
+	return
+}
diff --git a/src/runtime/iface_test.go b/src/runtime/iface_test.go
new file mode 100644
index 000000000..bca0ea0ee
--- /dev/null
+++ b/src/runtime/iface_test.go
@@ -0,0 +1,138 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"testing"
+)
+
+type I1 interface {
+	Method1()
+}
+
+type I2 interface {
+	Method1()
+	Method2()
+}
+
+type TS uint16
+type TM uintptr
+type TL [2]uintptr
+
+func (TS) Method1() {}
+func (TS) Method2() {}
+func (TM) Method1() {}
+func (TM) Method2() {}
+func (TL) Method1() {}
+func (TL) Method2() {}
+
+var (
+	e  interface{}
+	e_ interface{}
+	i1 I1
+	i2 I2
+	ts TS
+	tm TM
+	tl TL
+)
+
+func BenchmarkConvT2ESmall(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		e = ts
+	}
+}
+
+func BenchmarkConvT2EUintptr(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		e = tm
+	}
+}
+
+func BenchmarkConvT2ELarge(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		e = tl
+	}
+}
+
+func BenchmarkConvT2ISmall(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i1 = ts
+	}
+}
+
+func BenchmarkConvT2IUintptr(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i1 = tm
+	}
+}
+
+func BenchmarkConvT2ILarge(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		i1 = tl
+	}
+}
+
+func BenchmarkConvI2E(b *testing.B) {
+	i2 = tm
+	for i := 0; i < b.N; i++ {
+		e = i2
+	}
+}
+
+func BenchmarkConvI2I(b *testing.B) {
+	i2 = tm
+	for i := 0; i < b.N; i++ {
+		i1 = i2
+	}
+}
+
+func BenchmarkAssertE2T(b *testing.B) {
+	e = tm
+	for i := 0; i < b.N; i++ {
+		tm = e.(TM)
+	}
+}
+
+func BenchmarkAssertE2TLarge(b *testing.B) {
+	e = tl
+	for i := 0; i < b.N; i++ {
+		tl = e.(TL)
+	}
+}
+
+func BenchmarkAssertE2I(b *testing.B) {
+	e = tm
+	for i := 0; i < b.N; i++ {
+		i1 = e.(I1)
+	}
+}
+
+func BenchmarkAssertI2T(b *testing.B) {
+	i1 = tm
+	for i := 0; i < b.N; i++ {
+		tm = i1.(TM)
+	}
+}
+
+func BenchmarkAssertI2I(b *testing.B) {
+	i1 = tm
+	for i := 0; i < b.N; i++ {
+		i2 = i1.(I2)
+	}
+}
+
+func BenchmarkAssertI2E(b *testing.B) {
+	i1 = tm
+	for i := 0; i < b.N; i++ {
+		e = i1.(interface{})
+	}
+}
+
+func BenchmarkAssertE2E(b *testing.B) {
+	e = tm
+	for i := 0; i < b.N; i++ {
+		e_ = e
+	}
+}
diff --git a/src/runtime/lfstack.c b/src/runtime/lfstack.c
new file mode 100644
index 000000000..57e0af282
--- /dev/null
+++ b/src/runtime/lfstack.c
@@ -0,0 +1,87 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Lock-free stack.
+// The following code runs only on g0 stack.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+
+#ifdef _64BIT
+// Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag.
+// So we use 17msb of pointers as ABA counter.
+# define PTR_BITS 47
+#else
+# define PTR_BITS 32
+#endif
+#define PTR_MASK ((1ull<<PTR_BITS)-1)
+#define CNT_MASK (0ull-1)
+
+#ifdef _64BIT
+#ifdef GOOS_solaris
+// SPARC64 and Solaris on AMD64 uses all 64 bits of virtual addresses.
+// Use low-order three bits as ABA counter.
+// http://docs.oracle.com/cd/E19120-01/open.solaris/816-5138/6mba6ua5p/index.html
+#undef PTR_BITS
+#undef CNT_MASK
+#undef PTR_MASK
+#define PTR_BITS 0
+#define CNT_MASK 7
+#define PTR_MASK ((0ull-1)<<3)
+#endif
+#endif
+
+void
+runtime·lfstackpush(uint64 *head, LFNode *node)
+{
+	uint64 old, new;
+
+	if((uintptr)node != ((uintptr)node&PTR_MASK)) {
+		runtime·printf("p=%p\n", node);
+		runtime·throw("runtime·lfstackpush: invalid pointer");
+	}
+
+	node->pushcnt++;
+	new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS);
+	for(;;) {
+		old = runtime·atomicload64(head);
+		node->next = (LFNode*)(uintptr)(old&PTR_MASK);
+		if(runtime·cas64(head, old, new))
+			break;
+	}
+}
+
+LFNode*
+runtime·lfstackpop(uint64 *head)
+{
+	LFNode *node, *node2;
+	uint64 old, new;
+
+	for(;;) {
+		old = runtime·atomicload64(head);
+		if(old == 0)
+			return nil;
+		node = (LFNode*)(uintptr)(old&PTR_MASK);
+		node2 = runtime·atomicloadp(&node->next);
+		new = 0;
+		if(node2 != nil)
+			new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS);
+		if(runtime·cas64(head, old, new))
+			return node;
+	}
+}
+
+void
+runtime·lfstackpush_m(void)
+{
+	runtime·lfstackpush(g->m->ptrarg[0], g->m->ptrarg[1]);
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+}
+
+void
+runtime·lfstackpop_m(void)
+{
+	g->m->ptrarg[0] = runtime·lfstackpop(g->m->ptrarg[0]);
+}
diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go
new file mode 100644
index 000000000..e51877704
--- /dev/null
+++ b/src/runtime/lfstack_test.go
@@ -0,0 +1,136 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math/rand"
+	. "runtime"
+	"testing"
+	"unsafe"
+)
+
+type MyNode struct {
+	LFNode
+	data int
+}
+
+func fromMyNode(node *MyNode) *LFNode {
+	return (*LFNode)(unsafe.Pointer(node))
+}
+
+func toMyNode(node *LFNode) *MyNode {
+	return (*MyNode)(unsafe.Pointer(node))
+}
+
+func TestLFStack(t *testing.T) {
+	stack := new(uint64)
+	// Need to keep additional referenfces to nodes, the stack is not all that type-safe.
+	var nodes []*MyNode
+
+	// Check the stack is initially empty.
+	if LFStackPop(stack) != nil {
+		t.Fatalf("stack is not empty")
+	}
+
+	// Push one element.
+	node := &MyNode{data: 42}
+	nodes = append(nodes, node)
+	LFStackPush(stack, fromMyNode(node))
+
+	// Push another.
+	node = &MyNode{data: 43}
+	nodes = append(nodes, node)
+	LFStackPush(stack, fromMyNode(node))
+
+	// Pop one element.
+	node = toMyNode(LFStackPop(stack))
+	if node == nil {
+		t.Fatalf("stack is empty")
+	}
+	if node.data != 43 {
+		t.Fatalf("no lifo")
+	}
+
+	// Pop another.
+	node = toMyNode(LFStackPop(stack))
+	if node == nil {
+		t.Fatalf("stack is empty")
+	}
+	if node.data != 42 {
+		t.Fatalf("no lifo")
+	}
+
+	// Check the stack is empty again.
+	if LFStackPop(stack) != nil {
+		t.Fatalf("stack is not empty")
+	}
+	if *stack != 0 {
+		t.Fatalf("stack is not empty")
+	}
+}
+
+var stress []*MyNode
+
+func TestLFStackStress(t *testing.T) {
+	const K = 100
+	P := 4 * GOMAXPROCS(-1)
+	N := 100000
+	if testing.Short() {
+		N /= 10
+	}
+	// Create 2 stacks.
+	stacks := [2]*uint64{new(uint64), new(uint64)}
+	// Need to keep additional references to nodes,
+	// the lock-free stack is not type-safe.
+	stress = nil
+	// Push K elements randomly onto the stacks.
+	sum := 0
+	for i := 0; i < K; i++ {
+		sum += i
+		node := &MyNode{data: i}
+		stress = append(stress, node)
+		LFStackPush(stacks[i%2], fromMyNode(node))
+	}
+	c := make(chan bool, P)
+	for p := 0; p < P; p++ {
+		go func() {
+			r := rand.New(rand.NewSource(rand.Int63()))
+			// Pop a node from a random stack, then push it onto a random stack.
+			for i := 0; i < N; i++ {
+				node := toMyNode(LFStackPop(stacks[r.Intn(2)]))
+				if node != nil {
+					LFStackPush(stacks[r.Intn(2)], fromMyNode(node))
+				}
+			}
+			c <- true
+		}()
+	}
+	for i := 0; i < P; i++ {
+		<-c
+	}
+	// Pop all elements from both stacks, and verify that nothing lost.
+	sum2 := 0
+	cnt := 0
+	for i := 0; i < 2; i++ {
+		for {
+			node := toMyNode(LFStackPop(stacks[i]))
+			if node == nil {
+				break
+			}
+			cnt++
+			sum2 += node.data
+			node.Next = nil
+		}
+	}
+	if cnt != K {
+		t.Fatalf("Wrong number of nodes %d/%d", cnt, K)
+	}
+	if sum2 != sum {
+		t.Fatalf("Wrong sum %d/%d", sum2, sum)
+	}
+
+	// Let nodes be collected now.
+	stress = nil
+}
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
new file mode 100644
index 000000000..725962341
--- /dev/null
+++ b/src/runtime/lock_futex.go
@@ -0,0 +1,205 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build dragonfly freebsd linux
+
+package runtime
+
+import "unsafe"
+
+// This implementation depends on OS-specific implementations of
+//
+//	runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
+//		Atomically,
+//			if(*addr == val) sleep
+//		Might be woken up spuriously; that's allowed.
+//		Don't sleep longer than ns; ns < 0 means forever.
+//
+//	runtime·futexwakeup(uint32 *addr, uint32 cnt)
+//		If any procs are sleeping on addr, wake up at most cnt.
+
+const (
+	mutex_unlocked = 0
+	mutex_locked   = 1
+	mutex_sleeping = 2
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+// Possible lock states are mutex_unlocked, mutex_locked and mutex_sleeping.
+// mutex_sleeping means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
+
+func futexsleep(addr *uint32, val uint32, ns int64)
+func futexwakeup(addr *uint32, cnt uint32)
+
+// We use the uintptr mutex.key and note.key as a uint32.
+func key32(p *uintptr) *uint32 {
+	return (*uint32)(unsafe.Pointer(p))
+}
+
+func lock(l *mutex) {
+	gp := getg()
+
+	if gp.m.locks < 0 {
+		gothrow("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	v := xchg(key32(&l.key), mutex_locked)
+	if v == mutex_unlocked {
+		return
+	}
+
+	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+	// depending on whether there is a thread sleeping
+	// on this mutex.  If we ever change l->key from
+	// MUTEX_SLEEPING to some other value, we must be
+	// careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets
+	// its wakeup call.
+	wait := v
+
+	// On uniprocessors, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+	for {
+		// Try for lock, spinning.
+		for i := 0; i < spin; i++ {
+			for l.key == mutex_unlocked {
+				if cas(key32(&l.key), mutex_unlocked, wait) {
+					return
+				}
+			}
+			procyield(active_spin_cnt)
+		}
+
+		// Try for lock, rescheduling.
+		for i := 0; i < passive_spin; i++ {
+			for l.key == mutex_unlocked {
+				if cas(key32(&l.key), mutex_unlocked, wait) {
+					return
+				}
+			}
+			osyield()
+		}
+
+		// Sleep.
+		v = xchg(key32(&l.key), mutex_sleeping)
+		if v == mutex_unlocked {
+			return
+		}
+		wait = mutex_sleeping
+		futexsleep(key32(&l.key), mutex_sleeping, -1)
+	}
+}
+
+func unlock(l *mutex) {
+	v := xchg(key32(&l.key), mutex_unlocked)
+	if v == mutex_unlocked {
+		gothrow("unlock of unlocked lock")
+	}
+	if v == mutex_sleeping {
+		futexwakeup(key32(&l.key), 1)
+	}
+
+	gp := getg()
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		gothrow("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
+
+// One-time notifications.
+func noteclear(n *note) {
+	n.key = 0
+}
+
+func notewakeup(n *note) {
+	old := xchg(key32(&n.key), 1)
+	if old != 0 {
+		print("notewakeup - double wakeup (", old, ")\n")
+		gothrow("notewakeup - double wakeup")
+	}
+	futexwakeup(key32(&n.key), 1)
+}
+
+func notesleep(n *note) {
+	gp := getg()
+	if gp != gp.m.g0 {
+		gothrow("notesleep not on g0")
+	}
+	for atomicload(key32(&n.key)) == 0 {
+		gp.m.blocked = true
+		futexsleep(key32(&n.key), 0, -1)
+		gp.m.blocked = false
+	}
+}
+
+//go:nosplit
+func notetsleep_internal(n *note, ns int64) bool {
+	gp := getg()
+
+	if ns < 0 {
+		for atomicload(key32(&n.key)) == 0 {
+			gp.m.blocked = true
+			futexsleep(key32(&n.key), 0, -1)
+			gp.m.blocked = false
+		}
+		return true
+	}
+
+	if atomicload(key32(&n.key)) != 0 {
+		return true
+	}
+
+	deadline := nanotime() + ns
+	for {
+		gp.m.blocked = true
+		futexsleep(key32(&n.key), 0, ns)
+		gp.m.blocked = false
+		if atomicload(key32(&n.key)) != 0 {
+			break
+		}
+		now := nanotime()
+		if now >= deadline {
+			break
+		}
+		ns = deadline - now
+	}
+	return atomicload(key32(&n.key)) != 0
+}
+
+func notetsleep(n *note, ns int64) bool {
+	gp := getg()
+	if gp != gp.m.g0 && gp.m.gcing == 0 {
+		gothrow("notetsleep not on g0")
+	}
+
+	return notetsleep_internal(n, ns)
+}
+
+// same as runtime·notetsleep, but called on user g (not g0)
+// calls only nosplit functions between entersyscallblock/exitsyscall
+func notetsleepg(n *note, ns int64) bool {
+	gp := getg()
+	if gp == gp.m.g0 {
+		gothrow("notetsleepg on g0")
+	}
+
+	entersyscallblock()
+	ok := notetsleep_internal(n, ns)
+	exitsyscall()
+	return ok
+}
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
new file mode 100644
index 000000000..d136b8280
--- /dev/null
+++ b/src/runtime/lock_sema.go
@@ -0,0 +1,270 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin nacl netbsd openbsd plan9 solaris windows
+
+package runtime
+
+import "unsafe"
+
+// This implementation depends on OS-specific implementations of
+//
+//	uintptr runtime·semacreate(void)
+//		Create a semaphore, which will be assigned to m->waitsema.
+//		The zero value is treated as absence of any semaphore,
+//		so be sure to return a non-zero value.
+//
+//	int32 runtime·semasleep(int64 ns)
+//		If ns < 0, acquire m->waitsema and return 0.
+//		If ns >= 0, try to acquire m->waitsema for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	int32 runtime·semawakeup(M *mp)
+//		Wake up mp, which is or will soon be sleeping on mp->waitsema.
+//
+const (
+	locked uintptr = 1
+
+	active_spin     = 4
+	active_spin_cnt = 30
+	passive_spin    = 1
+)
+
+func semacreate() uintptr
+func semasleep(int64) int32
+func semawakeup(mp *m)
+
+func lock(l *mutex) {
+	gp := getg()
+	if gp.m.locks < 0 {
+		gothrow("runtime·lock: lock count")
+	}
+	gp.m.locks++
+
+	// Speculative grab for lock.
+	if casuintptr(&l.key, 0, locked) {
+		return
+	}
+	if gp.m.waitsema == 0 {
+		gp.m.waitsema = semacreate()
+	}
+
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin := 0
+	if ncpu > 1 {
+		spin = active_spin
+	}
+Loop:
+	for i := 0; ; i++ {
+		v := atomicloaduintptr(&l.key)
+		if v&locked == 0 {
+			// Unlocked. Try to lock.
+			if casuintptr(&l.key, v, v|locked) {
+				return
+			}
+			i = 0
+		}
+		if i < spin {
+			procyield(active_spin_cnt)
+		} else if i < spin+passive_spin {
+			osyield()
+		} else {
+			// Someone else has it.
+			// l->waitm points to a linked list of M's waiting
+			// for this lock, chained through m->nextwaitm.
+			// Queue this M.
+			for {
+				gp.m.nextwaitm = (*m)((unsafe.Pointer)(v &^ locked))
+				if casuintptr(&l.key, v, uintptr(unsafe.Pointer(gp.m))|locked) {
+					break
+				}
+				v = atomicloaduintptr(&l.key)
+				if v&locked == 0 {
+					continue Loop
+				}
+			}
+			if v&locked != 0 {
+				// Queued.  Wait.
+				semasleep(-1)
+				i = 0
+			}
+		}
+	}
+}
+
+func unlock(l *mutex) {
+	gp := getg()
+	var mp *m
+	for {
+		v := atomicloaduintptr(&l.key)
+		if v == locked {
+			if casuintptr(&l.key, locked, 0) {
+				break
+			}
+		} else {
+			// Other M's are waiting for the lock.
+			// Dequeue an M.
+			mp = (*m)((unsafe.Pointer)(v &^ locked))
+			if casuintptr(&l.key, v, uintptr(unsafe.Pointer(mp.nextwaitm))) {
+				// Dequeued an M.  Wake it.
+				semawakeup(mp)
+				break
+			}
+		}
+	}
+	gp.m.locks--
+	if gp.m.locks < 0 {
+		gothrow("runtime·unlock: lock count")
+	}
+	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
+		gp.stackguard0 = stackPreempt
+	}
+}
+
+// One-time notifications.
+func noteclear(n *note) {
+	n.key = 0
+}
+
+func notewakeup(n *note) {
+	var v uintptr
+	for {
+		v = atomicloaduintptr(&n.key)
+		if casuintptr(&n.key, v, locked) {
+			break
+		}
+	}
+
+	// Successfully set waitm to locked.
+	// What was it before?
+	switch {
+	case v == 0:
+		// Nothing was waiting. Done.
+	case v == locked:
+		// Two notewakeups!  Not allowed.
+		gothrow("notewakeup - double wakeup")
+	default:
+		// Must be the waiting m.  Wake it up.
+		semawakeup((*m)(unsafe.Pointer(v)))
+	}
+}
+
+func notesleep(n *note) {
+	gp := getg()
+	if gp != gp.m.g0 {
+		gothrow("notesleep not on g0")
+	}
+	if gp.m.waitsema == 0 {
+		gp.m.waitsema = semacreate()
+	}
+	if !casuintptr(&n.key, 0, uintptr(unsafe.Pointer(gp.m))) {
+		// Must be locked (got wakeup).
+		if n.key != locked {
+			gothrow("notesleep - waitm out of sync")
+		}
+		return
+	}
+	// Queued.  Sleep.
+	gp.m.blocked = true
+	semasleep(-1)
+	gp.m.blocked = false
+}
+
+//go:nosplit
+func notetsleep_internal(n *note, ns int64, gp *g, deadline int64) bool {
+	// gp and deadline are logically local variables, but they are written
+	// as parameters so that the stack space they require is charged
+	// to the caller.
+	// This reduces the nosplit footprint of notetsleep_internal.
+	gp = getg()
+
+	// Register for wakeup on n->waitm.
+	if !casuintptr(&n.key, 0, uintptr(unsafe.Pointer(gp.m))) {
+		// Must be locked (got wakeup).
+		if n.key != locked {
+			gothrow("notetsleep - waitm out of sync")
+		}
+		return true
+	}
+	if ns < 0 {
+		// Queued.  Sleep.
+		gp.m.blocked = true
+		semasleep(-1)
+		gp.m.blocked = false
+		return true
+	}
+
+	deadline = nanotime() + ns
+	for {
+		// Registered.  Sleep.
+		gp.m.blocked = true
+		if semasleep(ns) >= 0 {
+			gp.m.blocked = false
+			// Acquired semaphore, semawakeup unregistered us.
+			// Done.
+			return true
+		}
+		gp.m.blocked = false
+		// Interrupted or timed out.  Still registered.  Semaphore not acquired.
+		ns = deadline - nanotime()
+		if ns <= 0 {
+			break
+		}
+		// Deadline hasn't arrived.  Keep sleeping.
+	}
+
+	// Deadline arrived.  Still registered.  Semaphore not acquired.
+	// Want to give up and return, but have to unregister first,
+	// so that any notewakeup racing with the return does not
+	// try to grant us the semaphore when we don't expect it.
+	for {
+		v := atomicloaduintptr(&n.key)
+		switch v {
+		case uintptr(unsafe.Pointer(gp.m)):
+			// No wakeup yet; unregister if possible.
+			if casuintptr(&n.key, v, 0) {
+				return false
+			}
+		case locked:
+			// Wakeup happened so semaphore is available.
+			// Grab it to avoid getting out of sync.
+			gp.m.blocked = true
+			if semasleep(-1) < 0 {
+				gothrow("runtime: unable to acquire - semaphore out of sync")
+			}
+			gp.m.blocked = false
+			return true
+		default:
+			gothrow("runtime: unexpected waitm - semaphore out of sync")
+		}
+	}
+}
+
+func notetsleep(n *note, ns int64) bool {
+	gp := getg()
+	if gp != gp.m.g0 && gp.m.gcing == 0 {
+		gothrow("notetsleep not on g0")
+	}
+	if gp.m.waitsema == 0 {
+		gp.m.waitsema = semacreate()
+	}
+	return notetsleep_internal(n, ns, nil, 0)
+}
+
+// same as runtime·notetsleep, but called on user g (not g0)
+// calls only nosplit functions between entersyscallblock/exitsyscall
+func notetsleepg(n *note, ns int64) bool {
+	gp := getg()
+	if gp == gp.m.g0 {
+		gothrow("notetsleepg on g0")
+	}
+	if gp.m.waitsema == 0 {
+		gp.m.waitsema = semacreate()
+	}
+	entersyscallblock()
+	ok := notetsleep_internal(n, ns, nil, 0)
+	exitsyscall()
+	return ok
+}
diff --git a/src/runtime/malloc.c b/src/runtime/malloc.c
new file mode 100644
index 000000000..752ff60f3
--- /dev/null
+++ b/src/runtime/malloc.c
@@ -0,0 +1,439 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// See malloc.h for overview.
+//
+// TODO(rsc): double-check stats.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "type.h"
+#include "typekind.h"
+#include "race.h"
+#include "stack.h"
+#include "textflag.h"
+
+// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
+#pragma dataflag NOPTR
+MHeap runtime·mheap;
+#pragma dataflag NOPTR
+MStats runtime·memstats;
+
+Type* runtime·conservative;
+
+void runtime·cmallocgc(uintptr size, Type *typ, uint32 flag, void **ret);
+void runtime·gc_notype_ptr(Eface*);
+
+void*
+runtime·mallocgc(uintptr size, Type *typ, uint32 flag)
+{
+	void *ret;
+
+	// Call into the Go version of mallocgc.
+	// TODO: maybe someday we can get rid of this.  It is
+	// probably the only location where we run Go code on the M stack.
+	if((flag&FlagNoScan) == 0 && typ == nil)
+		typ = runtime·conservative;
+	runtime·cmallocgc(size, typ, flag, &ret);
+	return ret;
+}
+
+int32
+runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
+{
+	uintptr n, i;
+	byte *p;
+	MSpan *s;
+
+	g->m->mcache->local_nlookup++;
+	if (sizeof(void*) == 4 && g->m->mcache->local_nlookup >= (1<<30)) {
+		// purge cache stats to prevent overflow
+		runtime·lock(&runtime·mheap.lock);
+		runtime·purgecachedstats(g->m->mcache);
+		runtime·unlock(&runtime·mheap.lock);
+	}
+
+	s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
+	if(sp)
+		*sp = s;
+	if(s == nil) {
+		if(base)
+			*base = nil;
+		if(size)
+			*size = 0;
+		return 0;
+	}
+
+	p = (byte*)((uintptr)s->start<<PageShift);
+	if(s->sizeclass == 0) {
+		// Large object.
+		if(base)
+			*base = p;
+		if(size)
+			*size = s->npages<<PageShift;
+		return 1;
+	}
+
+	n = s->elemsize;
+	if(base) {
+		i = ((byte*)v - p)/n;
+		*base = p + i*n;
+	}
+	if(size)
+		*size = n;
+
+	return 1;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·purgecachedstats(MCache *c)
+{
+	MHeap *h;
+	int32 i;
+
+	// Protected by either heap or GC lock.
+	h = &runtime·mheap;
+	mstats.heap_alloc += c->local_cachealloc;
+	c->local_cachealloc = 0;
+	mstats.nlookup += c->local_nlookup;
+	c->local_nlookup = 0;
+	h->largefree += c->local_largefree;
+	c->local_largefree = 0;
+	h->nlargefree += c->local_nlargefree;
+	c->local_nlargefree = 0;
+	for(i=0; i<nelem(c->local_nsmallfree); i++) {
+		h->nsmallfree[i] += c->local_nsmallfree[i];
+		c->local_nsmallfree[i] = 0;
+	}
+}
+
+// Size of the trailing by_size array differs between Go and C,
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+uintptr runtime·sizeof_C_MStats = sizeof(MStats) - (NumSizeClasses - 61) * sizeof(mstats.by_size[0]);
+
+#define MaxArena32 (2U<<30)
+
+// For use by Go.  It can't be a constant in Go, unfortunately,
+// because it depends on the OS.
+uintptr runtime·maxMem = MaxMem;
+
+void
+runtime·mallocinit(void)
+{
+	byte *p, *p1;
+	uintptr arena_size, bitmap_size, spans_size, p_size;
+	extern byte runtime·end[];
+	uintptr limit;
+	uint64 i;
+	bool reserved;
+	Eface notype_eface;
+
+	p = nil;
+	p_size = 0;
+	arena_size = 0;
+	bitmap_size = 0;
+	spans_size = 0;
+	reserved = false;
+
+	// for 64-bit build
+	USED(p);
+	USED(p_size);
+	USED(arena_size);
+	USED(bitmap_size);
+	USED(spans_size);
+
+	runtime·InitSizes();
+
+	if(runtime·class_to_size[TinySizeClass] != TinySize)
+		runtime·throw("bad TinySizeClass");
+
+	// limit = runtime·memlimit();
+	// See https://code.google.com/p/go/issues/detail?id=5049
+	// TODO(rsc): Fix after 1.1.
+	limit = 0;
+
+	// Set up the allocation arena, a contiguous area of memory where
+	// allocated data will be found.  The arena begins with a bitmap large
+	// enough to hold 4 bits per allocated word.
+	if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
+		// On a 64-bit machine, allocate from a single contiguous reservation.
+		// 128 GB (MaxMem) should be big enough for now.
+		//
+		// The code will work with the reservation at any address, but ask
+		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
+		// Allocating a 128 GB region takes away 37 bits, and the amd64
+		// doesn't let us choose the top 17 bits, so that leaves the 11 bits
+		// in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
+		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
+		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+		// UTF-8 sequences, and they are otherwise as far away from 
+		// ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
+		// addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
+		// on OS X during thread allocations.  0x00c0 causes conflicts with
+		// AddressSanitizer which reserves all memory up to 0x0100.
+		// These choices are both for debuggability and to reduce the
+		// odds of the conservative garbage collector not collecting memory
+		// because some non-pointer block of memory had a bit pattern
+		// that matched a memory address.
+		//
+		// Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
+		// but it hardly matters: e0 00 is not valid UTF-8 either.
+		//
+		// If this fails we fall back to the 32 bit memory mechanism
+		arena_size = MaxMem;
+		bitmap_size = arena_size / (sizeof(void*)*8/4);
+		spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
+		spans_size = ROUND(spans_size, PageSize);
+		for(i = 0; i <= 0x7f; i++) {
+			p = (void*)(i<<40 | 0x00c0ULL<<32);
+			p_size = bitmap_size + spans_size + arena_size + PageSize;
+			p = runtime·SysReserve(p, p_size, &reserved);
+			if(p != nil)
+				break;
+		}
+	}
+	if (p == nil) {
+		// On a 32-bit machine, we can't typically get away
+		// with a giant virtual address space reservation.
+		// Instead we map the memory information bitmap
+		// immediately after the data segment, large enough
+		// to handle another 2GB of mappings (256 MB),
+		// along with a reservation for another 512 MB of memory.
+		// When that gets used up, we'll start asking the kernel
+		// for any memory anywhere and hope it's in the 2GB
+		// following the bitmap (presumably the executable begins
+		// near the bottom of memory, so we'll have to use up
+		// most of memory before the kernel resorts to giving out
+		// memory before the beginning of the text segment).
+		//
+		// Alternatively we could reserve 512 MB bitmap, enough
+		// for 4GB of mappings, and then accept any memory the
+		// kernel threw at us, but normally that's a waste of 512 MB
+		// of address space, which is probably too much in a 32-bit world.
+		bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
+		arena_size = 512<<20;
+		spans_size = MaxArena32 / PageSize * sizeof(runtime·mheap.spans[0]);
+		if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
+			bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
+			arena_size = bitmap_size * 8;
+			spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
+		}
+		spans_size = ROUND(spans_size, PageSize);
+
+		// SysReserve treats the address we ask for, end, as a hint,
+		// not as an absolute requirement.  If we ask for the end
+		// of the data segment but the operating system requires
+		// a little more space before we can start allocating, it will
+		// give out a slightly higher pointer.  Except QEMU, which
+		// is buggy, as usual: it won't adjust the pointer upward.
+		// So adjust it upward a little bit ourselves: 1/4 MB to get
+		// away from the running binary image and then round up
+		// to a MB boundary.
+		p = (byte*)ROUND((uintptr)runtime·end + (1<<18), 1<<20);
+		p_size = bitmap_size + spans_size + arena_size + PageSize;
+		p = runtime·SysReserve(p, p_size, &reserved);
+		if(p == nil)
+			runtime·throw("runtime: cannot reserve arena virtual address space");
+	}
+
+	// PageSize can be larger than OS definition of page size,
+	// so SysReserve can give us a PageSize-unaligned pointer.
+	// To overcome this we ask for PageSize more and round up the pointer.
+	p1 = (byte*)ROUND((uintptr)p, PageSize);
+
+	runtime·mheap.spans = (MSpan**)p1;
+	runtime·mheap.bitmap = p1 + spans_size;
+	runtime·mheap.arena_start = p1 + spans_size + bitmap_size;
+	runtime·mheap.arena_used = runtime·mheap.arena_start;
+	runtime·mheap.arena_end = p + p_size;
+	runtime·mheap.arena_reserved = reserved;
+
+	if(((uintptr)runtime·mheap.arena_start & (PageSize-1)) != 0)
+		runtime·throw("misrounded allocation in mallocinit");
+
+	// Initialize the rest of the allocator.	
+	runtime·MHeap_Init(&runtime·mheap);
+	g->m->mcache = runtime·allocmcache();
+
+	runtime·gc_notype_ptr(&notype_eface);
+	runtime·conservative = notype_eface.type;
+}
+
+void*
+runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
+{
+	byte *p, *p_end;
+	uintptr p_size;
+	bool reserved;
+
+	if(n > h->arena_end - h->arena_used) {
+		// We are in 32-bit mode, maybe we didn't use all possible address space yet.
+		// Reserve some more space.
+		byte *new_end;
+
+		p_size = ROUND(n + PageSize, 256<<20);
+		new_end = h->arena_end + p_size;
+		if(new_end <= h->arena_start + MaxArena32) {
+			// TODO: It would be bad if part of the arena
+			// is reserved and part is not.
+			p = runtime·SysReserve(h->arena_end, p_size, &reserved);
+			if(p == h->arena_end) {
+				h->arena_end = new_end;
+				h->arena_reserved = reserved;
+			}
+			else if(p+p_size <= h->arena_start + MaxArena32) {
+				// Keep everything page-aligned.
+				// Our pages are bigger than hardware pages.
+				h->arena_end = p+p_size;
+				h->arena_used = p + (-(uintptr)p&(PageSize-1));
+				h->arena_reserved = reserved;
+			} else {
+				uint64 stat;
+				stat = 0;
+				runtime·SysFree(p, p_size, &stat);
+			}
+		}
+	}
+	if(n <= h->arena_end - h->arena_used) {
+		// Keep taking from our reservation.
+		p = h->arena_used;
+		runtime·SysMap(p, n, h->arena_reserved, &mstats.heap_sys);
+		h->arena_used += n;
+		runtime·MHeap_MapBits(h);
+		runtime·MHeap_MapSpans(h);
+		if(raceenabled)
+			runtime·racemapshadow(p, n);
+		
+		if(((uintptr)p & (PageSize-1)) != 0)
+			runtime·throw("misrounded allocation in MHeap_SysAlloc");
+		return p;
+	}
+	
+	// If using 64-bit, our reservation is all we have.
+	if(h->arena_end - h->arena_start >= MaxArena32)
+		return nil;
+
+	// On 32-bit, once the reservation is gone we can
+	// try to get memory at a location chosen by the OS
+	// and hope that it is in the range we allocated bitmap for.
+	p_size = ROUND(n, PageSize) + PageSize;
+	p = runtime·sysAlloc(p_size, &mstats.heap_sys);
+	if(p == nil)
+		return nil;
+
+	if(p < h->arena_start || p+p_size - h->arena_start >= MaxArena32) {
+		runtime·printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
+			p, h->arena_start, h->arena_start+MaxArena32);
+		runtime·SysFree(p, p_size, &mstats.heap_sys);
+		return nil;
+	}
+	
+	p_end = p + p_size;
+	p += -(uintptr)p & (PageSize-1);
+	if(p+n > h->arena_used) {
+		h->arena_used = p+n;
+		if(p_end > h->arena_end)
+			h->arena_end = p_end;
+		runtime·MHeap_MapBits(h);
+		runtime·MHeap_MapSpans(h);
+		if(raceenabled)
+			runtime·racemapshadow(p, n);
+	}
+	
+	if(((uintptr)p & (PageSize-1)) != 0)
+		runtime·throw("misrounded allocation in MHeap_SysAlloc");
+	return p;
+}
+
+// Runtime stubs.
+
+static void*
+cnew(Type *typ, intgo n)
+{
+	if(n < 0 || (typ->size > 0 && n > MaxMem/typ->size))
+		runtime·panicstring("runtime: allocation size out of range");
+	return runtime·mallocgc(typ->size*n, typ, typ->kind&KindNoPointers ? FlagNoScan : 0);
+}
+
+// same as runtime·new, but callable from C
+void*
+runtime·cnew(Type *typ)
+{
+	return cnew(typ, 1);
+}
+
+void*
+runtime·cnewarray(Type *typ, intgo n)
+{
+	return cnew(typ, n);
+}
+
+void
+runtime·setFinalizer_m(void)
+{
+	FuncVal *fn;
+	void *arg;
+	uintptr nret;
+	Type *fint;
+	PtrType *ot;
+
+	fn = g->m->ptrarg[0];
+	arg = g->m->ptrarg[1];
+	nret = g->m->scalararg[0];
+	fint = g->m->ptrarg[2];
+	ot = g->m->ptrarg[3];
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+	g->m->ptrarg[2] = nil;
+	g->m->ptrarg[3] = nil;
+
+	g->m->scalararg[0] = runtime·addfinalizer(arg, fn, nret, fint, ot);
+}
+
+void
+runtime·removeFinalizer_m(void)
+{
+	void *p;
+
+	p = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	runtime·removefinalizer(p);
+}
+
+// mcallable cache refill
+void 
+runtime·mcacheRefill_m(void)
+{
+	runtime·MCache_Refill(g->m->mcache, (int32)g->m->scalararg[0]);
+}
+
+void
+runtime·largeAlloc_m(void)
+{
+	uintptr npages, size;
+	MSpan *s;
+	void *v;
+	int32 flag;
+
+	//runtime·printf("largeAlloc size=%D\n", g->m->scalararg[0]);
+	// Allocate directly from heap.
+	size = g->m->scalararg[0];
+	flag = (int32)g->m->scalararg[1];
+	if(size + PageSize < size)
+		runtime·throw("out of memory");
+	npages = size >> PageShift;
+	if((size & PageMask) != 0)
+		npages++;
+	s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
+	if(s == nil)
+		runtime·throw("out of memory");
+	s->limit = (byte*)(s->start<<PageShift) + size;
+	v = (void*)(s->start << PageShift);
+	// setup for mark sweep
+	runtime·markspan(v, 0, 0, true);
+	g->m->ptrarg[0] = s;
+}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
new file mode 100644
index 000000000..883ca0cef
--- /dev/null
+++ b/src/runtime/malloc.go
@@ -0,0 +1,814 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const (
+	debugMalloc = false
+
+	flagNoScan = 1 << 0 // GC doesn't have to scan object
+	flagNoZero = 1 << 1 // don't zero memory
+
+	maxTinySize   = 16
+	tinySizeClass = 2
+	maxSmallSize  = 32 << 10
+
+	pageShift = 13
+	pageSize  = 1 << pageShift
+	pageMask  = pageSize - 1
+
+	bitsPerPointer  = 2
+	bitsMask        = 1<<bitsPerPointer - 1
+	pointersPerByte = 8 / bitsPerPointer
+	bitPtrMask      = bitsMask << 2
+	maxGCMask       = 64
+	bitsDead        = 0
+	bitsPointer     = 2
+
+	bitBoundary = 1
+	bitMarked   = 2
+	bitMask     = bitBoundary | bitMarked
+
+	mSpanInUse = 0
+)
+
+// Page number (address>>pageShift)
+type pageID uintptr
+
+// All zero-sized allocations return a pointer to this byte.
+var zeroObject byte
+
+// Maximum possible heap size.
+var maxMem uintptr
+
+// Allocate an object of size bytes.
+// Small objects are allocated from the per-P cache's free lists.
+// Large objects (> 32 kB) are allocated straight from the heap.
+func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
+	if size == 0 {
+		return unsafe.Pointer(&zeroObject)
+	}
+	size0 := size
+
+	// This function must be atomic wrt GC, but for performance reasons
+	// we don't acquirem/releasem on fast path. The code below does not have
+	// split stack checks, so it can't be preempted by GC.
+	// Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
+	// If debugMalloc = true, these assumptions are checked below.
+	if debugMalloc {
+		mp := acquirem()
+		if mp.mallocing != 0 {
+			gothrow("malloc deadlock")
+		}
+		mp.mallocing = 1
+		if mp.curg != nil {
+			mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
+		}
+	}
+
+	c := gomcache()
+	var s *mspan
+	var x unsafe.Pointer
+	if size <= maxSmallSize {
+		if flags&flagNoScan != 0 && size < maxTinySize {
+			// Tiny allocator.
+			//
+			// Tiny allocator combines several tiny allocation requests
+			// into a single memory block. The resulting memory block
+			// is freed when all subobjects are unreachable. The subobjects
+			// must be FlagNoScan (don't have pointers), this ensures that
+			// the amount of potentially wasted memory is bounded.
+			//
+			// Size of the memory block used for combining (maxTinySize) is tunable.
+			// Current setting is 16 bytes, which relates to 2x worst case memory
+			// wastage (when all but one subobjects are unreachable).
+			// 8 bytes would result in no wastage at all, but provides less
+			// opportunities for combining.
+			// 32 bytes provides more opportunities for combining,
+			// but can lead to 4x worst case wastage.
+			// The best case winning is 8x regardless of block size.
+			//
+			// Objects obtained from tiny allocator must not be freed explicitly.
+			// So when an object will be freed explicitly, we ensure that
+			// its size >= maxTinySize.
+			//
+			// SetFinalizer has a special case for objects potentially coming
+			// from tiny allocator, it such case it allows to set finalizers
+			// for an inner byte of a memory block.
+			//
+			// The main targets of tiny allocator are small strings and
+			// standalone escaping variables. On a json benchmark
+			// the allocator reduces number of allocations by ~12% and
+			// reduces heap size by ~20%.
+
+			tinysize := uintptr(c.tinysize)
+			if size <= tinysize {
+				tiny := unsafe.Pointer(c.tiny)
+				// Align tiny pointer for required (conservative) alignment.
+				if size&7 == 0 {
+					tiny = roundup(tiny, 8)
+				} else if size&3 == 0 {
+					tiny = roundup(tiny, 4)
+				} else if size&1 == 0 {
+					tiny = roundup(tiny, 2)
+				}
+				size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny)))
+				if size1 <= tinysize {
+					// The object fits into existing tiny block.
+					x = tiny
+					c.tiny = (*byte)(add(x, size))
+					c.tinysize -= uintptr(size1)
+					if debugMalloc {
+						mp := acquirem()
+						if mp.mallocing == 0 {
+							gothrow("bad malloc")
+						}
+						mp.mallocing = 0
+						if mp.curg != nil {
+							mp.curg.stackguard0 = mp.curg.stackguard
+						}
+						releasem(mp)
+						releasem(mp)
+					}
+					return x
+				}
+			}
+			// Allocate a new maxTinySize block.
+			s = c.alloc[tinySizeClass]
+			v := s.freelist
+			if v == nil {
+				mp := acquirem()
+				mp.scalararg[0] = tinySizeClass
+				onM(mcacheRefill_m)
+				releasem(mp)
+				s = c.alloc[tinySizeClass]
+				v = s.freelist
+			}
+			s.freelist = v.next
+			s.ref++
+			//TODO: prefetch v.next
+			x = unsafe.Pointer(v)
+			(*[2]uint64)(x)[0] = 0
+			(*[2]uint64)(x)[1] = 0
+			// See if we need to replace the existing tiny block with the new one
+			// based on amount of remaining free space.
+			if maxTinySize-size > tinysize {
+				c.tiny = (*byte)(add(x, size))
+				c.tinysize = uintptr(maxTinySize - size)
+			}
+			size = maxTinySize
+		} else {
+			var sizeclass int8
+			if size <= 1024-8 {
+				sizeclass = size_to_class8[(size+7)>>3]
+			} else {
+				sizeclass = size_to_class128[(size-1024+127)>>7]
+			}
+			size = uintptr(class_to_size[sizeclass])
+			s = c.alloc[sizeclass]
+			v := s.freelist
+			if v == nil {
+				mp := acquirem()
+				mp.scalararg[0] = uintptr(sizeclass)
+				onM(mcacheRefill_m)
+				releasem(mp)
+				s = c.alloc[sizeclass]
+				v = s.freelist
+			}
+			s.freelist = v.next
+			s.ref++
+			//TODO: prefetch
+			x = unsafe.Pointer(v)
+			if flags&flagNoZero == 0 {
+				v.next = nil
+				if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
+					memclr(unsafe.Pointer(v), size)
+				}
+			}
+		}
+		c.local_cachealloc += intptr(size)
+	} else {
+		mp := acquirem()
+		mp.scalararg[0] = uintptr(size)
+		mp.scalararg[1] = uintptr(flags)
+		onM(largeAlloc_m)
+		s = (*mspan)(mp.ptrarg[0])
+		mp.ptrarg[0] = nil
+		releasem(mp)
+		x = unsafe.Pointer(uintptr(s.start << pageShift))
+		size = uintptr(s.elemsize)
+	}
+
+	if flags&flagNoScan != 0 {
+		// All objects are pre-marked as noscan.
+		goto marked
+	}
+
+	// From here till marked label marking the object as allocated
+	// and storing type info in the GC bitmap.
+	{
+		arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
+		off := (uintptr(x) - arena_start) / ptrSize
+		xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+		shift := (off % wordsPerBitmapByte) * gcBits
+		if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
+			println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
+			gothrow("bad bits in markallocated")
+		}
+
+		var ti, te uintptr
+		var ptrmask *uint8
+		if size == ptrSize {
+			// It's one word and it has pointers, it must be a pointer.
+			*xbits |= (bitsPointer << 2) << shift
+			goto marked
+		}
+		if typ.kind&kindGCProg != 0 {
+			nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
+			masksize := nptr
+			if masksize%2 != 0 {
+				masksize *= 2 // repeated
+			}
+			masksize = masksize * pointersPerByte / 8 // 4 bits per word
+			masksize++                                // unroll flag in the beginning
+			if masksize > maxGCMask && typ.gc[1] != 0 {
+				// If the mask is too large, unroll the program directly
+				// into the GC bitmap. It's 7 times slower than copying
+				// from the pre-unrolled mask, but saves 1/16 of type size
+				// memory for the mask.
+				mp := acquirem()
+				mp.ptrarg[0] = x
+				mp.ptrarg[1] = unsafe.Pointer(typ)
+				mp.scalararg[0] = uintptr(size)
+				mp.scalararg[1] = uintptr(size0)
+				onM(unrollgcproginplace_m)
+				releasem(mp)
+				goto marked
+			}
+			ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
+			// Check whether the program is already unrolled.
+			if uintptr(atomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
+				mp := acquirem()
+				mp.ptrarg[0] = unsafe.Pointer(typ)
+				onM(unrollgcprog_m)
+				releasem(mp)
+			}
+			ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
+		} else {
+			ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
+		}
+		if size == 2*ptrSize {
+			*xbits = *ptrmask | bitBoundary
+			goto marked
+		}
+		te = uintptr(typ.size) / ptrSize
+		// If the type occupies odd number of words, its mask is repeated.
+		if te%2 == 0 {
+			te /= 2
+		}
+		// Copy pointer bitmask into the bitmap.
+		for i := uintptr(0); i < size0; i += 2 * ptrSize {
+			v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
+			ti++
+			if ti == te {
+				ti = 0
+			}
+			if i == 0 {
+				v |= bitBoundary
+			}
+			if i+ptrSize == size0 {
+				v &^= uint8(bitPtrMask << 4)
+			}
+
+			*xbits = v
+			xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
+		}
+		if size0%(2*ptrSize) == 0 && size0 < size {
+			// Mark the word after last object's word as bitsDead.
+			*xbits = bitsDead << 2
+		}
+	}
+marked:
+	if raceenabled {
+		racemalloc(x, size)
+	}
+
+	if debugMalloc {
+		mp := acquirem()
+		if mp.mallocing == 0 {
+			gothrow("bad malloc")
+		}
+		mp.mallocing = 0
+		if mp.curg != nil {
+			mp.curg.stackguard0 = mp.curg.stackguard
+		}
+		releasem(mp)
+		releasem(mp)
+	}
+
+	if debug.allocfreetrace != 0 {
+		tracealloc(x, size, typ)
+	}
+
+	if rate := MemProfileRate; rate > 0 {
+		if size < uintptr(rate) && int32(size) < c.next_sample {
+			c.next_sample -= int32(size)
+		} else {
+			mp := acquirem()
+			profilealloc(mp, x, size)
+			releasem(mp)
+		}
+	}
+
+	if memstats.heap_alloc >= memstats.next_gc {
+		gogc(0)
+	}
+
+	return x
+}
+
+// cmallocgc is a trampoline used to call the Go malloc from C.
+func cmallocgc(size uintptr, typ *_type, flags int, ret *unsafe.Pointer) {
+	*ret = gomallocgc(size, typ, flags)
+}
+
+// implementation of new builtin
+func newobject(typ *_type) unsafe.Pointer {
+	flags := 0
+	if typ.kind&kindNoPointers != 0 {
+		flags |= flagNoScan
+	}
+	return gomallocgc(uintptr(typ.size), typ, flags)
+}
+
+// implementation of make builtin for slices
+func newarray(typ *_type, n uintptr) unsafe.Pointer {
+	flags := 0
+	if typ.kind&kindNoPointers != 0 {
+		flags |= flagNoScan
+	}
+	if int(n) < 0 || (typ.size > 0 && n > maxMem/uintptr(typ.size)) {
+		panic("runtime: allocation size out of range")
+	}
+	return gomallocgc(uintptr(typ.size)*n, typ, flags)
+}
+
+// rawmem returns a chunk of pointerless memory.  It is
+// not zeroed.
+func rawmem(size uintptr) unsafe.Pointer {
+	return gomallocgc(size, nil, flagNoScan|flagNoZero)
+}
+
+// round size up to next size class
+func goroundupsize(size uintptr) uintptr {
+	if size < maxSmallSize {
+		if size <= 1024-8 {
+			return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
+		}
+		return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
+	}
+	if size+pageSize < size {
+		return size
+	}
+	return (size + pageSize - 1) &^ pageMask
+}
+
+func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
+	c := mp.mcache
+	rate := MemProfileRate
+	if size < uintptr(rate) {
+		// pick next profile time
+		// If you change this, also change allocmcache.
+		if rate > 0x3fffffff { // make 2*rate not overflow
+			rate = 0x3fffffff
+		}
+		next := int32(fastrand1()) % (2 * int32(rate))
+		// Subtract the "remainder" of the current allocation.
+		// Otherwise objects that are close in size to sampling rate
+		// will be under-sampled, because we consistently discard this remainder.
+		next -= (int32(size) - c.next_sample)
+		if next < 0 {
+			next = 0
+		}
+		c.next_sample = next
+	}
+
+	mProf_Malloc(x, size)
+}
+
+// force = 1 - do GC regardless of current heap usage
+// force = 2 - go GC and eager sweep
+func gogc(force int32) {
+	// The gc is turned off (via enablegc) until the bootstrap has completed.
+	// Also, malloc gets called in the guts of a number of libraries that might be
+	// holding locks. To avoid deadlocks during stoptheworld, don't bother
+	// trying to run gc while holding a lock. The next mallocgc without a lock
+	// will do the gc instead.
+	mp := acquirem()
+	if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
+		releasem(mp)
+		return
+	}
+	releasem(mp)
+	mp = nil
+
+	semacquire(&worldsema, false)
+
+	if force == 0 && memstats.heap_alloc < memstats.next_gc {
+		// typically threads which lost the race to grab
+		// worldsema exit here when gc is done.
+		semrelease(&worldsema)
+		return
+	}
+
+	// Ok, we're doing it!  Stop everybody else
+	startTime := nanotime()
+	mp = acquirem()
+	mp.gcing = 1
+	releasem(mp)
+	onM(stoptheworld)
+	if mp != acquirem() {
+		gothrow("gogc: rescheduled")
+	}
+
+	clearpools()
+
+	// Run gc on the g0 stack.  We do this so that the g stack
+	// we're currently running on will no longer change.  Cuts
+	// the root set down a bit (g0 stacks are not scanned, and
+	// we don't need to scan gc's internal state).  We also
+	// need to switch to g0 so we can shrink the stack.
+	n := 1
+	if debug.gctrace > 1 {
+		n = 2
+	}
+	for i := 0; i < n; i++ {
+		if i > 0 {
+			startTime = nanotime()
+		}
+		// switch to g0, call gc, then switch back
+		mp.scalararg[0] = uintptr(uint32(startTime)) // low 32 bits
+		mp.scalararg[1] = uintptr(startTime >> 32)   // high 32 bits
+		if force >= 2 {
+			mp.scalararg[2] = 1 // eagersweep
+		} else {
+			mp.scalararg[2] = 0
+		}
+		onM(gc_m)
+	}
+
+	// all done
+	mp.gcing = 0
+	semrelease(&worldsema)
+	onM(starttheworld)
+	releasem(mp)
+	mp = nil
+
+	// now that gc is done, kick off finalizer thread if needed
+	if !concurrentSweep {
+		// give the queued finalizers, if any, a chance to run
+		gosched()
+	}
+}
+
+// GC runs a garbage collection.
+func GC() {
+	gogc(2)
+}
+
+// SetFinalizer sets the finalizer associated with x to f.
+// When the garbage collector finds an unreachable block
+// with an associated finalizer, it clears the association and runs
+// f(x) in a separate goroutine.  This makes x reachable again, but
+// now without an associated finalizer.  Assuming that SetFinalizer
+// is not called again, the next time the garbage collector sees
+// that x is unreachable, it will free x.
+//
+// SetFinalizer(x, nil) clears any finalizer associated with x.
+//
+// The argument x must be a pointer to an object allocated by
+// calling new or by taking the address of a composite literal.
+// The argument f must be a function that takes a single argument
+// to which x's type can be assigned, and can have arbitrary ignored return
+// values. If either of these is not true, SetFinalizer aborts the
+// program.
+//
+// Finalizers are run in dependency order: if A points at B, both have
+// finalizers, and they are otherwise unreachable, only the finalizer
+// for A runs; once A is freed, the finalizer for B can run.
+// If a cyclic structure includes a block with a finalizer, that
+// cycle is not guaranteed to be garbage collected and the finalizer
+// is not guaranteed to run, because there is no ordering that
+// respects the dependencies.
+//
+// The finalizer for x is scheduled to run at some arbitrary time after
+// x becomes unreachable.
+// There is no guarantee that finalizers will run before a program exits,
+// so typically they are useful only for releasing non-memory resources
+// associated with an object during a long-running program.
+// For example, an os.File object could use a finalizer to close the
+// associated operating system file descriptor when a program discards
+// an os.File without calling Close, but it would be a mistake
+// to depend on a finalizer to flush an in-memory I/O buffer such as a
+// bufio.Writer, because the buffer would not be flushed at program exit.
+//
+// It is not guaranteed that a finalizer will run if the size of *x is
+// zero bytes.
+//
+// A single goroutine runs all finalizers for a program, sequentially.
+// If a finalizer must run for a long time, it should do so by starting
+// a new goroutine.
+func SetFinalizer(obj interface{}, finalizer interface{}) {
+	e := (*eface)(unsafe.Pointer(&obj))
+	etyp := e._type
+	if etyp == nil {
+		gothrow("runtime.SetFinalizer: first argument is nil")
+	}
+	if etyp.kind&kindMask != kindPtr {
+		gothrow("runtime.SetFinalizer: first argument is " + *etyp._string + ", not pointer")
+	}
+	ot := (*ptrtype)(unsafe.Pointer(etyp))
+	if ot.elem == nil {
+		gothrow("nil elem type!")
+	}
+
+	// As an implementation detail we do not run finalizers for zero-sized objects,
+	// because we use &runtime·zerobase for all such allocations.
+	if ot.elem.size == 0 {
+		return
+	}
+
+	// find the containing object
+	_, base, _ := findObject(e.data)
+
+	// The following check is required for cases when a user passes a pointer to composite
+	// literal, but compiler makes it a pointer to global. For example:
+	//	var Foo = &Object{}
+	//	func main() {
+	//		runtime.SetFinalizer(Foo, nil)
+	//	}
+	// See issue 7656.
+	if base == nil {
+		return
+	}
+
+	if e.data != base {
+		// As an implementation detail we allow to set finalizers for an inner byte
+		// of an object if it could come from tiny alloc (see mallocgc for details).
+		if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
+			gothrow("runtime.SetFinalizer: pointer not at beginning of allocated block")
+		}
+	}
+
+	f := (*eface)(unsafe.Pointer(&finalizer))
+	ftyp := f._type
+	if ftyp == nil {
+		// switch to M stack and remove finalizer
+		mp := acquirem()
+		mp.ptrarg[0] = e.data
+		onM(removeFinalizer_m)
+		releasem(mp)
+		return
+	}
+
+	if ftyp.kind&kindMask != kindFunc {
+		gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function")
+	}
+	ft := (*functype)(unsafe.Pointer(ftyp))
+	ins := *(*[]*_type)(unsafe.Pointer(&ft.in))
+	if ft.dotdotdot || len(ins) != 1 {
+		gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
+	}
+	fint := ins[0]
+	switch {
+	case fint == etyp:
+		// ok - same type
+		goto okarg
+	case fint.kind&kindMask == kindPtr:
+		if (fint.x == nil || fint.x.name == nil || etyp.x == nil || etyp.x.name == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem {
+			// ok - not same type, but both pointers,
+			// one or the other is unnamed, and same element type, so assignable.
+			goto okarg
+		}
+	case fint.kind&kindMask == kindInterface:
+		ityp := (*interfacetype)(unsafe.Pointer(fint))
+		if len(ityp.mhdr) == 0 {
+			// ok - satisfies empty interface
+			goto okarg
+		}
+		if _, ok := assertE2I2(ityp, obj); ok {
+			goto okarg
+		}
+	}
+	gothrow("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
+okarg:
+	// compute size needed for return parameters
+	nret := uintptr(0)
+	for _, t := range *(*[]*_type)(unsafe.Pointer(&ft.out)) {
+		nret = round(nret, uintptr(t.align)) + uintptr(t.size)
+	}
+	nret = round(nret, ptrSize)
+
+	// make sure we have a finalizer goroutine
+	createfing()
+
+	// switch to M stack to add finalizer record
+	mp := acquirem()
+	mp.ptrarg[0] = f.data
+	mp.ptrarg[1] = e.data
+	mp.scalararg[0] = nret
+	mp.ptrarg[2] = unsafe.Pointer(fint)
+	mp.ptrarg[3] = unsafe.Pointer(ot)
+	onM(setFinalizer_m)
+	if mp.scalararg[0] != 1 {
+		gothrow("runtime.SetFinalizer: finalizer already set")
+	}
+	releasem(mp)
+}
+
+// round n up to a multiple of a.  a must be a power of 2.
+func round(n, a uintptr) uintptr {
+	return (n + a - 1) &^ (a - 1)
+}
+
+// Look up pointer v in heap.  Return the span containing the object,
+// the start of the object, and the size of the object.  If the object
+// does not exist, return nil, nil, 0.
+func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
+	c := gomcache()
+	c.local_nlookup++
+	if ptrSize == 4 && c.local_nlookup >= 1<<30 {
+		// purge cache stats to prevent overflow
+		lock(&mheap_.lock)
+		purgecachedstats(c)
+		unlock(&mheap_.lock)
+	}
+
+	// find span
+	arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
+	arena_used := uintptr(unsafe.Pointer(mheap_.arena_used))
+	if uintptr(v) < arena_start || uintptr(v) >= arena_used {
+		return
+	}
+	p := uintptr(v) >> pageShift
+	q := p - arena_start>>pageShift
+	s = *(**mspan)(add(unsafe.Pointer(mheap_.spans), q*ptrSize))
+	if s == nil {
+		return
+	}
+	x = unsafe.Pointer(uintptr(s.start) << pageShift)
+
+	if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
+		s = nil
+		x = nil
+		return
+	}
+
+	n = uintptr(s.elemsize)
+	if s.sizeclass != 0 {
+		x = add(x, (uintptr(v)-uintptr(x))/n*n)
+	}
+	return
+}
+
+var fingCreate uint32
+
+func createfing() {
+	// start the finalizer goroutine exactly once
+	if fingCreate == 0 && cas(&fingCreate, 0, 1) {
+		go runfinq()
+	}
+}
+
+// This is the goroutine that runs all of the finalizers
+func runfinq() {
+	var (
+		frame    unsafe.Pointer
+		framecap uintptr
+	)
+
+	for {
+		lock(&finlock)
+		fb := finq
+		finq = nil
+		if fb == nil {
+			gp := getg()
+			fing = gp
+			fingwait = true
+			gp.issystem = true
+			goparkunlock(&finlock, "finalizer wait")
+			gp.issystem = false
+			continue
+		}
+		unlock(&finlock)
+		if raceenabled {
+			racefingo()
+		}
+		for fb != nil {
+			for i := int32(0); i < fb.cnt; i++ {
+				f := (*finalizer)(add(unsafe.Pointer(&fb.fin), uintptr(i)*unsafe.Sizeof(finalizer{})))
+
+				framesz := unsafe.Sizeof((interface{})(nil)) + uintptr(f.nret)
+				if framecap < framesz {
+					// The frame does not contain pointers interesting for GC,
+					// all not yet finalized objects are stored in finq.
+					// If we do not mark it as FlagNoScan,
+					// the last finalized object is not collected.
+					frame = gomallocgc(framesz, nil, flagNoScan)
+					framecap = framesz
+				}
+
+				if f.fint == nil {
+					gothrow("missing type in runfinq")
+				}
+				switch f.fint.kind & kindMask {
+				case kindPtr:
+					// direct use of pointer
+					*(*unsafe.Pointer)(frame) = f.arg
+				case kindInterface:
+					ityp := (*interfacetype)(unsafe.Pointer(f.fint))
+					// set up with empty interface
+					(*eface)(frame)._type = &f.ot.typ
+					(*eface)(frame).data = f.arg
+					if len(ityp.mhdr) != 0 {
+						// convert to interface with methods
+						// this conversion is guaranteed to succeed - we checked in SetFinalizer
+						*(*fInterface)(frame) = assertE2I(ityp, *(*interface{})(frame))
+					}
+				default:
+					gothrow("bad kind in runfinq")
+				}
+				reflectcall(unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
+
+				// drop finalizer queue references to finalized object
+				f.fn = nil
+				f.arg = nil
+				f.ot = nil
+			}
+			fb.cnt = 0
+			next := fb.next
+			lock(&finlock)
+			fb.next = finc
+			finc = fb
+			unlock(&finlock)
+			fb = next
+		}
+	}
+}
+
+var persistent struct {
+	lock mutex
+	pos  unsafe.Pointer
+	end  unsafe.Pointer
+}
+
+// Wrapper around sysAlloc that can allocate small chunks.
+// There is no associated free operation.
+// Intended for things like function/type/debug-related persistent data.
+// If align is 0, uses default align (currently 8).
+func persistentalloc(size, align uintptr, stat *uint64) unsafe.Pointer {
+	const (
+		chunk    = 256 << 10
+		maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
+	)
+
+	if align != 0 {
+		if align&(align-1) != 0 {
+			gothrow("persistentalloc: align is not a power of 2")
+		}
+		if align > _PageSize {
+			gothrow("persistentalloc: align is too large")
+		}
+	} else {
+		align = 8
+	}
+
+	if size >= maxBlock {
+		return sysAlloc(size, stat)
+	}
+
+	lock(&persistent.lock)
+	persistent.pos = roundup(persistent.pos, align)
+	if uintptr(persistent.pos)+size > uintptr(persistent.end) {
+		persistent.pos = sysAlloc(chunk, &memstats.other_sys)
+		if persistent.pos == nil {
+			unlock(&persistent.lock)
+			gothrow("runtime: cannot allocate memory")
+		}
+		persistent.end = add(persistent.pos, chunk)
+	}
+	p := persistent.pos
+	persistent.pos = add(persistent.pos, size)
+	unlock(&persistent.lock)
+
+	if stat != &memstats.other_sys {
+		xadd64(stat, int64(size))
+		xadd64(&memstats.other_sys, -int64(size))
+	}
+	return p
+}
diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h
new file mode 100644
index 000000000..544169194
--- /dev/null
+++ b/src/runtime/malloc.h
@@ -0,0 +1,618 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Memory allocator, based on tcmalloc.
+// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
+// The main allocator works in runs of pages.
+// Small allocation sizes (up to and including 32 kB) are
+// rounded to one of about 100 size classes, each of which
+// has its own free list of objects of exactly that size.
+// Any free page of memory can be split into a set of objects
+// of one size class, which are then managed using free list
+// allocators.
+//
+// The allocator's data structures are:
+//
+//	FixAlloc: a free-list allocator for fixed-size objects,
+//		used to manage storage used by the allocator.
+//	MHeap: the malloc heap, managed at page (4096-byte) granularity.
+//	MSpan: a run of pages managed by the MHeap.
+//	MCentral: a shared free list for a given size class.
+//	MCache: a per-thread (in Go, per-P) cache for small objects.
+//	MStats: allocation statistics.
+//
+// Allocating a small object proceeds up a hierarchy of caches:
+//
+//	1. Round the size up to one of the small size classes
+//	   and look in the corresponding MCache free list.
+//	   If the list is not empty, allocate an object from it.
+//	   This can all be done without acquiring a lock.
+//
+//	2. If the MCache free list is empty, replenish it by
+//	   taking a bunch of objects from the MCentral free list.
+//	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
+//
+//	3. If the MCentral free list is empty, replenish it by
+//	   allocating a run of pages from the MHeap and then
+//	   chopping that memory into a objects of the given size.
+//	   Allocating many objects amortizes the cost of locking
+//	   the heap.
+//
+//	4. If the MHeap is empty or has no page runs large enough,
+//	   allocate a new group of pages (at least 1MB) from the
+//	   operating system.  Allocating a large run of pages
+//	   amortizes the cost of talking to the operating system.
+//
+// Freeing a small object proceeds up the same hierarchy:
+//
+//	1. Look up the size class for the object and add it to
+//	   the MCache free list.
+//
+//	2. If the MCache free list is too long or the MCache has
+//	   too much memory, return some to the MCentral free lists.
+//
+//	3. If all the objects in a given span have returned to
+//	   the MCentral list, return that span to the page heap.
+//
+//	4. If the heap has too much memory, return some to the
+//	   operating system.
+//
+//	TODO(rsc): Step 4 is not implemented.
+//
+// Allocating and freeing a large object uses the page heap
+// directly, bypassing the MCache and MCentral free lists.
+//
+// The small objects on the MCache and MCentral free lists
+// may or may not be zeroed.  They are zeroed if and only if
+// the second word of the object is zero.  A span in the
+// page heap is zeroed unless s->needzero is set. When a span
+// is allocated to break into small objects, it is zeroed if needed
+// and s->needzero is set. There are two main benefits to delaying the
+// zeroing this way:
+//
+//	1. stack frames allocated from the small object lists
+//	   or the page heap can avoid zeroing altogether.
+//	2. the cost of zeroing when reusing a small object is
+//	   charged to the mutator, not the garbage collector.
+//
+// This C code was written with an eye toward translating to Go
+// in the future.  Methods have the form Type_Method(Type *t, ...).
+
+typedef struct MCentral	MCentral;
+typedef struct MHeap	MHeap;
+typedef struct MSpan	MSpan;
+typedef struct MStats	MStats;
+typedef struct MLink	MLink;
+typedef struct GCStats	GCStats;
+
+enum
+{
+	PageShift	= 13,
+	PageSize	= 1<<PageShift,
+	PageMask	= PageSize - 1,
+};
+typedef	uintptr	pageID;		// address >> PageShift
+
+enum
+{
+	// Computed constant.  The definition of MaxSmallSize and the
+	// algorithm in msize.c produce some number of different allocation
+	// size classes.  NumSizeClasses is that number.  It's needed here
+	// because there are static arrays of this length; when msize runs its
+	// size choosing algorithm it double-checks that NumSizeClasses agrees.
+	NumSizeClasses = 67,
+
+	// Tunable constants.
+	MaxSmallSize = 32<<10,
+
+	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
+	TinySize = 16,
+	TinySizeClass = 2,
+
+	FixAllocChunk = 16<<10,		// Chunk size for FixAlloc
+	MaxMHeapList = 1<<(20 - PageShift),	// Maximum page length for fixed-size list in MHeap.
+	HeapAllocChunk = 1<<20,		// Chunk size for heap growth
+
+	// Per-P, per order stack segment cache size.
+	StackCacheSize = 32*1024,
+	// Number of orders that get caching.  Order 0 is FixedStack
+	// and each successive order is twice as large.
+	NumStackOrders = 3,
+
+	// Number of bits in page to span calculations (4k pages).
+	// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
+	// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
+	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
+#ifdef _64BIT
+#ifdef GOOS_windows
+	// Windows counts memory used by page table into committed memory
+	// of the process, so we can't reserve too much memory.
+	// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
+	MHeapMap_Bits = 35 - PageShift,
+#else
+	MHeapMap_Bits = 37 - PageShift,
+#endif
+#else
+	MHeapMap_Bits = 32 - PageShift,
+#endif
+
+	// Max number of threads to run garbage collection.
+	// 2, 3, and 4 are all plausible maximums depending
+	// on the hardware details of the machine.  The garbage
+	// collector scales well to 32 cpus.
+	MaxGcproc = 32,
+};
+
+// Maximum memory allocation size, a hint for callers.
+// This must be a #define instead of an enum because it
+// is so large.
+#ifdef _64BIT
+#define	MaxMem	(1ULL<<(MHeapMap_Bits+PageShift))	/* 128 GB or 32 GB */
+#else
+#define	MaxMem	((uintptr)-1)
+#endif
+
+// A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
+struct MLink
+{
+	MLink *next;
+};
+
+// sysAlloc obtains a large chunk of zeroed memory from the
+// operating system, typically on the order of a hundred kilobytes
+// or a megabyte.
+// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysUnused notifies the operating system that the contents
+// of the memory region are no longer needed and can be reused
+// for other purposes.
+// SysUsed notifies the operating system that the contents
+// of the memory region are needed again.
+//
+// SysFree returns it unconditionally; this is only used if
+// an out-of-memory error has been detected midway through
+// an allocation.  It is okay if SysFree is a no-op.
+//
+// SysReserve reserves address space without allocating memory.
+// If the pointer passed to it is non-nil, the caller wants the
+// reservation there, but SysReserve can still choose another
+// location if that one is unavailable.  On some systems and in some
+// cases SysReserve will simply check that the address space is
+// available and not actually reserve it.  If SysReserve returns
+// non-nil, it sets *reserved to true if the address space is
+// reserved, false if it has merely been checked.
+// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysMap maps previously reserved address space for use.
+// The reserved argument is true if the address space was really
+// reserved, not merely checked.
+//
+// SysFault marks a (already sysAlloc'd) region to fault
+// if accessed.  Used only for debugging the runtime.
+
+void*	runtime·sysAlloc(uintptr nbytes, uint64 *stat);
+void	runtime·SysFree(void *v, uintptr nbytes, uint64 *stat);
+void	runtime·SysUnused(void *v, uintptr nbytes);
+void	runtime·SysUsed(void *v, uintptr nbytes);
+void	runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat);
+void*	runtime·SysReserve(void *v, uintptr nbytes, bool *reserved);
+void	runtime·SysFault(void *v, uintptr nbytes);
+
+// FixAlloc is a simple free-list allocator for fixed size objects.
+// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// MCache and MSpan objects.
+//
+// Memory returned by FixAlloc_Alloc is not zeroed.
+// The caller is responsible for locking around FixAlloc calls.
+// Callers can keep state in the object but the first word is
+// smashed by freeing and reallocating.
+struct FixAlloc
+{
+	uintptr	size;
+	void	(*first)(void *arg, byte *p);	// called first time p is returned
+	void*	arg;
+	MLink*	list;
+	byte*	chunk;
+	uint32	nchunk;
+	uintptr	inuse;	// in-use bytes now
+	uint64*	stat;
+};
+
+void	runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat);
+void*	runtime·FixAlloc_Alloc(FixAlloc *f);
+void	runtime·FixAlloc_Free(FixAlloc *f, void *p);
+
+
+// Statistics.
+// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
+struct MStats
+{
+	// General statistics.
+	uint64	alloc;		// bytes allocated and still in use
+	uint64	total_alloc;	// bytes allocated (even if freed)
+	uint64	sys;		// bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
+	uint64	nlookup;	// number of pointer lookups
+	uint64	nmalloc;	// number of mallocs
+	uint64	nfree;  // number of frees
+
+	// Statistics about malloc heap.
+	// protected by mheap.lock
+	uint64	heap_alloc;	// bytes allocated and still in use
+	uint64	heap_sys;	// bytes obtained from system
+	uint64	heap_idle;	// bytes in idle spans
+	uint64	heap_inuse;	// bytes in non-idle spans
+	uint64	heap_released;	// bytes released to the OS
+	uint64	heap_objects;	// total number of allocated objects
+
+	// Statistics about allocation of low-level fixed-size structures.
+	// Protected by FixAlloc locks.
+	uint64	stacks_inuse;	// this number is included in heap_inuse above
+	uint64	stacks_sys;	// always 0 in mstats
+	uint64	mspan_inuse;	// MSpan structures
+	uint64	mspan_sys;
+	uint64	mcache_inuse;	// MCache structures
+	uint64	mcache_sys;
+	uint64	buckhash_sys;	// profiling bucket hash table
+	uint64	gc_sys;
+	uint64	other_sys;
+
+	// Statistics about garbage collector.
+	// Protected by mheap or stopping the world during GC.
+	uint64	next_gc;	// next GC (in heap_alloc time)
+	uint64  last_gc;	// last GC (in absolute time)
+	uint64	pause_total_ns;
+	uint64	pause_ns[256];
+	uint32	numgc;
+	bool	enablegc;
+	bool	debuggc;
+
+	// Statistics about allocation size classes.
+	struct {
+		uint32 size;
+		uint64 nmalloc;
+		uint64 nfree;
+	} by_size[NumSizeClasses];
+};
+
+#define mstats runtime·memstats
+extern MStats mstats;
+void	runtime·updatememstats(GCStats *stats);
+void	runtime·ReadMemStats(MStats *stats);
+
+// Size classes.  Computed and initialized by InitSizes.
+//
+// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
+//	1 <= sizeclass < NumSizeClasses, for n.
+//	Size class 0 is reserved to mean "not small".
+//
+// class_to_size[i] = largest size in class i
+// class_to_allocnpages[i] = number of pages to allocate when
+//	making new objects in class i
+
+int32	runtime·SizeToClass(int32);
+uintptr	runtime·roundupsize(uintptr);
+extern	int32	runtime·class_to_size[NumSizeClasses];
+extern	int32	runtime·class_to_allocnpages[NumSizeClasses];
+extern	int8	runtime·size_to_class8[1024/8 + 1];
+extern	int8	runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
+extern	void	runtime·InitSizes(void);
+
+typedef struct MCacheList MCacheList;
+struct MCacheList
+{
+	MLink *list;
+	uint32 nlist;
+};
+
+typedef struct StackFreeList StackFreeList;
+struct StackFreeList
+{
+	MLink *list;  // linked list of free stacks
+	uintptr size; // total size of stacks in list
+};
+
+typedef struct SudoG SudoG;
+
+// Per-thread (in Go, per-P) cache for small objects.
+// No locking needed because it is per-thread (per-P).
+struct MCache
+{
+	// The following members are accessed on every malloc,
+	// so they are grouped here for better caching.
+	int32 next_sample;		// trigger heap sample after allocating this many bytes
+	intptr local_cachealloc;	// bytes allocated (or freed) from cache since last lock of heap
+	// Allocator cache for tiny objects w/o pointers.
+	// See "Tiny allocator" comment in malloc.goc.
+	byte*	tiny;
+	uintptr	tinysize;
+	// The rest is not accessed on every malloc.
+	MSpan*	alloc[NumSizeClasses];	// spans to allocate from
+
+	StackFreeList stackcache[NumStackOrders];
+
+	SudoG*	sudogcache;
+
+	void*	gcworkbuf;
+
+	// Local allocator stats, flushed during GC.
+	uintptr local_nlookup;		// number of pointer lookups
+	uintptr local_largefree;	// bytes freed for large objects (>MaxSmallSize)
+	uintptr local_nlargefree;	// number of frees for large objects (>MaxSmallSize)
+	uintptr local_nsmallfree[NumSizeClasses];	// number of frees for small objects (<=MaxSmallSize)
+};
+
+MSpan*	runtime·MCache_Refill(MCache *c, int32 sizeclass);
+void	runtime·MCache_ReleaseAll(MCache *c);
+void	runtime·stackcache_clear(MCache *c);
+void	runtime·gcworkbuffree(void *b);
+
+enum
+{
+	KindSpecialFinalizer = 1,
+	KindSpecialProfile = 2,
+	// Note: The finalizer special must be first because if we're freeing
+	// an object, a finalizer special will cause the freeing operation
+	// to abort, and we want to keep the other special records around
+	// if that happens.
+};
+
+typedef struct Special Special;
+struct Special
+{
+	Special*	next;	// linked list in span
+	uint16		offset;	// span offset of object
+	byte		kind;	// kind of Special
+};
+
+// The described object has a finalizer set for it.
+typedef struct SpecialFinalizer SpecialFinalizer;
+struct SpecialFinalizer
+{
+	Special		special;
+	FuncVal*	fn;
+	uintptr		nret;
+	Type*		fint;
+	PtrType*	ot;
+};
+
+// The described object is being heap profiled.
+typedef struct Bucket Bucket; // from mprof.h
+typedef struct SpecialProfile SpecialProfile;
+struct SpecialProfile
+{
+	Special	special;
+	Bucket*	b;
+};
+
+// An MSpan is a run of pages.
+enum
+{
+	MSpanInUse = 0, // allocated for garbage collected heap
+	MSpanStack,     // allocated for use by stack allocator
+	MSpanFree,
+	MSpanListHead,
+	MSpanDead,
+};
+struct MSpan
+{
+	MSpan	*next;		// in a span linked list
+	MSpan	*prev;		// in a span linked list
+	pageID	start;		// starting page number
+	uintptr	npages;		// number of pages in span
+	MLink	*freelist;	// list of free objects
+	// sweep generation:
+	// if sweepgen == h->sweepgen - 2, the span needs sweeping
+	// if sweepgen == h->sweepgen - 1, the span is currently being swept
+	// if sweepgen == h->sweepgen, the span is swept and ready to use
+	// h->sweepgen is incremented by 2 after every GC
+	uint32	sweepgen;
+	uint16	ref;		// capacity - number of objects in freelist
+	uint8	sizeclass;	// size class
+	bool	incache;	// being used by an MCache
+	uint8	state;		// MSpanInUse etc
+	uint8	needzero;	// needs to be zeroed before allocation
+	uintptr	elemsize;	// computed from sizeclass or from npages
+	int64   unusedsince;	// First time spotted by GC in MSpanFree state
+	uintptr npreleased;	// number of pages released to the OS
+	byte	*limit;		// end of data in span
+	Mutex	specialLock;	// guards specials list
+	Special	*specials;	// linked list of special records sorted by offset.
+};
+
+void	runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages);
+void	runtime·MSpan_EnsureSwept(MSpan *span);
+bool	runtime·MSpan_Sweep(MSpan *span, bool preserve);
+
+// Every MSpan is in one doubly-linked list,
+// either one of the MHeap's free lists or one of the
+// MCentral's span lists.  We use empty MSpan structures as list heads.
+void	runtime·MSpanList_Init(MSpan *list);
+bool	runtime·MSpanList_IsEmpty(MSpan *list);
+void	runtime·MSpanList_Insert(MSpan *list, MSpan *span);
+void	runtime·MSpanList_InsertBack(MSpan *list, MSpan *span);
+void	runtime·MSpanList_Remove(MSpan *span);	// from whatever list it is in
+
+
+// Central list of free objects of a given size.
+struct MCentral
+{
+	Mutex  lock;
+	int32 sizeclass;
+	MSpan nonempty;	// list of spans with a free object
+	MSpan empty;	// list of spans with no free objects (or cached in an MCache)
+};
+
+void	runtime·MCentral_Init(MCentral *c, int32 sizeclass);
+MSpan*	runtime·MCentral_CacheSpan(MCentral *c);
+void	runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
+bool	runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
+
+// Main malloc heap.
+// The heap itself is the "free[]" and "large" arrays,
+// but all the other global data is here too.
+struct MHeap
+{
+	Mutex  lock;
+	MSpan free[MaxMHeapList];	// free lists of given length
+	MSpan freelarge;		// free lists length >= MaxMHeapList
+	MSpan busy[MaxMHeapList];	// busy lists of large objects of given length
+	MSpan busylarge;		// busy lists of large objects length >= MaxMHeapList
+	MSpan **allspans;		// all spans out there
+	MSpan **gcspans;		// copy of allspans referenced by GC marker or sweeper
+	uint32	nspan;
+	uint32	nspancap;
+	uint32	sweepgen;		// sweep generation, see comment in MSpan
+	uint32	sweepdone;		// all spans are swept
+
+	// span lookup
+	MSpan**	spans;
+	uintptr	spans_mapped;
+
+	// range of addresses we might see in the heap
+	byte *bitmap;
+	uintptr bitmap_mapped;
+	byte *arena_start;
+	byte *arena_used;
+	byte *arena_end;
+	bool arena_reserved;
+
+	// central free lists for small size classes.
+	// the padding makes sure that the MCentrals are
+	// spaced CacheLineSize bytes apart, so that each MCentral.lock
+	// gets its own cache line.
+	struct {
+		MCentral mcentral;
+		byte pad[CacheLineSize];
+	} central[NumSizeClasses];
+
+	FixAlloc spanalloc;	// allocator for Span*
+	FixAlloc cachealloc;	// allocator for MCache*
+	FixAlloc specialfinalizeralloc;	// allocator for SpecialFinalizer*
+	FixAlloc specialprofilealloc;	// allocator for SpecialProfile*
+	Mutex speciallock; // lock for sepcial record allocators.
+
+	// Malloc stats.
+	uint64 largefree;	// bytes freed for large objects (>MaxSmallSize)
+	uint64 nlargefree;	// number of frees for large objects (>MaxSmallSize)
+	uint64 nsmallfree[NumSizeClasses];	// number of frees for small objects (<=MaxSmallSize)
+};
+#define runtime·mheap runtime·mheap_
+extern MHeap runtime·mheap;
+
+void	runtime·MHeap_Init(MHeap *h);
+MSpan*	runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
+MSpan*	runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
+void	runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
+void	runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
+MSpan*	runtime·MHeap_Lookup(MHeap *h, void *v);
+MSpan*	runtime·MHeap_LookupMaybe(MHeap *h, void *v);
+void*	runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
+void	runtime·MHeap_MapBits(MHeap *h);
+void	runtime·MHeap_MapSpans(MHeap *h);
+void	runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit);
+
+void*	runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
+int32	runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
+uintptr	runtime·sweepone(void);
+void	runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
+void	runtime·unmarkspan(void *v, uintptr size);
+void	runtime·purgecachedstats(MCache*);
+void*	runtime·cnew(Type*);
+void*	runtime·cnewarray(Type*, intgo);
+void	runtime·tracealloc(void*, uintptr, Type*);
+void	runtime·tracefree(void*, uintptr);
+void	runtime·tracegc(void);
+extern Type*	runtime·conservative;
+
+int32	runtime·gcpercent;
+int32	runtime·readgogc(void);
+void	runtime·clearpools(void);
+
+enum
+{
+	// flags to malloc
+	FlagNoScan	= 1<<0,	// GC doesn't have to scan object
+	FlagNoZero	= 1<<1, // don't zero memory
+};
+
+void	runtime·mProf_Malloc(void*, uintptr);
+void	runtime·mProf_Free(Bucket*, uintptr, bool);
+void	runtime·mProf_GC(void);
+void	runtime·iterate_memprof(void (**callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr));
+int32	runtime·gcprocs(void);
+void	runtime·helpgc(int32 nproc);
+void	runtime·gchelper(void);
+void	runtime·createfing(void);
+G*	runtime·wakefing(void);
+void	runtime·getgcmask(byte*, Type*, byte**, uintptr*);
+
+typedef struct Finalizer Finalizer;
+struct Finalizer
+{
+	FuncVal *fn;	// function to call
+	void *arg;	// ptr to object
+	uintptr nret;	// bytes of return values from fn
+	Type *fint;	// type of first argument of fn
+	PtrType *ot;	// type of ptr to object
+};
+
+typedef struct FinBlock FinBlock;
+struct FinBlock
+{
+	FinBlock *alllink;
+	FinBlock *next;
+	int32 cnt;
+	int32 cap;
+	Finalizer fin[1];
+};
+extern Mutex	runtime·finlock;	// protects the following variables
+extern G*	runtime·fing;
+extern bool	runtime·fingwait;
+extern bool	runtime·fingwake;
+extern FinBlock	*runtime·finq;		// list of finalizers that are to be executed
+extern FinBlock	*runtime·finc;		// cache of free blocks
+
+void	runtime·setprofilebucket_m(void);
+
+bool	runtime·addfinalizer(void*, FuncVal *fn, uintptr, Type*, PtrType*);
+void	runtime·removefinalizer(void*);
+void	runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot);
+bool	runtime·freespecial(Special *s, void *p, uintptr size, bool freed);
+
+// Information from the compiler about the layout of stack frames.
+typedef struct BitVector BitVector;
+struct BitVector
+{
+	int32 n; // # of bits
+	uint32 *data;
+};
+typedef struct StackMap StackMap;
+struct StackMap
+{
+	int32 n; // number of bitmaps
+	int32 nbit; // number of bits in each bitmap
+	uint32 data[];
+};
+// Returns pointer map data for the given stackmap index
+// (the index is encoded in PCDATA_StackMapIndex).
+BitVector	runtime·stackmapdata(StackMap *stackmap, int32 n);
+
+extern	BitVector	runtime·gcdatamask;
+extern	BitVector	runtime·gcbssmask;
+
+// defined in mgc0.go
+void	runtime·gc_m_ptr(Eface*);
+void	runtime·gc_g_ptr(Eface*);
+void	runtime·gc_itab_ptr(Eface*);
+
+void  runtime·setgcpercent_m(void);
+
+// Value we use to mark dead pointers when GODEBUG=gcdead=1.
+#define PoisonGC ((uintptr)0xf969696969696969ULL)
+#define PoisonStack ((uintptr)0x6868686868686868ULL)
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
new file mode 100644
index 000000000..b7795aa1d
--- /dev/null
+++ b/src/runtime/malloc_test.go
@@ -0,0 +1,189 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"flag"
+	. "runtime"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+func TestMemStats(t *testing.T) {
+	// Test that MemStats has sane values.
+	st := new(MemStats)
+	ReadMemStats(st)
+
+	// Everything except HeapReleased and HeapIdle, because they indeed can be 0.
+	if st.Alloc == 0 || st.TotalAlloc == 0 || st.Sys == 0 || st.Lookups == 0 ||
+		st.Mallocs == 0 || st.Frees == 0 || st.HeapAlloc == 0 || st.HeapSys == 0 ||
+		st.HeapInuse == 0 || st.HeapObjects == 0 || st.StackInuse == 0 ||
+		st.StackSys == 0 || st.MSpanInuse == 0 || st.MSpanSys == 0 || st.MCacheInuse == 0 ||
+		st.MCacheSys == 0 || st.BuckHashSys == 0 || st.GCSys == 0 || st.OtherSys == 0 ||
+		st.NextGC == 0 || st.NumGC == 0 {
+		t.Fatalf("Zero value: %+v", *st)
+	}
+
+	if st.Alloc > 1e10 || st.TotalAlloc > 1e11 || st.Sys > 1e10 || st.Lookups > 1e10 ||
+		st.Mallocs > 1e10 || st.Frees > 1e10 || st.HeapAlloc > 1e10 || st.HeapSys > 1e10 ||
+		st.HeapIdle > 1e10 || st.HeapInuse > 1e10 || st.HeapObjects > 1e10 || st.StackInuse > 1e10 ||
+		st.StackSys > 1e10 || st.MSpanInuse > 1e10 || st.MSpanSys > 1e10 || st.MCacheInuse > 1e10 ||
+		st.MCacheSys > 1e10 || st.BuckHashSys > 1e10 || st.GCSys > 1e10 || st.OtherSys > 1e10 ||
+		st.NextGC > 1e10 || st.NumGC > 1e9 {
+		t.Fatalf("Insanely high value (overflow?): %+v", *st)
+	}
+
+	if st.Sys != st.HeapSys+st.StackSys+st.MSpanSys+st.MCacheSys+
+		st.BuckHashSys+st.GCSys+st.OtherSys {
+		t.Fatalf("Bad sys value: %+v", *st)
+	}
+
+	if st.HeapIdle+st.HeapInuse != st.HeapSys {
+		t.Fatalf("HeapIdle(%d) + HeapInuse(%d) should be equal to HeapSys(%d), but isn't.", st.HeapIdle, st.HeapInuse, st.HeapSys)
+	}
+}
+
+var mallocSink uintptr
+
+func BenchmarkMalloc8(b *testing.B) {
+	var x uintptr
+	for i := 0; i < b.N; i++ {
+		p := new(int64)
+		x ^= uintptr(unsafe.Pointer(p))
+	}
+	mallocSink = x
+}
+
+func BenchmarkMalloc16(b *testing.B) {
+	var x uintptr
+	for i := 0; i < b.N; i++ {
+		p := new([2]int64)
+		x ^= uintptr(unsafe.Pointer(p))
+	}
+	mallocSink = x
+}
+
+func BenchmarkMallocTypeInfo8(b *testing.B) {
+	var x uintptr
+	for i := 0; i < b.N; i++ {
+		p := new(struct {
+			p [8 / unsafe.Sizeof(uintptr(0))]*int
+		})
+		x ^= uintptr(unsafe.Pointer(p))
+	}
+	mallocSink = x
+}
+
+func BenchmarkMallocTypeInfo16(b *testing.B) {
+	var x uintptr
+	for i := 0; i < b.N; i++ {
+		p := new(struct {
+			p [16 / unsafe.Sizeof(uintptr(0))]*int
+		})
+		x ^= uintptr(unsafe.Pointer(p))
+	}
+	mallocSink = x
+}
+
+type LargeStruct struct {
+	x [16][]byte
+}
+
+func BenchmarkMallocLargeStruct(b *testing.B) {
+	var x uintptr
+	for i := 0; i < b.N; i++ {
+		p := make([]LargeStruct, 2)
+		x ^= uintptr(unsafe.Pointer(&p[0]))
+	}
+	mallocSink = x
+}
+
+var n = flag.Int("n", 1000, "number of goroutines")
+
+func BenchmarkGoroutineSelect(b *testing.B) {
+	quit := make(chan struct{})
+	read := func(ch chan struct{}) {
+		for {
+			select {
+			case _, ok := <-ch:
+				if !ok {
+					return
+				}
+			case <-quit:
+				return
+			}
+		}
+	}
+	benchHelper(b, *n, read)
+}
+
+func BenchmarkGoroutineBlocking(b *testing.B) {
+	read := func(ch chan struct{}) {
+		for {
+			if _, ok := <-ch; !ok {
+				return
+			}
+		}
+	}
+	benchHelper(b, *n, read)
+}
+
+func BenchmarkGoroutineForRange(b *testing.B) {
+	read := func(ch chan struct{}) {
+		for range ch {
+		}
+	}
+	benchHelper(b, *n, read)
+}
+
+func benchHelper(b *testing.B, n int, read func(chan struct{})) {
+	m := make([]chan struct{}, n)
+	for i := range m {
+		m[i] = make(chan struct{}, 1)
+		go read(m[i])
+	}
+	b.StopTimer()
+	b.ResetTimer()
+	GC()
+
+	for i := 0; i < b.N; i++ {
+		for _, ch := range m {
+			if ch != nil {
+				ch <- struct{}{}
+			}
+		}
+		time.Sleep(10 * time.Millisecond)
+		b.StartTimer()
+		GC()
+		b.StopTimer()
+	}
+
+	for _, ch := range m {
+		close(ch)
+	}
+	time.Sleep(10 * time.Millisecond)
+}
+
+func BenchmarkGoroutineIdle(b *testing.B) {
+	quit := make(chan struct{})
+	fn := func() {
+		<-quit
+	}
+	for i := 0; i < *n; i++ {
+		go fn()
+	}
+
+	GC()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		GC()
+	}
+
+	b.StopTimer()
+	close(quit)
+	time.Sleep(10 * time.Millisecond)
+}
diff --git a/src/runtime/map_test.go b/src/runtime/map_test.go
new file mode 100644
index 000000000..8bedc0568
--- /dev/null
+++ b/src/runtime/map_test.go
@@ -0,0 +1,477 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"fmt"
+	"math"
+	"reflect"
+	"runtime"
+	"sort"
+	"strings"
+	"sync"
+	"testing"
+)
+
+// negative zero is a good test because:
+//  1) 0 and -0 are equal, yet have distinct representations.
+//  2) 0 is represented as all zeros, -0 isn't.
+// I'm not sure the language spec actually requires this behavior,
+// but it's what the current map implementation does.
+func TestNegativeZero(t *testing.T) {
+	m := make(map[float64]bool, 0)
+
+	m[+0.0] = true
+	m[math.Copysign(0.0, -1.0)] = true // should overwrite +0 entry
+
+	if len(m) != 1 {
+		t.Error("length wrong")
+	}
+
+	for k := range m {
+		if math.Copysign(1.0, k) > 0 {
+			t.Error("wrong sign")
+		}
+	}
+
+	m = make(map[float64]bool, 0)
+	m[math.Copysign(0.0, -1.0)] = true
+	m[+0.0] = true // should overwrite -0.0 entry
+
+	if len(m) != 1 {
+		t.Error("length wrong")
+	}
+
+	for k := range m {
+		if math.Copysign(1.0, k) < 0 {
+			t.Error("wrong sign")
+		}
+	}
+}
+
+// nan is a good test because nan != nan, and nan has
+// a randomized hash value.
+func TestNan(t *testing.T) {
+	m := make(map[float64]int, 0)
+	nan := math.NaN()
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	if len(m) != 3 {
+		t.Error("length wrong")
+	}
+	s := 0
+	for k, v := range m {
+		if k == k {
+			t.Error("nan disappeared")
+		}
+		if (v & (v - 1)) != 0 {
+			t.Error("value wrong")
+		}
+		s |= v
+	}
+	if s != 7 {
+		t.Error("values wrong")
+	}
+}
+
+// Maps aren't actually copied on assignment.
+func TestAlias(t *testing.T) {
+	m := make(map[int]int, 0)
+	m[0] = 5
+	n := m
+	n[0] = 6
+	if m[0] != 6 {
+		t.Error("alias didn't work")
+	}
+}
+
+func TestGrowWithNaN(t *testing.T) {
+	m := make(map[float64]int, 4)
+	nan := math.NaN()
+	m[nan] = 1
+	m[nan] = 2
+	m[nan] = 4
+	cnt := 0
+	s := 0
+	growflag := true
+	for k, v := range m {
+		if growflag {
+			// force a hashtable resize
+			for i := 0; i < 100; i++ {
+				m[float64(i)] = i
+			}
+			growflag = false
+		}
+		if k != k {
+			cnt++
+			s |= v
+		}
+	}
+	if cnt != 3 {
+		t.Error("NaN keys lost during grow")
+	}
+	if s != 7 {
+		t.Error("NaN values lost during grow")
+	}
+}
+
+type FloatInt struct {
+	x float64
+	y int
+}
+
+func TestGrowWithNegativeZero(t *testing.T) {
+	negzero := math.Copysign(0.0, -1.0)
+	m := make(map[FloatInt]int, 4)
+	m[FloatInt{0.0, 0}] = 1
+	m[FloatInt{0.0, 1}] = 2
+	m[FloatInt{0.0, 2}] = 4
+	m[FloatInt{0.0, 3}] = 8
+	growflag := true
+	s := 0
+	cnt := 0
+	negcnt := 0
+	// The first iteration should return the +0 key.
+	// The subsequent iterations should return the -0 key.
+	// I'm not really sure this is required by the spec,
+	// but it makes sense.
+	// TODO: are we allowed to get the first entry returned again???
+	for k, v := range m {
+		if v == 0 {
+			continue
+		} // ignore entries added to grow table
+		cnt++
+		if math.Copysign(1.0, k.x) < 0 {
+			if v&16 == 0 {
+				t.Error("key/value not updated together 1")
+			}
+			negcnt++
+			s |= v & 15
+		} else {
+			if v&16 == 16 {
+				t.Error("key/value not updated together 2", k, v)
+			}
+			s |= v
+		}
+		if growflag {
+			// force a hashtable resize
+			for i := 0; i < 100; i++ {
+				m[FloatInt{3.0, i}] = 0
+			}
+			// then change all the entries
+			// to negative zero
+			m[FloatInt{negzero, 0}] = 1 | 16
+			m[FloatInt{negzero, 1}] = 2 | 16
+			m[FloatInt{negzero, 2}] = 4 | 16
+			m[FloatInt{negzero, 3}] = 8 | 16
+			growflag = false
+		}
+	}
+	if s != 15 {
+		t.Error("entry missing", s)
+	}
+	if cnt != 4 {
+		t.Error("wrong number of entries returned by iterator", cnt)
+	}
+	if negcnt != 3 {
+		t.Error("update to negzero missed by iteration", negcnt)
+	}
+}
+
+func TestIterGrowAndDelete(t *testing.T) {
+	m := make(map[int]int, 4)
+	for i := 0; i < 100; i++ {
+		m[i] = i
+	}
+	growflag := true
+	for k := range m {
+		if growflag {
+			// grow the table
+			for i := 100; i < 1000; i++ {
+				m[i] = i
+			}
+			// delete all odd keys
+			for i := 1; i < 1000; i += 2 {
+				delete(m, i)
+			}
+			growflag = false
+		} else {
+			if k&1 == 1 {
+				t.Error("odd value returned")
+			}
+		}
+	}
+}
+
+// make sure old bucket arrays don't get GCd while
+// an iterator is still using them.
+func TestIterGrowWithGC(t *testing.T) {
+	m := make(map[int]int, 4)
+	for i := 0; i < 16; i++ {
+		m[i] = i
+	}
+	growflag := true
+	bitmask := 0
+	for k := range m {
+		if k < 16 {
+			bitmask |= 1 << uint(k)
+		}
+		if growflag {
+			// grow the table
+			for i := 100; i < 1000; i++ {
+				m[i] = i
+			}
+			// trigger a gc
+			runtime.GC()
+			growflag = false
+		}
+	}
+	if bitmask != 1<<16-1 {
+		t.Error("missing key", bitmask)
+	}
+}
+
+func testConcurrentReadsAfterGrowth(t *testing.T, useReflect bool) {
+	if runtime.GOMAXPROCS(-1) == 1 {
+		defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(16))
+	}
+	numLoop := 10
+	numGrowStep := 250
+	numReader := 16
+	if testing.Short() {
+		numLoop, numGrowStep = 2, 500
+	}
+	for i := 0; i < numLoop; i++ {
+		m := make(map[int]int, 0)
+		for gs := 0; gs < numGrowStep; gs++ {
+			m[gs] = gs
+			var wg sync.WaitGroup
+			wg.Add(numReader * 2)
+			for nr := 0; nr < numReader; nr++ {
+				go func() {
+					defer wg.Done()
+					for range m {
+					}
+				}()
+				go func() {
+					defer wg.Done()
+					for key := 0; key < gs; key++ {
+						_ = m[key]
+					}
+				}()
+				if useReflect {
+					wg.Add(1)
+					go func() {
+						defer wg.Done()
+						mv := reflect.ValueOf(m)
+						keys := mv.MapKeys()
+						for _, k := range keys {
+							mv.MapIndex(k)
+						}
+					}()
+				}
+			}
+			wg.Wait()
+		}
+	}
+}
+
+func TestConcurrentReadsAfterGrowth(t *testing.T) {
+	testConcurrentReadsAfterGrowth(t, false)
+}
+
+func TestConcurrentReadsAfterGrowthReflect(t *testing.T) {
+	testConcurrentReadsAfterGrowth(t, true)
+}
+
+func TestBigItems(t *testing.T) {
+	var key [256]string
+	for i := 0; i < 256; i++ {
+		key[i] = "foo"
+	}
+	m := make(map[[256]string][256]string, 4)
+	for i := 0; i < 100; i++ {
+		key[37] = fmt.Sprintf("string%02d", i)
+		m[key] = key
+	}
+	var keys [100]string
+	var values [100]string
+	i := 0
+	for k, v := range m {
+		keys[i] = k[37]
+		values[i] = v[37]
+		i++
+	}
+	sort.Strings(keys[:])
+	sort.Strings(values[:])
+	for i := 0; i < 100; i++ {
+		if keys[i] != fmt.Sprintf("string%02d", i) {
+			t.Errorf("#%d: missing key: %v", i, keys[i])
+		}
+		if values[i] != fmt.Sprintf("string%02d", i) {
+			t.Errorf("#%d: missing value: %v", i, values[i])
+		}
+	}
+}
+
+type empty struct {
+}
+
+func TestEmptyKeyAndValue(t *testing.T) {
+	a := make(map[int]empty, 4)
+	b := make(map[empty]int, 4)
+	c := make(map[empty]empty, 4)
+	a[0] = empty{}
+	b[empty{}] = 0
+	b[empty{}] = 1
+	c[empty{}] = empty{}
+
+	if len(a) != 1 {
+		t.Errorf("empty value insert problem")
+	}
+	if b[empty{}] != 1 {
+		t.Errorf("empty key returned wrong value")
+	}
+}
+
+// Tests a map with a single bucket, with same-lengthed short keys
+// ("quick keys") as well as long keys.
+func TestSingleBucketMapStringKeys_DupLen(t *testing.T) {
+	testMapLookups(t, map[string]string{
+		"x":    "x1val",
+		"xx":   "x2val",
+		"foo":  "fooval",
+		"bar":  "barval", // same key length as "foo"
+		"xxxx": "x4val",
+		strings.Repeat("x", 128): "longval1",
+		strings.Repeat("y", 128): "longval2",
+	})
+}
+
+// Tests a map with a single bucket, with all keys having different lengths.
+func TestSingleBucketMapStringKeys_NoDupLen(t *testing.T) {
+	testMapLookups(t, map[string]string{
+		"x":                      "x1val",
+		"xx":                     "x2val",
+		"foo":                    "fooval",
+		"xxxx":                   "x4val",
+		"xxxxx":                  "x5val",
+		"xxxxxx":                 "x6val",
+		strings.Repeat("x", 128): "longval",
+	})
+}
+
+func testMapLookups(t *testing.T, m map[string]string) {
+	for k, v := range m {
+		if m[k] != v {
+			t.Fatalf("m[%q] = %q; want %q", k, m[k], v)
+		}
+	}
+}
+
+// Tests whether the iterator returns the right elements when
+// started in the middle of a grow, when the keys are NaNs.
+func TestMapNanGrowIterator(t *testing.T) {
+	m := make(map[float64]int)
+	nan := math.NaN()
+	const nBuckets = 16
+	// To fill nBuckets buckets takes LOAD * nBuckets keys.
+	nKeys := int(nBuckets * *runtime.HashLoad)
+
+	// Get map to full point with nan keys.
+	for i := 0; i < nKeys; i++ {
+		m[nan] = i
+	}
+	// Trigger grow
+	m[1.0] = 1
+	delete(m, 1.0)
+
+	// Run iterator
+	found := make(map[int]struct{})
+	for _, v := range m {
+		if v != -1 {
+			if _, repeat := found[v]; repeat {
+				t.Fatalf("repeat of value %d", v)
+			}
+			found[v] = struct{}{}
+		}
+		if len(found) == nKeys/2 {
+			// Halfway through iteration, finish grow.
+			for i := 0; i < nBuckets; i++ {
+				delete(m, 1.0)
+			}
+		}
+	}
+	if len(found) != nKeys {
+		t.Fatalf("missing value")
+	}
+}
+
+func TestMapIterOrder(t *testing.T) {
+	for _, n := range [...]int{3, 7, 9, 15} {
+		// Make m be {0: true, 1: true, ..., n-1: true}.
+		m := make(map[int]bool)
+		for i := 0; i < n; i++ {
+			m[i] = true
+		}
+		// Check that iterating over the map produces at least two different orderings.
+		ord := func() []int {
+			var s []int
+			for key := range m {
+				s = append(s, key)
+			}
+			return s
+		}
+		first := ord()
+		ok := false
+		for try := 0; try < 100; try++ {
+			if !reflect.DeepEqual(first, ord()) {
+				ok = true
+				break
+			}
+		}
+		if !ok {
+			t.Errorf("Map with n=%d elements had consistent iteration order: %v", n, first)
+		}
+	}
+}
+
+func TestMapStringBytesLookup(t *testing.T) {
+	// Use large string keys to avoid small-allocation coalescing,
+	// which can cause AllocsPerRun to report lower counts than it should.
+	m := map[string]int{
+		"1000000000000000000000000000000000000000000000000": 1,
+		"2000000000000000000000000000000000000000000000000": 2,
+	}
+	buf := []byte("1000000000000000000000000000000000000000000000000")
+	if x := m[string(buf)]; x != 1 {
+		t.Errorf(`m[string([]byte("1"))] = %d, want 1`, x)
+	}
+	buf[0] = '2'
+	if x := m[string(buf)]; x != 2 {
+		t.Errorf(`m[string([]byte("2"))] = %d, want 2`, x)
+	}
+
+	var x int
+	n := testing.AllocsPerRun(100, func() {
+		x += m[string(buf)]
+	})
+	if n != 0 {
+		t.Errorf("AllocsPerRun for m[string(buf)] = %v, want 0", n)
+	}
+
+	x = 0
+	n = testing.AllocsPerRun(100, func() {
+		y, ok := m[string(buf)]
+		if !ok {
+			panic("!ok")
+		}
+		x += y
+	})
+	if n != 0 {
+		t.Errorf("AllocsPerRun for x,ok = m[string(buf)] = %v, want 0", n)
+	}
+}
diff --git a/src/runtime/mapspeed_test.go b/src/runtime/mapspeed_test.go
new file mode 100644
index 000000000..119eb3f39
--- /dev/null
+++ b/src/runtime/mapspeed_test.go
@@ -0,0 +1,300 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+const size = 10
+
+func BenchmarkHashStringSpeed(b *testing.B) {
+	strings := make([]string, size)
+	for i := 0; i < size; i++ {
+		strings[i] = fmt.Sprintf("string#%d", i)
+	}
+	sum := 0
+	m := make(map[string]int, size)
+	for i := 0; i < size; i++ {
+		m[strings[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[strings[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+type chunk [17]byte
+
+func BenchmarkHashBytesSpeed(b *testing.B) {
+	// a bunch of chunks, each with a different alignment mod 16
+	var chunks [size]chunk
+	// initialize each to a different value
+	for i := 0; i < size; i++ {
+		chunks[i][0] = byte(i)
+	}
+	// put into a map
+	m := make(map[chunk]int, size)
+	for i, c := range chunks {
+		m[c] = i
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if m[chunks[idx]] != idx {
+			b.Error("bad map entry for chunk")
+		}
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkHashInt32Speed(b *testing.B) {
+	ints := make([]int32, size)
+	for i := 0; i < size; i++ {
+		ints[i] = int32(i)
+	}
+	sum := 0
+	m := make(map[int32]int, size)
+	for i := 0; i < size; i++ {
+		m[ints[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[ints[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkHashInt64Speed(b *testing.B) {
+	ints := make([]int64, size)
+	for i := 0; i < size; i++ {
+		ints[i] = int64(i)
+	}
+	sum := 0
+	m := make(map[int64]int, size)
+	for i := 0; i < size; i++ {
+		m[ints[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[ints[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+func BenchmarkHashStringArraySpeed(b *testing.B) {
+	stringpairs := make([][2]string, size)
+	for i := 0; i < size; i++ {
+		for j := 0; j < 2; j++ {
+			stringpairs[i][j] = fmt.Sprintf("string#%d/%d", i, j)
+		}
+	}
+	sum := 0
+	m := make(map[[2]string]int, size)
+	for i := 0; i < size; i++ {
+		m[stringpairs[i]] = 0
+	}
+	idx := 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		sum += m[stringpairs[idx]]
+		idx++
+		if idx == size {
+			idx = 0
+		}
+	}
+}
+
+func BenchmarkMegMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[strings.Repeat("X", 1<<20-1)+fmt.Sprint(suffix)] = true
+	}
+	key := strings.Repeat("X", 1<<20-1) + "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegOneMap(b *testing.B) {
+	m := make(map[string]bool)
+	m[strings.Repeat("X", 1<<20)] = true
+	key := strings.Repeat("Y", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMegEqMap(b *testing.B) {
+	m := make(map[string]bool)
+	key1 := strings.Repeat("X", 1<<20)
+	key2 := strings.Repeat("X", 1<<20) // equal but different instance
+	m[key1] = true
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key2]
+	}
+}
+
+func BenchmarkMegEmptyMap(b *testing.B) {
+	m := make(map[string]bool)
+	key := strings.Repeat("X", 1<<20)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkSmallStrMap(b *testing.B) {
+	m := make(map[string]bool)
+	for suffix := 'A'; suffix <= 'G'; suffix++ {
+		m[fmt.Sprint(suffix)] = true
+	}
+	key := "k"
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[key]
+	}
+}
+
+func BenchmarkMapStringKeysEight_16(b *testing.B) { benchmarkMapStringKeysEight(b, 16) }
+func BenchmarkMapStringKeysEight_32(b *testing.B) { benchmarkMapStringKeysEight(b, 32) }
+func BenchmarkMapStringKeysEight_64(b *testing.B) { benchmarkMapStringKeysEight(b, 64) }
+func BenchmarkMapStringKeysEight_1M(b *testing.B) { benchmarkMapStringKeysEight(b, 1<<20) }
+
+func benchmarkMapStringKeysEight(b *testing.B, keySize int) {
+	m := make(map[string]bool)
+	for i := 0; i < 8; i++ {
+		m[strings.Repeat("K", i+1)] = true
+	}
+	key := strings.Repeat("K", keySize)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m[key]
+	}
+}
+
+func BenchmarkIntMap(b *testing.B) {
+	m := make(map[int]bool)
+	for i := 0; i < 8; i++ {
+		m[i] = true
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = m[7]
+	}
+}
+
+// Accessing the same keys in a row.
+func benchmarkRepeatedLookup(b *testing.B, lookupKeySize int) {
+	m := make(map[string]bool)
+	// At least bigger than a single bucket:
+	for i := 0; i < 64; i++ {
+		m[fmt.Sprintf("some key %d", i)] = true
+	}
+	base := strings.Repeat("x", lookupKeySize-1)
+	key1 := base + "1"
+	key2 := base + "2"
+	b.ResetTimer()
+	for i := 0; i < b.N/4; i++ {
+		_ = m[key1]
+		_ = m[key1]
+		_ = m[key2]
+		_ = m[key2]
+	}
+}
+
+func BenchmarkRepeatedLookupStrMapKey32(b *testing.B) { benchmarkRepeatedLookup(b, 32) }
+func BenchmarkRepeatedLookupStrMapKey1M(b *testing.B) { benchmarkRepeatedLookup(b, 1<<20) }
+
+func BenchmarkNewEmptyMap(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_ = make(map[int]int)
+	}
+}
+
+func BenchmarkMapIter(b *testing.B) {
+	m := make(map[int]bool)
+	for i := 0; i < 8; i++ {
+		m[i] = true
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for range m {
+		}
+	}
+}
+
+func BenchmarkMapIterEmpty(b *testing.B) {
+	m := make(map[int]bool)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for range m {
+		}
+	}
+}
+
+func BenchmarkSameLengthMap(b *testing.B) {
+	// long strings, same length, differ in first few
+	// and last few bytes.
+	m := make(map[string]bool)
+	s1 := "foo" + strings.Repeat("-", 100) + "bar"
+	s2 := "goo" + strings.Repeat("-", 100) + "ber"
+	m[s1] = true
+	m[s2] = true
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = m[s1]
+	}
+}
+
+type BigKey [3]int64
+
+func BenchmarkBigKeyMap(b *testing.B) {
+	m := make(map[BigKey]bool)
+	k := BigKey{3, 4, 5}
+	m[k] = true
+	for i := 0; i < b.N; i++ {
+		_ = m[k]
+	}
+}
+
+type BigVal [3]int64
+
+func BenchmarkBigValMap(b *testing.B) {
+	m := make(map[BigKey]BigVal)
+	k := BigKey{3, 4, 5}
+	m[k] = BigVal{6, 7, 8}
+	for i := 0; i < b.N; i++ {
+		_ = m[k]
+	}
+}
+
+func BenchmarkSmallKeyMap(b *testing.B) {
+	m := make(map[int16]bool)
+	m[5] = true
+	for i := 0; i < b.N; i++ {
+		_ = m[5]
+	}
+}
diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c
new file mode 100644
index 000000000..bb1fc5403
--- /dev/null
+++ b/src/runtime/mcache.c
@@ -0,0 +1,115 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Per-P malloc cache for small objects.
+//
+// See malloc.h for an overview.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+
+extern volatile intgo runtime·MemProfileRate;
+
+// dummy MSpan that contains no free objects.
+static MSpan emptymspan;
+
+MCache*
+runtime·allocmcache(void)
+{
+	intgo rate;
+	MCache *c;
+	int32 i;
+
+	runtime·lock(&runtime·mheap.lock);
+	c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
+	runtime·unlock(&runtime·mheap.lock);
+	runtime·memclr((byte*)c, sizeof(*c));
+	for(i = 0; i < NumSizeClasses; i++)
+		c->alloc[i] = &emptymspan;
+
+	// Set first allocation sample size.
+	rate = runtime·MemProfileRate;
+	if(rate > 0x3fffffff)	// make 2*rate not overflow
+		rate = 0x3fffffff;
+	if(rate != 0)
+		c->next_sample = runtime·fastrand1() % (2*rate);
+
+	return c;
+}
+
+static void
+freemcache(MCache *c)
+{
+	runtime·MCache_ReleaseAll(c);
+	runtime·stackcache_clear(c);
+	runtime·gcworkbuffree(c->gcworkbuf);
+	runtime·lock(&runtime·mheap.lock);
+	runtime·purgecachedstats(c);
+	runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
+	runtime·unlock(&runtime·mheap.lock);
+}
+
+static void
+freemcache_m(void)
+{
+	MCache *c;
+
+	c = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	freemcache(c);
+}
+
+void
+runtime·freemcache(MCache *c)
+{
+	void (*fn)(void);
+
+	g->m->ptrarg[0] = c;
+	fn = freemcache_m;
+	runtime·onM(&fn);
+}
+
+// Gets a span that has a free object in it and assigns it
+// to be the cached span for the given sizeclass.  Returns this span.
+MSpan*
+runtime·MCache_Refill(MCache *c, int32 sizeclass)
+{
+	MSpan *s;
+
+	g->m->locks++;
+	// Return the current cached span to the central lists.
+	s = c->alloc[sizeclass];
+	if(s->freelist != nil)
+		runtime·throw("refill on a nonempty span");
+	if(s != &emptymspan)
+		s->incache = false;
+
+	// Get a new cached span from the central lists.
+	s = runtime·MCentral_CacheSpan(&runtime·mheap.central[sizeclass].mcentral);
+	if(s == nil)
+		runtime·throw("out of memory");
+	if(s->freelist == nil) {
+		runtime·printf("%d %d\n", s->ref, (int32)((s->npages << PageShift) / s->elemsize));
+		runtime·throw("empty span");
+	}
+	c->alloc[sizeclass] = s;
+	g->m->locks--;
+	return s;
+}
+
+void
+runtime·MCache_ReleaseAll(MCache *c)
+{
+	int32 i;
+	MSpan *s;
+
+	for(i=0; i<NumSizeClasses; i++) {
+		s = c->alloc[i];
+		if(s != &emptymspan) {
+			runtime·MCentral_UncacheSpan(&runtime·mheap.central[i].mcentral, s);
+			c->alloc[i] = &emptymspan;
+		}
+	}
+}
diff --git a/src/runtime/mcentral.c b/src/runtime/mcentral.c
new file mode 100644
index 000000000..fe6bcfeb1
--- /dev/null
+++ b/src/runtime/mcentral.c
@@ -0,0 +1,214 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Central free lists.
+//
+// See malloc.h for an overview.
+//
+// The MCentral doesn't actually contain the list of free objects; the MSpan does.
+// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
+// and those that are completely allocated (c->empty).
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+
+static MSpan* MCentral_Grow(MCentral *c);
+
+// Initialize a single central free list.
+void
+runtime·MCentral_Init(MCentral *c, int32 sizeclass)
+{
+	c->sizeclass = sizeclass;
+	runtime·MSpanList_Init(&c->nonempty);
+	runtime·MSpanList_Init(&c->empty);
+}
+
+// Allocate a span to use in an MCache.
+MSpan*
+runtime·MCentral_CacheSpan(MCentral *c)
+{
+	MSpan *s;
+	int32 cap, n;
+	uint32 sg;
+
+	runtime·lock(&c->lock);
+	sg = runtime·mheap.sweepgen;
+retry:
+	for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
+		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+			runtime·MSpanList_Remove(s);
+			runtime·MSpanList_InsertBack(&c->empty, s);
+			runtime·unlock(&c->lock);
+			runtime·MSpan_Sweep(s, true);
+			goto havespan;
+		}
+		if(s->sweepgen == sg-1) {
+			// the span is being swept by background sweeper, skip
+			continue;
+		}
+		// we have a nonempty span that does not require sweeping, allocate from it
+		runtime·MSpanList_Remove(s);
+		runtime·MSpanList_InsertBack(&c->empty, s);
+		runtime·unlock(&c->lock);
+		goto havespan;
+	}
+
+	for(s = c->empty.next; s != &c->empty; s = s->next) {
+		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+			// we have an empty span that requires sweeping,
+			// sweep it and see if we can free some space in it
+			runtime·MSpanList_Remove(s);
+			// swept spans are at the end of the list
+			runtime·MSpanList_InsertBack(&c->empty, s);
+			runtime·unlock(&c->lock);
+			runtime·MSpan_Sweep(s, true);
+			if(s->freelist != nil)
+				goto havespan;
+			runtime·lock(&c->lock);
+			// the span is still empty after sweep
+			// it is already in the empty list, so just retry
+			goto retry;
+		}
+		if(s->sweepgen == sg-1) {
+			// the span is being swept by background sweeper, skip
+			continue;
+		}
+		// already swept empty span,
+		// all subsequent ones must also be either swept or in process of sweeping
+		break;
+	}
+	runtime·unlock(&c->lock);
+
+	// Replenish central list if empty.
+	s = MCentral_Grow(c);
+	if(s == nil)
+		return nil;
+	runtime·lock(&c->lock);
+	runtime·MSpanList_InsertBack(&c->empty, s);
+	runtime·unlock(&c->lock);
+
+havespan:
+	// At this point s is a non-empty span, queued at the end of the empty list,
+	// c is unlocked.
+	cap = (s->npages << PageShift) / s->elemsize;
+	n = cap - s->ref;
+	if(n == 0)
+		runtime·throw("empty span");
+	if(s->freelist == nil)
+		runtime·throw("freelist empty");
+	s->incache = true;
+	return s;
+}
+
+// Return span from an MCache.
+void
+runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
+{
+	int32 cap, n;
+
+	runtime·lock(&c->lock);
+
+	s->incache = false;
+
+	if(s->ref == 0)
+		runtime·throw("uncaching full span");
+
+	cap = (s->npages << PageShift) / s->elemsize;
+	n = cap - s->ref;
+	if(n > 0) {
+		runtime·MSpanList_Remove(s);
+		runtime·MSpanList_Insert(&c->nonempty, s);
+	}
+	runtime·unlock(&c->lock);
+}
+
+// Free n objects from a span s back into the central free list c.
+// Called during sweep.
+// Returns true if the span was returned to heap.  Sets sweepgen to
+// the latest generation.
+// If preserve=true, don't return the span to heap nor relink in MCentral lists;
+// caller takes care of it.
+bool
+runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
+{
+	bool wasempty;
+
+	if(s->incache)
+		runtime·throw("freespan into cached span");
+
+	// Add the objects back to s's free list.
+	wasempty = s->freelist == nil;
+	end->next = s->freelist;
+	s->freelist = start;
+	s->ref -= n;
+
+	if(preserve) {
+		// preserve is set only when called from MCentral_CacheSpan above,
+		// the span must be in the empty list.
+		if(s->next == nil)
+			runtime·throw("can't preserve unlinked span");
+		runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
+		return false;
+	}
+
+	runtime·lock(&c->lock);
+
+	// Move to nonempty if necessary.
+	if(wasempty) {
+		runtime·MSpanList_Remove(s);
+		runtime·MSpanList_Insert(&c->nonempty, s);
+	}
+
+	// delay updating sweepgen until here.  This is the signal that
+	// the span may be used in an MCache, so it must come after the
+	// linked list operations above (actually, just after the
+	// lock of c above.)
+	runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
+
+	if(s->ref != 0) {
+		runtime·unlock(&c->lock);
+		return false;
+	}
+
+	// s is completely freed, return it to the heap.
+	runtime·MSpanList_Remove(s);
+	s->needzero = 1;
+	s->freelist = nil;
+	runtime·unlock(&c->lock);
+	runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+	runtime·MHeap_Free(&runtime·mheap, s, 0);
+	return true;
+}
+
+// Fetch a new span from the heap and carve into objects for the free list.
+static MSpan*
+MCentral_Grow(MCentral *c)
+{
+	uintptr size, npages, i, n;
+	MLink **tailp, *v;
+	byte *p;
+	MSpan *s;
+
+	npages = runtime·class_to_allocnpages[c->sizeclass];
+	size = runtime·class_to_size[c->sizeclass];
+	n = (npages << PageShift) / size;
+	s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
+	if(s == nil)
+		return nil;
+
+	// Carve span into sequence of blocks.
+	tailp = &s->freelist;
+	p = (byte*)(s->start << PageShift);
+	s->limit = p + size*n;
+	for(i=0; i<n; i++) {
+		v = (MLink*)p;
+		*tailp = v;
+		tailp = &v->next;
+		p += size;
+	}
+	*tailp = nil;
+	runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
+	return s;
+}
diff --git a/src/runtime/mem.go b/src/runtime/mem.go
new file mode 100644
index 000000000..34391b2eb
--- /dev/null
+++ b/src/runtime/mem.go
@@ -0,0 +1,72 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Note: the MemStats struct should be kept in sync with
+// struct MStats in malloc.h
+
+// A MemStats records statistics about the memory allocator.
+type MemStats struct {
+	// General statistics.
+	Alloc      uint64 // bytes allocated and still in use
+	TotalAlloc uint64 // bytes allocated (even if freed)
+	Sys        uint64 // bytes obtained from system (sum of XxxSys below)
+	Lookups    uint64 // number of pointer lookups
+	Mallocs    uint64 // number of mallocs
+	Frees      uint64 // number of frees
+
+	// Main allocation heap statistics.
+	HeapAlloc    uint64 // bytes allocated and still in use
+	HeapSys      uint64 // bytes obtained from system
+	HeapIdle     uint64 // bytes in idle spans
+	HeapInuse    uint64 // bytes in non-idle span
+	HeapReleased uint64 // bytes released to the OS
+	HeapObjects  uint64 // total number of allocated objects
+
+	// Low-level fixed-size structure allocator statistics.
+	//	Inuse is bytes used now.
+	//	Sys is bytes obtained from system.
+	StackInuse  uint64 // bytes used by stack allocator
+	StackSys    uint64
+	MSpanInuse  uint64 // mspan structures
+	MSpanSys    uint64
+	MCacheInuse uint64 // mcache structures
+	MCacheSys   uint64
+	BuckHashSys uint64 // profiling bucket hash table
+	GCSys       uint64 // GC metadata
+	OtherSys    uint64 // other system allocations
+
+	// Garbage collector statistics.
+	NextGC       uint64 // next run in HeapAlloc time (bytes)
+	LastGC       uint64 // last run in absolute time (ns)
+	PauseTotalNs uint64
+	PauseNs      [256]uint64 // circular buffer of recent GC pause times, most recent at [(NumGC+255)%256]
+	NumGC        uint32
+	EnableGC     bool
+	DebugGC      bool
+
+	// Per-size allocation statistics.
+	// 61 is NumSizeClasses in the C code.
+	BySize [61]struct {
+		Size    uint32
+		Mallocs uint64
+		Frees   uint64
+	}
+}
+
+var sizeof_C_MStats uintptr // filled in by malloc.goc
+
+func init() {
+	var memStats MemStats
+	if sizeof_C_MStats != unsafe.Sizeof(memStats) {
+		println(sizeof_C_MStats, unsafe.Sizeof(memStats))
+		panic("MStats vs MemStatsType size mismatch")
+	}
+}
+
+// ReadMemStats populates m with memory allocator statistics.
+func ReadMemStats(m *MemStats)
diff --git a/src/runtime/mem_darwin.c b/src/runtime/mem_darwin.c
new file mode 100644
index 000000000..bf3ede577
--- /dev/null
+++ b/src/runtime/mem_darwin.c
@@ -0,0 +1,82 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	// Linux's MADV_DONTNEED is like BSD's MADV_FREE.
+	runtime·madvise(v, n, MADV_FREE);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	*reserved = true;
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	return p;
+}
+
+enum
+{
+	ENOMEM = 12,
+};
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	USED(reserved);
+
+	runtime·xadd64(stat, n);
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_dragonfly.c b/src/runtime/mem_dragonfly.c
new file mode 100644
index 000000000..11457b2c0
--- /dev/null
+++ b/src/runtime/mem_dragonfly.c
@@ -0,0 +1,105 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	ENOMEM = 12,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	runtime·madvise(v, n, MADV_FREE);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		*reserved = false;
+		return v;
+	}
+
+	*reserved = true;
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		// TODO(jsing): For some reason DragonFly seems to return
+		// memory at a different address than we requested, even when
+		// there should be no reason for it to do so. This can be
+		// avoided by using MAP_FIXED, but I'm not sure we should need
+		// to do this - we do not on other platforms.
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_freebsd.c b/src/runtime/mem_freebsd.c
new file mode 100644
index 000000000..18a9a2f5b
--- /dev/null
+++ b/src/runtime/mem_freebsd.c
@@ -0,0 +1,100 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	ENOMEM = 12,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	runtime·madvise(v, n, MADV_FREE);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		*reserved = false;
+		return v;
+	}
+
+	*reserved = true;
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_linux.c b/src/runtime/mem_linux.c
new file mode 100644
index 000000000..bfb405607
--- /dev/null
+++ b/src/runtime/mem_linux.c
@@ -0,0 +1,162 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	_PAGE_SIZE = 4096,
+	EACCES = 13,
+};
+
+static int32
+addrspace_free(void *v, uintptr n)
+{
+	int32 errval;
+	uintptr chunk;
+	uintptr off;
+	
+	// NOTE: vec must be just 1 byte long here.
+	// Mincore returns ENOMEM if any of the pages are unmapped,
+	// but we want to know that all of the pages are unmapped.
+	// To make these the same, we can only ask about one page
+	// at a time. See golang.org/issue/7476.
+	static byte vec[1];
+
+	for(off = 0; off < n; off += chunk) {
+		chunk = _PAGE_SIZE * sizeof vec;
+		if(chunk > (n - off))
+			chunk = n - off;
+		errval = runtime·mincore((int8*)v + off, chunk, vec);
+		// ENOMEM means unmapped, which is what we want.
+		// Anything else we assume means the pages are mapped.
+		if (errval != -ENOMEM)
+			return 0;
+	}
+	return 1;
+}
+
+static void *
+mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
+{
+	void *p;
+
+	p = runtime·mmap(v, n, prot, flags, fd, offset);
+	if(p != v && addrspace_free(v, n)) {
+		// On some systems, mmap ignores v without
+		// MAP_FIXED, so retry if the address space is free.
+		if(p > (void*)4096)
+			runtime·munmap(p, n);
+		p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
+	}
+	return p;
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *p;
+
+	p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096) {
+		if(p == (void*)EACCES) {
+			runtime·printf("runtime: mmap: access denied\n");
+			runtime·printf("if you're running SELinux, enable execmem for this process.\n");
+			runtime·exit(2);
+		}
+		if(p == (void*)EAGAIN) {
+			runtime·printf("runtime: mmap: too much locked memory (check 'ulimit -l').\n");
+			runtime·exit(2);
+		}
+		return nil;
+	}
+	runtime·xadd64(stat, n);
+	return p;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	runtime·madvise(v, n, MADV_DONTNEED);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// if we can reserve at least 64K and check the assumption in SysMap.
+	// Only user-mode Linux (UML) rejects these requests.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if (p != v) {
+			if(p >= (void*)4096)
+				runtime·munmap(p, 64<<10);
+			return nil;
+		}
+		runtime·munmap(p, 64<<10);
+		*reserved = false;
+		return v;
+	}
+
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if((uintptr)p < 4096)
+		return nil;
+	*reserved = true;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_nacl.c b/src/runtime/mem_nacl.c
new file mode 100644
index 000000000..6c836f18a
--- /dev/null
+++ b/src/runtime/mem_nacl.c
@@ -0,0 +1,120 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	Debug = 0,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096) {
+		if(Debug)
+			runtime·printf("sysAlloc(%p): %p\n", n, v);
+		return nil;
+	}
+	runtime·xadd64(stat, n);
+	if(Debug)
+		runtime·printf("sysAlloc(%p) = %p\n", n, v);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	if(Debug)
+		runtime·printf("SysUnused(%p, %p)\n", v, n);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	if(Debug)
+		runtime·printf("SysFree(%p, %p)\n", v, n);
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(NaCl || sizeof(void*) == 8) {
+		*reserved = false;
+		return v;
+	}
+	
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	*reserved = true;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM) {
+			runtime·printf("SysMap(%p, %p): %p\n", v, n, p);
+			runtime·throw("runtime: out of memory");
+		}
+		if(p != v) {
+			runtime·printf("SysMap(%p, %p): %p\n", v, n, p);
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		if(Debug)
+			runtime·printf("SysMap(%p, %p) = %p\n", v, n, p);
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM) {
+		runtime·printf("SysMap(%p, %p): %p\n", v, n, p);
+		runtime·throw("runtime: out of memory");
+	}
+	if(p != v) {
+		runtime·printf("SysMap(%p, %p): %p\n", v, n, p);
+		runtime·printf("mmap MAP_FIXED %p returned %p\n", v, p);
+		runtime·throw("runtime: cannot map pages in arena address space");
+	}
+	if(Debug)
+		runtime·printf("SysMap(%p, %p) = %p\n", v, n, p);
+}
diff --git a/src/runtime/mem_netbsd.c b/src/runtime/mem_netbsd.c
new file mode 100644
index 000000000..31820e517
--- /dev/null
+++ b/src/runtime/mem_netbsd.c
@@ -0,0 +1,100 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	ENOMEM = 12,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	runtime·madvise(v, n, MADV_FREE);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		*reserved = false;
+		return v;
+	}
+
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	*reserved = true;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_openbsd.c b/src/runtime/mem_openbsd.c
new file mode 100644
index 000000000..31820e517
--- /dev/null
+++ b/src/runtime/mem_openbsd.c
@@ -0,0 +1,100 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	ENOMEM = 12,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	runtime·madvise(v, n, MADV_FREE);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		*reserved = false;
+		return v;
+	}
+
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	*reserved = true;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_plan9.c b/src/runtime/mem_plan9.c
new file mode 100644
index 000000000..402869f39
--- /dev/null
+++ b/src/runtime/mem_plan9.c
@@ -0,0 +1,120 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+extern byte runtime·end[];
+static byte *bloc = { runtime·end };
+static Mutex memlock;
+
+enum
+{
+	Round = PAGESIZE-1
+};
+
+static void*
+brk(uintptr nbytes)
+{
+	uintptr bl;
+
+	runtime·lock(&memlock);
+	// Plan 9 sbrk from /sys/src/libc/9sys/sbrk.c
+	bl = ((uintptr)bloc + Round) & ~Round;
+	if(runtime·brk_((void*)(bl + nbytes)) < 0) {
+		runtime·unlock(&memlock);
+		return nil;
+	}
+	bloc = (byte*)bl + nbytes;
+	runtime·unlock(&memlock);
+	return (void*)bl;	
+}
+
+static void
+sysalloc(void)
+{
+	uintptr nbytes;
+	uint64 *stat;
+	void *p;
+
+	nbytes = g->m->scalararg[0];
+	stat = g->m->ptrarg[0];
+	g->m->scalararg[0] = 0;
+	g->m->ptrarg[0] = nil;
+
+	p = brk(nbytes);
+	if(p != nil)
+		runtime·xadd64(stat, nbytes);
+
+	g->m->ptrarg[0] = p;
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr nbytes, uint64 *stat)
+{
+	void (*fn)(void);
+	void *p;
+
+	g->m->scalararg[0] = nbytes;
+	g->m->ptrarg[0] = stat;
+	fn = sysalloc;
+	runtime·onM(&fn);
+	p = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	return p;
+}
+
+void
+runtime·SysFree(void *v, uintptr nbytes, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)nbytes);
+	runtime·lock(&memlock);
+	// from tiny/mem.c
+	// Push pointer back if this is a free
+	// of the most recent sysAlloc.
+	nbytes += (nbytes + Round) & ~Round;
+	if(bloc == (byte*)v+nbytes)
+		bloc -= nbytes;
+	runtime·unlock(&memlock);
+}
+
+void
+runtime·SysUnused(void *v, uintptr nbytes)
+{
+	USED(v, nbytes);
+}
+
+void
+runtime·SysUsed(void *v, uintptr nbytes)
+{
+	USED(v, nbytes);
+}
+
+void
+runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat)
+{
+	// SysReserve has already allocated all heap memory,
+	// but has not adjusted stats.
+	USED(v, reserved);
+	runtime·xadd64(stat, nbytes);
+}
+
+void
+runtime·SysFault(void *v, uintptr nbytes)
+{
+	USED(v, nbytes);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr nbytes, bool *reserved)
+{
+	USED(v);
+	*reserved = true;
+	return brk(nbytes);
+}
diff --git a/src/runtime/mem_solaris.c b/src/runtime/mem_solaris.c
new file mode 100644
index 000000000..8e90ba1d9
--- /dev/null
+++ b/src/runtime/mem_solaris.c
@@ -0,0 +1,101 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum
+{
+	ENOMEM = 12,
+};
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	void *v;
+
+	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(v < (void*)4096)
+		return nil;
+	runtime·xadd64(stat, n);
+	return v;
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	USED(v);
+	USED(n);
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, -(uint64)n);
+	runtime·munmap(v, n);
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	void *p;
+
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if(sizeof(void*) == 8 && n > 1LL<<32) {
+		*reserved = false;
+		return v;
+	}
+	
+	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	if(p < (void*)4096)
+		return nil;
+	*reserved = true;
+	return p;
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+	
+	runtime·xadd64(stat, n);
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if(!reserved) {
+		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+		if(p == (void*)ENOMEM)
+			runtime·throw("runtime: out of memory");
+		if(p != v) {
+			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
+			runtime·throw("runtime: address space conflict");
+		}
+		return;
+	}
+
+	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
+	if(p == (void*)ENOMEM)
+		runtime·throw("runtime: out of memory");
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/mem_windows.c b/src/runtime/mem_windows.c
new file mode 100644
index 000000000..7bc028bf3
--- /dev/null
+++ b/src/runtime/mem_windows.c
@@ -0,0 +1,120 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "os_GOOS.h"
+#include "defs_GOOS_GOARCH.h"
+#include "malloc.h"
+#include "textflag.h"
+
+enum {
+	MEM_COMMIT = 0x1000,
+	MEM_RESERVE = 0x2000,
+	MEM_DECOMMIT = 0x4000,
+	MEM_RELEASE = 0x8000,
+	
+	PAGE_READWRITE = 0x0004,
+	PAGE_NOACCESS = 0x0001,
+};
+
+#pragma dynimport runtime·VirtualAlloc VirtualAlloc "kernel32.dll"
+#pragma dynimport runtime·VirtualFree VirtualFree "kernel32.dll"
+#pragma dynimport runtime·VirtualProtect VirtualProtect "kernel32.dll"
+extern void *runtime·VirtualAlloc;
+extern void *runtime·VirtualFree;
+extern void *runtime·VirtualProtect;
+
+#pragma textflag NOSPLIT
+void*
+runtime·sysAlloc(uintptr n, uint64 *stat)
+{
+	runtime·xadd64(stat, n);
+	return runtime·stdcall4(runtime·VirtualAlloc, 0, n, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
+}
+
+void
+runtime·SysUnused(void *v, uintptr n)
+{
+	void *r;
+	uintptr small;
+
+	r = runtime·stdcall3(runtime·VirtualFree, (uintptr)v, n, MEM_DECOMMIT);
+	if(r != nil)
+		return;
+
+	// Decommit failed. Usual reason is that we've merged memory from two different
+	// VirtualAlloc calls, and Windows will only let each VirtualFree handle pages from
+	// a single VirtualAlloc. It is okay to specify a subset of the pages from a single alloc,
+	// just not pages from multiple allocs. This is a rare case, arising only when we're
+	// trying to give memory back to the operating system, which happens on a time
+	// scale of minutes. It doesn't have to be terribly fast. Instead of extra bookkeeping
+	// on all our VirtualAlloc calls, try freeing successively smaller pieces until
+	// we manage to free something, and then repeat. This ends up being O(n log n)
+	// in the worst case, but that's fast enough.
+	while(n > 0) {
+		small = n;
+		while(small >= 4096 && runtime·stdcall3(runtime·VirtualFree, (uintptr)v, small, MEM_DECOMMIT) == nil)
+			small = (small / 2) & ~(4096-1);
+		if(small < 4096)
+			runtime·throw("runtime: failed to decommit pages");
+		v = (byte*)v + small;
+		n -= small;
+	}
+}
+
+void
+runtime·SysUsed(void *v, uintptr n)
+{
+	void *r;
+
+	r = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_COMMIT, PAGE_READWRITE);
+	if(r != v)
+		runtime·throw("runtime: failed to commit pages");
+}
+
+void
+runtime·SysFree(void *v, uintptr n, uint64 *stat)
+{
+	uintptr r;
+
+	runtime·xadd64(stat, -(uint64)n);
+	r = (uintptr)runtime·stdcall3(runtime·VirtualFree, (uintptr)v, 0, MEM_RELEASE);
+	if(r == 0)
+		runtime·throw("runtime: failed to release pages");
+}
+
+void
+runtime·SysFault(void *v, uintptr n)
+{
+	// SysUnused makes the memory inaccessible and prevents its reuse
+	runtime·SysUnused(v, n);
+}
+
+void*
+runtime·SysReserve(void *v, uintptr n, bool *reserved)
+{
+	*reserved = true;
+	// v is just a hint.
+	// First try at v.
+	v = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_RESERVE, PAGE_READWRITE);
+	if(v != nil)
+		return v;
+	
+	// Next let the kernel choose the address.
+	return runtime·stdcall4(runtime·VirtualAlloc, 0, n, MEM_RESERVE, PAGE_READWRITE);
+}
+
+void
+runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
+{
+	void *p;
+
+	USED(reserved);
+
+	runtime·xadd64(stat, n);
+	p = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_COMMIT, PAGE_READWRITE);
+	if(p != v)
+		runtime·throw("runtime: cannot map pages in arena address space");
+}
diff --git a/src/runtime/memclr_386.s b/src/runtime/memclr_386.s
new file mode 100644
index 000000000..8b163923e
--- /dev/null
+++ b/src/runtime/memclr_386.s
@@ -0,0 +1,128 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), DI
+	MOVL	n+4(FP), BX
+	XORL	AX, AX
+
+	// MOVOU seems always faster than REP STOSL.
+clr_tail:
+	TESTL	BX, BX
+	JEQ	clr_0
+	CMPL	BX, $2
+	JBE	clr_1or2
+	CMPL	BX, $4
+	JBE	clr_3or4
+	CMPL	BX, $8
+	JBE	clr_5through8
+	CMPL	BX, $16
+	JBE	clr_9through16
+	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
+	JEQ	nosse2
+	PXOR	X0, X0
+	CMPL	BX, $32
+	JBE	clr_17through32
+	CMPL	BX, $64
+	JBE	clr_33through64
+	CMPL	BX, $128
+	JBE	clr_65through128
+	CMPL	BX, $256
+	JBE	clr_129through256
+	// TODO: use branch table and BSR to make this just a single dispatch
+
+clr_loop:
+	MOVOU	X0, 0(DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, 64(DI)
+	MOVOU	X0, 80(DI)
+	MOVOU	X0, 96(DI)
+	MOVOU	X0, 112(DI)
+	MOVOU	X0, 128(DI)
+	MOVOU	X0, 144(DI)
+	MOVOU	X0, 160(DI)
+	MOVOU	X0, 176(DI)
+	MOVOU	X0, 192(DI)
+	MOVOU	X0, 208(DI)
+	MOVOU	X0, 224(DI)
+	MOVOU	X0, 240(DI)
+	SUBL	$256, BX
+	ADDL	$256, DI
+	CMPL	BX, $256
+	JAE	clr_loop
+	JMP	clr_tail
+
+clr_1or2:
+	MOVB	AX, (DI)
+	MOVB	AX, -1(DI)(BX*1)
+	RET
+clr_0:
+	RET
+clr_3or4:
+	MOVW	AX, (DI)
+	MOVW	AX, -2(DI)(BX*1)
+	RET
+clr_5through8:
+	MOVL	AX, (DI)
+	MOVL	AX, -4(DI)(BX*1)
+	RET
+clr_9through16:
+	MOVL	AX, (DI)
+	MOVL	AX, 4(DI)
+	MOVL	AX, -8(DI)(BX*1)
+	MOVL	AX, -4(DI)(BX*1)
+	RET
+clr_17through32:
+	MOVOU	X0, (DI)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_33through64:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_65through128:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, -64(DI)(BX*1)
+	MOVOU	X0, -48(DI)(BX*1)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_129through256:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, 64(DI)
+	MOVOU	X0, 80(DI)
+	MOVOU	X0, 96(DI)
+	MOVOU	X0, 112(DI)
+	MOVOU	X0, -128(DI)(BX*1)
+	MOVOU	X0, -112(DI)(BX*1)
+	MOVOU	X0, -96(DI)(BX*1)
+	MOVOU	X0, -80(DI)(BX*1)
+	MOVOU	X0, -64(DI)(BX*1)
+	MOVOU	X0, -48(DI)(BX*1)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+nosse2:
+	MOVL	BX, CX
+	SHRL	$2, CX
+	REP
+	STOSL
+	ANDL	$3, BX
+	JNE	clr_tail
+	RET
diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s
new file mode 100644
index 000000000..35b3b4bee
--- /dev/null
+++ b/src/runtime/memclr_amd64.s
@@ -0,0 +1,117 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !plan9
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB), NOSPLIT, $0-16
+	MOVQ	ptr+0(FP), DI
+	MOVQ	n+8(FP), BX
+	XORQ	AX, AX
+
+	// MOVOU seems always faster than REP STOSQ.
+clr_tail:
+	TESTQ	BX, BX
+	JEQ	clr_0
+	CMPQ	BX, $2
+	JBE	clr_1or2
+	CMPQ	BX, $4
+	JBE	clr_3or4
+	CMPQ	BX, $8
+	JBE	clr_5through8
+	CMPQ	BX, $16
+	JBE	clr_9through16
+	PXOR	X0, X0
+	CMPQ	BX, $32
+	JBE	clr_17through32
+	CMPQ	BX, $64
+	JBE	clr_33through64
+	CMPQ	BX, $128
+	JBE	clr_65through128
+	CMPQ	BX, $256
+	JBE	clr_129through256
+	// TODO: use branch table and BSR to make this just a single dispatch
+	// TODO: for really big clears, use MOVNTDQ.
+
+clr_loop:
+	MOVOU	X0, 0(DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, 64(DI)
+	MOVOU	X0, 80(DI)
+	MOVOU	X0, 96(DI)
+	MOVOU	X0, 112(DI)
+	MOVOU	X0, 128(DI)
+	MOVOU	X0, 144(DI)
+	MOVOU	X0, 160(DI)
+	MOVOU	X0, 176(DI)
+	MOVOU	X0, 192(DI)
+	MOVOU	X0, 208(DI)
+	MOVOU	X0, 224(DI)
+	MOVOU	X0, 240(DI)
+	SUBQ	$256, BX
+	ADDQ	$256, DI
+	CMPQ	BX, $256
+	JAE	clr_loop
+	JMP	clr_tail
+
+clr_1or2:
+	MOVB	AX, (DI)
+	MOVB	AX, -1(DI)(BX*1)
+	RET
+clr_0:
+	RET
+clr_3or4:
+	MOVW	AX, (DI)
+	MOVW	AX, -2(DI)(BX*1)
+	RET
+clr_5through8:
+	MOVL	AX, (DI)
+	MOVL	AX, -4(DI)(BX*1)
+	RET
+clr_9through16:
+	MOVQ	AX, (DI)
+	MOVQ	AX, -8(DI)(BX*1)
+	RET
+clr_17through32:
+	MOVOU	X0, (DI)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_33through64:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_65through128:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, -64(DI)(BX*1)
+	MOVOU	X0, -48(DI)(BX*1)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
+clr_129through256:
+	MOVOU	X0, (DI)
+	MOVOU	X0, 16(DI)
+	MOVOU	X0, 32(DI)
+	MOVOU	X0, 48(DI)
+	MOVOU	X0, 64(DI)
+	MOVOU	X0, 80(DI)
+	MOVOU	X0, 96(DI)
+	MOVOU	X0, 112(DI)
+	MOVOU	X0, -128(DI)(BX*1)
+	MOVOU	X0, -112(DI)(BX*1)
+	MOVOU	X0, -96(DI)(BX*1)
+	MOVOU	X0, -80(DI)(BX*1)
+	MOVOU	X0, -64(DI)(BX*1)
+	MOVOU	X0, -48(DI)(BX*1)
+	MOVOU	X0, -32(DI)(BX*1)
+	MOVOU	X0, -16(DI)(BX*1)
+	RET
diff --git a/src/runtime/memclr_arm.s b/src/runtime/memclr_arm.s
new file mode 100644
index 000000000..1824d33b1
--- /dev/null
+++ b/src/runtime/memclr_arm.s
@@ -0,0 +1,87 @@
+// Inferno's libkern/memset-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memset-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+TO = 8
+TOE = 11
+N = 12
+TMP = 12				/* N and TMP don't overlap */
+
+TEXT runtime·memclr(SB),NOSPLIT,$0-8
+	MOVW	ptr+0(FP), R(TO)
+	MOVW	n+4(FP), R(N)
+	MOVW	$0, R(0)
+
+	ADD	R(N), R(TO), R(TOE)	/* to end pointer */
+
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_1tail
+
+_4align:				/* align on 4 */
+	AND.S	$3, R(TO), R(TMP)
+	BEQ	_4aligned
+
+	MOVBU.P	R(0), 1(R(TO))		/* implicit write back */
+	B	_4align
+
+_4aligned:
+	SUB	$31, R(TOE), R(TMP)	/* do 32-byte chunks if possible */
+	CMP	R(TMP), R(TO)
+	BHS	_4tail
+
+	MOVW	R0, R1			/* replicate */
+	MOVW	R0, R2
+	MOVW	R0, R3
+	MOVW	R0, R4
+	MOVW	R0, R5
+	MOVW	R0, R6
+	MOVW	R0, R7
+
+_f32loop:
+	CMP	R(TMP), R(TO)
+	BHS	_4tail
+
+	MOVM.IA.W [R0-R7], (R(TO))
+	B	_f32loop
+
+_4tail:
+	SUB	$3, R(TOE), R(TMP)	/* do remaining words if possible */
+_4loop:
+	CMP	R(TMP), R(TO)
+	BHS	_1tail
+
+	MOVW.P	R(0), 4(R(TO))		/* implicit write back */
+	B	_4loop
+
+_1tail:
+	CMP	R(TO), R(TOE)
+	BEQ	_return
+
+	MOVBU.P	R(0), 1(R(TO))		/* implicit write back */
+	B	_1tail
+
+_return:
+	RET
diff --git a/src/runtime/memclr_plan9_386.s b/src/runtime/memclr_plan9_386.s
new file mode 100644
index 000000000..b4b671f77
--- /dev/null
+++ b/src/runtime/memclr_plan9_386.s
@@ -0,0 +1,51 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB), NOSPLIT, $0-8
+	MOVL	ptr+0(FP), DI
+	MOVL	n+4(FP), BX
+	XORL	AX, AX
+
+clr_tail:
+	TESTL	BX, BX
+	JEQ	clr_0
+	CMPL	BX, $2
+	JBE	clr_1or2
+	CMPL	BX, $4
+	JBE	clr_3or4
+	CMPL	BX, $8
+	JBE	clr_5through8
+	CMPL	BX, $16
+	JBE	clr_9through16
+	MOVL	BX, CX
+	SHRL	$2, CX
+	REP
+	STOSL
+	ANDL	$3, BX
+	JNE	clr_tail
+	RET
+
+clr_1or2:
+	MOVB	AX, (DI)
+	MOVB	AX, -1(DI)(BX*1)
+	RET
+clr_0:
+	RET
+clr_3or4:
+	MOVW	AX, (DI)
+	MOVW	AX, -2(DI)(BX*1)
+	RET
+clr_5through8:
+	MOVL	AX, (DI)
+	MOVL	AX, -4(DI)(BX*1)
+	RET
+clr_9through16:
+	MOVL	AX, (DI)
+	MOVL	AX, 4(DI)
+	MOVL	AX, -8(DI)(BX*1)
+	MOVL	AX, -4(DI)(BX*1)
+	RET
diff --git a/src/runtime/memclr_plan9_amd64.s b/src/runtime/memclr_plan9_amd64.s
new file mode 100644
index 000000000..37e61dfbc
--- /dev/null
+++ b/src/runtime/memclr_plan9_amd64.s
@@ -0,0 +1,21 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// void runtime·memclr(void*, uintptr)
+TEXT runtime·memclr(SB),NOSPLIT,$0-16
+	MOVQ	ptr+0(FP), DI
+	MOVQ	n+8(FP), CX
+	MOVQ	CX, BX
+	ANDQ	$7, BX
+	SHRQ	$3, CX
+	MOVQ	$0, AX
+	CLD
+	REP
+	STOSQ
+	MOVQ	BX, CX
+	REP
+	STOSB
+	RET
diff --git a/src/runtime/memmove_386.s b/src/runtime/memmove_386.s
new file mode 100644
index 000000000..4c0c74c1a
--- /dev/null
+++ b/src/runtime/memmove_386.s
@@ -0,0 +1,176 @@
+// Inferno's libkern/memmove-386.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// +build !plan9
+
+#include "textflag.h"
+
+TEXT runtime·memmove(SB), NOSPLIT, $0-12
+	MOVL	to+0(FP), DI
+	MOVL	from+4(FP), SI
+	MOVL	n+8(FP), BX
+
+	// REP instructions have a high startup cost, so we handle small sizes
+	// with some straightline code.  The REP MOVSL instruction is really fast
+	// for large sizes.  The cutover is approximately 1K.  We implement up to
+	// 128 because that is the maximum SSE register load (loading all data
+	// into registers lets us ignore copy direction).
+tail:
+	TESTL	BX, BX
+	JEQ	move_0
+	CMPL	BX, $2
+	JBE	move_1or2
+	CMPL	BX, $4
+	JBE	move_3or4
+	CMPL	BX, $8
+	JBE	move_5through8
+	CMPL	BX, $16
+	JBE	move_9through16
+	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
+	JEQ	nosse2
+	CMPL	BX, $32
+	JBE	move_17through32
+	CMPL	BX, $64
+	JBE	move_33through64
+	CMPL	BX, $128
+	JBE	move_65through128
+	// TODO: use branch table and BSR to make this just a single dispatch
+
+nosse2:
+/*
+ * check and set for backwards
+ */
+	CMPL	SI, DI
+	JLS	back
+
+/*
+ * forward copy loop
+ */
+forward:	
+	MOVL	BX, CX
+	SHRL	$2, CX
+	ANDL	$3, BX
+
+	REP;	MOVSL
+	JMP	tail
+/*
+ * check overlap
+ */
+back:
+	MOVL	SI, CX
+	ADDL	BX, CX
+	CMPL	CX, DI
+	JLS	forward
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+
+	ADDL	BX, DI
+	ADDL	BX, SI
+	STD
+
+/*
+ * copy
+ */
+	MOVL	BX, CX
+	SHRL	$2, CX
+	ANDL	$3, BX
+
+	SUBL	$4, DI
+	SUBL	$4, SI
+	REP;	MOVSL
+
+	CLD
+	ADDL	$4, DI
+	ADDL	$4, SI
+	SUBL	BX, DI
+	SUBL	BX, SI
+	JMP	tail
+
+move_1or2:
+	MOVB	(SI), AX
+	MOVB	-1(SI)(BX*1), CX
+	MOVB	AX, (DI)
+	MOVB	CX, -1(DI)(BX*1)
+	RET
+move_0:
+	RET
+move_3or4:
+	MOVW	(SI), AX
+	MOVW	-2(SI)(BX*1), CX
+	MOVW	AX, (DI)
+	MOVW	CX, -2(DI)(BX*1)
+	RET
+move_5through8:
+	MOVL	(SI), AX
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	AX, (DI)
+	MOVL	CX, -4(DI)(BX*1)
+	RET
+move_9through16:
+	MOVL	(SI), AX
+	MOVL	4(SI), CX
+	MOVL	-8(SI)(BX*1), DX
+	MOVL	-4(SI)(BX*1), BP
+	MOVL	AX, (DI)
+	MOVL	CX, 4(DI)
+	MOVL	DX, -8(DI)(BX*1)
+	MOVL	BP, -4(DI)(BX*1)
+	RET
+move_17through32:
+	MOVOU	(SI), X0
+	MOVOU	-16(SI)(BX*1), X1
+	MOVOU	X0, (DI)
+	MOVOU	X1, -16(DI)(BX*1)
+	RET
+move_33through64:
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	-32(SI)(BX*1), X2
+	MOVOU	-16(SI)(BX*1), X3
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, -32(DI)(BX*1)
+	MOVOU	X3, -16(DI)(BX*1)
+	RET
+move_65through128:
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	32(SI), X2
+	MOVOU	48(SI), X3
+	MOVOU	-64(SI)(BX*1), X4
+	MOVOU	-48(SI)(BX*1), X5
+	MOVOU	-32(SI)(BX*1), X6
+	MOVOU	-16(SI)(BX*1), X7
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, 32(DI)
+	MOVOU	X3, 48(DI)
+	MOVOU	X4, -64(DI)(BX*1)
+	MOVOU	X5, -48(DI)(BX*1)
+	MOVOU	X6, -32(DI)(BX*1)
+	MOVOU	X7, -16(DI)(BX*1)
+	RET
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s
new file mode 100644
index 000000000..f96843534
--- /dev/null
+++ b/src/runtime/memmove_amd64.s
@@ -0,0 +1,252 @@
+// Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// +build !plan9
+
+#include "textflag.h"
+
+// void runtime·memmove(void*, void*, uintptr)
+TEXT runtime·memmove(SB), NOSPLIT, $0-24
+
+	MOVQ	to+0(FP), DI
+	MOVQ	from+8(FP), SI
+	MOVQ	n+16(FP), BX
+
+	// REP instructions have a high startup cost, so we handle small sizes
+	// with some straightline code.  The REP MOVSQ instruction is really fast
+	// for large sizes.  The cutover is approximately 2K.
+tail:
+	// move_129through256 or smaller work whether or not the source and the
+	// destination memory regions overlap because they load all data into
+	// registers before writing it back.  move_256through2048 on the other
+	// hand can be used only when the memory regions don't overlap or the copy
+	// direction is forward.
+	TESTQ	BX, BX
+	JEQ	move_0
+	CMPQ	BX, $2
+	JBE	move_1or2
+	CMPQ	BX, $4
+	JBE	move_3or4
+	CMPQ	BX, $8
+	JBE	move_5through8
+	CMPQ	BX, $16
+	JBE	move_9through16
+	CMPQ	BX, $32
+	JBE	move_17through32
+	CMPQ	BX, $64
+	JBE	move_33through64
+	CMPQ	BX, $128
+	JBE	move_65through128
+	CMPQ	BX, $256
+	JBE	move_129through256
+	// TODO: use branch table and BSR to make this just a single dispatch
+
+/*
+ * check and set for backwards
+ */
+	CMPQ	SI, DI
+	JLS	back
+
+/*
+ * forward copy loop
+ */
+forward:
+	CMPQ	BX, $2048
+	JLS	move_256through2048
+
+	MOVQ	BX, CX
+	SHRQ	$3, CX
+	ANDQ	$7, BX
+	REP;	MOVSQ
+	JMP	tail
+
+back:
+/*
+ * check overlap
+ */
+	MOVQ	SI, CX
+	ADDQ	BX, CX
+	CMPQ	CX, DI
+	JLS	forward
+	
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+	ADDQ	BX, DI
+	ADDQ	BX, SI
+	STD
+
+/*
+ * copy
+ */
+	MOVQ	BX, CX
+	SHRQ	$3, CX
+	ANDQ	$7, BX
+
+	SUBQ	$8, DI
+	SUBQ	$8, SI
+	REP;	MOVSQ
+
+	CLD
+	ADDQ	$8, DI
+	ADDQ	$8, SI
+	SUBQ	BX, DI
+	SUBQ	BX, SI
+	JMP	tail
+
+move_1or2:
+	MOVB	(SI), AX
+	MOVB	-1(SI)(BX*1), CX
+	MOVB	AX, (DI)
+	MOVB	CX, -1(DI)(BX*1)
+	RET
+move_0:
+	RET
+move_3or4:
+	MOVW	(SI), AX
+	MOVW	-2(SI)(BX*1), CX
+	MOVW	AX, (DI)
+	MOVW	CX, -2(DI)(BX*1)
+	RET
+move_5through8:
+	MOVL	(SI), AX
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	AX, (DI)
+	MOVL	CX, -4(DI)(BX*1)
+	RET
+move_9through16:
+	MOVQ	(SI), AX
+	MOVQ	-8(SI)(BX*1), CX
+	MOVQ	AX, (DI)
+	MOVQ	CX, -8(DI)(BX*1)
+	RET
+move_17through32:
+	MOVOU	(SI), X0
+	MOVOU	-16(SI)(BX*1), X1
+	MOVOU	X0, (DI)
+	MOVOU	X1, -16(DI)(BX*1)
+	RET
+move_33through64:
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	-32(SI)(BX*1), X2
+	MOVOU	-16(SI)(BX*1), X3
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, -32(DI)(BX*1)
+	MOVOU	X3, -16(DI)(BX*1)
+	RET
+move_65through128:
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	32(SI), X2
+	MOVOU	48(SI), X3
+	MOVOU	-64(SI)(BX*1), X4
+	MOVOU	-48(SI)(BX*1), X5
+	MOVOU	-32(SI)(BX*1), X6
+	MOVOU	-16(SI)(BX*1), X7
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, 32(DI)
+	MOVOU	X3, 48(DI)
+	MOVOU	X4, -64(DI)(BX*1)
+	MOVOU	X5, -48(DI)(BX*1)
+	MOVOU	X6, -32(DI)(BX*1)
+	MOVOU	X7, -16(DI)(BX*1)
+	RET
+move_129through256:
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	32(SI), X2
+	MOVOU	48(SI), X3
+	MOVOU	64(SI), X4
+	MOVOU	80(SI), X5
+	MOVOU	96(SI), X6
+	MOVOU	112(SI), X7
+	MOVOU	-128(SI)(BX*1), X8
+	MOVOU	-112(SI)(BX*1), X9
+	MOVOU	-96(SI)(BX*1), X10
+	MOVOU	-80(SI)(BX*1), X11
+	MOVOU	-64(SI)(BX*1), X12
+	MOVOU	-48(SI)(BX*1), X13
+	MOVOU	-32(SI)(BX*1), X14
+	MOVOU	-16(SI)(BX*1), X15
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, 32(DI)
+	MOVOU	X3, 48(DI)
+	MOVOU	X4, 64(DI)
+	MOVOU	X5, 80(DI)
+	MOVOU	X6, 96(DI)
+	MOVOU	X7, 112(DI)
+	MOVOU	X8, -128(DI)(BX*1)
+	MOVOU	X9, -112(DI)(BX*1)
+	MOVOU	X10, -96(DI)(BX*1)
+	MOVOU	X11, -80(DI)(BX*1)
+	MOVOU	X12, -64(DI)(BX*1)
+	MOVOU	X13, -48(DI)(BX*1)
+	MOVOU	X14, -32(DI)(BX*1)
+	MOVOU	X15, -16(DI)(BX*1)
+	RET
+move_256through2048:
+	SUBQ	$256, BX
+	MOVOU	(SI), X0
+	MOVOU	16(SI), X1
+	MOVOU	32(SI), X2
+	MOVOU	48(SI), X3
+	MOVOU	64(SI), X4
+	MOVOU	80(SI), X5
+	MOVOU	96(SI), X6
+	MOVOU	112(SI), X7
+	MOVOU	128(SI), X8
+	MOVOU	144(SI), X9
+	MOVOU	160(SI), X10
+	MOVOU	176(SI), X11
+	MOVOU	192(SI), X12
+	MOVOU	208(SI), X13
+	MOVOU	224(SI), X14
+	MOVOU	240(SI), X15
+	MOVOU	X0, (DI)
+	MOVOU	X1, 16(DI)
+	MOVOU	X2, 32(DI)
+	MOVOU	X3, 48(DI)
+	MOVOU	X4, 64(DI)
+	MOVOU	X5, 80(DI)
+	MOVOU	X6, 96(DI)
+	MOVOU	X7, 112(DI)
+	MOVOU	X8, 128(DI)
+	MOVOU	X9, 144(DI)
+	MOVOU	X10, 160(DI)
+	MOVOU	X11, 176(DI)
+	MOVOU	X12, 192(DI)
+	MOVOU	X13, 208(DI)
+	MOVOU	X14, 224(DI)
+	MOVOU	X15, 240(DI)
+	CMPQ	BX, $256
+	LEAQ	256(SI), SI
+	LEAQ	256(DI), DI
+	JGE	move_256through2048
+	JMP	tail
diff --git a/src/runtime/memmove_arm.s b/src/runtime/memmove_arm.s
new file mode 100644
index 000000000..f187d4267
--- /dev/null
+++ b/src/runtime/memmove_arm.s
@@ -0,0 +1,261 @@
+// Inferno's libkern/memmove-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+// TE or TS are spilled to the stack during bulk register moves.
+TS = 0
+TE = 8
+
+// Warning: the linker will use R11 to synthesize certain instructions. Please
+// take care and double check with objdump.
+FROM = 11
+N = 12
+TMP = 12				/* N and TMP don't overlap */
+TMP1 = 5
+
+RSHIFT = 5
+LSHIFT = 6
+OFFSET = 7
+
+BR0 = 0					/* shared with TS */
+BW0 = 1
+BR1 = 1
+BW1 = 2
+BR2 = 2
+BW2 = 3
+BR3 = 3
+BW3 = 4
+
+FW0 = 1
+FR0 = 2
+FW1 = 2
+FR1 = 3
+FW2 = 3
+FR2 = 4
+FW3 = 4
+FR3 = 8					/* shared with TE */
+
+TEXT runtime·memmove(SB), NOSPLIT, $4-12
+_memmove:
+	MOVW	to+0(FP), R(TS)
+	MOVW	from+4(FP), R(FROM)
+	MOVW	n+8(FP), R(N)
+
+	ADD	R(N), R(TS), R(TE)	/* to end pointer */
+
+	CMP	R(FROM), R(TS)
+	BLS	_forward
+
+_back:
+	ADD	R(N), R(FROM)		/* from end pointer */
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_b1tail
+
+_b4align:				/* align destination on 4 */
+	AND.S	$3, R(TE), R(TMP)
+	BEQ	_b4aligned
+
+	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
+	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
+	B	_b4align
+
+_b4aligned:				/* is source now aligned? */
+	AND.S	$3, R(FROM), R(TMP)
+	BNE	_bunaligned
+
+	ADD	$31, R(TS), R(TMP)	/* do 32-byte chunks if possible */
+	MOVW	R(TS), savedts-4(SP)
+_b32loop:
+	CMP	R(TMP), R(TE)
+	BLS	_b4tail
+
+	MOVM.DB.W (R(FROM)), [R0-R7]
+	MOVM.DB.W [R0-R7], (R(TE))
+	B	_b32loop
+
+_b4tail:				/* do remaining words if possible */
+	MOVW	savedts-4(SP), R(TS)
+	ADD	$3, R(TS), R(TMP)
+_b4loop:
+	CMP	R(TMP), R(TE)
+	BLS	_b1tail
+
+	MOVW.W	-4(R(FROM)), R(TMP1)	/* pre-indexed */
+	MOVW.W	R(TMP1), -4(R(TE))	/* pre-indexed */
+	B	_b4loop
+
+_b1tail:				/* remaining bytes */
+	CMP	R(TE), R(TS)
+	BEQ	_return
+
+	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
+	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
+	B	_b1tail
+
+_forward:
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_f1tail
+
+_f4align:				/* align destination on 4 */
+	AND.S	$3, R(TS), R(TMP)
+	BEQ	_f4aligned
+
+	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
+	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
+	B	_f4align
+
+_f4aligned:				/* is source now aligned? */
+	AND.S	$3, R(FROM), R(TMP)
+	BNE	_funaligned
+
+	SUB	$31, R(TE), R(TMP)	/* do 32-byte chunks if possible */
+	MOVW	R(TE), savedte-4(SP)
+_f32loop:
+	CMP	R(TMP), R(TS)
+	BHS	_f4tail
+
+	MOVM.IA.W (R(FROM)), [R1-R8] 
+	MOVM.IA.W [R1-R8], (R(TS))
+	B	_f32loop
+
+_f4tail:
+	MOVW	savedte-4(SP), R(TE)
+	SUB	$3, R(TE), R(TMP)	/* do remaining words if possible */
+_f4loop:
+	CMP	R(TMP), R(TS)
+	BHS	_f1tail
+
+	MOVW.P	4(R(FROM)), R(TMP1)	/* implicit write back */
+	MOVW.P	R(TMP1), 4(R(TS))	/* implicit write back */
+	B	_f4loop
+
+_f1tail:
+	CMP	R(TS), R(TE)
+	BEQ	_return
+
+	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
+	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
+	B	_f1tail
+
+_return:
+	MOVW	to+0(FP), R0
+	RET
+
+_bunaligned:
+	CMP	$2, R(TMP)		/* is R(TMP) < 2 ? */
+
+	MOVW.LT	$8, R(RSHIFT)		/* (R(n)<<24)|(R(n-1)>>8) */
+	MOVW.LT	$24, R(LSHIFT)
+	MOVW.LT	$1, R(OFFSET)
+
+	MOVW.EQ	$16, R(RSHIFT)		/* (R(n)<<16)|(R(n-1)>>16) */
+	MOVW.EQ	$16, R(LSHIFT)
+	MOVW.EQ	$2, R(OFFSET)
+
+	MOVW.GT	$24, R(RSHIFT)		/* (R(n)<<8)|(R(n-1)>>24) */
+	MOVW.GT	$8, R(LSHIFT)
+	MOVW.GT	$3, R(OFFSET)
+
+	ADD	$16, R(TS), R(TMP)	/* do 16-byte chunks if possible */
+	CMP	R(TMP), R(TE)
+	BLS	_b1tail
+
+	BIC	$3, R(FROM)		/* align source */
+	MOVW	R(TS), savedts-4(SP)
+	MOVW	(R(FROM)), R(BR0)	/* prime first block register */
+
+_bu16loop:
+	CMP	R(TMP), R(TE)
+	BLS	_bu1tail
+
+	MOVW	R(BR0)<<R(LSHIFT), R(BW3)
+	MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)]
+	ORR	R(BR3)>>R(RSHIFT), R(BW3)
+
+	MOVW	R(BR3)<<R(LSHIFT), R(BW2)
+	ORR	R(BR2)>>R(RSHIFT), R(BW2)
+
+	MOVW	R(BR2)<<R(LSHIFT), R(BW1)
+	ORR	R(BR1)>>R(RSHIFT), R(BW1)
+
+	MOVW	R(BR1)<<R(LSHIFT), R(BW0)
+	ORR	R(BR0)>>R(RSHIFT), R(BW0)
+
+	MOVM.DB.W [R(BW0)-R(BW3)], (R(TE))
+	B	_bu16loop
+
+_bu1tail:
+	MOVW	savedts-4(SP), R(TS)
+	ADD	R(OFFSET), R(FROM)
+	B	_b1tail
+
+_funaligned:
+	CMP	$2, R(TMP)
+
+	MOVW.LT	$8, R(RSHIFT)		/* (R(n+1)<<24)|(R(n)>>8) */
+	MOVW.LT	$24, R(LSHIFT)
+	MOVW.LT	$3, R(OFFSET)
+
+	MOVW.EQ	$16, R(RSHIFT)		/* (R(n+1)<<16)|(R(n)>>16) */
+	MOVW.EQ	$16, R(LSHIFT)
+	MOVW.EQ	$2, R(OFFSET)
+
+	MOVW.GT	$24, R(RSHIFT)		/* (R(n+1)<<8)|(R(n)>>24) */
+	MOVW.GT	$8, R(LSHIFT)
+	MOVW.GT	$1, R(OFFSET)
+
+	SUB	$16, R(TE), R(TMP)	/* do 16-byte chunks if possible */
+	CMP	R(TMP), R(TS)
+	BHS	_f1tail
+
+	BIC	$3, R(FROM)		/* align source */
+	MOVW	R(TE), savedte-4(SP)
+	MOVW.P	4(R(FROM)), R(FR3)	/* prime last block register, implicit write back */
+
+_fu16loop:
+	CMP	R(TMP), R(TS)
+	BHS	_fu1tail
+
+	MOVW	R(FR3)>>R(RSHIFT), R(FW0)
+	MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)]
+	ORR	R(FR0)<<R(LSHIFT), R(FW0)
+
+	MOVW	R(FR0)>>R(RSHIFT), R(FW1)
+	ORR	R(FR1)<<R(LSHIFT), R(FW1)
+
+	MOVW	R(FR1)>>R(RSHIFT), R(FW2)
+	ORR	R(FR2)<<R(LSHIFT), R(FW2)
+
+	MOVW	R(FR2)>>R(RSHIFT), R(FW3)
+	ORR	R(FR3)<<R(LSHIFT), R(FW3)
+
+	MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS))
+	B	_fu16loop
+
+_fu1tail:
+	MOVW	savedte-4(SP), R(TE)
+	SUB	R(OFFSET), R(FROM)
+	B	_f1tail
diff --git a/src/runtime/memmove_linux_amd64_test.go b/src/runtime/memmove_linux_amd64_test.go
new file mode 100644
index 000000000..f7221f4f5
--- /dev/null
+++ b/src/runtime/memmove_linux_amd64_test.go
@@ -0,0 +1,61 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"io/ioutil"
+	"os"
+	"reflect"
+	"syscall"
+	"testing"
+	"unsafe"
+)
+
+// TestMemmoveOverflow maps 3GB of memory and calls memmove on
+// the corresponding slice.
+func TestMemmoveOverflow(t *testing.T) {
+	// Create a temporary file.
+	tmp, err := ioutil.TempFile("", "go-memmovetest")
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = tmp.Write(make([]byte, 65536))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(tmp.Name())
+	defer tmp.Close()
+
+	// Set up mappings.
+	base, _, errno := syscall.Syscall6(syscall.SYS_MMAP,
+		0xa0<<32, 3<<30, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_PRIVATE|syscall.MAP_ANONYMOUS, ^uintptr(0), 0)
+	if errno != 0 {
+		t.Skipf("could not create memory mapping: %s", errno)
+	}
+	syscall.Syscall(syscall.SYS_MUNMAP, base, 3<<30, 0)
+
+	for off := uintptr(0); off < 3<<30; off += 65536 {
+		_, _, errno := syscall.Syscall6(syscall.SYS_MMAP,
+			base+off, 65536, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED|syscall.MAP_FIXED, tmp.Fd(), 0)
+		if errno != 0 {
+			t.Fatalf("could not map a page at requested 0x%x: %s", base+off, errno)
+		}
+		defer syscall.Syscall(syscall.SYS_MUNMAP, base+off, 65536, 0)
+	}
+
+	var s []byte
+	sp := (*reflect.SliceHeader)(unsafe.Pointer(&s))
+	sp.Data = base
+	sp.Len, sp.Cap = 3<<30, 3<<30
+
+	n := copy(s[1:], s)
+	if n != 3<<30-1 {
+		t.Fatalf("copied %d bytes, expected %d", n, 3<<30-1)
+	}
+	n = copy(s, s[1:])
+	if n != 3<<30-1 {
+		t.Fatalf("copied %d bytes, expected %d", n, 3<<30-1)
+	}
+}
diff --git a/src/runtime/memmove_nacl_amd64p32.s b/src/runtime/memmove_nacl_amd64p32.s
new file mode 100644
index 000000000..373607afe
--- /dev/null
+++ b/src/runtime/memmove_nacl_amd64p32.s
@@ -0,0 +1,46 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT runtime·memmove(SB), NOSPLIT, $0-12
+	MOVL	to+0(FP), DI
+	MOVL	from+4(FP), SI
+	MOVL	n+8(FP), BX
+
+	CMPL	SI, DI
+	JLS back
+
+forward:
+	MOVL	BX, CX
+	SHRL	$3, CX
+	ANDL	$7, BX
+	REP; MOVSQ
+	MOVL	BX, CX
+	REP; MOVSB
+	RET
+
+back:
+	MOVL	SI, CX
+	ADDL	BX, CX
+	CMPL	CX, DI
+	JLS forward
+
+	ADDL	BX, DI
+	ADDL	BX, SI
+	STD
+	
+	MOVL	BX, CX
+	SHRL	$3, CX
+	ANDL	$7, BX
+	SUBL	$8, DI
+	SUBL	$8, SI
+	REP; MOVSQ
+	ADDL	$7, DI
+	ADDL	$7, SI
+	MOVL	BX, CX
+	REP; MOVSB
+	CLD
+
+	RET
diff --git a/src/runtime/memmove_plan9_386.s b/src/runtime/memmove_plan9_386.s
new file mode 100644
index 000000000..025d4ce1b
--- /dev/null
+++ b/src/runtime/memmove_plan9_386.s
@@ -0,0 +1,128 @@
+// Inferno's libkern/memmove-386.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+TEXT runtime·memmove(SB), NOSPLIT, $0-12
+	MOVL	to+0(FP), DI
+	MOVL	from+4(FP), SI
+	MOVL	n+8(FP), BX
+
+	// REP instructions have a high startup cost, so we handle small sizes
+	// with some straightline code.  The REP MOVSL instruction is really fast
+	// for large sizes.  The cutover is approximately 1K.
+tail:
+	TESTL	BX, BX
+	JEQ	move_0
+	CMPL	BX, $2
+	JBE	move_1or2
+	CMPL	BX, $4
+	JBE	move_3or4
+	CMPL	BX, $8
+	JBE	move_5through8
+	CMPL	BX, $16
+	JBE	move_9through16
+
+/*
+ * check and set for backwards
+ */
+	CMPL	SI, DI
+	JLS	back
+
+/*
+ * forward copy loop
+ */
+forward:	
+	MOVL	BX, CX
+	SHRL	$2, CX
+	ANDL	$3, BX
+
+	REP;	MOVSL
+	JMP	tail
+/*
+ * check overlap
+ */
+back:
+	MOVL	SI, CX
+	ADDL	BX, CX
+	CMPL	CX, DI
+	JLS	forward
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+
+	ADDL	BX, DI
+	ADDL	BX, SI
+	STD
+
+/*
+ * copy
+ */
+	MOVL	BX, CX
+	SHRL	$2, CX
+	ANDL	$3, BX
+
+	SUBL	$4, DI
+	SUBL	$4, SI
+	REP;	MOVSL
+
+	CLD
+	ADDL	$4, DI
+	ADDL	$4, SI
+	SUBL	BX, DI
+	SUBL	BX, SI
+	JMP	tail
+
+move_1or2:
+	MOVB	(SI), AX
+	MOVB	-1(SI)(BX*1), CX
+	MOVB	AX, (DI)
+	MOVB	CX, -1(DI)(BX*1)
+	RET
+move_0:
+	RET
+move_3or4:
+	MOVW	(SI), AX
+	MOVW	-2(SI)(BX*1), CX
+	MOVW	AX, (DI)
+	MOVW	CX, -2(DI)(BX*1)
+	RET
+move_5through8:
+	MOVL	(SI), AX
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	AX, (DI)
+	MOVL	CX, -4(DI)(BX*1)
+	RET
+move_9through16:
+	MOVL	(SI), AX
+	MOVL	4(SI), CX
+	MOVL	-8(SI)(BX*1), DX
+	MOVL	-4(SI)(BX*1), BP
+	MOVL	AX, (DI)
+	MOVL	CX, 4(DI)
+	MOVL	DX, -8(DI)(BX*1)
+	MOVL	BP, -4(DI)(BX*1)
+	RET
diff --git a/src/runtime/memmove_plan9_amd64.s b/src/runtime/memmove_plan9_amd64.s
new file mode 100644
index 000000000..8e96b8717
--- /dev/null
+++ b/src/runtime/memmove_plan9_amd64.s
@@ -0,0 +1,127 @@
+// Derived from Inferno's libkern/memmove-386.s (adapted for amd64)
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+// void runtime·memmove(void*, void*, uintptr)
+TEXT runtime·memmove(SB), NOSPLIT, $0-24
+
+	MOVQ	to+0(FP), DI
+	MOVQ	from+8(FP), SI
+	MOVQ	n+16(FP), BX
+
+	// REP instructions have a high startup cost, so we handle small sizes
+	// with some straightline code.  The REP MOVSQ instruction is really fast
+	// for large sizes.  The cutover is approximately 1K.
+tail:
+	TESTQ	BX, BX
+	JEQ	move_0
+	CMPQ	BX, $2
+	JBE	move_1or2
+	CMPQ	BX, $4
+	JBE	move_3or4
+	CMPQ	BX, $8
+	JBE	move_5through8
+	CMPQ	BX, $16
+	JBE	move_9through16
+
+/*
+ * check and set for backwards
+ */
+	CMPQ	SI, DI
+	JLS	back
+
+/*
+ * forward copy loop
+ */
+forward:
+	MOVQ	BX, CX
+	SHRQ	$3, CX
+	ANDQ	$7, BX
+
+	REP;	MOVSQ
+	JMP	tail
+
+back:
+/*
+ * check overlap
+ */
+	MOVQ	SI, CX
+	ADDQ	BX, CX
+	CMPQ	CX, DI
+	JLS	forward
+	
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+	ADDQ	BX, DI
+	ADDQ	BX, SI
+	STD
+
+/*
+ * copy
+ */
+	MOVQ	BX, CX
+	SHRQ	$3, CX
+	ANDQ	$7, BX
+
+	SUBQ	$8, DI
+	SUBQ	$8, SI
+	REP;	MOVSQ
+
+	CLD
+	ADDQ	$8, DI
+	ADDQ	$8, SI
+	SUBQ	BX, DI
+	SUBQ	BX, SI
+	JMP	tail
+
+move_1or2:
+	MOVB	(SI), AX
+	MOVB	-1(SI)(BX*1), CX
+	MOVB	AX, (DI)
+	MOVB	CX, -1(DI)(BX*1)
+	RET
+move_0:
+	RET
+move_3or4:
+	MOVW	(SI), AX
+	MOVW	-2(SI)(BX*1), CX
+	MOVW	AX, (DI)
+	MOVW	CX, -2(DI)(BX*1)
+	RET
+move_5through8:
+	MOVL	(SI), AX
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	AX, (DI)
+	MOVL	CX, -4(DI)(BX*1)
+	RET
+move_9through16:
+	MOVQ	(SI), AX
+	MOVQ	-8(SI)(BX*1), CX
+	MOVQ	AX, (DI)
+	MOVQ	CX, -8(DI)(BX*1)
+	RET
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
new file mode 100644
index 000000000..ffda4fe6c
--- /dev/null
+++ b/src/runtime/memmove_test.go
@@ -0,0 +1,295 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"testing"
+)
+
+func TestMemmove(t *testing.T) {
+	size := 256
+	if testing.Short() {
+		size = 128 + 16
+	}
+	src := make([]byte, size)
+	dst := make([]byte, size)
+	for i := 0; i < size; i++ {
+		src[i] = byte(128 + (i & 127))
+	}
+	for i := 0; i < size; i++ {
+		dst[i] = byte(i & 127)
+	}
+	for n := 0; n <= size; n++ {
+		for x := 0; x <= size-n; x++ { // offset in src
+			for y := 0; y <= size-n; y++ { // offset in dst
+				copy(dst[y:y+n], src[x:x+n])
+				for i := 0; i < y; i++ {
+					if dst[i] != byte(i&127) {
+						t.Fatalf("prefix dst[%d] = %d", i, dst[i])
+					}
+				}
+				for i := y; i < y+n; i++ {
+					if dst[i] != byte(128+((i-y+x)&127)) {
+						t.Fatalf("copied dst[%d] = %d", i, dst[i])
+					}
+					dst[i] = byte(i & 127) // reset dst
+				}
+				for i := y + n; i < size; i++ {
+					if dst[i] != byte(i&127) {
+						t.Fatalf("suffix dst[%d] = %d", i, dst[i])
+					}
+				}
+			}
+		}
+	}
+}
+
+func TestMemmoveAlias(t *testing.T) {
+	size := 256
+	if testing.Short() {
+		size = 128 + 16
+	}
+	buf := make([]byte, size)
+	for i := 0; i < size; i++ {
+		buf[i] = byte(i)
+	}
+	for n := 0; n <= size; n++ {
+		for x := 0; x <= size-n; x++ { // src offset
+			for y := 0; y <= size-n; y++ { // dst offset
+				copy(buf[y:y+n], buf[x:x+n])
+				for i := 0; i < y; i++ {
+					if buf[i] != byte(i) {
+						t.Fatalf("prefix buf[%d] = %d", i, buf[i])
+					}
+				}
+				for i := y; i < y+n; i++ {
+					if buf[i] != byte(i-y+x) {
+						t.Fatalf("copied buf[%d] = %d", i, buf[i])
+					}
+					buf[i] = byte(i) // reset buf
+				}
+				for i := y + n; i < size; i++ {
+					if buf[i] != byte(i) {
+						t.Fatalf("suffix buf[%d] = %d", i, buf[i])
+					}
+				}
+			}
+		}
+	}
+}
+
+func bmMemmove(b *testing.B, n int) {
+	x := make([]byte, n)
+	y := make([]byte, n)
+	b.SetBytes(int64(n))
+	for i := 0; i < b.N; i++ {
+		copy(x, y)
+	}
+}
+
+func BenchmarkMemmove0(b *testing.B)    { bmMemmove(b, 0) }
+func BenchmarkMemmove1(b *testing.B)    { bmMemmove(b, 1) }
+func BenchmarkMemmove2(b *testing.B)    { bmMemmove(b, 2) }
+func BenchmarkMemmove3(b *testing.B)    { bmMemmove(b, 3) }
+func BenchmarkMemmove4(b *testing.B)    { bmMemmove(b, 4) }
+func BenchmarkMemmove5(b *testing.B)    { bmMemmove(b, 5) }
+func BenchmarkMemmove6(b *testing.B)    { bmMemmove(b, 6) }
+func BenchmarkMemmove7(b *testing.B)    { bmMemmove(b, 7) }
+func BenchmarkMemmove8(b *testing.B)    { bmMemmove(b, 8) }
+func BenchmarkMemmove9(b *testing.B)    { bmMemmove(b, 9) }
+func BenchmarkMemmove10(b *testing.B)   { bmMemmove(b, 10) }
+func BenchmarkMemmove11(b *testing.B)   { bmMemmove(b, 11) }
+func BenchmarkMemmove12(b *testing.B)   { bmMemmove(b, 12) }
+func BenchmarkMemmove13(b *testing.B)   { bmMemmove(b, 13) }
+func BenchmarkMemmove14(b *testing.B)   { bmMemmove(b, 14) }
+func BenchmarkMemmove15(b *testing.B)   { bmMemmove(b, 15) }
+func BenchmarkMemmove16(b *testing.B)   { bmMemmove(b, 16) }
+func BenchmarkMemmove32(b *testing.B)   { bmMemmove(b, 32) }
+func BenchmarkMemmove64(b *testing.B)   { bmMemmove(b, 64) }
+func BenchmarkMemmove128(b *testing.B)  { bmMemmove(b, 128) }
+func BenchmarkMemmove256(b *testing.B)  { bmMemmove(b, 256) }
+func BenchmarkMemmove512(b *testing.B)  { bmMemmove(b, 512) }
+func BenchmarkMemmove1024(b *testing.B) { bmMemmove(b, 1024) }
+func BenchmarkMemmove2048(b *testing.B) { bmMemmove(b, 2048) }
+func BenchmarkMemmove4096(b *testing.B) { bmMemmove(b, 4096) }
+
+func TestMemclr(t *testing.T) {
+	size := 512
+	if testing.Short() {
+		size = 128 + 16
+	}
+	mem := make([]byte, size)
+	for i := 0; i < size; i++ {
+		mem[i] = 0xee
+	}
+	for n := 0; n < size; n++ {
+		for x := 0; x <= size-n; x++ { // offset in mem
+			MemclrBytes(mem[x : x+n])
+			for i := 0; i < x; i++ {
+				if mem[i] != 0xee {
+					t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i])
+				}
+			}
+			for i := x; i < x+n; i++ {
+				if mem[i] != 0 {
+					t.Fatalf("failed clear mem[%d] = %d", i, mem[i])
+				}
+				mem[i] = 0xee
+			}
+			for i := x + n; i < size; i++ {
+				if mem[i] != 0xee {
+					t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i])
+				}
+			}
+		}
+	}
+}
+
+func bmMemclr(b *testing.B, n int) {
+	x := make([]byte, n)
+	b.SetBytes(int64(n))
+	for i := 0; i < b.N; i++ {
+		MemclrBytes(x)
+	}
+}
+func BenchmarkMemclr5(b *testing.B)     { bmMemclr(b, 5) }
+func BenchmarkMemclr16(b *testing.B)    { bmMemclr(b, 16) }
+func BenchmarkMemclr64(b *testing.B)    { bmMemclr(b, 64) }
+func BenchmarkMemclr256(b *testing.B)   { bmMemclr(b, 256) }
+func BenchmarkMemclr4096(b *testing.B)  { bmMemclr(b, 4096) }
+func BenchmarkMemclr65536(b *testing.B) { bmMemclr(b, 65536) }
+
+func BenchmarkClearFat8(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [8 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat12(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [12 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat16(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [16 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat24(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [24 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat32(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [32 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat64(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [64 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat128(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [128 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat256(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [256 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat512(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [512 / 4]uint32
+		_ = x
+	}
+}
+func BenchmarkClearFat1024(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var x [1024 / 4]uint32
+		_ = x
+	}
+}
+
+func BenchmarkCopyFat8(b *testing.B) {
+	var x [8 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat12(b *testing.B) {
+	var x [12 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat16(b *testing.B) {
+	var x [16 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat24(b *testing.B) {
+	var x [24 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat32(b *testing.B) {
+	var x [32 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat64(b *testing.B) {
+	var x [64 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat128(b *testing.B) {
+	var x [128 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat256(b *testing.B) {
+	var x [256 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat512(b *testing.B) {
+	var x [512 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
+func BenchmarkCopyFat1024(b *testing.B) {
+	var x [1024 / 4]uint32
+	for i := 0; i < b.N; i++ {
+		y := x
+		_ = y
+	}
+}
diff --git a/src/runtime/mfinal_test.go b/src/runtime/mfinal_test.go
new file mode 100644
index 000000000..6b53888ab
--- /dev/null
+++ b/src/runtime/mfinal_test.go
@@ -0,0 +1,239 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"runtime"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+type Tintptr *int // assignable to *int
+type Tint int     // *Tint implements Tinter, interface{}
+
+func (t *Tint) m() {}
+
+type Tinter interface {
+	m()
+}
+
+func TestFinalizerType(t *testing.T) {
+	if runtime.GOARCH != "amd64" {
+		t.Skipf("Skipping on non-amd64 machine")
+	}
+
+	ch := make(chan bool, 10)
+	finalize := func(x *int) {
+		if *x != 97531 {
+			t.Errorf("finalizer %d, want %d", *x, 97531)
+		}
+		ch <- true
+	}
+
+	var finalizerTests = []struct {
+		convert   func(*int) interface{}
+		finalizer interface{}
+	}{
+		{func(x *int) interface{} { return x }, func(v *int) { finalize(v) }},
+		{func(x *int) interface{} { return Tintptr(x) }, func(v Tintptr) { finalize(v) }},
+		{func(x *int) interface{} { return Tintptr(x) }, func(v *int) { finalize(v) }},
+		{func(x *int) interface{} { return (*Tint)(x) }, func(v *Tint) { finalize((*int)(v)) }},
+		{func(x *int) interface{} { return (*Tint)(x) }, func(v Tinter) { finalize((*int)(v.(*Tint))) }},
+	}
+
+	for _, tt := range finalizerTests {
+		done := make(chan bool, 1)
+		go func() {
+			v := new(int)
+			*v = 97531
+			runtime.SetFinalizer(tt.convert(v), tt.finalizer)
+			v = nil
+			done <- true
+		}()
+		<-done
+		runtime.GC()
+		select {
+		case <-ch:
+		case <-time.After(time.Second * 4):
+			t.Errorf("finalizer for type %T didn't run", tt.finalizer)
+		}
+	}
+}
+
+type bigValue struct {
+	fill uint64
+	it   bool
+	up   string
+}
+
+func TestFinalizerInterfaceBig(t *testing.T) {
+	if runtime.GOARCH != "amd64" {
+		t.Skipf("Skipping on non-amd64 machine")
+	}
+	ch := make(chan bool)
+	done := make(chan bool, 1)
+	go func() {
+		v := &bigValue{0xDEADBEEFDEADBEEF, true, "It matters not how strait the gate"}
+		old := *v
+		runtime.SetFinalizer(v, func(v interface{}) {
+			i, ok := v.(*bigValue)
+			if !ok {
+				t.Errorf("finalizer called with type %T, want *bigValue", v)
+			}
+			if *i != old {
+				t.Errorf("finalizer called with %+v, want %+v", *i, old)
+			}
+			close(ch)
+		})
+		v = nil
+		done <- true
+	}()
+	<-done
+	runtime.GC()
+	select {
+	case <-ch:
+	case <-time.After(4 * time.Second):
+		t.Errorf("finalizer for type *bigValue didn't run")
+	}
+}
+
+func fin(v *int) {
+}
+
+// Verify we don't crash at least. golang.org/issue/6857
+func TestFinalizerZeroSizedStruct(t *testing.T) {
+	type Z struct{}
+	z := new(Z)
+	runtime.SetFinalizer(z, func(*Z) {})
+}
+
+func BenchmarkFinalizer(b *testing.B) {
+	const Batch = 1000
+	b.RunParallel(func(pb *testing.PB) {
+		var data [Batch]*int
+		for i := 0; i < Batch; i++ {
+			data[i] = new(int)
+		}
+		for pb.Next() {
+			for i := 0; i < Batch; i++ {
+				runtime.SetFinalizer(data[i], fin)
+			}
+			for i := 0; i < Batch; i++ {
+				runtime.SetFinalizer(data[i], nil)
+			}
+		}
+	})
+}
+
+func BenchmarkFinalizerRun(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			v := new(int)
+			runtime.SetFinalizer(v, fin)
+		}
+	})
+}
+
+// One chunk must be exactly one sizeclass in size.
+// It should be a sizeclass not used much by others, so we
+// have a greater chance of finding adjacent ones.
+// size class 19: 320 byte objects, 25 per page, 1 page alloc at a time
+const objsize = 320
+
+type objtype [objsize]byte
+
+func adjChunks() (*objtype, *objtype) {
+	var s []*objtype
+
+	for {
+		c := new(objtype)
+		for _, d := range s {
+			if uintptr(unsafe.Pointer(c))+unsafe.Sizeof(*c) == uintptr(unsafe.Pointer(d)) {
+				return c, d
+			}
+			if uintptr(unsafe.Pointer(d))+unsafe.Sizeof(*c) == uintptr(unsafe.Pointer(c)) {
+				return d, c
+			}
+		}
+		s = append(s, c)
+	}
+}
+
+// Make sure an empty slice on the stack doesn't pin the next object in memory.
+func TestEmptySlice(t *testing.T) {
+	if true { // disable until bug 7564 is fixed.
+		return
+	}
+	x, y := adjChunks()
+
+	// the pointer inside xs points to y.
+	xs := x[objsize:] // change objsize to objsize-1 and the test passes
+
+	fin := make(chan bool, 1)
+	runtime.SetFinalizer(y, func(z *objtype) { fin <- true })
+	runtime.GC()
+	select {
+	case <-fin:
+	case <-time.After(4 * time.Second):
+		t.Errorf("finalizer of next object in memory didn't run")
+	}
+	xsglobal = xs // keep empty slice alive until here
+}
+
+var xsglobal []byte
+
+func adjStringChunk() (string, *objtype) {
+	b := make([]byte, objsize)
+	for {
+		s := string(b)
+		t := new(objtype)
+		p := *(*uintptr)(unsafe.Pointer(&s))
+		q := uintptr(unsafe.Pointer(t))
+		if p+objsize == q {
+			return s, t
+		}
+	}
+}
+
+// Make sure an empty string on the stack doesn't pin the next object in memory.
+func TestEmptyString(t *testing.T) {
+	x, y := adjStringChunk()
+
+	ss := x[objsize:] // change objsize to objsize-1 and the test passes
+	fin := make(chan bool, 1)
+	// set finalizer on string contents of y
+	runtime.SetFinalizer(y, func(z *objtype) { fin <- true })
+	runtime.GC()
+	select {
+	case <-fin:
+	case <-time.After(4 * time.Second):
+		t.Errorf("finalizer of next string in memory didn't run")
+	}
+	ssglobal = ss // keep 0-length string live until here
+}
+
+var ssglobal string
+
+// Test for issue 7656.
+func TestFinalizerOnGlobal(t *testing.T) {
+	runtime.SetFinalizer(Foo1, func(p *Object1) {})
+	runtime.SetFinalizer(Foo2, func(p *Object2) {})
+	runtime.SetFinalizer(Foo1, nil)
+	runtime.SetFinalizer(Foo2, nil)
+}
+
+type Object1 struct {
+	Something []byte
+}
+
+type Object2 struct {
+	Something byte
+}
+
+var (
+	Foo2 = &Object2{}
+	Foo1 = &Object1{}
+)
diff --git a/src/runtime/mfixalloc.c b/src/runtime/mfixalloc.c
new file mode 100644
index 000000000..d670629da
--- /dev/null
+++ b/src/runtime/mfixalloc.c
@@ -0,0 +1,64 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fixed-size object allocator.  Returned memory is not zeroed.
+//
+// See malloc.h for overview.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+
+// Initialize f to allocate objects of the given size,
+// using the allocator to obtain chunks of memory.
+void
+runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat)
+{
+	f->size = size;
+	f->first = first;
+	f->arg = arg;
+	f->list = nil;
+	f->chunk = nil;
+	f->nchunk = 0;
+	f->inuse = 0;
+	f->stat = stat;
+}
+
+void*
+runtime·FixAlloc_Alloc(FixAlloc *f)
+{
+	void *v;
+	
+	if(f->size == 0) {
+		runtime·printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n");
+		runtime·throw("runtime: internal error");
+	}
+
+	if(f->list) {
+		v = f->list;
+		f->list = *(void**)f->list;
+		f->inuse += f->size;
+		return v;
+	}
+	if(f->nchunk < f->size) {
+		f->chunk = runtime·persistentalloc(FixAllocChunk, 0, f->stat);
+		f->nchunk = FixAllocChunk;
+	}
+	v = f->chunk;
+	if(f->first)
+		f->first(f->arg, v);
+	f->chunk += f->size;
+	f->nchunk -= f->size;
+	f->inuse += f->size;
+	return v;
+}
+
+void
+runtime·FixAlloc_Free(FixAlloc *f, void *p)
+{
+	f->inuse -= f->size;
+	*(void**)p = f->list;
+	f->list = p;
+}
+
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
new file mode 100644
index 000000000..d4c414736
--- /dev/null
+++ b/src/runtime/mgc0.c
@@ -0,0 +1,1910 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector (GC).
+//
+// GC is:
+// - mark&sweep
+// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
+// - parallel (up to MaxGcproc threads)
+// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
+// - non-moving/non-compacting
+// - full (non-partial)
+//
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+//
+// Concurrent sweep.
+// The sweep phase proceeds concurrently with normal program execution.
+// The heap is swept span-by-span both lazily (when a goroutine needs another span)
+// and concurrently in a background goroutine (this helps programs that are not CPU bound).
+// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
+// and so next_gc calculation is tricky and happens as follows.
+// At the end of the stop-the-world phase next_gc is conservatively set based on total
+// heap size; all spans are marked as "needs sweeping".
+// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
+// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
+// closer to the target value. However, this is not enough to avoid over-allocating memory.
+// Consider that a goroutine wants to allocate a new span for a large object and
+// there are no free swept spans, but there are small-object unswept spans.
+// If the goroutine naively allocates a new span, it can surpass the yet-unknown
+// target next_gc value. In order to prevent such cases (1) when a goroutine needs
+// to allocate a new small-object span, it sweeps small-object spans for the same
+// object size until it frees at least one object; (2) when a goroutine needs to
+// allocate large-object span from heap, it sweeps spans until it frees at least
+// that many pages into heap. Together these two measures ensure that we don't surpass
+// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
+// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
+// but there can still be other one-page unswept spans which could be combined into a two-page span.
+// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
+// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
+// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
+// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
+// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
+// The finalizer goroutine is kicked off only when all spans are swept.
+// When the next GC starts, it sweeps all not-yet-swept spans (if any).
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "stack.h"
+#include "mgc0.h"
+#include "chan.h"
+#include "race.h"
+#include "type.h"
+#include "typekind.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+enum {
+	Debug		= 0,
+	ConcurrentSweep	= 1,
+	PreciseScan	= 1,
+
+	WorkbufSize	= 4*1024,
+	FinBlockSize	= 4*1024,
+	RootData	= 0,
+	RootBss		= 1,
+	RootFinalizers	= 2,
+	RootSpans	= 3,
+	RootFlushCaches = 4,
+	RootCount	= 5,
+};
+
+#define ScanConservatively ((byte*)1)
+
+// Initialized from $GOGC.  GOGC=off means no gc.
+extern int32 runtime·gcpercent;
+
+// Holding worldsema grants an M the right to try to stop the world.
+// The procedure is:
+//
+//	runtime·semacquire(&runtime·worldsema);
+//	m->gcing = 1;
+//	runtime·stoptheworld();
+//
+//	... do stuff ...
+//
+//	m->gcing = 0;
+//	runtime·semrelease(&runtime·worldsema);
+//	runtime·starttheworld();
+//
+uint32 runtime·worldsema = 1;
+
+typedef struct Workbuf Workbuf;
+struct Workbuf
+{
+	LFNode	node; // must be first
+	uintptr	nobj;
+	byte*	obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
+};
+
+extern byte runtime·data[];
+extern byte runtime·edata[];
+extern byte runtime·bss[];
+extern byte runtime·ebss[];
+
+extern byte runtime·gcdata[];
+extern byte runtime·gcbss[];
+
+Mutex	runtime·finlock;	// protects the following variables
+G*	runtime·fing;		// goroutine that runs finalizers
+FinBlock*	runtime·finq;	// list of finalizers that are to be executed
+FinBlock*	runtime·finc;	// cache of free blocks
+bool	runtime·fingwait;
+bool	runtime·fingwake;
+static FinBlock	*allfin;	// list of all blocks
+
+BitVector	runtime·gcdatamask;
+BitVector	runtime·gcbssmask;
+
+static Mutex	gclock;
+
+static void	bgsweep(void);
+static Workbuf* getempty(Workbuf*);
+static Workbuf* getfull(Workbuf*);
+static void	putempty(Workbuf*);
+static Workbuf* handoff(Workbuf*);
+static void	gchelperstart(void);
+static void	flushallmcaches(void);
+static bool	scanframe(Stkframe *frame, void *unused);
+static void	scanstack(G *gp);
+static BitVector	unrollglobgcprog(byte *prog, uintptr size);
+
+static FuncVal bgsweepv = {bgsweep};
+
+static struct {
+	uint64	full;  // lock-free list of full blocks
+	uint64	empty; // lock-free list of empty blocks
+	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
+	uint32	nproc;
+	int64	tstart;
+	volatile uint32	nwait;
+	volatile uint32	ndone;
+	Note	alldone;
+	ParFor*	markfor;
+
+	// Copy of mheap.allspans for marker or sweeper.
+	MSpan**	spans;
+	uint32	nspan;
+} work;
+
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left.  Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body.  Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
+static void
+scanblock(byte *b, uintptr n, byte *ptrmask)
+{
+	byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached;
+	uintptr i, nobj, size, idx, x, off, scanbufpos;
+	intptr ncached;
+	Workbuf *wbuf;
+	Iface *iface;
+	Eface *eface;
+	Type *typ;
+	MSpan *s;
+	pageID k;
+	bool keepworking;
+
+	// Cache memory arena parameters in local vars.
+	arena_start = runtime·mheap.arena_start;
+	arena_used = runtime·mheap.arena_used;
+
+	wbuf = getempty(nil);
+	nobj = wbuf->nobj;
+	wp = &wbuf->obj[nobj];
+	keepworking = b == nil;
+	scanbufpos = 0;
+	for(i = 0; i < nelem(scanbuf); i++)
+		scanbuf[i] = nil;
+
+	ptrbitp = nil;
+	cached = 0;
+	ncached = 0;
+
+	// ptrmask can have 3 possible values:
+	// 1. nil - obtain pointer mask from GC bitmap.
+	// 2. ScanConservatively - don't use any mask, scan conservatively.
+	// 3. pointer to a compact mask (for stacks and data).
+	if(b != nil)
+		goto scanobj;
+	for(;;) {
+		if(nobj == 0) {
+			// Out of work in workbuf.
+			// First, see is there is any work in scanbuf.
+			for(i = 0; i < nelem(scanbuf); i++) {
+				b = scanbuf[scanbufpos];
+				scanbuf[scanbufpos++] = nil;
+				if(scanbufpos == nelem(scanbuf))
+					scanbufpos = 0;
+				if(b != nil) {
+					n = arena_used - b; // scan until bitBoundary or BitsDead
+					ptrmask = nil; // use GC bitmap for pointer info
+					goto scanobj;
+				}
+			}
+			if(!keepworking) {
+				putempty(wbuf);
+				return;
+			}
+			// Refill workbuf from global queue.
+			wbuf = getfull(wbuf);
+			if(wbuf == nil)
+				return;
+			nobj = wbuf->nobj;
+			wp = &wbuf->obj[nobj];
+		}
+
+		// If another proc wants a pointer, give it some.
+		if(work.nwait > 0 && nobj > 4 && work.full == 0) {
+			wbuf->nobj = nobj;
+			wbuf = handoff(wbuf);
+			nobj = wbuf->nobj;
+			wp = &wbuf->obj[nobj];
+		}
+
+		wp--;
+		nobj--;
+		b = *wp;
+		n = arena_used - b; // scan until next bitBoundary or BitsDead
+		ptrmask = nil; // use GC bitmap for pointer info
+
+	scanobj:
+		if(!PreciseScan) {
+			if(ptrmask == nil) {
+				// Heap obj, obtain real size.
+				if(!runtime·mlookup(b, &p, &n, nil))
+					continue; // not an allocated obj
+				if(b != p)
+					runtime·throw("bad heap object");
+			}
+			ptrmask = ScanConservatively;
+		}
+		// Find bits of the beginning of the object.
+		if(ptrmask == nil) {
+			off = (uintptr*)b - (uintptr*)arena_start;
+			ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
+			shift = (off % wordsPerBitmapByte) * gcBits;
+			cached = *ptrbitp >> shift;
+			cached &= ~bitBoundary;
+			ncached = (8 - shift)/gcBits;
+		}
+		for(i = 0; i < n; i += PtrSize) {
+			obj = nil;
+			// Find bits for this word.
+			if(ptrmask == nil) {
+				// Check is we have reached end of span.
+				if((((uintptr)b+i)%PageSize) == 0 &&
+					runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
+					break;
+				// Consult GC bitmap.
+				if(ncached <= 0) {
+					// Refill cache.
+					cached = *--ptrbitp;
+					ncached = 2;
+				}
+				bits = cached;
+				cached >>= gcBits;
+				ncached--;
+				if((bits&bitBoundary) != 0)
+					break; // reached beginning of the next object
+				bits = (bits>>2)&BitsMask;
+				if(bits == BitsDead)
+					break; // reached no-scan part of the object
+			} else if(ptrmask != ScanConservatively) // dense mask (stack or data)
+				bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
+			else
+				bits = BitsPointer;
+
+			if(bits == BitsScalar || bits == BitsDead)
+				continue;
+			if(bits == BitsPointer) {
+				obj = *(byte**)(b+i);
+				goto markobj;
+			}
+
+			// With those three out of the way, must be multi-word.
+			if(bits != BitsMultiWord)
+				runtime·throw("unexpected garbage collection bits");
+			// Find the next pair of bits.
+			if(ptrmask == nil) {
+				if(ncached <= 0) {
+					// Refill cache.
+					cached = *--ptrbitp;
+					ncached = 2;
+				}
+				bits = (cached>>2)&BitsMask;
+			} else
+				bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
+
+			switch(bits) {
+			default:
+				runtime·throw("unexpected garbage collection bits");
+			case BitsIface:
+				iface = (Iface*)(b+i);
+				if(iface->tab != nil) {
+					typ = iface->tab->type;
+					if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
+						obj = iface->data;
+				}
+				break;
+			case BitsEface:
+				eface = (Eface*)(b+i);
+				typ = eface->type;
+				if(typ != nil) {
+					if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
+						obj = eface->data;
+				}
+				break;
+			}
+
+			i += PtrSize;
+			cached >>= gcBits;
+			ncached--;
+
+		markobj:
+			// At this point we have extracted the next potential pointer.
+			// Check if it points into heap.
+			if(obj == nil || obj < arena_start || obj >= arena_used)
+				continue;
+			// Mark the object.
+			off = (uintptr*)obj - (uintptr*)arena_start;
+			bitp = arena_start - off/wordsPerBitmapByte - 1;
+			shift = (off % wordsPerBitmapByte) * gcBits;
+			xbits = *bitp;
+			bits = (xbits >> shift) & bitMask;
+			if((bits&bitBoundary) == 0) {
+				// Not a beginning of a block, consult span table to find the block beginning.
+				k = (uintptr)obj>>PageShift;
+				x = k;
+				x -= (uintptr)arena_start>>PageShift;
+				s = runtime·mheap.spans[x];
+				if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
+					continue;
+				p = (byte*)((uintptr)s->start<<PageShift);
+				if(s->sizeclass != 0) {
+					size = s->elemsize;
+					idx = ((byte*)obj - p)/size;
+					p = p+idx*size;
+				}
+				if(p == obj) {
+					runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
+						p, s->start*PageSize, s->limit);
+					runtime·throw("failed to find block beginning");
+				}
+				obj = p;
+				goto markobj;
+			}
+
+			// Now we have bits, bitp, and shift correct for
+			// obj pointing at the base of the object.
+			// Only care about not marked objects.
+			if((bits&bitMarked) != 0)
+				continue;
+			// If obj size is greater than 8, then each byte of GC bitmap
+			// contains info for at most one object. In such case we use
+			// non-atomic byte store to mark the object. This can lead
+			// to double enqueue of the object for scanning, but scanning
+			// is an idempotent operation, so it is OK. This cannot lead
+			// to bitmap corruption because the single marked bit is the
+			// only thing that can change in the byte.
+			// For 8-byte objects we use non-atomic store, if the other
+			// quadruple is already marked. Otherwise we resort to CAS
+			// loop for marking.
+			if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
+				work.nproc == 1)
+				*bitp = xbits | (bitMarked<<shift);
+			else
+				runtime·atomicor8(bitp, bitMarked<<shift);
+
+			if(((xbits>>(shift+2))&BitsMask) == BitsDead)
+				continue;  // noscan object
+
+			// Queue the obj for scanning.
+			PREFETCH(obj);
+			obj = (byte*)((uintptr)obj & ~(PtrSize-1));
+			p = scanbuf[scanbufpos];
+			scanbuf[scanbufpos++] = obj;
+			if(scanbufpos == nelem(scanbuf))
+				scanbufpos = 0;
+			if(p == nil)
+				continue;
+
+			// If workbuf is full, obtain an empty one.
+			if(nobj >= nelem(wbuf->obj)) {
+				wbuf->nobj = nobj;
+				wbuf = getempty(wbuf);
+				nobj = wbuf->nobj;
+				wp = &wbuf->obj[nobj];
+			}
+			*wp = p;
+			wp++;
+			nobj++;
+		}
+
+		if(Debug && ptrmask == nil) {
+			// For heap objects ensure that we did not overscan.
+			n = 0;
+			p = nil;
+			if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) {
+				runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n);
+				runtime·throw("scanblock: scanned invalid object");
+			}
+		}
+	}
+}
+
+static void
+markroot(ParFor *desc, uint32 i)
+{
+	FinBlock *fb;
+	MSpan *s;
+	uint32 spanidx, sg;
+	G *gp;
+	void *p;
+	uint32 status;
+	bool restart;
+
+	USED(&desc);
+	// Note: if you add a case here, please also update heapdump.c:dumproots.
+	switch(i) {
+	case RootData:
+		scanblock(runtime·data, runtime·edata - runtime·data, (byte*)runtime·gcdatamask.data);
+		break;
+
+	case RootBss:
+		scanblock(runtime·bss, runtime·ebss - runtime·bss, (byte*)runtime·gcbssmask.data);
+		break;
+
+	case RootFinalizers:
+		for(fb=allfin; fb; fb=fb->alllink)
+			scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), ScanConservatively);
+		break;
+
+	case RootSpans:
+		// mark MSpan.specials
+		sg = runtime·mheap.sweepgen;
+		for(spanidx=0; spanidx<work.nspan; spanidx++) {
+			Special *sp;
+			SpecialFinalizer *spf;
+
+			s = work.spans[spanidx];
+			if(s->state != MSpanInUse)
+				continue;
+			if(s->sweepgen != sg) {
+				runtime·printf("sweep %d %d\n", s->sweepgen, sg);
+				runtime·throw("gc: unswept span");
+			}
+			for(sp = s->specials; sp != nil; sp = sp->next) {
+				if(sp->kind != KindSpecialFinalizer)
+					continue;
+				// don't mark finalized object, but scan it so we
+				// retain everything it points to.
+				spf = (SpecialFinalizer*)sp;
+				// A finalizer can be set for an inner byte of an object, find object beginning.
+				p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
+				scanblock(p, s->elemsize, nil);
+				scanblock((void*)&spf->fn, PtrSize, ScanConservatively);
+			}
+		}
+		break;
+
+	case RootFlushCaches:
+		flushallmcaches();
+		break;
+
+	default:
+		// the rest is scanning goroutine stacks
+		if(i - RootCount >= runtime·allglen)
+			runtime·throw("markroot: bad index");
+		gp = runtime·allg[i - RootCount];
+		// remember when we've first observed the G blocked
+		// needed only to output in traceback
+		status = runtime·readgstatus(gp);
+		if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
+			gp->waitsince = work.tstart;
+		// Shrink a stack if not much of it is being used.
+		runtime·shrinkstack(gp);
+		if(runtime·readgstatus(gp) == Gdead) 
+			gp->gcworkdone = true;
+		else 
+			gp->gcworkdone = false; 
+		restart = runtime·stopg(gp);
+		scanstack(gp);
+		if(restart)
+			runtime·restartg(gp);
+		break;
+	}
+}
+
+// Get an empty work buffer off the work.empty list,
+// allocating new buffers as needed.
+static Workbuf*
+getempty(Workbuf *b)
+{
+	MCache *c;
+
+	if(b != nil)
+		runtime·lfstackpush(&work.full, &b->node);
+	b = nil;
+	c = g->m->mcache;
+	if(c->gcworkbuf != nil) {
+		b = c->gcworkbuf;
+		c->gcworkbuf = nil;
+	}
+	if(b == nil)
+		b = (Workbuf*)runtime·lfstackpop(&work.empty);
+	if(b == nil)
+		b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
+	b->nobj = 0;
+	return b;
+}
+
+static void
+putempty(Workbuf *b)
+{
+	MCache *c;
+
+	c = g->m->mcache;
+	if(c->gcworkbuf == nil) {
+		c->gcworkbuf = b;
+		return;
+	}
+	runtime·lfstackpush(&work.empty, &b->node);
+}
+
+void
+runtime·gcworkbuffree(void *b)
+{
+	if(b != nil)
+		putempty(b);
+}
+
+// Get a full work buffer off the work.full list, or return nil.
+static Workbuf*
+getfull(Workbuf *b)
+{
+	int32 i;
+
+	if(b != nil)
+		runtime·lfstackpush(&work.empty, &b->node);
+	b = (Workbuf*)runtime·lfstackpop(&work.full);
+	if(b != nil || work.nproc == 1)
+		return b;
+
+	runtime·xadd(&work.nwait, +1);
+	for(i=0;; i++) {
+		if(work.full != 0) {
+			runtime·xadd(&work.nwait, -1);
+			b = (Workbuf*)runtime·lfstackpop(&work.full);
+			if(b != nil)
+				return b;
+			runtime·xadd(&work.nwait, +1);
+		}
+		if(work.nwait == work.nproc)
+			return nil;
+		if(i < 10) {
+			g->m->gcstats.nprocyield++;
+			runtime·procyield(20);
+		} else if(i < 20) {
+			g->m->gcstats.nosyield++;
+			runtime·osyield();
+		} else {
+			g->m->gcstats.nsleep++;
+			runtime·usleep(100);
+		}
+	}
+}
+
+static Workbuf*
+handoff(Workbuf *b)
+{
+	int32 n;
+	Workbuf *b1;
+
+	// Make new buffer with half of b's pointers.
+	b1 = getempty(nil);
+	n = b->nobj/2;
+	b->nobj -= n;
+	b1->nobj = n;
+	runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
+	g->m->gcstats.nhandoff++;
+	g->m->gcstats.nhandoffcnt += n;
+
+	// Put b on full list - let first half of b get stolen.
+	runtime·lfstackpush(&work.full, &b->node);
+	return b1;
+}
+
+BitVector
+runtime·stackmapdata(StackMap *stackmap, int32 n)
+{
+	if(n < 0 || n >= stackmap->n)
+		runtime·throw("stackmapdata: index out of range");
+	return (BitVector){stackmap->nbit, stackmap->data + n*((stackmap->nbit+31)/32)};
+}
+
+// Scan a stack frame: local variables and function arguments/results.
+static bool
+scanframe(Stkframe *frame, void *unused)
+{
+	Func *f;
+	StackMap *stackmap;
+	BitVector bv;
+	uintptr size;
+	uintptr targetpc;
+	int32 pcdata;
+
+	USED(unused);
+	f = frame->fn;
+	targetpc = frame->continpc;
+	if(targetpc == 0) {
+		// Frame is dead.
+		return true;
+	}
+	if(Debug > 1)
+		runtime·printf("scanframe %s\n", runtime·funcname(f));
+	if(targetpc != f->entry)
+		targetpc--;
+	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
+	if(pcdata == -1) {
+		// We do not have a valid pcdata value but there might be a
+		// stackmap for this function.  It is likely that we are looking
+		// at the function prologue, assume so and hope for the best.
+		pcdata = 0;
+	}
+
+	// Scan local variables if stack frame has been allocated.
+	// Use pointer information if known.
+	stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+	if(stackmap == nil) {
+		// No locals information, scan everything.
+		size = frame->varp - frame->sp;
+		if(Debug > 2)
+			runtime·printf("frame %s unsized locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
+		scanblock((byte*)(frame->varp - size), size, ScanConservatively);
+	} else if(stackmap->n < 0) {
+		// Locals size information, scan just the locals.
+		size = -stackmap->n;
+		if(Debug > 2)
+			runtime·printf("frame %s conservative locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
+		scanblock((byte*)(frame->varp - size), size, ScanConservatively);
+	} else if(stackmap->n > 0) {
+		// Locals bitmap information, scan just the pointers in locals.
+		if(pcdata < 0 || pcdata >= stackmap->n) {
+			// don't know where we are
+			runtime·printf("pcdata is %d and %d stack map entries for %s (targetpc=%p)\n",
+				pcdata, stackmap->n, runtime·funcname(f), targetpc);
+			runtime·throw("scanframe: bad symbol table");
+		}
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		size = (bv.n * PtrSize) / BitsPerPointer;
+		scanblock((byte*)(frame->varp - size), bv.n/BitsPerPointer*PtrSize, (byte*)bv.data);
+	}
+
+	// Scan arguments.
+	// Use pointer information if known.
+	stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+	if(stackmap != nil) {
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, (byte*)bv.data);
+	} else {
+		if(Debug > 2)
+			runtime·printf("frame %s conservative args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
+		scanblock((byte*)frame->argp, frame->arglen, ScanConservatively);
+	}
+	return true;
+}
+
+static void
+scanstack(G *gp)
+{
+	M *mp;
+	int32 n;
+	Stktop *stk;
+	uintptr sp, guard;
+	bool (*fn)(Stkframe*, void*);
+
+	if(runtime·readgstatus(gp)&Gscan == 0) {
+		runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
+		runtime·throw("mark - bad status");
+	}
+
+	switch(runtime·readgstatus(gp)&~Gscan) {
+	default:
+		runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
+		runtime·throw("mark - bad status");
+	case Gdead:
+		return;
+	case Grunning:
+		runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
+		runtime·throw("mark - world not stopped");
+	case Grunnable:
+	case Gsyscall:
+	case Gwaiting:
+		break;
+	}
+
+	if(gp == g)
+		runtime·throw("can't scan our own stack");
+	if((mp = gp->m) != nil && mp->helpgc)
+		runtime·throw("can't scan gchelper stack");
+
+	if(gp->syscallstack != (uintptr)nil) {
+		// Scanning another goroutine that is about to enter or might
+		// have just exited a system call. It may be executing code such
+		// as schedlock and may have needed to start a new stack segment.
+		// Use the stack segment and stack pointer at the time of
+		// the system call instead, since that won't change underfoot.
+		sp = gp->syscallsp;
+		stk = (Stktop*)gp->syscallstack;
+		guard = gp->syscallguard;
+	} else {
+		// Scanning another goroutine's stack.
+		// The goroutine is usually asleep (the world is stopped).
+		sp = gp->sched.sp;
+		stk = (Stktop*)gp->stackbase;
+		guard = gp->stackguard;
+	}
+	if(ScanStackByFrames) {
+		USED(sp);
+		USED(stk);
+		USED(guard);
+		fn = scanframe;
+		runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &fn, nil, false);
+	} else {
+		n = 0;
+		while(stk) {
+			if(sp < guard-StackGuard || (uintptr)stk < sp) {
+				runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
+				runtime·throw("scanstack");
+			}
+			if(Debug > 2)
+				runtime·printf("conservative stack %p+%p\n", (byte*)sp, (uintptr)stk-sp);
+			scanblock((byte*)sp, (uintptr)stk - sp, ScanConservatively);
+			sp = stk->gobuf.sp;
+			guard = stk->stackguard;
+			stk = (Stktop*)stk->stackbase;
+			n++;
+		}
+	}
+}
+
+// The gp has been moved to a gc safepoint. If there is gcphase specific
+// work it is done here. 
+void
+runtime·gcphasework(G *gp)
+{
+	switch(runtime·gcphase) {
+	default:
+		runtime·throw("gcphasework in bad gcphase");
+	case GCoff:
+	case GCquiesce:
+	case GCstw:
+	case GCsweep:
+		// No work for now.
+		break;
+	case GCmark:
+		// Disabled until concurrent GC is implemented
+		// but indicate the scan has been done. 
+		// scanstack(gp);
+		break;
+	}
+	gp->gcworkdone = true;
+}
+
+void
+runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
+{
+	FinBlock *block;
+	Finalizer *f;
+
+	runtime·lock(&runtime·finlock);
+	if(runtime·finq == nil || runtime·finq->cnt == runtime·finq->cap) {
+		if(runtime·finc == nil) {
+			runtime·finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
+			runtime·finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
+			runtime·finc->alllink = allfin;
+			allfin = runtime·finc;
+		}
+		block = runtime·finc;
+		runtime·finc = block->next;
+		block->next = runtime·finq;
+		runtime·finq = block;
+	}
+	f = &runtime·finq->fin[runtime·finq->cnt];
+	runtime·finq->cnt++;
+	f->fn = fn;
+	f->nret = nret;
+	f->fint = fint;
+	f->ot = ot;
+	f->arg = p;
+	runtime·fingwake = true;
+	runtime·unlock(&runtime·finlock);
+}
+
+void
+runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
+{
+	FinBlock *fb;
+	Finalizer *f;
+	uintptr i;
+
+	for(fb = allfin; fb; fb = fb->alllink) {
+		for(i = 0; i < fb->cnt; i++) {
+			f = &fb->fin[i];
+			callback(f->fn, f->arg, f->nret, f->fint, f->ot);
+		}
+	}
+}
+
+void
+runtime·MSpan_EnsureSwept(MSpan *s)
+{
+	uint32 sg;
+
+	// Caller must disable preemption.
+	// Otherwise when this function returns the span can become unswept again
+	// (if GC is triggered on another goroutine).
+	if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
+		runtime·throw("MSpan_EnsureSwept: m is not locked");
+
+	sg = runtime·mheap.sweepgen;
+	if(runtime·atomicload(&s->sweepgen) == sg)
+		return;
+	if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+		runtime·MSpan_Sweep(s, false);
+		return;
+	}
+	// unfortunate condition, and we don't have efficient means to wait
+	while(runtime·atomicload(&s->sweepgen) != sg)
+		runtime·osyield();
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
+// Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// caller takes care of it.
+bool
+runtime·MSpan_Sweep(MSpan *s, bool preserve)
+{
+	int32 cl, n, npages, nfree;
+	uintptr size, off, step;
+	uint32 sweepgen;
+	byte *p, *bitp, shift, xbits, bits;
+	MCache *c;
+	byte *arena_start;
+	MLink head, *end, *link;
+	Special *special, **specialp, *y;
+	bool res, sweepgenset;
+
+	// It's critical that we enter this function with preemption disabled,
+	// GC must not start while we are in the middle of this function.
+	if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
+		runtime·throw("MSpan_Sweep: m is not locked");
+	sweepgen = runtime·mheap.sweepgen;
+	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+			s->state, s->sweepgen, sweepgen);
+		runtime·throw("MSpan_Sweep: bad span state");
+	}
+	arena_start = runtime·mheap.arena_start;
+	cl = s->sizeclass;
+	size = s->elemsize;
+	if(cl == 0) {
+		n = 1;
+	} else {
+		// Chunk full of small blocks.
+		npages = runtime·class_to_allocnpages[cl];
+		n = (npages << PageShift) / size;
+	}
+	res = false;
+	nfree = 0;
+	end = &head;
+	c = g->m->mcache;
+	sweepgenset = false;
+
+	// Mark any free objects in this span so we don't collect them.
+	for(link = s->freelist; link != nil; link = link->next) {
+		off = (uintptr*)link - (uintptr*)arena_start;
+		bitp = arena_start - off/wordsPerBitmapByte - 1;
+		shift = (off % wordsPerBitmapByte) * gcBits;
+		*bitp |= bitMarked<<shift;
+	}
+
+	// Unlink & free special records for any objects we're about to free.
+	specialp = &s->specials;
+	special = *specialp;
+	while(special != nil) {
+		// A finalizer can be set for an inner byte of an object, find object beginning.
+		p = (byte*)(s->start << PageShift) + special->offset/size*size;
+		off = (uintptr*)p - (uintptr*)arena_start;
+		bitp = arena_start - off/wordsPerBitmapByte - 1;
+		shift = (off % wordsPerBitmapByte) * gcBits;
+		bits = (*bitp>>shift) & bitMask;
+		if((bits&bitMarked) == 0) {
+			// Find the exact byte for which the special was setup
+			// (as opposed to object beginning).
+			p = (byte*)(s->start << PageShift) + special->offset;
+			// about to free object: splice out special record
+			y = special;
+			special = special->next;
+			*specialp = special;
+			if(!runtime·freespecial(y, p, size, false)) {
+				// stop freeing of object if it has a finalizer
+				*bitp |= bitMarked << shift;
+			}
+		} else {
+			// object is still live: keep special record
+			specialp = &special->next;
+			special = *specialp;
+		}
+	}
+
+	// Sweep through n objects of given size starting at p.
+	// This thread owns the span now, so it can manipulate
+	// the block bitmap without atomic operations.
+	p = (byte*)(s->start << PageShift);
+	// Find bits for the beginning of the span.
+	off = (uintptr*)p - (uintptr*)arena_start;
+	bitp = arena_start - off/wordsPerBitmapByte - 1;
+	shift = 0;
+	step = size/(PtrSize*wordsPerBitmapByte);
+	// Rewind to the previous quadruple as we move to the next
+	// in the beginning of the loop.
+	bitp += step;
+	if(step == 0) {
+		// 8-byte objects.
+		bitp++;
+		shift = gcBits;
+	}
+	for(; n > 0; n--, p += size) {
+		bitp -= step;
+		if(step == 0) {
+			if(shift != 0)
+				bitp--;
+			shift = gcBits - shift;
+		}
+
+		xbits = *bitp;
+		bits = (xbits>>shift) & bitMask;
+
+		// Allocated and marked object, reset bits to allocated.
+		if((bits&bitMarked) != 0) {
+			*bitp &= ~(bitMarked<<shift);
+			continue;
+		}
+		// At this point we know that we are looking at garbage object
+		// that needs to be collected.
+		if(runtime·debug.allocfreetrace)
+			runtime·tracefree(p, size);
+		// Reset to allocated+noscan.
+		*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
+		if(cl == 0) {
+			// Free large span.
+			if(preserve)
+				runtime·throw("can't preserve large span");
+			runtime·unmarkspan(p, s->npages<<PageShift);
+			s->needzero = 1;
+			// important to set sweepgen before returning it to heap
+			runtime·atomicstore(&s->sweepgen, sweepgen);
+			sweepgenset = true;
+			// NOTE(rsc,dvyukov): The original implementation of efence
+			// in CL 22060046 used SysFree instead of SysFault, so that
+			// the operating system would eventually give the memory
+			// back to us again, so that an efence program could run
+			// longer without running out of memory. Unfortunately,
+			// calling SysFree here without any kind of adjustment of the
+			// heap data structures means that when the memory does
+			// come back to us, we have the wrong metadata for it, either in
+			// the MSpan structures or in the garbage collection bitmap.
+			// Using SysFault here means that the program will run out of
+			// memory fairly quickly in efence mode, but at least it won't
+			// have mysterious crashes due to confused memory reuse.
+			// It should be possible to switch back to SysFree if we also
+			// implement and then call some kind of MHeap_DeleteSpan.
+			if(runtime·debug.efence) {
+				s->limit = nil;	// prevent mlookup from finding this span
+				runtime·SysFault(p, size);
+			} else
+				runtime·MHeap_Free(&runtime·mheap, s, 1);
+			c->local_nlargefree++;
+			c->local_largefree += size;
+			runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
+			res = true;
+		} else {
+			// Free small object.
+			if(size > 2*sizeof(uintptr))
+				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
+			else if(size > sizeof(uintptr))
+				((uintptr*)p)[1] = 0;
+
+			end->next = (MLink*)p;
+			end = (MLink*)p;
+			nfree++;
+		}
+	}
+
+	// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
+	// because of the potential for a concurrent free/SetFinalizer.
+	// But we need to set it before we make the span available for allocation
+	// (return it to heap or mcentral), because allocation code assumes that a
+	// span is already swept if available for allocation.
+
+	if(!sweepgenset && nfree == 0) {
+		// The span must be in our exclusive ownership until we update sweepgen,
+		// check for potential races.
+		if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
+			runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
+				s->state, s->sweepgen, sweepgen);
+			runtime·throw("MSpan_Sweep: bad span state after sweep");
+		}
+		runtime·atomicstore(&s->sweepgen, sweepgen);
+	}
+	if(nfree > 0) {
+		c->local_nsmallfree[cl] += nfree;
+		c->local_cachealloc -= nfree * size;
+		runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
+		res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
+		// MCentral_FreeSpan updates sweepgen
+	}
+	return res;
+}
+
+// State of background sweep.
+// Pretected by gclock.
+static struct
+{
+	G*	g;
+	bool	parked;
+
+	uint32	spanidx;	// background sweeper position
+
+	uint32	nbgsweep;
+	uint32	npausesweep;
+} sweep;
+
+// background sweeping goroutine
+static void
+bgsweep(void)
+{
+	g->issystem = true;
+	for(;;) {
+		while(runtime·sweepone() != -1) {
+			sweep.nbgsweep++;
+			runtime·gosched();
+		}
+		runtime·lock(&gclock);
+		if(!runtime·mheap.sweepdone) {
+			// It's possible if GC has happened between sweepone has
+			// returned -1 and gclock lock.
+			runtime·unlock(&gclock);
+			continue;
+		}
+		sweep.parked = true;
+		runtime·parkunlock(&gclock, runtime·gostringnocopy((byte*)"GC sweep wait"));
+	}
+}
+
+// sweeps one span
+// returns number of pages returned to heap, or -1 if there is nothing to sweep
+uintptr
+runtime·sweepone(void)
+{
+	MSpan *s;
+	uint32 idx, sg;
+	uintptr npages;
+
+	// increment locks to ensure that the goroutine is not preempted
+	// in the middle of sweep thus leaving the span in an inconsistent state for next GC
+	g->m->locks++;
+	sg = runtime·mheap.sweepgen;
+	for(;;) {
+		idx = runtime·xadd(&sweep.spanidx, 1) - 1;
+		if(idx >= work.nspan) {
+			runtime·mheap.sweepdone = true;
+			g->m->locks--;
+			return -1;
+		}
+		s = work.spans[idx];
+		if(s->state != MSpanInUse) {
+			s->sweepgen = sg;
+			continue;
+		}
+		if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
+			continue;
+		npages = s->npages;
+		if(!runtime·MSpan_Sweep(s, false))
+			npages = 0;
+		g->m->locks--;
+		return npages;
+	}
+}
+
+void
+runtime·gchelper(void)
+{
+	uint32 nproc;
+
+	g->m->traceback = 2;
+	gchelperstart();
+
+	// parallel mark for over gc roots
+	runtime·parfordo(work.markfor);
+
+	// help other threads scan secondary blocks
+	scanblock(nil, 0, nil);
+
+	nproc = work.nproc;  // work.nproc can change right after we increment work.ndone
+	if(runtime·xadd(&work.ndone, +1) == nproc-1)
+		runtime·notewakeup(&work.alldone);
+	g->m->traceback = 0;
+}
+
+static void
+cachestats(void)
+{
+	MCache *c;
+	P *p, **pp;
+
+	for(pp=runtime·allp; p=*pp; pp++) {
+		c = p->mcache;
+		if(c==nil)
+			continue;
+		runtime·purgecachedstats(c);
+	}
+}
+
+static void
+flushallmcaches(void)
+{
+	P *p, **pp;
+	MCache *c;
+
+	// Flush MCache's to MCentral.
+	for(pp=runtime·allp; p=*pp; pp++) {
+		c = p->mcache;
+		if(c==nil)
+			continue;
+		runtime·MCache_ReleaseAll(c);
+		runtime·stackcache_clear(c);
+	}
+}
+
+static void
+flushallmcaches_m(G *gp)
+{
+	flushallmcaches();
+	runtime·gogo(&gp->sched);
+}
+
+void
+runtime·updatememstats(GCStats *stats)
+{
+	M *mp;
+	MSpan *s;
+	int32 i;
+	uint64 smallfree;
+	uint64 *src, *dst;
+	void (*fn)(G*);
+
+	if(stats)
+		runtime·memclr((byte*)stats, sizeof(*stats));
+	for(mp=runtime·allm; mp; mp=mp->alllink) {
+		if(stats) {
+			src = (uint64*)&mp->gcstats;
+			dst = (uint64*)stats;
+			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
+				dst[i] += src[i];
+			runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
+		}
+	}
+	mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
+	mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
+	mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
+		mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
+	
+	// Calculate memory allocator stats.
+	// During program execution we only count number of frees and amount of freed memory.
+	// Current number of alive object in the heap and amount of alive heap memory
+	// are calculated by scanning all spans.
+	// Total number of mallocs is calculated as number of frees plus number of alive objects.
+	// Similarly, total amount of allocated memory is calculated as amount of freed memory
+	// plus amount of alive heap memory.
+	mstats.alloc = 0;
+	mstats.total_alloc = 0;
+	mstats.nmalloc = 0;
+	mstats.nfree = 0;
+	for(i = 0; i < nelem(mstats.by_size); i++) {
+		mstats.by_size[i].nmalloc = 0;
+		mstats.by_size[i].nfree = 0;
+	}
+
+	// Flush MCache's to MCentral.
+	if(g == g->m->g0)
+		flushallmcaches();
+	else {
+		fn = flushallmcaches_m;
+		runtime·mcall(&fn);
+	}
+
+	// Aggregate local stats.
+	cachestats();
+
+	// Scan all spans and count number of alive objects.
+	runtime·lock(&runtime·mheap.lock);
+	for(i = 0; i < runtime·mheap.nspan; i++) {
+		s = runtime·mheap.allspans[i];
+		if(s->state != MSpanInUse)
+			continue;
+		if(s->sizeclass == 0) {
+			mstats.nmalloc++;
+			mstats.alloc += s->elemsize;
+		} else {
+			mstats.nmalloc += s->ref;
+			mstats.by_size[s->sizeclass].nmalloc += s->ref;
+			mstats.alloc += s->ref*s->elemsize;
+		}
+	}
+	runtime·unlock(&runtime·mheap.lock);
+
+	// Aggregate by size class.
+	smallfree = 0;
+	mstats.nfree = runtime·mheap.nlargefree;
+	for(i = 0; i < nelem(mstats.by_size); i++) {
+		mstats.nfree += runtime·mheap.nsmallfree[i];
+		mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
+		mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
+		smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
+	}
+	mstats.nmalloc += mstats.nfree;
+
+	// Calculate derived stats.
+	mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
+	mstats.heap_alloc = mstats.alloc;
+	mstats.heap_objects = mstats.nmalloc - mstats.nfree;
+}
+
+// Structure of arguments passed to function gc().
+// This allows the arguments to be passed via runtime·mcall.
+struct gc_args
+{
+	int64 start_time; // start time of GC in ns (just before stoptheworld)
+	bool  eagersweep;
+};
+
+static void gc(struct gc_args *args);
+static void mgc(G *gp);
+
+int32
+runtime·readgogc(void)
+{
+	byte *p;
+
+	p = runtime·getenv("GOGC");
+	if(p == nil || p[0] == '\0')
+		return 100;
+	if(runtime·strcmp(p, (byte*)"off") == 0)
+		return -1;
+	return runtime·atoi(p);
+}
+
+void
+runtime·gcinit(void)
+{
+	if(sizeof(Workbuf) != WorkbufSize)
+		runtime·throw("runtime: size of Workbuf is suboptimal");
+
+	work.markfor = runtime·parforalloc(MaxGcproc);
+	runtime·gcpercent = runtime·readgogc();
+	runtime·gcdatamask = unrollglobgcprog(runtime·gcdata, runtime·edata - runtime·data);
+	runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
+}
+
+void
+runtime·gc_m(void)
+{
+	struct gc_args a;
+	G *gp;
+
+	gp = g->m->curg;
+	runtime·casgstatus(gp, Grunning, Gwaiting);
+	gp->waitreason = runtime·gostringnocopy((byte*)"garbage collection");
+
+	a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
+	a.eagersweep = g->m->scalararg[2];
+	gc(&a);
+
+	runtime·casgstatus(gp, Gwaiting, Grunning);
+}
+
+static void
+gc(struct gc_args *args)
+{
+	int64 t0, t1, t2, t3, t4;
+	uint64 heap0, heap1, obj;
+	GCStats stats;
+
+	if(runtime·debug.allocfreetrace)
+		runtime·tracegc();
+
+	g->m->traceback = 2;
+	t0 = args->start_time;
+	work.tstart = args->start_time; 
+
+	t1 = 0;
+	if(runtime·debug.gctrace)
+		t1 = runtime·nanotime();
+
+	// Sweep what is not sweeped by bgsweep.
+	while(runtime·sweepone() != -1)
+		sweep.npausesweep++;
+
+	// Cache runtime.mheap.allspans in work.spans to avoid conflicts with
+	// resizing/freeing allspans.
+	// New spans can be created while GC progresses, but they are not garbage for
+	// this round:
+	//  - new stack spans can be created even while the world is stopped.
+	//  - new malloc spans can be created during the concurrent sweep
+
+	// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	runtime·lock(&runtime·mheap.lock);
+	// Free the old cached sweep array if necessary.
+	if(work.spans != nil && work.spans != runtime·mheap.allspans)
+		runtime·SysFree(work.spans, work.nspan*sizeof(work.spans[0]), &mstats.other_sys);
+	// Cache the current array for marking.
+	runtime·mheap.gcspans = runtime·mheap.allspans;
+	work.spans = runtime·mheap.allspans;
+	work.nspan = runtime·mheap.nspan;
+	runtime·unlock(&runtime·mheap.lock);
+
+	work.nwait = 0;
+	work.ndone = 0;
+	work.nproc = runtime·gcprocs();
+	runtime·parforsetup(work.markfor, work.nproc, RootCount + runtime·allglen, nil, false, markroot);
+	if(work.nproc > 1) {
+		runtime·noteclear(&work.alldone);
+		runtime·helpgc(work.nproc);
+	}
+
+	t2 = 0;
+	if(runtime·debug.gctrace)
+		t2 = runtime·nanotime();
+
+	gchelperstart();
+	runtime·parfordo(work.markfor);
+	scanblock(nil, 0, nil);
+
+	t3 = 0;
+	if(runtime·debug.gctrace)
+		t3 = runtime·nanotime();
+
+	if(work.nproc > 1)
+		runtime·notesleep(&work.alldone);
+
+	cachestats();
+	// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
+	// estimate what was live heap size after previous GC (for tracing only)
+	heap0 = mstats.next_gc*100/(runtime·gcpercent+100);
+	// conservatively set next_gc to high value assuming that everything is live
+	// concurrent/lazy sweep will reduce this number while discovering new garbage
+	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100;
+
+	t4 = runtime·nanotime();
+	runtime·atomicstore64(&mstats.last_gc, runtime·unixnanotime());  // must be Unix time to make sense to user
+	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
+	mstats.pause_total_ns += t4 - t0;
+	mstats.numgc++;
+	if(mstats.debuggc)
+		runtime·printf("pause %D\n", t4-t0);
+
+	if(runtime·debug.gctrace) {
+		heap1 = mstats.heap_alloc;
+		runtime·updatememstats(&stats);
+		if(heap1 != mstats.heap_alloc) {
+			runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
+			runtime·throw("mstats skew");
+		}
+		obj = mstats.nmalloc - mstats.nfree;
+
+		stats.nprocyield += work.markfor->nprocyield;
+		stats.nosyield += work.markfor->nosyield;
+		stats.nsleep += work.markfor->nsleep;
+
+		runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
+				" %d/%d/%d sweeps,"
+				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
+			mstats.numgc, work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
+			heap0>>20, heap1>>20, obj,
+			mstats.nmalloc, mstats.nfree,
+			work.nspan, sweep.nbgsweep, sweep.npausesweep,
+			stats.nhandoff, stats.nhandoffcnt,
+			work.markfor->nsteal, work.markfor->nstealcnt,
+			stats.nprocyield, stats.nosyield, stats.nsleep);
+		sweep.nbgsweep = sweep.npausesweep = 0;
+	}
+
+	// See the comment in the beginning of this function as to why we need the following.
+	// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	runtime·lock(&runtime·mheap.lock);
+	// Free the old cached mark array if necessary.
+	if(work.spans != nil && work.spans != runtime·mheap.allspans)
+		runtime·SysFree(work.spans, work.nspan*sizeof(work.spans[0]), &mstats.other_sys);
+	// Cache the current array for sweeping.
+	runtime·mheap.gcspans = runtime·mheap.allspans;
+	runtime·mheap.sweepgen += 2;
+	runtime·mheap.sweepdone = false;
+	work.spans = runtime·mheap.allspans;
+	work.nspan = runtime·mheap.nspan;
+	sweep.spanidx = 0;
+	runtime·unlock(&runtime·mheap.lock);
+
+	// Temporary disable concurrent sweep, because we see failures on builders.
+	if(ConcurrentSweep && !args->eagersweep) {
+		runtime·lock(&gclock);
+		if(sweep.g == nil)
+			sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, gc);
+		else if(sweep.parked) {
+			sweep.parked = false;
+			runtime·ready(sweep.g);
+		}
+		runtime·unlock(&gclock);
+	} else {
+		// Sweep all spans eagerly.
+		while(runtime·sweepone() != -1)
+			sweep.npausesweep++;
+	}
+
+	runtime·mProf_GC();
+	g->m->traceback = 0;
+}
+
+extern uintptr runtime·sizeof_C_MStats;
+
+void
+runtime·ReadMemStats(MStats *stats)
+{
+	// Have to acquire worldsema to stop the world,
+	// because stoptheworld can only be used by
+	// one goroutine at a time, and there might be
+	// a pending garbage collection already calling it.
+	runtime·semacquire(&runtime·worldsema, false);
+	g->m->gcing = 1;
+	runtime·stoptheworld();
+	runtime·updatememstats(nil);
+	// Size of the trailing by_size array differs between Go and C,
+	// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+	runtime·memmove(stats, &mstats, runtime·sizeof_C_MStats);
+
+	// Stack numbers are part of the heap numbers, separate those out for user consumption
+	stats->stacks_sys = stats->stacks_inuse;
+	stats->heap_inuse -= stats->stacks_inuse;
+	stats->heap_sys -= stats->stacks_inuse;
+	
+	g->m->gcing = 0;
+	g->m->locks++;
+	runtime·semrelease(&runtime·worldsema);
+	runtime·starttheworld();
+	g->m->locks--;
+}
+
+void
+runtime∕debug·readGCStats(Slice *pauses)
+{
+	uint64 *p;
+	uint32 i, n;
+
+	// Calling code in runtime/debug should make the slice large enough.
+	if(pauses->cap < nelem(mstats.pause_ns)+3)
+		runtime·throw("runtime: short slice passed to readGCStats");
+
+	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
+	p = (uint64*)pauses->array;
+	runtime·lock(&runtime·mheap.lock);
+	n = mstats.numgc;
+	if(n > nelem(mstats.pause_ns))
+		n = nelem(mstats.pause_ns);
+	
+	// The pause buffer is circular. The most recent pause is at
+	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
+	// from there to go back farther in time. We deliver the times
+	// most recent first (in p[0]).
+	for(i=0; i<n; i++)
+		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
+
+	p[n] = mstats.last_gc;
+	p[n+1] = mstats.numgc;
+	p[n+2] = mstats.pause_total_ns;	
+	runtime·unlock(&runtime·mheap.lock);
+	pauses->len = n+3;
+}
+
+void
+runtime·setgcpercent_m(void) {
+	int32 in;
+	int32 out;
+
+	in = (int32)(intptr)g->m->scalararg[0];
+
+	runtime·lock(&runtime·mheap.lock);
+	out = runtime·gcpercent;
+	if(in < 0)
+		in = -1;
+	runtime·gcpercent = in;
+	runtime·unlock(&runtime·mheap.lock);
+
+	g->m->scalararg[0] = (uintptr)(intptr)out;
+}
+
+static void
+gchelperstart(void)
+{
+	if(g->m->helpgc < 0 || g->m->helpgc >= MaxGcproc)
+		runtime·throw("gchelperstart: bad m->helpgc");
+	if(g != g->m->g0)
+		runtime·throw("gchelper not running on g0 stack");
+}
+
+G*
+runtime·wakefing(void)
+{
+	G *res;
+
+	res = nil;
+	runtime·lock(&runtime·finlock);
+	if(runtime·fingwait && runtime·fingwake) {
+		runtime·fingwait = false;
+		runtime·fingwake = false;
+		res = runtime·fing;
+	}
+	runtime·unlock(&runtime·finlock);
+	return res;
+}
+
+// Recursively unrolls GC program in prog.
+// mask is where to store the result.
+// ppos is a pointer to position in mask, in bits.
+// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
+static byte*
+unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
+{
+	uintptr pos, siz, i, off;
+	byte *arena_start, *prog1, v, *bitp, shift;
+
+	arena_start = runtime·mheap.arena_start;
+	pos = *ppos;
+	for(;;) {
+		switch(prog[0]) {
+		case insData:
+			prog++;
+			siz = prog[0];
+			prog++;
+			for(i = 0; i < siz; i++) {
+				v = prog[i/PointersPerByte];
+				v >>= (i%PointersPerByte)*BitsPerPointer;
+				v &= BitsMask;
+				if(inplace) {
+					// Store directly into GC bitmap.
+					off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
+					bitp = arena_start - off/wordsPerBitmapByte - 1;
+					shift = (off % wordsPerBitmapByte) * gcBits;
+					if(shift==0)
+						*bitp = 0;
+					*bitp |= v<<(shift+2);
+					pos += PtrSize;
+				} else if(sparse) {
+					// 4-bits per word
+					v <<= (pos%8)+2;
+					mask[pos/8] |= v;
+					pos += gcBits;
+				} else {
+					// 2-bits per word
+					v <<= pos%8;
+					mask[pos/8] |= v;
+					pos += BitsPerPointer;
+				}
+			}
+			prog += ROUND(siz*BitsPerPointer, 8)/8;
+			break;
+		case insArray:
+			prog++;
+			siz = 0;
+			for(i = 0; i < PtrSize; i++)
+				siz = (siz<<8) + prog[PtrSize-i-1];
+			prog += PtrSize;
+			prog1 = nil;
+			for(i = 0; i < siz; i++)
+				prog1 = unrollgcprog1(mask, prog, &pos, inplace, sparse);
+			if(prog1[0] != insArrayEnd)
+				runtime·throw("unrollgcprog: array does not end with insArrayEnd");
+			prog = prog1+1;
+			break;
+		case insArrayEnd:
+		case insEnd:
+			*ppos = pos;
+			return prog;
+		default:
+			runtime·throw("unrollgcprog: unknown instruction");
+		}
+	}
+}
+
+// Unrolls GC program prog for data/bss, returns dense GC mask.
+static BitVector
+unrollglobgcprog(byte *prog, uintptr size)
+{
+	byte *mask;
+	uintptr pos, masksize;
+
+	masksize = ROUND(ROUND(size, PtrSize)/PtrSize*BitsPerPointer, 8)/8;
+	mask = runtime·persistentalloc(masksize+1, 0, &mstats.gc_sys);
+	mask[masksize] = 0xa1;
+	pos = 0;
+	prog = unrollgcprog1(mask, prog, &pos, false, false);
+	if(pos != size/PtrSize*BitsPerPointer) {
+		runtime·printf("unrollglobgcprog: bad program size, got %D, expect %D\n",
+			(uint64)pos, (uint64)size/PtrSize*BitsPerPointer);
+		runtime·throw("unrollglobgcprog: bad program size");
+	}
+	if(prog[0] != insEnd)
+		runtime·throw("unrollglobgcprog: program does not end with insEnd");
+	if(mask[masksize] != 0xa1)
+		runtime·throw("unrollglobgcprog: overflow");
+	return (BitVector){masksize*8, (uint32*)mask};
+}
+
+void
+runtime·unrollgcproginplace_m(void)
+{
+	uintptr size, size0, pos, off;
+	byte *arena_start, *prog, *bitp, shift;
+	Type *typ;
+	void *v;
+
+	v = g->m->ptrarg[0];
+	typ = g->m->ptrarg[1];
+	size = g->m->scalararg[0];
+	size0 = g->m->scalararg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+
+	pos = 0;
+	prog = (byte*)typ->gc[1];
+	while(pos != size0)
+		unrollgcprog1(v, prog, &pos, true, true);
+	// Mark first word as bitAllocated.
+	arena_start = runtime·mheap.arena_start;
+	off = (uintptr*)v - (uintptr*)arena_start;
+	bitp = arena_start - off/wordsPerBitmapByte - 1;
+	shift = (off % wordsPerBitmapByte) * gcBits;
+	*bitp |= bitBoundary<<shift;
+	// Mark word after last as BitsDead.
+	if(size0 < size) {
+		off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
+		bitp = arena_start - off/wordsPerBitmapByte - 1;
+		shift = (off % wordsPerBitmapByte) * gcBits;
+		*bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
+	}
+}
+
+// Unrolls GC program in typ->gc[1] into typ->gc[0]
+void
+runtime·unrollgcprog_m(void)
+{
+	static Mutex lock;
+	Type *typ;
+	byte *mask, *prog;
+	uintptr pos;
+	uint32 x;
+
+	typ = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+
+	runtime·lock(&lock);
+	mask = (byte*)typ->gc[0];
+	if(mask[0] == 0) {
+		pos = 8;  // skip the unroll flag
+		prog = (byte*)typ->gc[1];
+		prog = unrollgcprog1(mask, prog, &pos, false, true);
+		if(prog[0] != insEnd)
+			runtime·throw("unrollgcprog: program does not end with insEnd");
+		if(((typ->size/PtrSize)%2) != 0) {
+			// repeat the program twice
+			prog = (byte*)typ->gc[1];
+			unrollgcprog1(mask, prog, &pos, false, true);
+		}
+		// atomic way to say mask[0] = 1
+		x = ((uint32*)mask)[0];
+		runtime·atomicstore((uint32*)mask, x|1);
+	}
+	runtime·unlock(&lock);
+}
+
+// mark the span of memory at v as having n blocks of the given size.
+// if leftover is true, there is left over space at the end of the span.
+void
+runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
+{
+	uintptr i, off, step;
+	byte *b;
+
+	if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+		runtime·throw("markspan: bad pointer");
+
+	// Find bits of the beginning of the span.
+	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+	b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+	if((off%wordsPerBitmapByte) != 0)
+		runtime·throw("markspan: unaligned length");
+
+	// Okay to use non-atomic ops here, because we control
+	// the entire span, and each bitmap byte has bits for only
+	// one span, so no other goroutines are changing these bitmap words.
+
+	if(size == PtrSize) {
+		// Possible only on 64-bits (minimal size class is 8 bytes).
+		// Poor man's memset(0x11).
+		if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
+			runtime·throw("markspan: bad bits");
+		if((n%(wordsPerBitmapByte*PtrSize)) != 0)
+			runtime·throw("markspan: unaligned length");
+		b = b - n/wordsPerBitmapByte + 1;	// find first byte
+		if(((uintptr)b%PtrSize) != 0)
+			runtime·throw("markspan: unaligned pointer");
+		for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
+			*(uintptr*)b = (uintptr)0x1111111111111111ULL;  // bitBoundary+BitsDead
+		return;
+	}
+
+	if(leftover)
+		n++;	// mark a boundary just past end of last block too
+	step = size/(PtrSize*wordsPerBitmapByte);
+	for(i = 0; i != n; i++, b -= step)
+		*b = bitBoundary|(BitsDead<<2);
+}
+
+// unmark the span of memory at v of length n bytes.
+void
+runtime·unmarkspan(void *v, uintptr n)
+{
+	uintptr off;
+	byte *b;
+
+	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+		runtime·throw("markspan: bad pointer");
+
+	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+	if((off % (PtrSize*wordsPerBitmapByte)) != 0)
+		runtime·throw("markspan: unaligned pointer");
+	b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+	n /= PtrSize;
+	if(n%(PtrSize*wordsPerBitmapByte) != 0)
+		runtime·throw("unmarkspan: unaligned length");
+	// Okay to use non-atomic ops here, because we control
+	// the entire span, and each bitmap word has bits for only
+	// one span, so no other goroutines are changing these
+	// bitmap words.
+	n /= wordsPerBitmapByte;
+	runtime·memclr(b - n + 1, n);
+}
+
+void
+runtime·MHeap_MapBits(MHeap *h)
+{
+	// Caller has added extra mappings to the arena.
+	// Add extra mappings of bitmap words as needed.
+	// We allocate extra bitmap pieces in chunks of bitmapChunk.
+	enum {
+		bitmapChunk = 8192
+	};
+	uintptr n;
+
+	n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
+	n = ROUND(n, bitmapChunk);
+	n = ROUND(n, PhysPageSize);
+	if(h->bitmap_mapped >= n)
+		return;
+
+	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
+	h->bitmap_mapped = n;
+}
+
+static bool
+getgcmaskcb(Stkframe *frame, void *ctxt)
+{
+	Stkframe *frame0;
+
+	frame0 = ctxt;
+	if(frame->sp <= frame0->sp && frame0->sp < frame->varp) {
+		*frame0 = *frame;
+		return false;
+	}
+	return true;
+}
+
+// Returns GC type info for object p for testing.
+void
+runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
+{
+	Stkframe frame;
+	uintptr i, n, off;
+	byte *base, bits, shift, *b;
+	bool (*cb)(Stkframe*, void*);
+
+	*mask = nil;
+	*len = 0;
+
+	// data
+	if(p >= runtime·data && p < runtime·edata) {
+		n = ((PtrType*)t)->elem->size;
+		*len = n/PtrSize;
+		*mask = runtime·mallocgc(*len, nil, 0);
+		for(i = 0; i < n; i += PtrSize) {
+			off = (p+i-runtime·data)/PtrSize;
+			bits = (((byte*)runtime·gcdatamask.data)[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
+			(*mask)[i/PtrSize] = bits;
+		}
+		return;
+	}
+	// bss
+	if(p >= runtime·bss && p < runtime·ebss) {
+		n = ((PtrType*)t)->elem->size;
+		*len = n/PtrSize;
+		*mask = runtime·mallocgc(*len, nil, 0);
+		for(i = 0; i < n; i += PtrSize) {
+			off = (p+i-runtime·bss)/PtrSize;
+			bits = (((byte*)runtime·gcbssmask.data)[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
+			(*mask)[i/PtrSize] = bits;
+		}
+		return;
+	}
+	// heap
+	if(runtime·mlookup(p, &base, &n, nil)) {
+		*len = n/PtrSize;
+		*mask = runtime·mallocgc(*len, nil, 0);
+		for(i = 0; i < n; i += PtrSize) {
+			off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
+			b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+			shift = (off % wordsPerBitmapByte) * gcBits;
+			bits = (*b >> (shift+2))&BitsMask;
+			(*mask)[i/PtrSize] = bits;
+		}
+		return;
+	}
+	// stack
+	frame.fn = nil;
+	frame.sp = (uintptr)p;
+	cb = getgcmaskcb;
+	runtime·gentraceback(g->m->curg->sched.pc, g->m->curg->sched.sp, 0, g->m->curg, 0, nil, 1000, &cb, &frame, false);
+	if(frame.fn != nil) {
+		Func *f;
+		StackMap *stackmap;
+		BitVector bv;
+		uintptr size;
+		uintptr targetpc;
+		int32 pcdata;
+
+		f = frame.fn;
+		targetpc = frame.continpc;
+		if(targetpc == 0)
+			return;
+		if(targetpc != f->entry)
+			targetpc--;
+		pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
+		if(pcdata == -1)
+			return;
+		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+		if(stackmap == nil || stackmap->n <= 0)
+			return;
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		size = bv.n/BitsPerPointer*PtrSize;
+		n = ((PtrType*)t)->elem->size;
+		*len = n/PtrSize;
+		*mask = runtime·mallocgc(*len, nil, 0);
+		for(i = 0; i < n; i += PtrSize) {
+			off = (p+i-(byte*)frame.varp+size)/PtrSize;
+			bits = (bv.data[off*BitsPerPointer/32] >> ((off*BitsPerPointer)%32))&BitsMask;
+			(*mask)[i/PtrSize] = bits;
+		}
+	}
+}
+
+void runtime·gc_unixnanotime(int64 *now);
+
+int64 runtime·unixnanotime(void)
+{
+	int64 now;
+
+	runtime·gc_unixnanotime(&now);
+	return now;
+}
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
new file mode 100644
index 000000000..2d9d76a47
--- /dev/null
+++ b/src/runtime/mgc0.go
@@ -0,0 +1,70 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// Called from C. Returns the Go type *m.
+func gc_m_ptr(ret *interface{}) {
+	*ret = (*m)(nil)
+}
+
+// Called from C. Returns the Go type *g.
+func gc_g_ptr(ret *interface{}) {
+	*ret = (*g)(nil)
+}
+
+// Called from C. Returns the Go type *itab.
+func gc_itab_ptr(ret *interface{}) {
+	*ret = (*itab)(nil)
+}
+
+// Type used for "conservative" allocations in C code.
+type notype [8]*byte
+
+// Called from C. Returns the Go type used for C allocations w/o type.
+func gc_notype_ptr(ret *interface{}) {
+	var x notype
+	*ret = x
+}
+
+func timenow() (sec int64, nsec int32)
+
+func gc_unixnanotime(now *int64) {
+	sec, nsec := timenow()
+	*now = sec*1e9 + int64(nsec)
+}
+
+func freeOSMemory() {
+	gogc(2) // force GC and do eager sweep
+	onM(scavenge_m)
+}
+
+var poolcleanup func()
+
+func registerPoolCleanup(f func()) {
+	poolcleanup = f
+}
+
+func clearpools() {
+	// clear sync.Pools
+	if poolcleanup != nil {
+		poolcleanup()
+	}
+
+	for _, p := range &allp {
+		if p == nil {
+			break
+		}
+		// clear tinyalloc pool
+		if c := p.mcache; c != nil {
+			c.tiny = nil
+			c.tinysize = 0
+			c.sudogcache = nil
+		}
+		// clear defer pools
+		for i := range p.deferpool {
+			p.deferpool[i] = nil
+		}
+	}
+}
diff --git a/src/runtime/mgc0.h b/src/runtime/mgc0.h
new file mode 100644
index 000000000..d04b5cab8
--- /dev/null
+++ b/src/runtime/mgc0.h
@@ -0,0 +1,78 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector (GC)
+
+enum {
+	ScanStackByFrames = 1,
+
+	// Four bits per word (see #defines below).
+	gcBits = 4,
+	wordsPerBitmapByte = 8/gcBits,
+
+	// GC type info programs.
+	// The programs allow to store type info required for GC in a compact form.
+	// Most importantly arrays take O(1) space instead of O(n).
+	// The program grammar is:
+	//
+	// Program = {Block} "insEnd"
+	// Block = Data | Array
+	// Data = "insData" DataSize DataBlock
+	// DataSize = int // size of the DataBlock in bit pairs, 1 byte
+	// DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
+	// Array = "insArray" ArrayLen Block "insArrayEnd"
+	// ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
+	//
+	// Each instruction (insData, insArray, etc) is 1 byte.
+	// For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
+	// the program looks as:
+	//
+	// insData 3 (BitsMultiWord BitsSlice BitsScalar)
+	//	insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd
+	//
+	// Total size of the program is 17 bytes (13 bytes on 32-bits).
+	// The corresponding GC mask would take 43 bytes (it would be repeated
+	// because the type has odd number of words).
+	insData = 1,
+	insArray,
+	insArrayEnd,
+	insEnd,
+
+	// Pointer map
+	BitsPerPointer	= 2,
+	BitsMask	= (1<<BitsPerPointer)-1,
+	PointersPerByte	= 8/BitsPerPointer,
+
+	BitsDead	= 0,
+	BitsScalar	= 1,
+	BitsPointer	= 2,
+	BitsMultiWord	= 3,
+	// BitsMultiWord will be set for the first word of a multi-word item.
+	// When it is set, one of the following will be set for the second word.
+	// NOT USED ANYMORE: BitsString	= 0,
+	// NOT USED ANYMORE: BitsSlice	= 1,
+	BitsIface	= 2,
+	BitsEface	= 3,
+
+	// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
+	MaxGCMask	= 64,
+};
+
+// Bits in per-word bitmap.
+// #defines because we shift the values beyond 32 bits.
+//
+// Each word in the bitmap describes wordsPerBitmapWord words
+// of heap memory.  There are 4 bitmap bits dedicated to each heap word,
+// so on a 64-bit system there is one bitmap word per 16 heap words.
+//
+// The bitmap starts at mheap.arena_start and extends *backward* from
+// there.  On a 64-bit system the off'th word in the arena is tracked by
+// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
+// the only difference is that the divisor is 8.)
+
+#define bitBoundary	((uintptr)1) // boundary of an object
+#define bitMarked	((uintptr)2) // marked object
+
+#define bitMask		((uintptr)bitBoundary|bitMarked)
+#define bitPtrMask	((uintptr)BitsMask<<2)
diff --git a/src/runtime/mheap.c b/src/runtime/mheap.c
new file mode 100644
index 000000000..902a5c71a
--- /dev/null
+++ b/src/runtime/mheap.c
@@ -0,0 +1,885 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Page heap.
+//
+// See malloc.h for overview.
+//
+// When a MSpan is in the heap free list, state == MSpanFree
+// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
+//
+// When a MSpan is allocated, state == MSpanInUse or MSpanStack
+// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+
+static MSpan *MHeap_AllocSpanLocked(MHeap*, uintptr);
+static void MHeap_FreeSpanLocked(MHeap*, MSpan*, bool, bool);
+static bool MHeap_Grow(MHeap*, uintptr);
+static MSpan *MHeap_AllocLarge(MHeap*, uintptr);
+static MSpan *BestFit(MSpan*, uintptr, MSpan*);
+
+static void
+RecordSpan(void *vh, byte *p)
+{
+	MHeap *h;
+	MSpan *s;
+	MSpan **all;
+	uint32 cap;
+
+	h = vh;
+	s = (MSpan*)p;
+	if(h->nspan >= h->nspancap) {
+		cap = 64*1024/sizeof(all[0]);
+		if(cap < h->nspancap*3/2)
+			cap = h->nspancap*3/2;
+		all = (MSpan**)runtime·sysAlloc(cap*sizeof(all[0]), &mstats.other_sys);
+		if(all == nil)
+			runtime·throw("runtime: cannot allocate memory");
+		if(h->allspans) {
+			runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
+			// Don't free the old array if it's referenced by sweep.
+			// See the comment in mgc0.c.
+			if(h->allspans != runtime·mheap.gcspans)
+				runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
+		}
+		h->allspans = all;
+		h->nspancap = cap;
+	}
+	h->allspans[h->nspan++] = s;
+}
+
+// Initialize the heap; fetch memory using alloc.
+void
+runtime·MHeap_Init(MHeap *h)
+{
+	uint32 i;
+
+	runtime·FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
+	runtime·FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
+	runtime·FixAlloc_Init(&h->specialfinalizeralloc, sizeof(SpecialFinalizer), nil, nil, &mstats.other_sys);
+	runtime·FixAlloc_Init(&h->specialprofilealloc, sizeof(SpecialProfile), nil, nil, &mstats.other_sys);
+	// h->mapcache needs no init
+	for(i=0; i<nelem(h->free); i++) {
+		runtime·MSpanList_Init(&h->free[i]);
+		runtime·MSpanList_Init(&h->busy[i]);
+	}
+	runtime·MSpanList_Init(&h->freelarge);
+	runtime·MSpanList_Init(&h->busylarge);
+	for(i=0; i<nelem(h->central); i++)
+		runtime·MCentral_Init(&h->central[i].mcentral, i);
+}
+
+void
+runtime·MHeap_MapSpans(MHeap *h)
+{
+	uintptr n;
+
+	// Map spans array, PageSize at a time.
+	n = (uintptr)h->arena_used;
+	n -= (uintptr)h->arena_start;
+	n = n / PageSize * sizeof(h->spans[0]);
+	n = ROUND(n, PhysPageSize);
+	if(h->spans_mapped >= n)
+		return;
+	runtime·SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, h->arena_reserved, &mstats.other_sys);
+	h->spans_mapped = n;
+}
+
+// Sweeps spans in list until reclaims at least npages into heap.
+// Returns the actual number of pages reclaimed.
+static uintptr
+MHeap_ReclaimList(MHeap *h, MSpan *list, uintptr npages)
+{
+	MSpan *s;
+	uintptr n;
+	uint32 sg;
+
+	n = 0;
+	sg = runtime·mheap.sweepgen;
+retry:
+	for(s = list->next; s != list; s = s->next) {
+		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+			runtime·MSpanList_Remove(s);
+			// swept spans are at the end of the list
+			runtime·MSpanList_InsertBack(list, s);
+			runtime·unlock(&h->lock);
+			n += runtime·MSpan_Sweep(s, false);
+			runtime·lock(&h->lock);
+			if(n >= npages)
+				return n;
+			// the span could have been moved elsewhere
+			goto retry;
+		}
+		if(s->sweepgen == sg-1) {
+			// the span is being sweept by background sweeper, skip
+			continue;
+		}
+		// already swept empty span,
+		// all subsequent ones must also be either swept or in process of sweeping
+		break;
+	}
+	return n;
+}
+
+// Sweeps and reclaims at least npage pages into heap.
+// Called before allocating npage pages.
+static void
+MHeap_Reclaim(MHeap *h, uintptr npage)
+{
+	uintptr reclaimed, n;
+
+	// First try to sweep busy spans with large objects of size >= npage,
+	// this has good chances of reclaiming the necessary space.
+	for(n=npage; n < nelem(h->busy); n++) {
+		if(MHeap_ReclaimList(h, &h->busy[n], npage))
+			return;  // Bingo!
+	}
+
+	// Then -- even larger objects.
+	if(MHeap_ReclaimList(h, &h->busylarge, npage))
+		return;  // Bingo!
+
+	// Now try smaller objects.
+	// One such object is not enough, so we need to reclaim several of them.
+	reclaimed = 0;
+	for(n=0; n < npage && n < nelem(h->busy); n++) {
+		reclaimed += MHeap_ReclaimList(h, &h->busy[n], npage-reclaimed);
+		if(reclaimed >= npage)
+			return;
+	}
+
+	// Now sweep everything that is not yet swept.
+	runtime·unlock(&h->lock);
+	for(;;) {
+		n = runtime·sweepone();
+		if(n == -1)  // all spans are swept
+			break;
+		reclaimed += n;
+		if(reclaimed >= npage)
+			break;
+	}
+	runtime·lock(&h->lock);
+}
+
+// Allocate a new span of npage pages from the heap for GC'd memory
+// and record its size class in the HeapMap and HeapMapCache.
+static MSpan*
+mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large)
+{
+	MSpan *s;
+
+	if(g != g->m->g0)
+		runtime·throw("mheap_alloc not on M stack");
+	runtime·lock(&h->lock);
+
+	// To prevent excessive heap growth, before allocating n pages
+	// we need to sweep and reclaim at least n pages.
+	if(!h->sweepdone)
+		MHeap_Reclaim(h, npage);
+
+	// transfer stats from cache to global
+	mstats.heap_alloc += g->m->mcache->local_cachealloc;
+	g->m->mcache->local_cachealloc = 0;
+
+	s = MHeap_AllocSpanLocked(h, npage);
+	if(s != nil) {
+		// Record span info, because gc needs to be
+		// able to map interior pointer to containing span.
+		runtime·atomicstore(&s->sweepgen, h->sweepgen);
+		s->state = MSpanInUse;
+		s->freelist = nil;
+		s->ref = 0;
+		s->sizeclass = sizeclass;
+		s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
+
+		// update stats, sweep lists
+		if(large) {
+			mstats.heap_objects++;
+			mstats.heap_alloc += npage<<PageShift;
+			// Swept spans are at the end of lists.
+			if(s->npages < nelem(h->free))
+				runtime·MSpanList_InsertBack(&h->busy[s->npages], s);
+			else
+				runtime·MSpanList_InsertBack(&h->busylarge, s);
+		}
+	}
+	runtime·unlock(&h->lock);
+	return s;
+}
+
+static void
+mheap_alloc_m(G *gp)
+{
+	MHeap *h;
+	MSpan *s;
+
+	h = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	s = mheap_alloc(h, g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
+	g->m->ptrarg[0] = s;
+
+	runtime·gogo(&gp->sched);
+}
+
+MSpan*
+runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
+{
+	MSpan *s;
+	void (*fn)(G*);
+
+	// Don't do any operations that lock the heap on the G stack.
+	// It might trigger stack growth, and the stack growth code needs
+	// to be able to allocate heap.
+	if(g == g->m->g0) {
+		s = mheap_alloc(h, npage, sizeclass, large);
+	} else {
+		g->m->ptrarg[0] = h;
+		g->m->scalararg[0] = npage;
+		g->m->scalararg[1] = sizeclass;
+		g->m->scalararg[2] = large;
+		fn = mheap_alloc_m;
+		runtime·mcall(&fn);
+		s = g->m->ptrarg[0];
+		g->m->ptrarg[0] = nil;
+	}
+	if(s != nil) {
+		if(needzero && s->needzero)
+			runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
+		s->needzero = 0;
+	}
+	return s;
+}
+
+MSpan*
+runtime·MHeap_AllocStack(MHeap *h, uintptr npage)
+{
+	MSpan *s;
+
+	if(g != g->m->g0)
+		runtime·throw("mheap_allocstack not on M stack");
+	runtime·lock(&h->lock);
+	s = MHeap_AllocSpanLocked(h, npage);
+	if(s != nil) {
+		s->state = MSpanStack;
+		s->freelist = nil;
+		s->ref = 0;
+		mstats.stacks_inuse += s->npages<<PageShift;
+	}
+	runtime·unlock(&h->lock);
+	return s;
+}
+
+// Allocates a span of the given size.  h must be locked.
+// The returned span has been removed from the
+// free list, but its state is still MSpanFree.
+static MSpan*
+MHeap_AllocSpanLocked(MHeap *h, uintptr npage)
+{
+	uintptr n;
+	MSpan *s, *t;
+	pageID p;
+
+	// Try in fixed-size lists up to max.
+	for(n=npage; n < nelem(h->free); n++) {
+		if(!runtime·MSpanList_IsEmpty(&h->free[n])) {
+			s = h->free[n].next;
+			goto HaveSpan;
+		}
+	}
+
+	// Best fit in list of large spans.
+	if((s = MHeap_AllocLarge(h, npage)) == nil) {
+		if(!MHeap_Grow(h, npage))
+			return nil;
+		if((s = MHeap_AllocLarge(h, npage)) == nil)
+			return nil;
+	}
+
+HaveSpan:
+	// Mark span in use.
+	if(s->state != MSpanFree)
+		runtime·throw("MHeap_AllocLocked - MSpan not free");
+	if(s->npages < npage)
+		runtime·throw("MHeap_AllocLocked - bad npages");
+	runtime·MSpanList_Remove(s);
+	if(s->next != nil || s->prev != nil)
+		runtime·throw("still in list");
+	if(s->npreleased > 0) {
+		runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
+		mstats.heap_released -= s->npreleased<<PageShift;
+		s->npreleased = 0;
+	}
+
+	if(s->npages > npage) {
+		// Trim extra and put it back in the heap.
+		t = runtime·FixAlloc_Alloc(&h->spanalloc);
+		runtime·MSpan_Init(t, s->start + npage, s->npages - npage);
+		s->npages = npage;
+		p = t->start;
+		p -= ((uintptr)h->arena_start>>PageShift);
+		if(p > 0)
+			h->spans[p-1] = s;
+		h->spans[p] = t;
+		h->spans[p+t->npages-1] = t;
+		t->needzero = s->needzero;
+		s->state = MSpanStack; // prevent coalescing with s
+		t->state = MSpanStack;
+		MHeap_FreeSpanLocked(h, t, false, false);
+		t->unusedsince = s->unusedsince; // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
+		s->state = MSpanFree;
+	}
+	s->unusedsince = 0;
+
+	p = s->start;
+	p -= ((uintptr)h->arena_start>>PageShift);
+	for(n=0; n<npage; n++)
+		h->spans[p+n] = s;
+
+	mstats.heap_inuse += npage<<PageShift;
+	mstats.heap_idle -= npage<<PageShift;
+
+	//runtime·printf("spanalloc %p\n", s->start << PageShift);
+	if(s->next != nil || s->prev != nil)
+		runtime·throw("still in list");
+	return s;
+}
+
+// Allocate a span of exactly npage pages from the list of large spans.
+static MSpan*
+MHeap_AllocLarge(MHeap *h, uintptr npage)
+{
+	return BestFit(&h->freelarge, npage, nil);
+}
+
+// Search list for smallest span with >= npage pages.
+// If there are multiple smallest spans, take the one
+// with the earliest starting address.
+static MSpan*
+BestFit(MSpan *list, uintptr npage, MSpan *best)
+{
+	MSpan *s;
+
+	for(s=list->next; s != list; s=s->next) {
+		if(s->npages < npage)
+			continue;
+		if(best == nil
+		|| s->npages < best->npages
+		|| (s->npages == best->npages && s->start < best->start))
+			best = s;
+	}
+	return best;
+}
+
+// Try to add at least npage pages of memory to the heap,
+// returning whether it worked.
+static bool
+MHeap_Grow(MHeap *h, uintptr npage)
+{
+	uintptr ask;
+	void *v;
+	MSpan *s;
+	pageID p;
+
+	// Ask for a big chunk, to reduce the number of mappings
+	// the operating system needs to track; also amortizes
+	// the overhead of an operating system mapping.
+	// Allocate a multiple of 64kB.
+	npage = ROUND(npage, (64<<10)/PageSize);
+	ask = npage<<PageShift;
+	if(ask < HeapAllocChunk)
+		ask = HeapAllocChunk;
+
+	v = runtime·MHeap_SysAlloc(h, ask);
+	if(v == nil) {
+		if(ask > (npage<<PageShift)) {
+			ask = npage<<PageShift;
+			v = runtime·MHeap_SysAlloc(h, ask);
+		}
+		if(v == nil) {
+			runtime·printf("runtime: out of memory: cannot allocate %D-byte block (%D in use)\n", (uint64)ask, mstats.heap_sys);
+			return false;
+		}
+	}
+
+	// Create a fake "in use" span and free it, so that the
+	// right coalescing happens.
+	s = runtime·FixAlloc_Alloc(&h->spanalloc);
+	runtime·MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
+	p = s->start;
+	p -= ((uintptr)h->arena_start>>PageShift);
+	h->spans[p] = s;
+	h->spans[p + s->npages - 1] = s;
+	runtime·atomicstore(&s->sweepgen, h->sweepgen);
+	s->state = MSpanInUse;
+	MHeap_FreeSpanLocked(h, s, false, true);
+	return true;
+}
+
+// Look up the span at the given address.
+// Address is guaranteed to be in map
+// and is guaranteed to be start or end of span.
+MSpan*
+runtime·MHeap_Lookup(MHeap *h, void *v)
+{
+	uintptr p;
+	
+	p = (uintptr)v;
+	p -= (uintptr)h->arena_start;
+	return h->spans[p >> PageShift];
+}
+
+// Look up the span at the given address.
+// Address is *not* guaranteed to be in map
+// and may be anywhere in the span.
+// Map entries for the middle of a span are only
+// valid for allocated spans.  Free spans may have
+// other garbage in their middles, so we have to
+// check for that.
+MSpan*
+runtime·MHeap_LookupMaybe(MHeap *h, void *v)
+{
+	MSpan *s;
+	pageID p, q;
+
+	if((byte*)v < h->arena_start || (byte*)v >= h->arena_used)
+		return nil;
+	p = (uintptr)v>>PageShift;
+	q = p;
+	q -= (uintptr)h->arena_start >> PageShift;
+	s = h->spans[q];
+	if(s == nil || p < s->start || v >= s->limit || s->state != MSpanInUse)
+		return nil;
+	return s;
+}
+
+// Free the span back into the heap.
+static void
+mheap_free(MHeap *h, MSpan *s, int32 acct)
+{
+	if(g != g->m->g0)
+		runtime·throw("mheap_free not on M stack");
+	runtime·lock(&h->lock);
+	mstats.heap_alloc += g->m->mcache->local_cachealloc;
+	g->m->mcache->local_cachealloc = 0;
+	if(acct) {
+		mstats.heap_alloc -= s->npages<<PageShift;
+		mstats.heap_objects--;
+	}
+	MHeap_FreeSpanLocked(h, s, true, true);
+	runtime·unlock(&h->lock);
+}
+
+static void
+mheap_free_m(G *gp)
+{
+	MHeap *h;
+	MSpan *s;
+	
+	h = g->m->ptrarg[0];
+	s = g->m->ptrarg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+	mheap_free(h, s, g->m->scalararg[0]);
+	runtime·gogo(&gp->sched);
+}
+
+void
+runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
+{
+	void (*fn)(G*);
+
+	if(g == g->m->g0) {
+		mheap_free(h, s, acct);
+	} else {
+		g->m->ptrarg[0] = h;
+		g->m->ptrarg[1] = s;
+		g->m->scalararg[0] = acct;
+		fn = mheap_free_m;
+		runtime·mcall(&fn);
+	}
+}
+
+void
+runtime·MHeap_FreeStack(MHeap *h, MSpan *s)
+{
+	if(g != g->m->g0)
+		runtime·throw("mheap_freestack not on M stack");
+	s->needzero = 1;
+	runtime·lock(&h->lock);
+	mstats.stacks_inuse -= s->npages<<PageShift;
+	MHeap_FreeSpanLocked(h, s, true, true);
+	runtime·unlock(&h->lock);
+}
+
+static void
+MHeap_FreeSpanLocked(MHeap *h, MSpan *s, bool acctinuse, bool acctidle)
+{
+	MSpan *t;
+	pageID p;
+
+	switch(s->state) {
+	case MSpanStack:
+		if(s->ref != 0)
+			runtime·throw("MHeap_FreeSpanLocked - invalid stack free");
+		break;
+	case MSpanInUse:
+		if(s->ref != 0 || s->sweepgen != h->sweepgen) {
+			runtime·printf("MHeap_FreeSpanLocked - span %p ptr %p ref %d sweepgen %d/%d\n",
+				       s, s->start<<PageShift, s->ref, s->sweepgen, h->sweepgen);
+			runtime·throw("MHeap_FreeSpanLocked - invalid free");
+		}
+		break;
+	default:
+		runtime·throw("MHeap_FreeSpanLocked - invalid span state");
+		break;
+	}
+	if(acctinuse)
+		mstats.heap_inuse -= s->npages<<PageShift;
+	if(acctidle)
+		mstats.heap_idle += s->npages<<PageShift;
+	s->state = MSpanFree;
+	runtime·MSpanList_Remove(s);
+	// Stamp newly unused spans. The scavenger will use that
+	// info to potentially give back some pages to the OS.
+	s->unusedsince = runtime·nanotime();
+	s->npreleased = 0;
+
+	// Coalesce with earlier, later spans.
+	p = s->start;
+	p -= (uintptr)h->arena_start >> PageShift;
+	if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
+		s->start = t->start;
+		s->npages += t->npages;
+		s->npreleased = t->npreleased; // absorb released pages
+		s->needzero |= t->needzero;
+		p -= t->npages;
+		h->spans[p] = s;
+		runtime·MSpanList_Remove(t);
+		t->state = MSpanDead;
+		runtime·FixAlloc_Free(&h->spanalloc, t);
+	}
+	if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
+		s->npages += t->npages;
+		s->npreleased += t->npreleased;
+		s->needzero |= t->needzero;
+		h->spans[p + s->npages - 1] = s;
+		runtime·MSpanList_Remove(t);
+		t->state = MSpanDead;
+		runtime·FixAlloc_Free(&h->spanalloc, t);
+	}
+
+	// Insert s into appropriate list.
+	if(s->npages < nelem(h->free))
+		runtime·MSpanList_Insert(&h->free[s->npages], s);
+	else
+		runtime·MSpanList_Insert(&h->freelarge, s);
+}
+
+static uintptr
+scavengelist(MSpan *list, uint64 now, uint64 limit)
+{
+	uintptr released, sumreleased;
+	MSpan *s;
+
+	if(runtime·MSpanList_IsEmpty(list))
+		return 0;
+
+	sumreleased = 0;
+	for(s=list->next; s != list; s=s->next) {
+		if((now - s->unusedsince) > limit && s->npreleased != s->npages) {
+			released = (s->npages - s->npreleased) << PageShift;
+			mstats.heap_released += released;
+			sumreleased += released;
+			s->npreleased = s->npages;
+			runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
+		}
+	}
+	return sumreleased;
+}
+
+void
+runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit)
+{
+	uint32 i;
+	uintptr sumreleased;
+	MHeap *h;
+	
+	h = &runtime·mheap;
+	runtime·lock(&h->lock);
+	sumreleased = 0;
+	for(i=0; i < nelem(h->free); i++)
+		sumreleased += scavengelist(&h->free[i], now, limit);
+	sumreleased += scavengelist(&h->freelarge, now, limit);
+	runtime·unlock(&h->lock);
+
+	if(runtime·debug.gctrace > 0) {
+		if(sumreleased > 0)
+			runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
+		// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
+		// But we can't call ReadMemStats on g0 holding locks.
+		runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
+			k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
+			mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
+	}
+}
+
+void
+runtime·scavenge_m(void)
+{
+	runtime·MHeap_Scavenge(-1, ~(uintptr)0, 0);
+}
+
+// Initialize a new span with the given start and npages.
+void
+runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages)
+{
+	span->next = nil;
+	span->prev = nil;
+	span->start = start;
+	span->npages = npages;
+	span->freelist = nil;
+	span->ref = 0;
+	span->sizeclass = 0;
+	span->incache = false;
+	span->elemsize = 0;
+	span->state = MSpanDead;
+	span->unusedsince = 0;
+	span->npreleased = 0;
+	span->specialLock.key = 0;
+	span->specials = nil;
+	span->needzero = 0;
+}
+
+// Initialize an empty doubly-linked list.
+void
+runtime·MSpanList_Init(MSpan *list)
+{
+	list->state = MSpanListHead;
+	list->next = list;
+	list->prev = list;
+}
+
+void
+runtime·MSpanList_Remove(MSpan *span)
+{
+	if(span->prev == nil && span->next == nil)
+		return;
+	span->prev->next = span->next;
+	span->next->prev = span->prev;
+	span->prev = nil;
+	span->next = nil;
+}
+
+bool
+runtime·MSpanList_IsEmpty(MSpan *list)
+{
+	return list->next == list;
+}
+
+void
+runtime·MSpanList_Insert(MSpan *list, MSpan *span)
+{
+	if(span->next != nil || span->prev != nil) {
+		runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
+		runtime·throw("MSpanList_Insert");
+	}
+	span->next = list->next;
+	span->prev = list;
+	span->next->prev = span;
+	span->prev->next = span;
+}
+
+void
+runtime·MSpanList_InsertBack(MSpan *list, MSpan *span)
+{
+	if(span->next != nil || span->prev != nil) {
+		runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
+		runtime·throw("MSpanList_Insert");
+	}
+	span->next = list;
+	span->prev = list->prev;
+	span->next->prev = span;
+	span->prev->next = span;
+}
+
+// Adds the special record s to the list of special records for
+// the object p.  All fields of s should be filled in except for
+// offset & next, which this routine will fill in.
+// Returns true if the special was successfully added, false otherwise.
+// (The add will fail only if a record with the same p and s->kind
+//  already exists.)
+static bool
+addspecial(void *p, Special *s)
+{
+	MSpan *span;
+	Special **t, *x;
+	uintptr offset;
+	byte kind;
+
+	span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
+	if(span == nil)
+		runtime·throw("addspecial on invalid pointer");
+
+	// Ensure that the span is swept.
+	// GC accesses specials list w/o locks. And it's just much safer.
+	g->m->locks++;
+	runtime·MSpan_EnsureSwept(span);
+
+	offset = (uintptr)p - (span->start << PageShift);
+	kind = s->kind;
+
+	runtime·lock(&span->specialLock);
+
+	// Find splice point, check for existing record.
+	t = &span->specials;
+	while((x = *t) != nil) {
+		if(offset == x->offset && kind == x->kind) {
+			runtime·unlock(&span->specialLock);
+			g->m->locks--;
+			return false; // already exists
+		}
+		if(offset < x->offset || (offset == x->offset && kind < x->kind))
+			break;
+		t = &x->next;
+	}
+	// Splice in record, fill in offset.
+	s->offset = offset;
+	s->next = x;
+	*t = s;
+	runtime·unlock(&span->specialLock);
+	g->m->locks--;
+	return true;
+}
+
+// Removes the Special record of the given kind for the object p.
+// Returns the record if the record existed, nil otherwise.
+// The caller must FixAlloc_Free the result.
+static Special*
+removespecial(void *p, byte kind)
+{
+	MSpan *span;
+	Special *s, **t;
+	uintptr offset;
+
+	span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
+	if(span == nil)
+		runtime·throw("removespecial on invalid pointer");
+
+	// Ensure that the span is swept.
+	// GC accesses specials list w/o locks. And it's just much safer.
+	g->m->locks++;
+	runtime·MSpan_EnsureSwept(span);
+
+	offset = (uintptr)p - (span->start << PageShift);
+
+	runtime·lock(&span->specialLock);
+	t = &span->specials;
+	while((s = *t) != nil) {
+		// This function is used for finalizers only, so we don't check for
+		// "interior" specials (p must be exactly equal to s->offset).
+		if(offset == s->offset && kind == s->kind) {
+			*t = s->next;
+			runtime·unlock(&span->specialLock);
+			g->m->locks--;
+			return s;
+		}
+		t = &s->next;
+	}
+	runtime·unlock(&span->specialLock);
+	g->m->locks--;
+	return nil;
+}
+
+// Adds a finalizer to the object p.  Returns true if it succeeded.
+bool
+runtime·addfinalizer(void *p, FuncVal *f, uintptr nret, Type *fint, PtrType *ot)
+{
+	SpecialFinalizer *s;
+
+	runtime·lock(&runtime·mheap.speciallock);
+	s = runtime·FixAlloc_Alloc(&runtime·mheap.specialfinalizeralloc);
+	runtime·unlock(&runtime·mheap.speciallock);
+	s->special.kind = KindSpecialFinalizer;
+	s->fn = f;
+	s->nret = nret;
+	s->fint = fint;
+	s->ot = ot;
+	if(addspecial(p, &s->special))
+		return true;
+
+	// There was an old finalizer
+	runtime·lock(&runtime·mheap.speciallock);
+	runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
+	runtime·unlock(&runtime·mheap.speciallock);
+	return false;
+}
+
+// Removes the finalizer (if any) from the object p.
+void
+runtime·removefinalizer(void *p)
+{
+	SpecialFinalizer *s;
+
+	s = (SpecialFinalizer*)removespecial(p, KindSpecialFinalizer);
+	if(s == nil)
+		return; // there wasn't a finalizer to remove
+	runtime·lock(&runtime·mheap.speciallock);
+	runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
+	runtime·unlock(&runtime·mheap.speciallock);
+}
+
+// Set the heap profile bucket associated with addr to b.
+void
+runtime·setprofilebucket_m(void)
+{	
+	void *p;
+	Bucket *b;
+	SpecialProfile *s;
+	
+	p = g->m->ptrarg[0];
+	b = g->m->ptrarg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+
+	runtime·lock(&runtime·mheap.speciallock);
+	s = runtime·FixAlloc_Alloc(&runtime·mheap.specialprofilealloc);
+	runtime·unlock(&runtime·mheap.speciallock);
+	s->special.kind = KindSpecialProfile;
+	s->b = b;
+	if(!addspecial(p, &s->special))
+		runtime·throw("setprofilebucket: profile already set");
+}
+
+// Do whatever cleanup needs to be done to deallocate s.  It has
+// already been unlinked from the MSpan specials list.
+// Returns true if we should keep working on deallocating p.
+bool
+runtime·freespecial(Special *s, void *p, uintptr size, bool freed)
+{
+	SpecialFinalizer *sf;
+	SpecialProfile *sp;
+
+	switch(s->kind) {
+	case KindSpecialFinalizer:
+		sf = (SpecialFinalizer*)s;
+		runtime·queuefinalizer(p, sf->fn, sf->nret, sf->fint, sf->ot);
+		runtime·lock(&runtime·mheap.speciallock);
+		runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, sf);
+		runtime·unlock(&runtime·mheap.speciallock);
+		return false; // don't free p until finalizer is done
+	case KindSpecialProfile:
+		sp = (SpecialProfile*)s;
+		runtime·mProf_Free(sp->b, size, freed);
+		runtime·lock(&runtime·mheap.speciallock);
+		runtime·FixAlloc_Free(&runtime·mheap.specialprofilealloc, sp);
+		runtime·unlock(&runtime·mheap.speciallock);
+		return true;
+	default:
+		runtime·throw("bad special kind");
+		return true;
+	}
+}
diff --git a/src/runtime/mknacl.sh b/src/runtime/mknacl.sh
new file mode 100644
index 000000000..47fb7bd88
--- /dev/null
+++ b/src/runtime/mknacl.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Copyright 2013 The Go Authors.  All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+cat /Users/rsc/pub/native_client/src/trusted/service_runtime/include/bits/nacl_syscalls.h |
+	awk '
+	BEGIN {
+		printf("// generated by mknacl.sh - do not edit\n")
+	}
+	NF==3 && $1=="#define" && $2~/^NACL_sys_/ {
+		name=$2
+		sub(/^NACL_sys_/, "SYS_", name)
+		printf("#define %s %s\n", name, $3)
+	}' >syscall_nacl.h
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
new file mode 100644
index 000000000..7177c8459
--- /dev/null
+++ b/src/runtime/mprof.go
@@ -0,0 +1,660 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Malloc profiling.
+// Patterned after tcmalloc's algorithms; shorter code.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+// NOTE(rsc): Everything here could use cas if contention became an issue.
+var proflock mutex
+
+// All memory allocations are local and do not escape outside of the profiler.
+// The profiler is forbidden from referring to garbage-collected memory.
+
+const (
+	// profile types
+	memProfile bucketType = 1 + iota
+	blockProfile
+
+	// size of bucket hash table
+	buckHashSize = 179999
+
+	// max depth of stack to record in bucket
+	maxStack = 32
+)
+
+type bucketType int
+
+// A bucket holds per-call-stack profiling information.
+// The representation is a bit sleazy, inherited from C.
+// This struct defines the bucket header. It is followed in
+// memory by the stack words and then the actual record
+// data, either a memRecord or a blockRecord.
+//
+// Per-call-stack profiling information.
+// Lookup by hashing call stack into a linked-list hash table.
+type bucket struct {
+	next    *bucket
+	allnext *bucket
+	typ     bucketType // memBucket or blockBucket
+	hash    uintptr
+	size    uintptr
+	nstk    uintptr
+}
+
+// A memRecord is the bucket data for a bucket of type memProfile,
+// part of the memory profile.
+type memRecord struct {
+	// The following complex 3-stage scheme of stats accumulation
+	// is required to obtain a consistent picture of mallocs and frees
+	// for some point in time.
+	// The problem is that mallocs come in real time, while frees
+	// come only after a GC during concurrent sweeping. So if we would
+	// naively count them, we would get a skew toward mallocs.
+	//
+	// Mallocs are accounted in recent stats.
+	// Explicit frees are accounted in recent stats.
+	// GC frees are accounted in prev stats.
+	// After GC prev stats are added to final stats and
+	// recent stats are moved into prev stats.
+	allocs      uintptr
+	frees       uintptr
+	alloc_bytes uintptr
+	free_bytes  uintptr
+
+	// changes between next-to-last GC and last GC
+	prev_allocs      uintptr
+	prev_frees       uintptr
+	prev_alloc_bytes uintptr
+	prev_free_bytes  uintptr
+
+	// changes since last GC
+	recent_allocs      uintptr
+	recent_frees       uintptr
+	recent_alloc_bytes uintptr
+	recent_free_bytes  uintptr
+}
+
+// A blockRecord is the bucket data for a bucket of type blockProfile,
+// part of the blocking profile.
+type blockRecord struct {
+	count  int64
+	cycles int64
+}
+
+var (
+	mbuckets  *bucket // memory profile buckets
+	bbuckets  *bucket // blocking profile buckets
+	buckhash  *[179999]*bucket
+	bucketmem uintptr
+)
+
+// newBucket allocates a bucket with the given type and number of stack entries.
+func newBucket(typ bucketType, nstk int) *bucket {
+	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
+	switch typ {
+	default:
+		gothrow("invalid profile bucket type")
+	case memProfile:
+		size += unsafe.Sizeof(memRecord{})
+	case blockProfile:
+		size += unsafe.Sizeof(blockRecord{})
+	}
+
+	b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
+	bucketmem += size
+	b.typ = typ
+	b.nstk = uintptr(nstk)
+	return b
+}
+
+// stk returns the slice in b holding the stack.
+func (b *bucket) stk() []uintptr {
+	stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
+	return stk[:b.nstk:b.nstk]
+}
+
+// mp returns the memRecord associated with the memProfile bucket b.
+func (b *bucket) mp() *memRecord {
+	if b.typ != memProfile {
+		gothrow("bad use of bucket.mp")
+	}
+	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
+	return (*memRecord)(data)
+}
+
+// bp returns the blockRecord associated with the blockProfile bucket b.
+func (b *bucket) bp() *blockRecord {
+	if b.typ != blockProfile {
+		gothrow("bad use of bucket.bp")
+	}
+	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
+	return (*blockRecord)(data)
+}
+
+// Return the bucket for stk[0:nstk], allocating new bucket if needed.
+func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket {
+	if buckhash == nil {
+		buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))
+		if buckhash == nil {
+			gothrow("runtime: cannot allocate memory")
+		}
+	}
+
+	// Hash stack.
+	var h uintptr
+	for _, pc := range stk {
+		h += pc
+		h += h << 10
+		h ^= h >> 6
+	}
+	// hash in size
+	h += size
+	h += h << 10
+	h ^= h >> 6
+	// finalize
+	h += h << 3
+	h ^= h >> 11
+
+	i := int(h % buckHashSize)
+	for b := buckhash[i]; b != nil; b = b.next {
+		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
+			return b
+		}
+	}
+
+	if !alloc {
+		return nil
+	}
+
+	// Create new bucket.
+	b := newBucket(typ, len(stk))
+	copy(b.stk(), stk)
+	b.hash = h
+	b.size = size
+	b.next = buckhash[i]
+	buckhash[i] = b
+	if typ == memProfile {
+		b.allnext = mbuckets
+		mbuckets = b
+	} else {
+		b.allnext = bbuckets
+		bbuckets = b
+	}
+	return b
+}
+
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer
+
+func eqslice(x, y []uintptr) bool {
+	if len(x) != len(y) {
+		return false
+	}
+	for i, xi := range x {
+		if xi != y[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func mprof_GC() {
+	for b := mbuckets; b != nil; b = b.allnext {
+		mp := b.mp()
+		mp.allocs += mp.prev_allocs
+		mp.frees += mp.prev_frees
+		mp.alloc_bytes += mp.prev_alloc_bytes
+		mp.free_bytes += mp.prev_free_bytes
+
+		mp.prev_allocs = mp.recent_allocs
+		mp.prev_frees = mp.recent_frees
+		mp.prev_alloc_bytes = mp.recent_alloc_bytes
+		mp.prev_free_bytes = mp.recent_free_bytes
+
+		mp.recent_allocs = 0
+		mp.recent_frees = 0
+		mp.recent_alloc_bytes = 0
+		mp.recent_free_bytes = 0
+	}
+}
+
+// Record that a gc just happened: all the 'recent' statistics are now real.
+func mProf_GC() {
+	lock(&proflock)
+	mprof_GC()
+	unlock(&proflock)
+}
+
+// Called by malloc to record a profiled block.
+func mProf_Malloc(p unsafe.Pointer, size uintptr) {
+	var stk [maxStack]uintptr
+	nstk := callers(1, &stk[0], len(stk))
+	lock(&proflock)
+	b := stkbucket(memProfile, size, stk[:nstk], true)
+	mp := b.mp()
+	mp.recent_allocs++
+	mp.recent_alloc_bytes += size
+	unlock(&proflock)
+
+	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
+	// This reduces potential contention and chances of deadlocks.
+	// Since the object must be alive during call to mProf_Malloc,
+	// it's fine to do this non-atomically.
+	setprofilebucket(p, b)
+}
+
+func setprofilebucket_m() // mheap.c
+
+func setprofilebucket(p unsafe.Pointer, b *bucket) {
+	g := getg()
+	g.m.ptrarg[0] = p
+	g.m.ptrarg[1] = unsafe.Pointer(b)
+	onM(setprofilebucket_m)
+}
+
+// Called when freeing a profiled block.
+func mProf_Free(b *bucket, size uintptr, freed bool) {
+	lock(&proflock)
+	mp := b.mp()
+	if freed {
+		mp.recent_frees++
+		mp.recent_free_bytes += size
+	} else {
+		mp.prev_frees++
+		mp.prev_free_bytes += size
+	}
+	unlock(&proflock)
+}
+
+var blockprofilerate uint64 // in CPU ticks
+
+// SetBlockProfileRate controls the fraction of goroutine blocking events
+// that are reported in the blocking profile.  The profiler aims to sample
+// an average of one blocking event per rate nanoseconds spent blocked.
+//
+// To include every blocking event in the profile, pass rate = 1.
+// To turn off profiling entirely, pass rate <= 0.
+func SetBlockProfileRate(rate int) {
+	var r int64
+	if rate <= 0 {
+		r = 0 // disable profiling
+	} else {
+		// convert ns to cycles, use float64 to prevent overflow during multiplication
+		r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
+		if r == 0 {
+			r = 1
+		}
+	}
+
+	atomicstore64(&blockprofilerate, uint64(r))
+}
+
+func fastrand1() uint32     // assembly
+func readgstatus(*g) uint32 // proc.c
+
+func blockevent(cycles int64, skip int) {
+	if cycles <= 0 {
+		return
+	}
+	rate := int64(atomicload64(&blockprofilerate))
+	if rate <= 0 || (rate > cycles && int64(fastrand1())%rate > cycles) {
+		return
+	}
+	gp := getg()
+	var nstk int
+	var stk [maxStack]uintptr
+	if gp.m.curg == nil || gp.m.curg == gp {
+		nstk = callers(skip, &stk[0], len(stk))
+	} else {
+		nstk = gcallers(gp.m.curg, skip, &stk[0], len(stk))
+	}
+	lock(&proflock)
+	b := stkbucket(blockProfile, 0, stk[:nstk], true)
+	b.bp().count++
+	b.bp().cycles += cycles
+	unlock(&proflock)
+}
+
+// Go interface to profile data.
+
+// A StackRecord describes a single execution stack.
+type StackRecord struct {
+	Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
+}
+
+// Stack returns the stack trace associated with the record,
+// a prefix of r.Stack0.
+func (r *StackRecord) Stack() []uintptr {
+	for i, v := range r.Stack0 {
+		if v == 0 {
+			return r.Stack0[0:i]
+		}
+	}
+	return r.Stack0[0:]
+}
+
+// MemProfileRate controls the fraction of memory allocations
+// that are recorded and reported in the memory profile.
+// The profiler aims to sample an average of
+// one allocation per MemProfileRate bytes allocated.
+//
+// To include every allocated block in the profile, set MemProfileRate to 1.
+// To turn off profiling entirely, set MemProfileRate to 0.
+//
+// The tools that process the memory profiles assume that the
+// profile rate is constant across the lifetime of the program
+// and equal to the current value.  Programs that change the
+// memory profiling rate should do so just once, as early as
+// possible in the execution of the program (for example,
+// at the beginning of main).
+var MemProfileRate int = 512 * 1024
+
+// A MemProfileRecord describes the live objects allocated
+// by a particular call sequence (stack trace).
+type MemProfileRecord struct {
+	AllocBytes, FreeBytes     int64       // number of bytes allocated, freed
+	AllocObjects, FreeObjects int64       // number of objects allocated, freed
+	Stack0                    [32]uintptr // stack trace for this record; ends at first 0 entry
+}
+
+// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
+func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
+
+// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
+func (r *MemProfileRecord) InUseObjects() int64 {
+	return r.AllocObjects - r.FreeObjects
+}
+
+// Stack returns the stack trace associated with the record,
+// a prefix of r.Stack0.
+func (r *MemProfileRecord) Stack() []uintptr {
+	for i, v := range r.Stack0 {
+		if v == 0 {
+			return r.Stack0[0:i]
+		}
+	}
+	return r.Stack0[0:]
+}
+
+// MemProfile returns n, the number of records in the current memory profile.
+// If len(p) >= n, MemProfile copies the profile into p and returns n, true.
+// If len(p) < n, MemProfile does not change p and returns n, false.
+//
+// If inuseZero is true, the profile includes allocation records
+// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
+// These are sites where memory was allocated, but it has all
+// been released back to the runtime.
+//
+// Most clients should use the runtime/pprof package or
+// the testing package's -test.memprofile flag instead
+// of calling MemProfile directly.
+func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
+	lock(&proflock)
+	clear := true
+	for b := mbuckets; b != nil; b = b.allnext {
+		mp := b.mp()
+		if inuseZero || mp.alloc_bytes != mp.free_bytes {
+			n++
+		}
+		if mp.allocs != 0 || mp.frees != 0 {
+			clear = false
+		}
+	}
+	if clear {
+		// Absolutely no data, suggesting that a garbage collection
+		// has not yet happened. In order to allow profiling when
+		// garbage collection is disabled from the beginning of execution,
+		// accumulate stats as if a GC just happened, and recount buckets.
+		mprof_GC()
+		mprof_GC()
+		n = 0
+		for b := mbuckets; b != nil; b = b.allnext {
+			mp := b.mp()
+			if inuseZero || mp.alloc_bytes != mp.free_bytes {
+				n++
+			}
+		}
+	}
+	if n <= len(p) {
+		ok = true
+		idx := 0
+		for b := mbuckets; b != nil; b = b.allnext {
+			mp := b.mp()
+			if inuseZero || mp.alloc_bytes != mp.free_bytes {
+				record(&p[idx], b)
+				idx++
+			}
+		}
+	}
+	unlock(&proflock)
+	return
+}
+
+// Write b's data to r.
+func record(r *MemProfileRecord, b *bucket) {
+	mp := b.mp()
+	r.AllocBytes = int64(mp.alloc_bytes)
+	r.FreeBytes = int64(mp.free_bytes)
+	r.AllocObjects = int64(mp.allocs)
+	r.FreeObjects = int64(mp.frees)
+	copy(r.Stack0[:], b.stk())
+	for i := int(b.nstk); i < len(r.Stack0); i++ {
+		r.Stack0[i] = 0
+	}
+}
+
+func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
+	lock(&proflock)
+	for b := mbuckets; b != nil; b = b.allnext {
+		mp := b.mp()
+		fn(b, uintptr(b.nstk), &b.stk()[0], b.size, mp.allocs, mp.frees)
+	}
+	unlock(&proflock)
+}
+
+// BlockProfileRecord describes blocking events originated
+// at a particular call sequence (stack trace).
+type BlockProfileRecord struct {
+	Count  int64
+	Cycles int64
+	StackRecord
+}
+
+// BlockProfile returns n, the number of records in the current blocking profile.
+// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
+// If len(p) < n, BlockProfile does not change p and returns n, false.
+//
+// Most clients should use the runtime/pprof package or
+// the testing package's -test.blockprofile flag instead
+// of calling BlockProfile directly.
+func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
+	lock(&proflock)
+	for b := bbuckets; b != nil; b = b.allnext {
+		n++
+	}
+	if n <= len(p) {
+		ok = true
+		for b := bbuckets; b != nil; b = b.allnext {
+			bp := b.bp()
+			r := &p[0]
+			r.Count = int64(bp.count)
+			r.Cycles = int64(bp.cycles)
+			i := copy(r.Stack0[:], b.stk())
+			for ; i < len(r.Stack0); i++ {
+				r.Stack0[i] = 0
+			}
+			p = p[1:]
+		}
+	}
+	unlock(&proflock)
+	return
+}
+
+// ThreadCreateProfile returns n, the number of records in the thread creation profile.
+// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
+// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
+//
+// Most clients should use the runtime/pprof package instead
+// of calling ThreadCreateProfile directly.
+func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
+	first := (*m)(atomicloadp(unsafe.Pointer(&allm)))
+	for mp := first; mp != nil; mp = mp.alllink {
+		n++
+	}
+	if n <= len(p) {
+		ok = true
+		i := 0
+		for mp := first; mp != nil; mp = mp.alllink {
+			for s := range mp.createstack {
+				p[i].Stack0[s] = uintptr(mp.createstack[s])
+			}
+			i++
+		}
+	}
+	return
+}
+
+var allgs []*g // proc.c
+
+// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
+// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
+// If len(p) < n, GoroutineProfile does not change p and returns n, false.
+//
+// Most clients should use the runtime/pprof package instead
+// of calling GoroutineProfile directly.
+func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+	sp := getcallersp(unsafe.Pointer(&p))
+	pc := getcallerpc(unsafe.Pointer(&p))
+
+	n = NumGoroutine()
+	if n <= len(p) {
+		gp := getg()
+		semacquire(&worldsema, false)
+		gp.m.gcing = 1
+		onM(stoptheworld)
+
+		n = NumGoroutine()
+		if n <= len(p) {
+			ok = true
+			r := p
+			saveg(pc, sp, gp, &r[0])
+			r = r[1:]
+			for _, gp1 := range allgs {
+				if gp1 == gp || readgstatus(gp1) == _Gdead {
+					continue
+				}
+				saveg(^uintptr(0), ^uintptr(0), gp1, &r[0])
+				r = r[1:]
+			}
+		}
+
+		gp.m.gcing = 0
+		semrelease(&worldsema)
+		onM(starttheworld)
+	}
+
+	return n, ok
+}
+
+func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
+	n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, false)
+	if n < len(r.Stack0) {
+		r.Stack0[n] = 0
+	}
+}
+
+// Stack formats a stack trace of the calling goroutine into buf
+// and returns the number of bytes written to buf.
+// If all is true, Stack formats stack traces of all other goroutines
+// into buf after the trace for the current goroutine.
+func Stack(buf []byte, all bool) int {
+	sp := getcallersp(unsafe.Pointer(&buf))
+	pc := getcallerpc(unsafe.Pointer(&buf))
+	mp := acquirem()
+	gp := mp.curg
+	if all {
+		semacquire(&worldsema, false)
+		mp.gcing = 1
+		releasem(mp)
+		onM(stoptheworld)
+		if mp != acquirem() {
+			gothrow("Stack: rescheduled")
+		}
+	}
+
+	n := 0
+	if len(buf) > 0 {
+		gp.writebuf = buf[0:0:len(buf)]
+		goroutineheader(gp)
+		traceback(pc, sp, 0, gp)
+		if all {
+			tracebackothers(gp)
+		}
+		n = len(gp.writebuf)
+		gp.writebuf = nil
+	}
+
+	if all {
+		mp.gcing = 0
+		semrelease(&worldsema)
+		onM(starttheworld)
+	}
+	releasem(mp)
+	return n
+}
+
+// Tracing of alloc/free/gc.
+
+var tracelock mutex
+
+func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
+	lock(&tracelock)
+	gp := getg()
+	gp.m.traceback = 2
+	if typ == nil {
+		print("tracealloc(", p, ", ", hex(size), ")\n")
+	} else {
+		print("tracealloc(", p, ", ", hex(size), ", ", *typ._string, ")\n")
+	}
+	if gp.m.curg == nil || gp == gp.m.curg {
+		goroutineheader(gp)
+		traceback(getcallerpc(unsafe.Pointer(&p)), getcallersp(unsafe.Pointer(&p)), 0, gp)
+	} else {
+		goroutineheader(gp.m.curg)
+		traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg)
+	}
+	print("\n")
+	gp.m.traceback = 0
+	unlock(&tracelock)
+}
+
+func tracefree(p unsafe.Pointer, size uintptr) {
+	lock(&tracelock)
+	gp := getg()
+	gp.m.traceback = 2
+	print("tracefree(", p, ", ", hex(size), ")\n")
+	goroutineheader(gp)
+	traceback(getcallerpc(unsafe.Pointer(&p)), getcallersp(unsafe.Pointer(&p)), 0, gp)
+	print("\n")
+	gp.m.traceback = 0
+	unlock(&tracelock)
+}
+
+func tracegc() {
+	lock(&tracelock)
+	gp := getg()
+	gp.m.traceback = 2
+	print("tracegc()\n")
+	// running on m->g0 stack; show all non-g0 goroutines
+	tracebackothers(gp)
+	print("end tracegc\n")
+	print("\n")
+	gp.m.traceback = 0
+	unlock(&tracelock)
+}
diff --git a/src/runtime/msize.c b/src/runtime/msize.c
new file mode 100644
index 000000000..7cb65dad0
--- /dev/null
+++ b/src/runtime/msize.c
@@ -0,0 +1,184 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Malloc small size classes.
+//
+// See malloc.h for overview.
+//
+// The size classes are chosen so that rounding an allocation
+// request up to the next size class wastes at most 12.5% (1.125x).
+//
+// Each size class has its own page count that gets allocated
+// and chopped up when new objects of the size class are needed.
+// That page count is chosen so that chopping up the run of
+// pages into objects of the given size wastes at most 12.5% (1.125x)
+// of the memory.  It is not necessary that the cutoff here be
+// the same as above.
+//
+// The two sources of waste multiply, so the worst possible case
+// for the above constraints would be that allocations of some
+// size might have a 26.6% (1.266x) overhead.
+// In practice, only one of the wastes comes into play for a
+// given size (sizes < 512 waste mainly on the round-up,
+// sizes > 512 waste mainly on the page chopping).
+//
+// TODO(rsc): Compute max waste for any given size.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "textflag.h"
+
+#pragma dataflag NOPTR
+int32 runtime·class_to_size[NumSizeClasses];
+#pragma dataflag NOPTR
+int32 runtime·class_to_allocnpages[NumSizeClasses];
+
+// The SizeToClass lookup is implemented using two arrays,
+// one mapping sizes <= 1024 to their class and one mapping
+// sizes >= 1024 and <= MaxSmallSize to their class.
+// All objects are 8-aligned, so the first array is indexed by
+// the size divided by 8 (rounded up).  Objects >= 1024 bytes
+// are 128-aligned, so the second array is indexed by the
+// size divided by 128 (rounded up).  The arrays are filled in
+// by InitSizes.
+
+#pragma dataflag NOPTR
+int8 runtime·size_to_class8[1024/8 + 1];
+#pragma dataflag NOPTR
+int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
+
+void runtime·testdefersizes(void);
+
+int32
+runtime·SizeToClass(int32 size)
+{
+	if(size > MaxSmallSize)
+		runtime·throw("SizeToClass - invalid size");
+	if(size > 1024-8)
+		return runtime·size_to_class128[(size-1024+127) >> 7];
+	return runtime·size_to_class8[(size+7)>>3];
+}
+
+void
+runtime·InitSizes(void)
+{
+	int32 align, sizeclass, size, nextsize, n;
+	uint32 i;
+	uintptr allocsize, npages;
+
+	// Initialize the runtime·class_to_size table (and choose class sizes in the process).
+	runtime·class_to_size[0] = 0;
+	sizeclass = 1;	// 0 means no class
+	align = 8;
+	for(size = align; size <= MaxSmallSize; size += align) {
+		if((size&(size-1)) == 0) {	// bump alignment once in a while
+			if(size >= 2048)
+				align = 256;
+			else if(size >= 128)
+				align = size / 8;
+			else if(size >= 16)
+				align = 16;	// required for x86 SSE instructions, if we want to use them
+		}
+		if((align&(align-1)) != 0)
+			runtime·throw("InitSizes - bug");
+
+		// Make the allocnpages big enough that
+		// the leftover is less than 1/8 of the total,
+		// so wasted space is at most 12.5%.
+		allocsize = PageSize;
+		while(allocsize%size > allocsize/8)
+			allocsize += PageSize;
+		npages = allocsize >> PageShift;
+
+		// If the previous sizeclass chose the same
+		// allocation size and fit the same number of
+		// objects into the page, we might as well
+		// use just this size instead of having two
+		// different sizes.
+		if(sizeclass > 1 &&
+			npages == runtime·class_to_allocnpages[sizeclass-1] &&
+			allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
+			runtime·class_to_size[sizeclass-1] = size;
+			continue;
+		}
+
+		runtime·class_to_allocnpages[sizeclass] = npages;
+		runtime·class_to_size[sizeclass] = size;
+		sizeclass++;
+	}
+	if(sizeclass != NumSizeClasses) {
+		runtime·printf("sizeclass=%d NumSizeClasses=%d\n", sizeclass, NumSizeClasses);
+		runtime·throw("InitSizes - bad NumSizeClasses");
+	}
+
+	// Initialize the size_to_class tables.
+	nextsize = 0;
+	for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
+		for(; nextsize < 1024 && nextsize <= runtime·class_to_size[sizeclass]; nextsize+=8)
+			runtime·size_to_class8[nextsize/8] = sizeclass;
+		if(nextsize >= 1024)
+			for(; nextsize <= runtime·class_to_size[sizeclass]; nextsize += 128)
+				runtime·size_to_class128[(nextsize-1024)/128] = sizeclass;
+	}
+
+	// Double-check SizeToClass.
+	if(0) {
+		for(n=0; n < MaxSmallSize; n++) {
+			sizeclass = runtime·SizeToClass(n);
+			if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime·class_to_size[sizeclass] < n) {
+				runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
+				runtime·printf("incorrect SizeToClass");
+				goto dump;
+			}
+			if(sizeclass > 1 && runtime·class_to_size[sizeclass-1] >= n) {
+				runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
+				runtime·printf("SizeToClass too big");
+				goto dump;
+			}
+		}
+	}
+
+	runtime·testdefersizes();
+
+	// Copy out for statistics table.
+	for(i=0; i<nelem(runtime·class_to_size); i++)
+		mstats.by_size[i].size = runtime·class_to_size[i];
+	return;
+
+dump:
+	if(1){
+		runtime·printf("NumSizeClasses=%d\n", NumSizeClasses);
+		runtime·printf("runtime·class_to_size:");
+		for(sizeclass=0; sizeclass<NumSizeClasses; sizeclass++)
+			runtime·printf(" %d", runtime·class_to_size[sizeclass]);
+		runtime·printf("\n\n");
+		runtime·printf("size_to_class8:");
+		for(i=0; i<nelem(runtime·size_to_class8); i++)
+			runtime·printf(" %d=>%d(%d)\n", i*8, runtime·size_to_class8[i],
+				runtime·class_to_size[runtime·size_to_class8[i]]);
+		runtime·printf("\n");
+		runtime·printf("size_to_class128:");
+		for(i=0; i<nelem(runtime·size_to_class128); i++)
+			runtime·printf(" %d=>%d(%d)\n", i*128, runtime·size_to_class128[i],
+				runtime·class_to_size[runtime·size_to_class128[i]]);
+		runtime·printf("\n");
+	}
+	runtime·throw("InitSizes failed");
+}
+
+// Returns size of the memory block that mallocgc will allocate if you ask for the size.
+uintptr
+runtime·roundupsize(uintptr size)
+{
+	if(size < MaxSmallSize) {
+		if(size <= 1024-8)
+			return runtime·class_to_size[runtime·size_to_class8[(size+7)>>3]];
+		else
+			return runtime·class_to_size[runtime·size_to_class128[(size-1024+127) >> 7]];
+	}
+	if(size + PageSize < size)
+		return size;
+	return ROUND(size, PageSize);
+}
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
new file mode 100644
index 000000000..3456e0208
--- /dev/null
+++ b/src/runtime/netpoll.go
@@ -0,0 +1,455 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
+
+package runtime
+
+import "unsafe"
+
+// Integrated network poller (platform-independent part).
+// A particular implementation (epoll/kqueue) must define the following functions:
+// func netpollinit()			// to initialize the poller
+// func netpollopen(fd uintptr, pd *pollDesc) int32	// to arm edge-triggered notifications
+// and associate fd with pd.
+// An implementation must call the following function to denote that the pd is ready.
+// func netpollready(gpp **g, pd *pollDesc, mode int32)
+
+// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
+// goroutines respectively. The semaphore can be in the following states:
+// pdReady - io readiness notification is pending;
+//           a goroutine consumes the notification by changing the state to nil.
+// pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
+//          the goroutine commits to park by changing the state to G pointer,
+//          or, alternatively, concurrent io notification changes the state to READY,
+//          or, alternatively, concurrent timeout/close changes the state to nil.
+// G pointer - the goroutine is blocked on the semaphore;
+//             io notification or timeout/close changes the state to READY or nil respectively
+//             and unparks the goroutine.
+// nil - nothing of the above.
+const (
+	pdReady uintptr = 1
+	pdWait  uintptr = 2
+)
+
+const pollBlockSize = 4 * 1024
+
+// Network poller descriptor.
+type pollDesc struct {
+	link *pollDesc // in pollcache, protected by pollcache.lock
+
+	// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
+	// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
+	// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
+	// proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated
+	// in a lock-free way by all operations.
+	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
+	// that will blow up when GC starts moving objects.
+	lock    mutex // protectes the following fields
+	fd      uintptr
+	closing bool
+	seq     uintptr        // protects from stale timers and ready notifications
+	rg      uintptr        // pdReady, pdWait, G waiting for read or nil
+	rt      timer          // read deadline timer (set if rt.f != nil)
+	rd      int64          // read deadline
+	wg      uintptr        // pdReady, pdWait, G waiting for write or nil
+	wt      timer          // write deadline timer
+	wd      int64          // write deadline
+	user    unsafe.Pointer // user settable cookie
+}
+
+type pollCache struct {
+	lock  mutex
+	first *pollDesc
+	// PollDesc objects must be type-stable,
+	// because we can get ready notification from epoll/kqueue
+	// after the descriptor is closed/reused.
+	// Stale notifications are detected using seq variable,
+	// seq is incremented when deadlines are changed or descriptor is reused.
+}
+
+var pollcache pollCache
+
+func netpollServerInit() {
+	onM(netpollinit)
+}
+
+func netpollOpen(fd uintptr) (*pollDesc, int) {
+	pd := pollcache.alloc()
+	lock(&pd.lock)
+	if pd.wg != 0 && pd.wg != pdReady {
+		gothrow("netpollOpen: blocked write on free descriptor")
+	}
+	if pd.rg != 0 && pd.rg != pdReady {
+		gothrow("netpollOpen: blocked read on free descriptor")
+	}
+	pd.fd = fd
+	pd.closing = false
+	pd.seq++
+	pd.rg = 0
+	pd.rd = 0
+	pd.wg = 0
+	pd.wd = 0
+	unlock(&pd.lock)
+
+	var errno int32
+	onM(func() {
+		errno = netpollopen(fd, pd)
+	})
+	return pd, int(errno)
+}
+
+func netpollClose(pd *pollDesc) {
+	if !pd.closing {
+		gothrow("netpollClose: close w/o unblock")
+	}
+	if pd.wg != 0 && pd.wg != pdReady {
+		gothrow("netpollClose: blocked write on closing descriptor")
+	}
+	if pd.rg != 0 && pd.rg != pdReady {
+		gothrow("netpollClose: blocked read on closing descriptor")
+	}
+	onM(func() {
+		netpollclose(uintptr(pd.fd))
+	})
+	pollcache.free(pd)
+}
+
+func (c *pollCache) free(pd *pollDesc) {
+	lock(&c.lock)
+	pd.link = c.first
+	c.first = pd
+	unlock(&c.lock)
+}
+
+func netpollReset(pd *pollDesc, mode int) int {
+	err := netpollcheckerr(pd, int32(mode))
+	if err != 0 {
+		return err
+	}
+	if mode == 'r' {
+		pd.rg = 0
+	} else if mode == 'w' {
+		pd.wg = 0
+	}
+	return 0
+}
+
+func netpollWait(pd *pollDesc, mode int) int {
+	err := netpollcheckerr(pd, int32(mode))
+	if err != 0 {
+		return err
+	}
+	// As for now only Solaris uses level-triggered IO.
+	if GOOS == "solaris" {
+		onM(func() {
+			netpollarm(pd, mode)
+		})
+	}
+	for !netpollblock(pd, int32(mode), false) {
+		err = netpollcheckerr(pd, int32(mode))
+		if err != 0 {
+			return err
+		}
+		// Can happen if timeout has fired and unblocked us,
+		// but before we had a chance to run, timeout has been reset.
+		// Pretend it has not happened and retry.
+	}
+	return 0
+}
+
+func netpollWaitCanceled(pd *pollDesc, mode int) {
+	// This function is used only on windows after a failed attempt to cancel
+	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
+	for !netpollblock(pd, int32(mode), true) {
+	}
+}
+
+func netpollSetDeadline(pd *pollDesc, d int64, mode int) {
+	lock(&pd.lock)
+	if pd.closing {
+		unlock(&pd.lock)
+		return
+	}
+	pd.seq++ // invalidate current timers
+	// Reset current timers.
+	if pd.rt.f != nil {
+		deltimer(&pd.rt)
+		pd.rt.f = nil
+	}
+	if pd.wt.f != nil {
+		deltimer(&pd.wt)
+		pd.wt.f = nil
+	}
+	// Setup new timers.
+	if d != 0 && d <= nanotime() {
+		d = -1
+	}
+	if mode == 'r' || mode == 'r'+'w' {
+		pd.rd = d
+	}
+	if mode == 'w' || mode == 'r'+'w' {
+		pd.wd = d
+	}
+	if pd.rd > 0 && pd.rd == pd.wd {
+		pd.rt.f = netpollDeadline
+		pd.rt.when = pd.rd
+		// Copy current seq into the timer arg.
+		// Timer func will check the seq against current descriptor seq,
+		// if they differ the descriptor was reused or timers were reset.
+		pd.rt.arg = pd
+		pd.rt.seq = pd.seq
+		addtimer(&pd.rt)
+	} else {
+		if pd.rd > 0 {
+			pd.rt.f = netpollReadDeadline
+			pd.rt.when = pd.rd
+			pd.rt.arg = pd
+			pd.rt.seq = pd.seq
+			addtimer(&pd.rt)
+		}
+		if pd.wd > 0 {
+			pd.wt.f = netpollWriteDeadline
+			pd.wt.when = pd.wd
+			pd.wt.arg = pd
+			pd.wt.seq = pd.seq
+			addtimer(&pd.wt)
+		}
+	}
+	// If we set the new deadline in the past, unblock currently pending IO if any.
+	var rg, wg *g
+	atomicstorep(unsafe.Pointer(&wg), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
+	if pd.rd < 0 {
+		rg = netpollunblock(pd, 'r', false)
+	}
+	if pd.wd < 0 {
+		wg = netpollunblock(pd, 'w', false)
+	}
+	unlock(&pd.lock)
+	if rg != nil {
+		goready(rg)
+	}
+	if wg != nil {
+		goready(wg)
+	}
+}
+
+func netpollUnblock(pd *pollDesc) {
+	lock(&pd.lock)
+	if pd.closing {
+		gothrow("netpollUnblock: already closing")
+	}
+	pd.closing = true
+	pd.seq++
+	var rg, wg *g
+	atomicstorep(unsafe.Pointer(&rg), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
+	rg = netpollunblock(pd, 'r', false)
+	wg = netpollunblock(pd, 'w', false)
+	if pd.rt.f != nil {
+		deltimer(&pd.rt)
+		pd.rt.f = nil
+	}
+	if pd.wt.f != nil {
+		deltimer(&pd.wt)
+		pd.wt.f = nil
+	}
+	unlock(&pd.lock)
+	if rg != nil {
+		goready(rg)
+	}
+	if wg != nil {
+		goready(wg)
+	}
+}
+
+func netpollfd(pd *pollDesc) uintptr {
+	return pd.fd
+}
+
+func netpolluser(pd *pollDesc) *unsafe.Pointer {
+	return &pd.user
+}
+
+func netpollclosing(pd *pollDesc) bool {
+	return pd.closing
+}
+
+func netpolllock(pd *pollDesc) {
+	lock(&pd.lock)
+}
+
+func netpollunlock(pd *pollDesc) {
+	unlock(&pd.lock)
+}
+
+// make pd ready, newly runnable goroutines (if any) are returned in rg/wg
+func netpollready(gpp **g, pd *pollDesc, mode int32) {
+	var rg, wg *g
+	if mode == 'r' || mode == 'r'+'w' {
+		rg = netpollunblock(pd, 'r', true)
+	}
+	if mode == 'w' || mode == 'r'+'w' {
+		wg = netpollunblock(pd, 'w', true)
+	}
+	if rg != nil {
+		rg.schedlink = *gpp
+		*gpp = rg
+	}
+	if wg != nil {
+		wg.schedlink = *gpp
+		*gpp = wg
+	}
+}
+
+func netpollcheckerr(pd *pollDesc, mode int32) int {
+	if pd.closing {
+		return 1 // errClosing
+	}
+	if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
+		return 2 // errTimeout
+	}
+	return 0
+}
+
+func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
+	return casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
+}
+
+// returns true if IO is ready, or false if timedout or closed
+// waitio - wait only for completed IO, ignore errors
+func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
+	gpp := &pd.rg
+	if mode == 'w' {
+		gpp = &pd.wg
+	}
+
+	// set the gpp semaphore to WAIT
+	for {
+		old := *gpp
+		if old == pdReady {
+			*gpp = 0
+			return true
+		}
+		if old != 0 {
+			gothrow("netpollblock: double wait")
+		}
+		if casuintptr(gpp, 0, pdWait) {
+			break
+		}
+	}
+
+	// need to recheck error states after setting gpp to WAIT
+	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
+	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
+	if waitio || netpollcheckerr(pd, mode) == 0 {
+		f := netpollblockcommit
+		gopark(**(**unsafe.Pointer)(unsafe.Pointer(&f)), unsafe.Pointer(gpp), "IO wait")
+	}
+	// be careful to not lose concurrent READY notification
+	old := xchguintptr(gpp, 0)
+	if old > pdWait {
+		gothrow("netpollblock: corrupted state")
+	}
+	return old == pdReady
+}
+
+func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
+	gpp := &pd.rg
+	if mode == 'w' {
+		gpp = &pd.wg
+	}
+
+	for {
+		old := *gpp
+		if old == pdReady {
+			return nil
+		}
+		if old == 0 && !ioready {
+			// Only set READY for ioready. runtime_pollWait
+			// will check for timeout/cancel before waiting.
+			return nil
+		}
+		var new uintptr
+		if ioready {
+			new = pdReady
+		}
+		if casuintptr(gpp, old, new) {
+			if old == pdReady || old == pdWait {
+				old = 0
+			}
+			return (*g)(unsafe.Pointer(old))
+		}
+	}
+}
+
+func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
+	lock(&pd.lock)
+	// Seq arg is seq when the timer was set.
+	// If it's stale, ignore the timer event.
+	if seq != pd.seq {
+		// The descriptor was reused or timers were reset.
+		unlock(&pd.lock)
+		return
+	}
+	var rg *g
+	if read {
+		if pd.rd <= 0 || pd.rt.f == nil {
+			gothrow("netpolldeadlineimpl: inconsistent read deadline")
+		}
+		pd.rd = -1
+		atomicstorep(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
+		rg = netpollunblock(pd, 'r', false)
+	}
+	var wg *g
+	if write {
+		if pd.wd <= 0 || pd.wt.f == nil && !read {
+			gothrow("netpolldeadlineimpl: inconsistent write deadline")
+		}
+		pd.wd = -1
+		atomicstorep(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
+		wg = netpollunblock(pd, 'w', false)
+	}
+	unlock(&pd.lock)
+	if rg != nil {
+		goready(rg)
+	}
+	if wg != nil {
+		goready(wg)
+	}
+}
+
+func netpollDeadline(arg interface{}, seq uintptr) {
+	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
+}
+
+func netpollReadDeadline(arg interface{}, seq uintptr) {
+	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
+}
+
+func netpollWriteDeadline(arg interface{}, seq uintptr) {
+	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
+}
+
+func (c *pollCache) alloc() *pollDesc {
+	lock(&c.lock)
+	if c.first == nil {
+		const pdSize = unsafe.Sizeof(pollDesc{})
+		n := pollBlockSize / pdSize
+		if n == 0 {
+			n = 1
+		}
+		// Must be in non-GC memory because can be referenced
+		// only from epoll/kqueue internals.
+		mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
+		for i := uintptr(0); i < n; i++ {
+			pd := (*pollDesc)(add(mem, i*pdSize))
+			pd.link = c.first
+			c.first = pd
+		}
+	}
+	pd := c.first
+	c.first = pd.link
+	unlock(&c.lock)
+	return pd
+}
diff --git a/src/runtime/netpoll_epoll.go b/src/runtime/netpoll_epoll.go
new file mode 100644
index 000000000..ecfc9cdde
--- /dev/null
+++ b/src/runtime/netpoll_epoll.go
@@ -0,0 +1,97 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+package runtime
+
+import "unsafe"
+
+func epollcreate(size int32) int32
+func epollcreate1(flags int32) int32
+
+//go:noescape
+func epollctl(epfd, op, fd int32, ev *epollevent) int32
+
+//go:noescape
+func epollwait(epfd int32, ev *epollevent, nev, timeout int32) int32
+func closeonexec(fd int32)
+
+var (
+	epfd           int32 = -1 // epoll descriptor
+	netpolllasterr int32
+)
+
+func netpollinit() {
+	epfd = epollcreate1(_EPOLL_CLOEXEC)
+	if epfd >= 0 {
+		return
+	}
+	epfd = epollcreate(1024)
+	if epfd >= 0 {
+		closeonexec(epfd)
+		return
+	}
+	println("netpollinit: failed to create epoll descriptor", -epfd)
+	gothrow("netpollinit: failed to create descriptor")
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	var ev epollevent
+	ev.events = _EPOLLIN | _EPOLLOUT | _EPOLLRDHUP | _EPOLLET
+	*(**pollDesc)(unsafe.Pointer(&ev.data)) = pd
+	return -epollctl(epfd, _EPOLL_CTL_ADD, int32(fd), &ev)
+}
+
+func netpollclose(fd uintptr) int32 {
+	var ev epollevent
+	return -epollctl(epfd, _EPOLL_CTL_DEL, int32(fd), &ev)
+}
+
+func netpollarm(pd *pollDesc, mode int) {
+	gothrow("unused")
+}
+
+// polls for ready network connections
+// returns list of goroutines that become runnable
+func netpoll(block bool) (gp *g) {
+	if epfd == -1 {
+		return
+	}
+	waitms := int32(-1)
+	if !block {
+		waitms = 0
+	}
+	var events [128]epollevent
+retry:
+	n := epollwait(epfd, &events[0], int32(len(events)), waitms)
+	if n < 0 {
+		if n != -_EINTR && n != netpolllasterr {
+			netpolllasterr = n
+			println("runtime: epollwait on fd", epfd, "failed with", -n)
+		}
+		goto retry
+	}
+	for i := int32(0); i < n; i++ {
+		ev := &events[i]
+		if ev.events == 0 {
+			continue
+		}
+		var mode int32
+		if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
+			mode += 'r'
+		}
+		if ev.events&(_EPOLLOUT|_EPOLLHUP|_EPOLLERR) != 0 {
+			mode += 'w'
+		}
+		if mode != 0 {
+			pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
+			netpollready((**g)(noescape(unsafe.Pointer(&gp))), pd, mode)
+		}
+	}
+	if block && gp == nil {
+		goto retry
+	}
+	return gp
+}
diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go
new file mode 100644
index 000000000..d6d55b97b
--- /dev/null
+++ b/src/runtime/netpoll_kqueue.go
@@ -0,0 +1,101 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd netbsd openbsd
+
+package runtime
+
+// Integrated network poller (kqueue-based implementation).
+
+import "unsafe"
+
+func kqueue() int32
+
+//go:noescape
+func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32
+func closeonexec(fd int32)
+
+var (
+	kq             int32 = -1
+	netpolllasterr int32
+)
+
+func netpollinit() {
+	kq = kqueue()
+	if kq < 0 {
+		println("netpollinit: kqueue failed with", -kq)
+		gothrow("netpollinit: kqueue failed")
+	}
+	closeonexec(kq)
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	// Arm both EVFILT_READ and EVFILT_WRITE in edge-triggered mode (EV_CLEAR)
+	// for the whole fd lifetime.  The notifications are automatically unregistered
+	// when fd is closed.
+	var ev [2]keventt
+	*(*uintptr)(unsafe.Pointer(&ev[0].ident)) = fd
+	ev[0].filter = _EVFILT_READ
+	ev[0].flags = _EV_ADD | _EV_CLEAR
+	ev[0].fflags = 0
+	ev[0].data = 0
+	ev[0].udata = (*byte)(unsafe.Pointer(pd))
+	ev[1] = ev[0]
+	ev[1].filter = _EVFILT_WRITE
+	n := kevent(kq, &ev[0], 2, nil, 0, nil)
+	if n < 0 {
+		return -n
+	}
+	return 0
+}
+
+func netpollclose(fd uintptr) int32 {
+	// Don't need to unregister because calling close()
+	// on fd will remove any kevents that reference the descriptor.
+	return 0
+}
+
+func netpollarm(pd *pollDesc, mode int) {
+	gothrow("unused")
+}
+
+// Polls for ready network connections.
+// Returns list of goroutines that become runnable.
+func netpoll(block bool) (gp *g) {
+	if kq == -1 {
+		return
+	}
+	var tp *timespec
+	var ts timespec
+	if !block {
+		tp = &ts
+	}
+	var events [64]keventt
+retry:
+	n := kevent(kq, nil, 0, &events[0], int32(len(events)), tp)
+	if n < 0 {
+		if n != -_EINTR && n != netpolllasterr {
+			netpolllasterr = n
+			println("runtime: kevent on fd", kq, "failed with", -n)
+		}
+		goto retry
+	}
+	for i := 0; i < int(n); i++ {
+		ev := &events[i]
+		var mode int32
+		if ev.filter == _EVFILT_READ {
+			mode += 'r'
+		}
+		if ev.filter == _EVFILT_WRITE {
+			mode += 'w'
+		}
+		if mode != 0 {
+			netpollready((**g)(noescape(unsafe.Pointer(&gp))), (*pollDesc)(unsafe.Pointer(ev.udata)), mode)
+		}
+	}
+	if block && gp == nil {
+		goto retry
+	}
+	return gp
+}
diff --git a/src/runtime/netpoll_nacl.go b/src/runtime/netpoll_nacl.go
new file mode 100644
index 000000000..5cbc30032
--- /dev/null
+++ b/src/runtime/netpoll_nacl.go
@@ -0,0 +1,26 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fake network poller for NaCl.
+// Should never be used, because NaCl network connections do not honor "SetNonblock".
+
+package runtime
+
+func netpollinit() {
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	return 0
+}
+
+func netpollclose(fd uintptr) int32 {
+	return 0
+}
+
+func netpollarm(pd *pollDesc, mode int) {
+}
+
+func netpoll(block bool) *g {
+	return nil
+}
diff --git a/src/runtime/netpoll_solaris.c b/src/runtime/netpoll_solaris.c
new file mode 100644
index 000000000..d422719cf
--- /dev/null
+++ b/src/runtime/netpoll_solaris.c
@@ -0,0 +1,264 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+// Solaris runtime-integrated network poller.
+// 
+// Solaris uses event ports for scalable network I/O. Event
+// ports are level-triggered, unlike epoll and kqueue which
+// can be configured in both level-triggered and edge-triggered
+// mode. Level triggering means we have to keep track of a few things
+// ourselves. After we receive an event for a file descriptor,
+// it's our responsibility to ask again to be notified for future
+// events for that descriptor. When doing this we must keep track of
+// what kind of events the goroutines are currently interested in,
+// for example a fd may be open both for reading and writing.
+// 
+// A description of the high level operation of this code
+// follows. Networking code will get a file descriptor by some means
+// and will register it with the netpolling mechanism by a code path
+// that eventually calls runtime·netpollopen. runtime·netpollopen
+// calls port_associate with an empty event set. That means that we
+// will not receive any events at this point. The association needs
+// to be done at this early point because we need to process the I/O
+// readiness notification at some point in the future. If I/O becomes
+// ready when nobody is listening, when we finally care about it,
+// nobody will tell us anymore.
+// 
+// Beside calling runtime·netpollopen, the networking code paths
+// will call runtime·netpollarm each time goroutines are interested
+// in doing network I/O. Because now we know what kind of I/O we
+// are interested in (reading/writting), we can call port_associate
+// passing the correct type of event set (POLLIN/POLLOUT). As we made
+// sure to have already associated the file descriptor with the port,
+// when we now call port_associate, we will unblock the main poller
+// loop (in runtime·netpoll) right away if the socket is actually
+// ready for I/O.
+// 
+// The main poller loop runs in its own thread waiting for events
+// using port_getn. When an event happens, it will tell the scheduler
+// about it using runtime·netpollready. Besides doing this, it must
+// also re-associate the events that were not part of this current
+// notification with the file descriptor. Failing to do this would
+// mean each notification will prevent concurrent code using the
+// same file descriptor in parallel.
+// 
+// The logic dealing with re-associations is encapsulated in
+// runtime·netpollupdate. This function takes care to associate the
+// descriptor only with the subset of events that were previously
+// part of the association, except the one that just happened. We
+// can't re-associate with that right away, because event ports
+// are level triggered so it would cause a busy loop. Instead, that
+// association is effected only by the runtime·netpollarm code path,
+// when Go code actually asks for I/O.
+// 
+// The open and arming mechanisms are serialized using the lock
+// inside PollDesc. This is required because the netpoll loop runs
+// asynchonously in respect to other Go code and by the time we get
+// to call port_associate to update the association in the loop, the
+// file descriptor might have been closed and reopened already. The
+// lock allows runtime·netpollupdate to be called synchronously from
+// the loop thread while preventing other threads operating to the
+// same PollDesc, so once we unblock in the main loop, until we loop
+// again we know for sure we are always talking about the same file
+// descriptor and can safely access the data we want (the event set).
+
+#pragma dynimport libc·fcntl fcntl "libc.so"
+#pragma dynimport libc·port_create port_create "libc.so"
+#pragma dynimport libc·port_associate port_associate "libc.so"
+#pragma dynimport libc·port_dissociate port_dissociate "libc.so"
+#pragma dynimport libc·port_getn port_getn "libc.so"
+extern uintptr libc·fcntl;
+extern uintptr libc·port_create;
+extern uintptr libc·port_associate;
+extern uintptr libc·port_dissociate;
+extern uintptr libc·port_getn;
+
+#define errno (*g->m->perrno)
+
+int32
+runtime·fcntl(int32 fd, int32 cmd, uintptr arg)
+{
+	return runtime·sysvicall3(libc·fcntl, (uintptr)fd, (uintptr)cmd, (uintptr)arg);
+}
+
+int32
+runtime·port_create(void)
+{
+	return runtime·sysvicall0(libc·port_create);
+}
+
+int32
+runtime·port_associate(int32 port, int32 source, uintptr object, int32 events, uintptr user)
+{
+	return runtime·sysvicall5(libc·port_associate, (uintptr)port, (uintptr)source, object, (uintptr)events, user);
+}
+
+int32
+runtime·port_dissociate(int32 port, int32 source, uintptr object)
+{
+	return runtime·sysvicall3(libc·port_dissociate, (uintptr)port, (uintptr)source, object);
+}
+
+int32
+runtime·port_getn(int32 port, PortEvent *evs, uint32 max, uint32 *nget, Timespec *timeout)
+{
+	return runtime·sysvicall5(libc·port_getn, (uintptr)port, (uintptr)evs, (uintptr)max, (uintptr)nget, (uintptr)timeout);
+}
+
+static int32 portfd = -1;
+
+void
+runtime·netpollinit(void)
+{
+	if((portfd = runtime·port_create()) >= 0) {
+		runtime·fcntl(portfd, F_SETFD, FD_CLOEXEC);
+		return;
+	}
+
+	runtime·printf("netpollinit: failed to create port (%d)\n", errno);
+	runtime·throw("netpollinit: failed to create port");
+}
+
+int32
+runtime·netpollopen(uintptr fd, PollDesc *pd)
+{
+	int32 r;
+
+	runtime·netpolllock(pd);
+	// We don't register for any specific type of events yet, that's
+	// netpollarm's job. We merely ensure we call port_associate before
+	// asynchonous connect/accept completes, so when we actually want
+	// to do any I/O, the call to port_associate (from netpollarm,
+	// with the interested event set) will unblock port_getn right away
+	// because of the I/O readiness notification.
+	*runtime·netpolluser(pd) = 0;
+	r = runtime·port_associate(portfd, PORT_SOURCE_FD, fd, 0, (uintptr)pd);
+	runtime·netpollunlock(pd);
+	return r;
+}
+
+int32
+runtime·netpollclose(uintptr fd)
+{
+	return runtime·port_dissociate(portfd, PORT_SOURCE_FD, fd);
+}
+
+// Updates the association with a new set of interested events. After
+// this call, port_getn will return one and only one event for that
+// particular descriptor, so this function needs to be called again.
+void
+runtime·netpollupdate(PollDesc* pd, uint32 set, uint32 clear)
+{
+	uint32 *ep, old, events;
+	uintptr fd = runtime·netpollfd(pd);
+	ep = (uint32*)runtime·netpolluser(pd);
+
+	if(runtime·netpollclosing(pd))
+		return;
+
+	old = *ep;
+	events = (old & ~clear) | set;
+	if(old == events)
+		return;
+
+	if(events && runtime·port_associate(portfd, PORT_SOURCE_FD, fd, events, (uintptr)pd) != 0) {
+		runtime·printf("netpollupdate: failed to associate (%d)\n", errno);
+		runtime·throw("netpollupdate: failed to associate");
+	} 
+	*ep = events;
+}
+
+// subscribe the fd to the port such that port_getn will return one event.
+void
+runtime·netpollarm(PollDesc* pd, int32 mode)
+{
+	runtime·netpolllock(pd);
+	switch(mode) {
+	case 'r':
+		runtime·netpollupdate(pd, POLLIN, 0);
+		break;
+	case 'w':
+		runtime·netpollupdate(pd, POLLOUT, 0);
+		break;
+	default:
+		runtime·throw("netpollarm: bad mode");
+	}
+	runtime·netpollunlock(pd);
+}
+
+// polls for ready network connections
+// returns list of goroutines that become runnable
+G*
+runtime·netpoll(bool block)
+{
+	static int32 lasterr;
+	PortEvent events[128], *ev;
+	PollDesc *pd;
+	int32 i, mode, clear;
+	uint32 n;
+	Timespec *wait = nil, zero;
+	G *gp;
+
+	if(portfd == -1)
+		return (nil);
+
+	if(!block) {
+		zero.tv_sec = 0;
+		zero.tv_nsec = 0;
+		wait = &zero;
+	}
+
+retry:
+	n = 1;
+	if(runtime·port_getn(portfd, events, nelem(events), &n, wait) < 0) {
+		if(errno != EINTR && errno != lasterr) {
+			lasterr = errno;
+			runtime·printf("runtime: port_getn on fd %d failed with %d\n", portfd, errno);
+		}
+		goto retry;
+	}
+
+	gp = nil;
+	for(i = 0; i < n; i++) {
+		ev = &events[i];
+
+		if(ev->portev_events == 0)
+			continue;
+		pd = (PollDesc *)ev->portev_user;
+
+		mode = 0;
+		clear = 0;
+		if(ev->portev_events & (POLLIN|POLLHUP|POLLERR)) {
+			mode += 'r';
+			clear |= POLLIN;
+		}
+		if(ev->portev_events & (POLLOUT|POLLHUP|POLLERR)) {
+			mode += 'w';
+			clear |= POLLOUT;
+		}
+		// To effect edge-triggered events, we need to be sure to
+		// update our association with whatever events were not
+		// set with the event. For example if we are registered
+		// for POLLIN|POLLOUT, and we get POLLIN, besides waking
+		// the goroutine interested in POLLIN we have to not forget
+		// about the one interested in POLLOUT.
+		if(clear != 0) {
+			runtime·netpolllock(pd);
+			runtime·netpollupdate(pd, 0, clear);
+			runtime·netpollunlock(pd);
+		}
+
+		if(mode)
+			runtime·netpollready(&gp, pd, mode);
+	}
+
+	if(block && gp == nil)
+		goto retry;
+	return gp;
+}
diff --git a/src/runtime/netpoll_stub.c b/src/runtime/netpoll_stub.c
new file mode 100644
index 000000000..b7a8f2944
--- /dev/null
+++ b/src/runtime/netpoll_stub.c
@@ -0,0 +1,18 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build plan9
+
+#include "runtime.h"
+
+// Polls for ready network connections.
+// Returns list of goroutines that become runnable.
+G*
+runtime·netpoll(bool block)
+{
+	// Implementation for platforms that do not support
+	// integrated network poller.
+	USED(block);
+	return nil;
+}
diff --git a/src/runtime/netpoll_windows.c b/src/runtime/netpoll_windows.c
new file mode 100644
index 000000000..64da41ad9
--- /dev/null
+++ b/src/runtime/netpoll_windows.c
@@ -0,0 +1,163 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+#define DWORD_MAX 0xffffffff
+
+#pragma dynimport runtime·CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
+#pragma dynimport runtime·GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
+#pragma dynimport runtime·WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
+
+extern void *runtime·CreateIoCompletionPort;
+extern void *runtime·GetQueuedCompletionStatus;
+extern void *runtime·WSAGetOverlappedResult;
+
+#define INVALID_HANDLE_VALUE ((uintptr)-1)
+
+// net_op must be the same as beginning of net.operation. Keep these in sync.
+typedef struct net_op net_op;
+struct net_op
+{
+	// used by windows
+	Overlapped	o;
+	// used by netpoll
+	PollDesc*	pd;
+	int32	mode;
+	int32	errno;
+	uint32	qty;
+};
+
+typedef struct OverlappedEntry OverlappedEntry;
+struct OverlappedEntry
+{
+	uintptr	key;
+	net_op*	op;  // In reality it's Overlapped*, but we cast it to net_op* anyway.
+	uintptr	internal;
+	uint32	qty;
+};
+
+static void handlecompletion(G **gpp, net_op *o, int32 errno, uint32 qty);
+
+static uintptr iocphandle = INVALID_HANDLE_VALUE;  // completion port io handle
+
+void
+runtime·netpollinit(void)
+{
+	iocphandle = (uintptr)runtime·stdcall4(runtime·CreateIoCompletionPort, INVALID_HANDLE_VALUE, 0, 0, DWORD_MAX);
+	if(iocphandle == 0) {
+		runtime·printf("netpoll: failed to create iocp handle (errno=%d)\n", runtime·getlasterror());
+		runtime·throw("netpoll: failed to create iocp handle");
+	}
+	return;
+}
+
+int32
+runtime·netpollopen(uintptr fd, PollDesc *pd)
+{
+	USED(pd);
+	if(runtime·stdcall4(runtime·CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0)
+		return -runtime·getlasterror();
+	return 0;
+}
+
+int32
+runtime·netpollclose(uintptr fd)
+{
+	// nothing to do
+	USED(fd);
+	return 0;
+}
+
+void
+runtime·netpollarm(PollDesc* pd, int32 mode)
+{
+	USED(pd, mode);
+	runtime·throw("unused");
+}
+
+// Polls for completed network IO.
+// Returns list of goroutines that become runnable.
+G*
+runtime·netpoll(bool block)
+{
+	OverlappedEntry entries[64];
+	uint32 wait, qty, key, flags, n, i;
+	int32 errno;
+	net_op *op;
+	G *gp;
+
+	if(iocphandle == INVALID_HANDLE_VALUE)
+		return nil;
+	gp = nil;
+	wait = 0;
+	if(block)
+		wait = INFINITE;
+retry:
+	if(runtime·GetQueuedCompletionStatusEx != nil) {
+		n = nelem(entries) / runtime·gomaxprocs;
+		if(n < 8)
+			n = 8;
+		if(block)
+			g->m->blocked = true;
+		if(runtime·stdcall6(runtime·GetQueuedCompletionStatusEx, iocphandle, (uintptr)entries, n, (uintptr)&n, wait, 0) == 0) {
+			g->m->blocked = false;
+			errno = runtime·getlasterror();
+			if(!block && errno == WAIT_TIMEOUT)
+				return nil;
+			runtime·printf("netpoll: GetQueuedCompletionStatusEx failed (errno=%d)\n", errno);
+			runtime·throw("netpoll: GetQueuedCompletionStatusEx failed");
+		}
+		g->m->blocked = false;
+		for(i = 0; i < n; i++) {
+			op = entries[i].op;
+			errno = 0;
+			qty = 0;
+			if(runtime·stdcall5(runtime·WSAGetOverlappedResult, runtime·netpollfd(op->pd), (uintptr)op, (uintptr)&qty, 0, (uintptr)&flags) == 0)
+				errno = runtime·getlasterror();
+			handlecompletion(&gp, op, errno, qty);
+		}
+	} else {
+		op = nil;
+		errno = 0;
+		qty = 0;
+		if(block)
+			g->m->blocked = true;
+		if(runtime·stdcall5(runtime·GetQueuedCompletionStatus, iocphandle, (uintptr)&qty, (uintptr)&key, (uintptr)&op, wait) == 0) {
+			g->m->blocked = false;
+			errno = runtime·getlasterror();
+			if(!block && errno == WAIT_TIMEOUT)
+				return nil;
+			if(op == nil) {
+				runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
+				runtime·throw("netpoll: GetQueuedCompletionStatus failed");
+			}
+			// dequeued failed IO packet, so report that
+		}
+		g->m->blocked = false;
+		handlecompletion(&gp, op, errno, qty);
+	}
+	if(block && gp == nil)
+		goto retry;
+	return gp;
+}
+
+static void
+handlecompletion(G **gpp, net_op *op, int32 errno, uint32 qty)
+{
+	int32 mode;
+
+	if(op == nil)
+		runtime·throw("netpoll: GetQueuedCompletionStatus returned op == nil");
+	mode = op->mode;
+	if(mode != 'r' && mode != 'w') {
+		runtime·printf("netpoll: GetQueuedCompletionStatus returned invalid mode=%d\n", mode);
+		runtime·throw("netpoll: GetQueuedCompletionStatus returned invalid mode");
+	}
+	op->errno = errno;
+	op->qty = qty;
+	runtime·netpollready(gpp, op->pd, mode);
+}
diff --git a/src/runtime/noasm_arm.go b/src/runtime/noasm_arm.go
new file mode 100644
index 000000000..dd3ef8267
--- /dev/null
+++ b/src/runtime/noasm_arm.go
@@ -0,0 +1,54 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Routines that are implemented in assembly in asm_{amd64,386}.s
+// but are implemented in Go for arm.
+
+package runtime
+
+func cmpstring(s1, s2 string) int {
+	l := len(s1)
+	if len(s2) < l {
+		l = len(s2)
+	}
+	for i := 0; i < l; i++ {
+		c1, c2 := s1[i], s2[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+	}
+	if len(s1) < len(s2) {
+		return -1
+	}
+	if len(s1) > len(s2) {
+		return +1
+	}
+	return 0
+}
+
+func cmpbytes(s1, s2 []byte) int {
+	l := len(s1)
+	if len(s2) < l {
+		l = len(s2)
+	}
+	for i := 0; i < l; i++ {
+		c1, c2 := s1[i], s2[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+	}
+	if len(s1) < len(s2) {
+		return -1
+	}
+	if len(s1) > len(s2) {
+		return +1
+	}
+	return 0
+}
diff --git a/src/runtime/norace_test.go b/src/runtime/norace_test.go
new file mode 100644
index 000000000..3b171877a
--- /dev/null
+++ b/src/runtime/norace_test.go
@@ -0,0 +1,46 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The file contains tests that can not run under race detector for some reason.
+// +build !race
+
+package runtime_test
+
+import (
+	"runtime"
+	"testing"
+)
+
+// Syscall tests split stack between Entersyscall and Exitsyscall under race detector.
+func BenchmarkSyscall(b *testing.B) {
+	benchmarkSyscall(b, 0, 1)
+}
+
+func BenchmarkSyscallWork(b *testing.B) {
+	benchmarkSyscall(b, 100, 1)
+}
+
+func BenchmarkSyscallExcess(b *testing.B) {
+	benchmarkSyscall(b, 0, 4)
+}
+
+func BenchmarkSyscallExcessWork(b *testing.B) {
+	benchmarkSyscall(b, 100, 4)
+}
+
+func benchmarkSyscall(b *testing.B, work, excess int) {
+	b.SetParallelism(excess)
+	b.RunParallel(func(pb *testing.PB) {
+		foo := 42
+		for pb.Next() {
+			runtime.Entersyscall()
+			for i := 0; i < work; i++ {
+				foo *= 2
+				foo /= 2
+			}
+			runtime.Exitsyscall()
+		}
+		_ = foo
+	})
+}
diff --git a/src/runtime/os_android.c b/src/runtime/os_android.c
new file mode 100644
index 000000000..58e0dac93
--- /dev/null
+++ b/src/runtime/os_android.c
@@ -0,0 +1,16 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+// Export the runtime entry point symbol.
+//
+// Used by the app package to start the Go runtime after loading
+// a shared library via JNI. See code.google.com/p/go.mobile/app.
+
+void _rt0_arm_linux1();
+#pragma cgo_export_static _rt0_arm_linux1
+#pragma cgo_export_dynamic _rt0_arm_linux1
diff --git a/src/runtime/os_android.h b/src/runtime/os_android.h
new file mode 100644
index 000000000..c7c1098e8
--- /dev/null
+++ b/src/runtime/os_android.h
@@ -0,0 +1 @@
+#include "os_linux.h"
diff --git a/src/runtime/os_darwin.c b/src/runtime/os_darwin.c
new file mode 100644
index 000000000..536f688cf
--- /dev/null
+++ b/src/runtime/os_darwin.c
@@ -0,0 +1,595 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+extern SigTab runtime·sigtab[];
+
+static Sigset sigset_none;
+static Sigset sigset_all = ~(Sigset)0;
+
+static void
+unimplemented(int8 *name)
+{
+	runtime·prints(name);
+	runtime·prints(" not implemented\n");
+	*(int32*)1231 = 1231;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	runtime·mach_semrelease(mp->waitsema);
+}
+
+static void
+semacreate(void)
+{
+	g->m->scalararg[0] = runtime·mach_semcreate();
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	uintptr x;
+	void (*fn)(void);
+	
+	fn = semacreate;
+	runtime·onM(&fn);
+	x = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	return x;
+}
+
+// BSD interface for threading.
+void
+runtime·osinit(void)
+{
+	// bsdthread_register delayed until end of goenvs so that we
+	// can look at the environment first.
+
+	// Use sysctl to fetch hw.ncpu.
+	uint32 mib[2];
+	uint32 out;
+	int32 ret;
+	uintptr nout;
+
+	mib[0] = 6;
+	mib[1] = 3;
+	nout = sizeof out;
+	out = 0;
+	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
+	if(ret >= 0)
+		runtime·ncpu = out;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+
+	// Register our thread-creation callback (see sys_darwin_{amd64,386}.s)
+	// but only if we're not using cgo.  If we are using cgo we need
+	// to let the C pthread library install its own thread-creation callback.
+	if(!runtime·iscgo) {
+		if(runtime·bsdthread_register() != 0) {
+			if(runtime·getenv("DYLD_INSERT_LIBRARIES"))
+				runtime·throw("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)");
+			runtime·throw("runtime: bsdthread_register error");
+		}
+	}
+
+}
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	int32 errno;
+	Sigset oset;
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+	if(0){
+		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	runtime·sigprocmask(SIG_SETMASK, &sigset_all, &oset);
+	errno = runtime·bsdthread_create(stk, mp, mp->g0, runtime·mstart);
+	runtime·sigprocmask(SIG_SETMASK, &oset, nil);
+
+	if(errno < 0) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -errno);
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Initialize signal handling.
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+// Mach IPC, to get at semaphores
+// Definitions are in /usr/include/mach on a Mac.
+
+static void
+macherror(int32 r, int8 *fn)
+{
+	runtime·prints("mach error ");
+	runtime·prints(fn);
+	runtime·prints(": ");
+	runtime·printint(r);
+	runtime·prints("\n");
+	runtime·throw("mach error");
+}
+
+enum
+{
+	DebugMach = 0
+};
+
+static MachNDR zerondr;
+
+#define MACH_MSGH_BITS(a, b) ((a) | ((b)<<8))
+
+static int32
+mach_msg(MachHeader *h,
+	int32 op,
+	uint32 send_size,
+	uint32 rcv_size,
+	uint32 rcv_name,
+	uint32 timeout,
+	uint32 notify)
+{
+	// TODO: Loop on interrupt.
+	return runtime·mach_msg_trap(h, op, send_size, rcv_size, rcv_name, timeout, notify);
+}
+
+// Mach RPC (MIG)
+
+enum
+{
+	MinMachMsg = 48,
+	Reply = 100,
+};
+
+#pragma pack on
+typedef struct CodeMsg CodeMsg;
+struct CodeMsg
+{
+	MachHeader h;
+	MachNDR NDR;
+	int32 code;
+};
+#pragma pack off
+
+static int32
+machcall(MachHeader *h, int32 maxsize, int32 rxsize)
+{
+	uint32 *p;
+	int32 i, ret, id;
+	uint32 port;
+	CodeMsg *c;
+
+	if((port = g->m->machport) == 0){
+		port = runtime·mach_reply_port();
+		g->m->machport = port;
+	}
+
+	h->msgh_bits |= MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE);
+	h->msgh_local_port = port;
+	h->msgh_reserved = 0;
+	id = h->msgh_id;
+
+	if(DebugMach){
+		p = (uint32*)h;
+		runtime·prints("send:\t");
+		for(i=0; i<h->msgh_size/sizeof(p[0]); i++){
+			runtime·prints(" ");
+			runtime·printpointer((void*)p[i]);
+			if(i%8 == 7)
+				runtime·prints("\n\t");
+		}
+		if(i%8)
+			runtime·prints("\n");
+	}
+
+	ret = mach_msg(h, MACH_SEND_MSG|MACH_RCV_MSG,
+		h->msgh_size, maxsize, port, 0, 0);
+	if(ret != 0){
+		if(DebugMach){
+			runtime·prints("mach_msg error ");
+			runtime·printint(ret);
+			runtime·prints("\n");
+		}
+		return ret;
+	}
+
+	if(DebugMach){
+		p = (uint32*)h;
+		runtime·prints("recv:\t");
+		for(i=0; i<h->msgh_size/sizeof(p[0]); i++){
+			runtime·prints(" ");
+			runtime·printpointer((void*)p[i]);
+			if(i%8 == 7)
+				runtime·prints("\n\t");
+		}
+		if(i%8)
+			runtime·prints("\n");
+	}
+
+	if(h->msgh_id != id+Reply){
+		if(DebugMach){
+			runtime·prints("mach_msg reply id mismatch ");
+			runtime·printint(h->msgh_id);
+			runtime·prints(" != ");
+			runtime·printint(id+Reply);
+			runtime·prints("\n");
+		}
+		return -303;	// MIG_REPLY_MISMATCH
+	}
+
+	// Look for a response giving the return value.
+	// Any call can send this back with an error,
+	// and some calls only have return values so they
+	// send it back on success too.  I don't quite see how
+	// you know it's one of these and not the full response
+	// format, so just look if the message is right.
+	c = (CodeMsg*)h;
+	if(h->msgh_size == sizeof(CodeMsg)
+	&& !(h->msgh_bits & MACH_MSGH_BITS_COMPLEX)){
+		if(DebugMach){
+			runtime·prints("mig result ");
+			runtime·printint(c->code);
+			runtime·prints("\n");
+		}
+		return c->code;
+	}
+
+	if(h->msgh_size != rxsize){
+		if(DebugMach){
+			runtime·prints("mach_msg reply size mismatch ");
+			runtime·printint(h->msgh_size);
+			runtime·prints(" != ");
+			runtime·printint(rxsize);
+			runtime·prints("\n");
+		}
+		return -307;	// MIG_ARRAY_TOO_LARGE
+	}
+
+	return 0;
+}
+
+
+// Semaphores!
+
+enum
+{
+	Tmach_semcreate = 3418,
+	Rmach_semcreate = Tmach_semcreate + Reply,
+
+	Tmach_semdestroy = 3419,
+	Rmach_semdestroy = Tmach_semdestroy + Reply,
+
+	// Mach calls that get interrupted by Unix signals
+	// return this error code.  We retry them.
+	KERN_ABORTED = 14,
+	KERN_OPERATION_TIMED_OUT = 49,
+};
+
+typedef struct Tmach_semcreateMsg Tmach_semcreateMsg;
+typedef struct Rmach_semcreateMsg Rmach_semcreateMsg;
+typedef struct Tmach_semdestroyMsg Tmach_semdestroyMsg;
+// Rmach_semdestroyMsg = CodeMsg
+
+#pragma pack on
+struct Tmach_semcreateMsg
+{
+	MachHeader h;
+	MachNDR ndr;
+	int32 policy;
+	int32 value;
+};
+
+struct Rmach_semcreateMsg
+{
+	MachHeader h;
+	MachBody body;
+	MachPort semaphore;
+};
+
+struct Tmach_semdestroyMsg
+{
+	MachHeader h;
+	MachBody body;
+	MachPort semaphore;
+};
+#pragma pack off
+
+uint32
+runtime·mach_semcreate(void)
+{
+	union {
+		Tmach_semcreateMsg tx;
+		Rmach_semcreateMsg rx;
+		uint8 pad[MinMachMsg];
+	} m;
+	int32 r;
+
+	m.tx.h.msgh_bits = 0;
+	m.tx.h.msgh_size = sizeof(m.tx);
+	m.tx.h.msgh_remote_port = runtime·mach_task_self();
+	m.tx.h.msgh_id = Tmach_semcreate;
+	m.tx.ndr = zerondr;
+
+	m.tx.policy = 0;	// 0 = SYNC_POLICY_FIFO
+	m.tx.value = 0;
+
+	while((r = machcall(&m.tx.h, sizeof m, sizeof(m.rx))) != 0){
+		if(r == KERN_ABORTED)	// interrupted
+			continue;
+		macherror(r, "semaphore_create");
+	}
+	if(m.rx.body.msgh_descriptor_count != 1)
+		unimplemented("mach_semcreate desc count");
+	return m.rx.semaphore.name;
+}
+
+void
+runtime·mach_semdestroy(uint32 sem)
+{
+	union {
+		Tmach_semdestroyMsg tx;
+		uint8 pad[MinMachMsg];
+	} m;
+	int32 r;
+
+	m.tx.h.msgh_bits = MACH_MSGH_BITS_COMPLEX;
+	m.tx.h.msgh_size = sizeof(m.tx);
+	m.tx.h.msgh_remote_port = runtime·mach_task_self();
+	m.tx.h.msgh_id = Tmach_semdestroy;
+	m.tx.body.msgh_descriptor_count = 1;
+	m.tx.semaphore.name = sem;
+	m.tx.semaphore.disposition = MACH_MSG_TYPE_MOVE_SEND;
+	m.tx.semaphore.type = 0;
+
+	while((r = machcall(&m.tx.h, sizeof m, 0)) != 0){
+		if(r == KERN_ABORTED)	// interrupted
+			continue;
+		macherror(r, "semaphore_destroy");
+	}
+}
+
+// The other calls have simple system call traps in sys_darwin_{amd64,386}.s
+int32 runtime·mach_semaphore_wait(uint32 sema);
+int32 runtime·mach_semaphore_timedwait(uint32 sema, uint32 sec, uint32 nsec);
+int32 runtime·mach_semaphore_signal(uint32 sema);
+int32 runtime·mach_semaphore_signal_all(uint32 sema);
+
+static void
+semasleep(void)
+{
+	int32 r, secs, nsecs;
+	int64 ns;
+	
+	ns = (int64)(uint32)g->m->scalararg[0] | (int64)(uint32)g->m->scalararg[1]<<32;
+	g->m->scalararg[0] = 0;
+	g->m->scalararg[1] = 0;
+
+	if(ns >= 0) {
+		secs = runtime·timediv(ns, 1000000000, &nsecs);
+		r = runtime·mach_semaphore_timedwait(g->m->waitsema, secs, nsecs);
+		if(r == KERN_ABORTED || r == KERN_OPERATION_TIMED_OUT) {
+			g->m->scalararg[0] = -1;
+			return;
+		}
+		if(r != 0)
+			macherror(r, "semaphore_wait");
+		g->m->scalararg[0] = 0;
+		return;
+	}
+	while((r = runtime·mach_semaphore_wait(g->m->waitsema)) != 0) {
+		if(r == KERN_ABORTED)	// interrupted
+			continue;
+		macherror(r, "semaphore_wait");
+	}
+	g->m->scalararg[0] = 0;
+	return;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	int32 r;
+	void (*fn)(void);
+
+	g->m->scalararg[0] = (uint32)ns;
+	g->m->scalararg[1] = (uint32)(ns>>32);
+	fn = semasleep;
+	runtime·onM(&fn);
+	r = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	return r;
+}
+
+static int32 mach_semrelease_errno;
+
+static void
+mach_semrelease_fail(void)
+{
+	macherror(mach_semrelease_errno, "semaphore_signal");
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·mach_semrelease(uint32 sem)
+{
+	int32 r;
+	void (*fn)(void);
+
+	while((r = runtime·mach_semaphore_signal(sem)) != 0) {
+		if(r == KERN_ABORTED)	// interrupted
+			continue;
+		
+		// mach_semrelease must be completely nosplit,
+		// because it is called from Go code.
+		// If we're going to die, start that process on the m stack
+		// to avoid a Go stack split.
+		// Only do that if we're actually running on the g stack.
+		// We might be on the gsignal stack, and if so, onM will abort.
+		// We use the global variable instead of scalararg because
+		// we might be on the gsignal stack, having interrupted a
+		// normal call to onM. It doesn't quite matter, since the
+		// program is about to die, but better to be clean.
+		mach_semrelease_errno = r;
+		fn = mach_semrelease_fail;
+		if(g == g->m->curg)
+			runtime·onM(&fn);
+		else
+			fn();
+	}
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·osyield(void)
+{
+	runtime·usleep(1);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	// NOTE(rsc): Could use getrlimit here,
+	// like on FreeBSD or Linux, but Darwin doesn't enforce
+	// ulimit -v, so it's unclear why we'd try to stay within
+	// the limit.
+	return 0;
+}
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+		
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask = ~(uintptr)0;
+	sa.sa_tramp = (void*)runtime·sigtramp;	// runtime·sigtramp's job is to call into real handler
+	*(uintptr*)sa.__sigaction_u = (uintptr)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	return *(void**)sa.__sigaction_u;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
new file mode 100644
index 000000000..e0f63ddb9
--- /dev/null
+++ b/src/runtime/os_darwin.go
@@ -0,0 +1,26 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func bsdthread_create(stk, mm, gg, fn unsafe.Pointer) int32
+func bsdthread_register() int32
+func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
+func mach_reply_port() uint32
+func mach_task_self() uint32
+func mach_thread_self() uint32
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func sigprocmask(sig int32, new, old unsafe.Pointer)
+func sigaction(mode uint32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigtramp()
+func setitimer(mode int32, new, old unsafe.Pointer)
+func mach_semaphore_wait(sema uint32) int32
+func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
+func mach_semaphore_signal(sema uint32) int32
+func mach_semaphore_signal_all(sema uint32) int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_darwin.h b/src/runtime/os_darwin.h
new file mode 100644
index 000000000..e8bb45daf
--- /dev/null
+++ b/src/runtime/os_darwin.h
@@ -0,0 +1,43 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+typedef byte* kevent_udata;
+
+int32	runtime·bsdthread_create(void*, M*, G*, void(*)(void));
+int32	runtime·bsdthread_register(void);
+int32	runtime·mach_msg_trap(MachHeader*, int32, uint32, uint32, uint32, uint32, uint32);
+uint32	runtime·mach_reply_port(void);
+int32	runtime·mach_semacquire(uint32, int64);
+uint32	runtime·mach_semcreate(void);
+void	runtime·mach_semdestroy(uint32);
+void	runtime·mach_semrelease(uint32);
+void	runtime·mach_semreset(uint32);
+uint32	runtime·mach_task_self(void);
+uint32	runtime·mach_task_self(void);
+uint32	runtime·mach_thread_self(void);
+uint32	runtime·mach_thread_self(void);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+
+typedef uint32 Sigset;
+void	runtime·sigprocmask(int32, Sigset*, Sigset*);
+void	runtime·unblocksignals(void);
+
+struct SigactionT;
+void	runtime·sigaction(uintptr, struct SigactionT*, struct SigactionT*);
+
+struct StackT;
+void	runtime·sigaltstack(struct StackT*, struct StackT*);
+void	runtime·sigtramp(void);
+void	runtime·sigpanic(void);
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+
+
+enum {
+	NSIG = 32,
+	SI_USER = 0, /* empirically true, but not what headers say */
+	SIG_BLOCK = 1,
+	SIG_UNBLOCK = 2,
+	SIG_SETMASK = 3,
+	SS_DISABLE = 4,
+};
diff --git a/src/runtime/os_dragonfly.c b/src/runtime/os_dragonfly.c
new file mode 100644
index 000000000..d470f7c78
--- /dev/null
+++ b/src/runtime/os_dragonfly.c
@@ -0,0 +1,340 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+extern SigTab runtime·sigtab[];
+extern int32 runtime·sys_umtx_sleep(uint32*, int32, int32);
+extern int32 runtime·sys_umtx_wakeup(uint32*, int32);
+
+// From DragonFly's <sys/sysctl.h>
+#define	CTL_HW	6
+#define	HW_NCPU	3
+
+static Sigset sigset_none;
+static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
+
+static int32
+getncpu(void)
+{
+	uint32 mib[2];
+	uint32 out;
+	int32 ret;
+	uintptr nout;
+
+	// Fetch hw.ncpu via sysctl.
+	mib[0] = CTL_HW;
+	mib[1] = HW_NCPU;
+	nout = sizeof out;
+	out = 0;
+	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
+	if(ret >= 0)
+		return out;
+	else
+		return 1;
+}
+
+static void futexsleep(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
+{
+	void (*fn)(void);
+
+	g->m->ptrarg[0] = addr;
+	g->m->scalararg[0] = val;
+	g->m->ptrarg[1] = &ns;
+
+	fn = futexsleep;
+	runtime·onM(&fn);
+}
+
+static void
+futexsleep(void)
+{
+	uint32 *addr;
+	uint32 val;
+	int64 ns;
+	int32 timeout = 0;
+	int32 ret;
+
+	addr = g->m->ptrarg[0];
+	val = g->m->scalararg[0];
+	ns = *(int64*)g->m->ptrarg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->scalararg[0] = 0;
+	g->m->ptrarg[1] = nil;
+
+	if(ns >= 0) {
+		// The timeout is specified in microseconds - ensure that we
+		// do not end up dividing to zero, which would put us to sleep
+		// indefinitely...
+		timeout = runtime·timediv(ns, 1000, nil);
+		if(timeout == 0)
+			timeout = 1;
+	}
+
+	// sys_umtx_sleep will return EWOULDBLOCK (EAGAIN) when the timeout
+	// expires or EBUSY if the mutex value does not match. 
+	ret = runtime·sys_umtx_sleep(addr, val, timeout);
+	if(ret >= 0 || ret == -EINTR || ret == -EAGAIN || ret == -EBUSY)
+		return;
+
+	runtime·prints("umtx_wait addr=");
+	runtime·printpointer(addr);
+	runtime·prints(" val=");
+	runtime·printint(val);
+	runtime·prints(" ret=");
+	runtime·printint(ret);
+	runtime·prints("\n");
+	*(int32*)0x1005 = 0x1005;
+}
+
+static void badfutexwakeup(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·futexwakeup(uint32 *addr, uint32 cnt)
+{
+	int32 ret;
+	void (*fn)(void);
+
+	ret = runtime·sys_umtx_wakeup(addr, cnt);
+	if(ret >= 0)
+		return;
+
+	g->m->ptrarg[0] = addr;
+	g->m->scalararg[0] = ret;
+	fn = badfutexwakeup;
+	if(g == g->m->gsignal)
+		fn();
+	else
+		runtime·onM(&fn);
+	*(int32*)0x1006 = 0x1006;
+}
+
+static void
+badfutexwakeup(void)
+{
+	void *addr;
+	int32 ret;
+	
+	addr = g->m->ptrarg[0];
+	ret = g->m->scalararg[0];
+	runtime·printf("umtx_wake addr=%p ret=%d\n", addr, ret);
+}
+
+void runtime·lwp_start(void*);
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	Lwpparams params;
+	Sigset oset;
+
+	if(0){
+		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	runtime·sigprocmask(&sigset_all, &oset);
+	runtime·memclr((byte*)&params, sizeof params);
+
+	params.func = runtime·lwp_start;
+	params.arg = (byte*)mp;
+	params.stack = (byte*)stk;
+	params.tid1 = (int32*)&mp->procid;
+	params.tid2 = nil;
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+
+	runtime·lwp_create(&params);
+	runtime·sigprocmask(&oset, nil);
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getncpu();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Initialize signal handling
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·sigprocmask(&sigset_none, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte runtime·text[], runtime·end[];
+	uintptr used;
+	
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = runtime·end - runtime·text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
+
+extern void runtime·sigtramp(void);
+
+typedef struct sigaction {
+	union {
+		void    (*__sa_handler)(int32);
+		void    (*__sa_sigaction)(int32, Siginfo*, void *);
+	} __sigaction_u;		/* signal handler */
+	int32	sa_flags;		/* see signal options below */
+	Sigset	sa_mask;		/* signal mask to apply */
+} SigactionT;
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask.__bits[0] = ~(uint32)0;
+	sa.sa_mask.__bits[1] = ~(uint32)0;
+	sa.sa_mask.__bits[2] = ~(uint32)0;
+	sa.sa_mask.__bits[3] = ~(uint32)0;
+	if(fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	sa.__sigaction_u.__sa_sigaction = (void*)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
+		return runtime·sighandler;
+	return (void*)sa.__sigaction_u.__sa_sigaction;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(&sigset_none, nil);
+}
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
new file mode 100644
index 000000000..cdaa06986
--- /dev/null
+++ b/src/runtime/os_dragonfly.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func lwp_create(param unsafe.Pointer) int32
+func sigaltstack(new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigprocmask(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func sys_umtx_sleep(addr unsafe.Pointer, val, timeout int32) int32
+func sys_umtx_wakeup(addr unsafe.Pointer, val int32) int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_dragonfly.h b/src/runtime/os_dragonfly.h
new file mode 100644
index 000000000..389736a32
--- /dev/null
+++ b/src/runtime/os_dragonfly.h
@@ -0,0 +1,30 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef byte* kevent_udata;
+
+int32	runtime·lwp_create(Lwpparams*);
+void	runtime·sigpanic(void);
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+struct	sigaction;
+void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
+void	runtime·sigprocmask(Sigset *, Sigset *);
+void	runtime·unblocksignals(void);
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+
+enum {
+	NSIG = 33,
+	SI_USER = 0x10001,
+	SS_DISABLE = 4,
+	RLIMIT_AS = 10,
+};
+
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	int64	rlim_cur;
+	int64	rlim_max;
+};
+int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_freebsd.c b/src/runtime/os_freebsd.c
new file mode 100644
index 000000000..aae944ea1
--- /dev/null
+++ b/src/runtime/os_freebsd.c
@@ -0,0 +1,348 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+extern SigTab runtime·sigtab[];
+extern int32 runtime·sys_umtx_op(uint32*, int32, uint32, void*, void*);
+
+// From FreeBSD's <sys/sysctl.h>
+#define	CTL_HW	6
+#define	HW_NCPU	3
+
+static Sigset sigset_none;
+static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
+
+static int32
+getncpu(void)
+{
+	uint32 mib[2];
+	uint32 out;
+	int32 ret;
+	uintptr nout;
+
+	// Fetch hw.ncpu via sysctl.
+	mib[0] = CTL_HW;
+	mib[1] = HW_NCPU;
+	nout = sizeof out;
+	out = 0;
+	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
+	if(ret >= 0)
+		return out;
+	else
+		return 1;
+}
+
+// FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and
+// thus the code is largely similar. See linux/thread.c and lock_futex.c for comments.
+
+static void futexsleep(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
+{
+	void (*fn)(void);
+
+	g->m->ptrarg[0] = addr;
+	g->m->scalararg[0] = val;
+	g->m->ptrarg[1] = &ns;
+
+	fn = futexsleep;
+	runtime·onM(&fn);
+}
+
+static void
+futexsleep(void)
+{
+	uint32 *addr;
+	uint32 val;
+	int64 ns;
+	int32 ret;
+	Timespec ts;
+	
+	addr = g->m->ptrarg[0];
+	val = g->m->scalararg[0];
+	ns = *(int64*)g->m->ptrarg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->scalararg[0] = 0;
+	g->m->ptrarg[1] = nil;
+
+	if(ns < 0) {
+		ret = runtime·sys_umtx_op(addr, UMTX_OP_WAIT_UINT_PRIVATE, val, nil, nil);
+		if(ret >= 0 || ret == -EINTR)
+			return;
+		goto fail;
+	}
+	// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
+	ts.tv_nsec = 0;
+	ts.tv_sec = runtime·timediv(ns, 1000000000, (int32*)&ts.tv_nsec);
+	ret = runtime·sys_umtx_op(addr, UMTX_OP_WAIT_UINT_PRIVATE, val, nil, &ts);
+	if(ret >= 0 || ret == -EINTR)
+		return;
+
+fail:
+	runtime·prints("umtx_wait addr=");
+	runtime·printpointer(addr);
+	runtime·prints(" val=");
+	runtime·printint(val);
+	runtime·prints(" ret=");
+	runtime·printint(ret);
+	runtime·prints("\n");
+	*(int32*)0x1005 = 0x1005;
+}
+
+static void badfutexwakeup(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·futexwakeup(uint32 *addr, uint32 cnt)
+{
+	int32 ret;
+	void (*fn)(void);
+
+	ret = runtime·sys_umtx_op(addr, UMTX_OP_WAKE_PRIVATE, cnt, nil, nil);
+	if(ret >= 0)
+		return;
+
+	g->m->ptrarg[0] = addr;
+	g->m->scalararg[0] = ret;
+	fn = badfutexwakeup;
+	if(g == g->m->gsignal)
+		fn();
+	else
+		runtime·onM(&fn);
+	*(int32*)0x1006 = 0x1006;
+}
+
+static void
+badfutexwakeup(void)
+{
+	void *addr;
+	int32 ret;
+	
+	addr = g->m->ptrarg[0];
+	ret = g->m->scalararg[0];
+	runtime·printf("umtx_wake addr=%p ret=%d\n", addr, ret);
+}
+
+void runtime·thr_start(void*);
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	ThrParam param;
+	Sigset oset;
+
+	if(0){
+		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	runtime·sigprocmask(&sigset_all, &oset);
+	runtime·memclr((byte*)&param, sizeof param);
+
+	param.start_func = runtime·thr_start;
+	param.arg = (byte*)mp;
+	
+	// NOTE(rsc): This code is confused. stackbase is the top of the stack
+	// and is equal to stk. However, it's working, so I'm not changing it.
+	param.stack_base = (void*)mp->g0->stackbase;
+	param.stack_size = (byte*)stk - (byte*)mp->g0->stackbase;
+
+	param.child_tid = (void*)&mp->procid;
+	param.parent_tid = nil;
+	param.tls_base = (void*)&mp->tls[0];
+	param.tls_size = sizeof mp->tls;
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+
+	runtime·thr_new(&param, sizeof param);
+	runtime·sigprocmask(&oset, nil);
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getncpu();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Initialize signal handling
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·sigprocmask(&sigset_none, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte runtime·text[], runtime·end[];
+	uintptr used;
+	
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = runtime·end - runtime·text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
+
+extern void runtime·sigtramp(void);
+
+typedef struct sigaction {
+	union {
+		void    (*__sa_handler)(int32);
+		void    (*__sa_sigaction)(int32, Siginfo*, void *);
+	} __sigaction_u;		/* signal handler */
+	int32	sa_flags;		/* see signal options below */
+	Sigset	sa_mask;		/* signal mask to apply */
+} SigactionT;
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask.__bits[0] = ~(uint32)0;
+	sa.sa_mask.__bits[1] = ~(uint32)0;
+	sa.sa_mask.__bits[2] = ~(uint32)0;
+	sa.sa_mask.__bits[3] = ~(uint32)0;
+	if(fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	sa.__sigaction_u.__sa_sigaction = (void*)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
+		return runtime·sighandler;
+	return (void*)sa.__sigaction_u.__sa_sigaction;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(&sigset_none, nil);
+}
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
new file mode 100644
index 000000000..96964f1e1
--- /dev/null
+++ b/src/runtime/os_freebsd.go
@@ -0,0 +1,19 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func thr_new(param unsafe.Pointer, size int32)
+func sigaltstack(new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigprocmask(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func sys_umtx_op(addr unsafe.Pointer, mode int32, val uint32, ptr2, ts unsafe.Pointer) int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_freebsd.h b/src/runtime/os_freebsd.h
new file mode 100644
index 000000000..b86bb393c
--- /dev/null
+++ b/src/runtime/os_freebsd.h
@@ -0,0 +1,29 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+typedef byte* kevent_udata;
+
+int32	runtime·thr_new(ThrParam*, int32);
+void	runtime·sigpanic(void);
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+struct	sigaction;
+void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
+void	runtime·sigprocmask(Sigset *, Sigset *);
+void	runtime·unblocksignals(void);
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+
+enum {
+	SS_DISABLE = 4,
+	NSIG = 33,
+	SI_USER = 0x10001,
+	RLIMIT_AS = 10,
+};
+
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	int64	rlim_cur;
+	int64	rlim_max;
+};
+int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_freebsd_arm.c b/src/runtime/os_freebsd_arm.c
new file mode 100644
index 000000000..2f2d7767f
--- /dev/null
+++ b/src/runtime/os_freebsd_arm.c
@@ -0,0 +1,24 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+void
+runtime·checkgoarm(void)
+{
+	// TODO(minux)
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·cputicks(void)
+{
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand1.
+	return runtime·nanotime();
+}
diff --git a/src/runtime/os_linux.c b/src/runtime/os_linux.c
new file mode 100644
index 000000000..66e7bcec0
--- /dev/null
+++ b/src/runtime/os_linux.c
@@ -0,0 +1,370 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+extern SigTab runtime·sigtab[];
+
+static Sigset sigset_none;
+static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 };
+
+// Linux futex.
+//
+//	futexsleep(uint32 *addr, uint32 val)
+//	futexwakeup(uint32 *addr)
+//
+// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
+// Futexwakeup wakes up threads sleeping on addr.
+// Futexsleep is allowed to wake up spuriously.
+
+enum
+{
+	FUTEX_WAIT = 0,
+	FUTEX_WAKE = 1,
+};
+
+// Atomically,
+//	if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+// Don't sleep longer than ns; ns < 0 means forever.
+#pragma textflag NOSPLIT
+void
+runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
+{
+	Timespec ts;
+
+	// Some Linux kernels have a bug where futex of
+	// FUTEX_WAIT returns an internal error code
+	// as an errno.  Libpthread ignores the return value
+	// here, and so can we: as it says a few lines up,
+	// spurious wakeups are allowed.
+
+	if(ns < 0) {
+		runtime·futex(addr, FUTEX_WAIT, val, nil, nil, 0);
+		return;
+	}
+	// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
+	ts.tv_nsec = 0;
+	ts.tv_sec = runtime·timediv(ns, 1000000000LL, (int32*)&ts.tv_nsec);
+	runtime·futex(addr, FUTEX_WAIT, val, &ts, nil, 0);
+}
+
+static void badfutexwakeup(void);
+
+// If any procs are sleeping on addr, wake up at most cnt.
+#pragma textflag NOSPLIT
+void
+runtime·futexwakeup(uint32 *addr, uint32 cnt)
+{
+	int64 ret;
+	void (*fn)(void);
+
+	ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0);
+	if(ret >= 0)
+		return;
+
+	// I don't know that futex wakeup can return
+	// EAGAIN or EINTR, but if it does, it would be
+	// safe to loop and call futex again.
+	g->m->ptrarg[0] = addr;
+	g->m->scalararg[0] = (int32)ret; // truncated but fine
+	fn = badfutexwakeup;
+	if(g == g->m->gsignal)
+		fn();
+	else
+		runtime·onM(&fn);
+	*(int32*)0x1006 = 0x1006;
+}
+
+static void
+badfutexwakeup(void)
+{
+	void *addr;
+	int64 ret;
+	
+	addr = g->m->ptrarg[0];
+	ret = (int32)g->m->scalararg[0];
+	runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret);
+}
+
+extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf);
+static int32
+getproccount(void)
+{
+	uintptr buf[16], t;
+	int32 r, cnt, i;
+
+	cnt = 0;
+	r = runtime·sched_getaffinity(0, sizeof(buf), buf);
+	if(r > 0)
+	for(i = 0; i < r/sizeof(buf[0]); i++) {
+		t = buf[i];
+		t = t - ((t >> 1) & 0x5555555555555555ULL);
+		t = (t & 0x3333333333333333ULL) + ((t >> 2) & 0x3333333333333333ULL);
+		cnt += (int32)((((t + (t >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
+	}
+
+	return cnt ? cnt : 1;
+}
+
+// Clone, the Linux rfork.
+enum
+{
+	CLONE_VM = 0x100,
+	CLONE_FS = 0x200,
+	CLONE_FILES = 0x400,
+	CLONE_SIGHAND = 0x800,
+	CLONE_PTRACE = 0x2000,
+	CLONE_VFORK = 0x4000,
+	CLONE_PARENT = 0x8000,
+	CLONE_THREAD = 0x10000,
+	CLONE_NEWNS = 0x20000,
+	CLONE_SYSVSEM = 0x40000,
+	CLONE_SETTLS = 0x80000,
+	CLONE_PARENT_SETTID = 0x100000,
+	CLONE_CHILD_CLEARTID = 0x200000,
+	CLONE_UNTRACED = 0x800000,
+	CLONE_CHILD_SETTID = 0x1000000,
+	CLONE_STOPPED = 0x2000000,
+	CLONE_NEWUTS = 0x4000000,
+	CLONE_NEWIPC = 0x8000000,
+};
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	int32 ret;
+	int32 flags;
+	Sigset oset;
+
+	/*
+	 * note: strace gets confused if we use CLONE_PTRACE here.
+	 */
+	flags = CLONE_VM	/* share memory */
+		| CLONE_FS	/* share cwd, etc */
+		| CLONE_FILES	/* share fd table */
+		| CLONE_SIGHAND	/* share sig handler table */
+		| CLONE_THREAD	/* revisit - okay for now */
+		;
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+	if(0){
+		runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	// Disable signals during clone, so that the new thread starts
+	// with signals disabled.  It will enable them in minit.
+	runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset);
+	ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart);
+	runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset);
+
+	if(ret < 0) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getproccount();
+}
+
+// Random bytes initialized at startup.  These come
+// from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c).
+byte*	runtime·startup_random_data;
+uint32	runtime·startup_random_data_len;
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	if(runtime·startup_random_data != nil) {
+		*rnd = runtime·startup_random_data;
+		*rnd_len = runtime·startup_random_data_len;
+	} else {
+		#pragma dataflag NOPTR
+		static byte urandom_data[HashRandomBytes];
+		int32 fd;
+		fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+		if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+			*rnd = urandom_data;
+			*rnd_len = HashRandomBytes;
+		} else {
+			*rnd = nil;
+			*rnd_len = 0;
+		}
+		runtime·close(fd);
+	}
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Initialize signal handling.
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset));
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte runtime·text[], runtime·end[];
+	uintptr used;
+
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = runtime·end - runtime·text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
+
+#ifdef GOARCH_386
+#define sa_handler k_sa_handler
+#endif
+
+/*
+ * This assembler routine takes the args from registers, puts them on the stack,
+ * and calls sighandler().
+ */
+extern void runtime·sigtramp(void);
+extern void runtime·sigreturn(void);	// calls rt_sigreturn, only used with SA_RESTORER
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask = ~0ULL;
+	// Although Linux manpage says "sa_restorer element is obsolete and
+	// should not be used". x86_64 kernel requires it. Only use it on
+	// x86.
+#ifdef GOARCH_386
+	sa.sa_restorer = (void*)runtime·sigreturn;
+#endif
+#ifdef GOARCH_amd64
+	sa.sa_restorer = (void*)runtime·sigreturn;
+#endif
+	if(fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	sa.sa_handler = fn;
+	if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0)
+		runtime·throw("rt_sigaction failure");
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0)
+		runtime·throw("rt_sigaction read failure");
+	if((void*)sa.sa_handler == runtime·sigtramp)
+		return runtime·sighandler;
+	return (void*)sa.sa_handler;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	SigaltstackT st;
+
+	st.ss_sp = p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof sigset_none);
+}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
new file mode 100644
index 000000000..a6799cd41
--- /dev/null
+++ b/src/runtime/os_linux.go
@@ -0,0 +1,19 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+func rt_sigaction(sig uintptr, new, old unsafe.Pointer, size uintptr) int32
+func sigaltstack(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func rtsigprocmask(sig int32, new, old unsafe.Pointer, size int32)
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_linux.h b/src/runtime/os_linux.h
new file mode 100644
index 000000000..75606d615
--- /dev/null
+++ b/src/runtime/os_linux.h
@@ -0,0 +1,41 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+// Linux-specific system calls
+int32	runtime·futex(uint32*, int32, uint32, Timespec*, uint32*, uint32);
+int32	runtime·clone(int32, void*, M*, G*, void(*)(void));
+
+struct SigactionT;
+int32	runtime·rt_sigaction(uintptr, struct SigactionT*, void*, uintptr);
+
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+void	runtime·sigpanic(void);
+void runtime·setitimer(int32, Itimerval*, Itimerval*);
+
+enum {
+	SS_DISABLE = 2,
+	NSIG = 65,
+	SI_USER = 0,
+	SIG_SETMASK = 2,
+	RLIMIT_AS = 9,
+};
+
+// It's hard to tease out exactly how big a Sigset is, but
+// rt_sigprocmask crashes if we get it wrong, so if binaries
+// are running, this is right.
+typedef struct Sigset Sigset;
+struct Sigset
+{
+	uint32 mask[2];
+};
+void	runtime·rtsigprocmask(int32, Sigset*, Sigset*, int32);
+void	runtime·unblocksignals(void);
+
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	uintptr	rlim_cur;
+	uintptr	rlim_max;
+};
+int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_linux_386.c b/src/runtime/os_linux_386.c
new file mode 100644
index 000000000..dc89d04e2
--- /dev/null
+++ b/src/runtime/os_linux_386.c
@@ -0,0 +1,38 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+#define AT_NULL		0
+#define AT_RANDOM	25
+#define AT_SYSINFO	32
+extern uint32 runtime·_vdso;
+
+#pragma textflag NOSPLIT
+void
+runtime·linux_setup_vdso(int32 argc, byte **argv)
+{
+	byte **envp;
+	uint32 *auxv;
+
+	// skip envp to get to ELF auxiliary vector.
+	for(envp = &argv[argc+1]; *envp != nil; envp++)
+		;
+	envp++;
+	
+	for(auxv=(uint32*)envp; auxv[0] != AT_NULL; auxv += 2) {
+		if(auxv[0] == AT_SYSINFO) {
+			runtime·_vdso = auxv[1];
+			continue;
+		}
+		if(auxv[0] == AT_RANDOM) {
+			runtime·startup_random_data = (byte*)auxv[1];
+			runtime·startup_random_data_len = 16;
+			continue;
+		}
+	}
+}
diff --git a/src/runtime/os_linux_arm.c b/src/runtime/os_linux_arm.c
new file mode 100644
index 000000000..e3eda7c2d
--- /dev/null
+++ b/src/runtime/os_linux_arm.c
@@ -0,0 +1,80 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+#define AT_NULL		0
+#define AT_PLATFORM	15 // introduced in at least 2.6.11
+#define AT_HWCAP	16 // introduced in at least 2.6.11
+#define AT_RANDOM	25 // introduced in 2.6.29
+#define HWCAP_VFP	(1 << 6) // introduced in at least 2.6.11
+#define HWCAP_VFPv3	(1 << 13) // introduced in 2.6.30
+static uint32 runtime·randomNumber;
+uint8  runtime·armArch = 6;	// we default to ARMv6
+uint32 runtime·hwcap;	// set by setup_auxv
+extern uint8  runtime·goarm;	// set by 5l
+
+void
+runtime·checkgoarm(void)
+{
+	if(runtime·goarm > 5 && !(runtime·hwcap & HWCAP_VFP)) {
+		runtime·printf("runtime: this CPU has no floating point hardware, so it cannot run\n");
+		runtime·printf("this GOARM=%d binary. Recompile using GOARM=5.\n", runtime·goarm);
+		runtime·exit(1);
+	}
+	if(runtime·goarm > 6 && !(runtime·hwcap & HWCAP_VFPv3)) {
+		runtime·printf("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n");
+		runtime·printf("this GOARM=%d binary. Recompile using GOARM=6.\n", runtime·goarm);
+		runtime·exit(1);
+	}
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·setup_auxv(int32 argc, byte **argv)
+{
+	byte **envp;
+	byte *rnd;
+	uint32 *auxv;
+	uint32 t;
+
+	// skip envp to get to ELF auxiliary vector.
+	for(envp = &argv[argc+1]; *envp != nil; envp++)
+		;
+	envp++;
+	
+	for(auxv=(uint32*)envp; auxv[0] != AT_NULL; auxv += 2) {
+		switch(auxv[0]) {
+		case AT_RANDOM: // kernel provided 16-byte worth of random data
+			if(auxv[1]) {
+				rnd = (byte*)auxv[1];
+				runtime·randomNumber = rnd[4] | rnd[5]<<8 | rnd[6]<<16 | rnd[7]<<24;
+			}
+			break;
+		case AT_PLATFORM: // v5l, v6l, v7l
+			if(auxv[1]) {
+				t = *(uint8*)(auxv[1]+1);
+				if(t >= '5' && t <= '7')
+					runtime·armArch = t - '0';
+			}
+			break;
+		case AT_HWCAP: // CPU capability bit flags
+			runtime·hwcap = auxv[1];
+			break;
+		}
+	}
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·cputicks(void)
+{
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// runtime·randomNumber provides better seeding of fastrand1.
+	return runtime·nanotime() + runtime·randomNumber;
+}
diff --git a/src/runtime/os_nacl.c b/src/runtime/os_nacl.c
new file mode 100644
index 000000000..37a1fcca0
--- /dev/null
+++ b/src/runtime/os_nacl.c
@@ -0,0 +1,325 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "arch_GOARCH.h"
+#include "textflag.h"
+#include "stack.h"
+
+int8 *goos = "nacl";
+extern SigTab runtime·sigtab[];
+
+void runtime·sigtramp(void);
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	int32 ret;
+
+	// Initialize signal handling
+	ret = runtime·nacl_exception_stack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	if(ret < 0)
+		runtime·printf("runtime: nacl_exception_stack: error %d\n", -ret);
+
+	ret = runtime·nacl_exception_handler(runtime·sigtramp, nil);
+	if(ret < 0)
+		runtime·printf("runtime: nacl_exception_handler: error %d\n", -ret);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+}
+
+int8 runtime·sigtrampf[] = "runtime: signal at PC=%X AX=%X CX=%X DX=%X BX=%X DI=%X R15=%X *SP=%X\n";
+int8 runtime·sigtrampp[] = "runtime: sigtramp";
+
+extern byte runtime·tls0[];
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = 1;
+	g->m->procid = 2;
+//runtime·nacl_exception_handler(runtime·sigtramp, nil);
+}
+
+void
+runtime·crash(void)
+{
+	*(int32*)0 = 0;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	*rnd = nil;
+	*rnd_len = 0;
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+void
+runtime·initsig(void)
+{
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·usleep(uint32 us)
+{
+	Timespec ts;
+	
+	ts.tv_sec = us/1000000;
+	ts.tv_nsec = (us%1000000)*1000;
+	runtime·nacl_nanosleep(&ts, nil);
+}
+
+void runtime·mstart_nacl(void);
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	int32 ret;
+	void **tls;
+
+	tls = (void**)mp->tls;
+	tls[0] = mp->g0;
+	tls[1] = mp;
+	ret = runtime·nacl_thread_create(runtime·mstart_nacl, stk, tls+2, 0);
+	if(ret < 0) {
+		runtime·printf("nacl_thread_create: error %d\n", -ret);
+		runtime·throw("newosproc");
+	}
+}
+
+static void
+semacreate(void)
+{
+	int32 mu, cond;
+	
+	mu = runtime·nacl_mutex_create(0);
+	if(mu < 0) {
+		runtime·printf("nacl_mutex_create: error %d\n", -mu);
+		runtime·throw("semacreate");
+	}
+	cond = runtime·nacl_cond_create(0);
+	if(cond < 0) {
+		runtime·printf("nacl_cond_create: error %d\n", -cond);
+		runtime·throw("semacreate");
+	}
+	g->m->waitsemalock = mu;
+	g->m->scalararg[0] = cond; // assigned to m->waitsema
+}
+
+#pragma textflag NOSPLIT
+uint32
+runtime·semacreate(void)
+{
+	void (*fn)(void);
+	uint32 x;
+	
+	fn = semacreate;
+	runtime·onM(&fn);
+	x = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	return x;
+}
+
+static void
+semasleep(void)
+{
+	int32 ret;
+	int64 ns;
+	
+	ns = (int64)(uint32)g->m->scalararg[0] | (int64)(uint32)g->m->scalararg[1]<<32;
+	g->m->scalararg[0] = 0;
+	g->m->scalararg[1] = 0;
+	
+	ret = runtime·nacl_mutex_lock(g->m->waitsemalock);
+	if(ret < 0) {
+		//runtime·printf("nacl_mutex_lock: error %d\n", -ret);
+		runtime·throw("semasleep");
+	}
+	if(g->m->waitsemacount > 0) {
+		g->m->waitsemacount = 0;
+		runtime·nacl_mutex_unlock(g->m->waitsemalock);
+		g->m->scalararg[0] = 0;
+		return;
+	}
+
+	while(g->m->waitsemacount == 0) {
+		if(ns < 0) {
+			ret = runtime·nacl_cond_wait(g->m->waitsema, g->m->waitsemalock);
+			if(ret < 0) {
+				//runtime·printf("nacl_cond_wait: error %d\n", -ret);
+				runtime·throw("semasleep");
+			}
+		} else {
+			Timespec ts;
+			
+			ns += runtime·nanotime();
+			ts.tv_sec = runtime·timediv(ns, 1000000000, (int32*)&ts.tv_nsec);
+			ret = runtime·nacl_cond_timed_wait_abs(g->m->waitsema, g->m->waitsemalock, &ts);
+			if(ret == -ETIMEDOUT) {
+				runtime·nacl_mutex_unlock(g->m->waitsemalock);
+				g->m->scalararg[0] = -1;
+				return;
+			}
+			if(ret < 0) {
+				//runtime·printf("nacl_cond_timed_wait_abs: error %d\n", -ret);
+				runtime·throw("semasleep");
+			}
+		}
+	}
+			
+	g->m->waitsemacount = 0;
+	runtime·nacl_mutex_unlock(g->m->waitsemalock);
+	g->m->scalararg[0] = 0;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	int32 r;
+	void (*fn)(void);
+
+	g->m->scalararg[0] = (uint32)ns;
+	g->m->scalararg[1] = (uint32)(ns>>32);
+	fn = semasleep;
+	runtime·onM(&fn);
+	r = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	return r;
+}
+
+static void
+semawakeup(void)
+{
+	int32 ret;
+	M *mp;
+	
+	mp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+
+	ret = runtime·nacl_mutex_lock(mp->waitsemalock);
+	if(ret < 0) {
+		//runtime·printf("nacl_mutex_lock: error %d\n", -ret);
+		runtime·throw("semawakeup");
+	}
+	if(mp->waitsemacount != 0) {
+		//runtime·printf("semawakeup: double wakeup\n");
+		runtime·throw("semawakeup");
+	}
+	mp->waitsemacount = 1;
+	runtime·nacl_cond_signal(mp->waitsema);
+	runtime·nacl_mutex_unlock(mp->waitsemalock);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	void (*fn)(void);
+
+	g->m->ptrarg[0] = mp;
+	fn = semawakeup;
+	runtime·onM(&fn);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	runtime·printf("memlimit\n");
+	return 0;
+}
+
+#pragma dataflag NOPTR
+static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n";
+
+// This runs on a foreign stack, without an m or a g.  No stack split.
+#pragma textflag NOSPLIT
+void
+runtime·badsignal2(void)
+{
+	runtime·write(2, badsignal, sizeof badsignal - 1);
+	runtime·exit(2);
+}
+
+void	runtime·madvise(byte*, uintptr, int32) { }
+void runtime·munmap(byte*, uintptr) {}
+
+void
+runtime·resetcpuprofiler(int32 hz)
+{
+	USED(hz);
+}
+
+void
+runtime·sigdisable(uint32)
+{
+}
+
+void
+runtime·sigenable(uint32)
+{
+}
+
+void
+runtime·closeonexec(int32)
+{
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	// Native Client only invokes the exception handler for memory faults.
+	g->sig = SIGSEGV;
+	if(g->sigpc == 0)
+		runtime·panicstring("call of nil func value");
+	runtime·panicstring("invalid memory address or nil pointer dereference");
+}
+
+uint32 runtime·writelock; // test-and-set spin lock for runtime.write
+
+/*
+An attempt at IRT. Doesn't work. See end of sys_nacl_amd64.s.
+
+void (*runtime·nacl_irt_query)(void);
+
+int8 runtime·nacl_irt_basic_v0_1_str[] = "nacl-irt-basic-0.1";
+void *runtime·nacl_irt_basic_v0_1[6]; // exit, gettod, clock, nanosleep, sched_yield, sysconf
+int32 runtime·nacl_irt_basic_v0_1_size = sizeof(runtime·nacl_irt_basic_v0_1);
+
+int8 runtime·nacl_irt_memory_v0_3_str[] = "nacl-irt-memory-0.3";
+void *runtime·nacl_irt_memory_v0_3[3]; // mmap, munmap, mprotect
+int32 runtime·nacl_irt_memory_v0_3_size = sizeof(runtime·nacl_irt_memory_v0_3);
+
+int8 runtime·nacl_irt_thread_v0_1_str[] = "nacl-irt-thread-0.1";
+void *runtime·nacl_irt_thread_v0_1[3]; // thread_create, thread_exit, thread_nice
+int32 runtime·nacl_irt_thread_v0_1_size = sizeof(runtime·nacl_irt_thread_v0_1);
+*/
diff --git a/src/runtime/os_nacl.go b/src/runtime/os_nacl.go
new file mode 100644
index 000000000..12a15aea0
--- /dev/null
+++ b/src/runtime/os_nacl.go
@@ -0,0 +1,30 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func nacl_exception_stack(p unsafe.Pointer, size int32) int32
+func nacl_exception_handler(fn, arg unsafe.Pointer) int32
+func nacl_sem_create(flag int32) int32
+func nacl_sem_wait(sem int32) int32
+func nacl_sem_post(sem int32) int32
+func nacl_mutex_create(flag int32) int32
+func nacl_mutex_lock(mutex int32) int32
+func nacl_mutex_trylock(mutex int32) int32
+func nacl_mutex_unlock(mutex int32) int32
+func nacl_cond_create(flag int32) int32
+func nacl_cond_wait(cond, n int32) int32
+func nacl_cond_signal(cond int32) int32
+func nacl_cond_broadcast(cond int32) int32
+func nacl_cond_timed_wait_abs(cond, lock int32, ts unsafe.Pointer) int32
+func nacl_thread_create(fn, stk, tls, xx unsafe.Pointer) int32
+func nacl_nanosleep(ts, extra unsafe.Pointer) int32
+
+const stackSystem = 0
+
+func os_sigpipe() {
+	gothrow("too many writes on closed pipe")
+}
diff --git a/src/runtime/os_nacl.h b/src/runtime/os_nacl.h
new file mode 100644
index 000000000..7c9d9c242
--- /dev/null
+++ b/src/runtime/os_nacl.h
@@ -0,0 +1,162 @@
+enum {
+	NSIG = 32,
+	SI_USER = 1,
+
+	// native_client/src/trusted/service_runtime/include/sys/errno.h
+	// The errors are mainly copied from Linux.
+	EPERM = 1,  /* Operation not permitted */
+	ENOENT = 2,  /* No such file or directory */
+	ESRCH = 3,  /* No such process */
+	EINTR = 4,  /* Interrupted system call */
+	EIO = 5,  /* I/O error */
+	ENXIO = 6,  /* No such device or address */
+	E2BIG = 7,  /* Argument list too long */
+	ENOEXEC = 8,  /* Exec format error */
+	EBADF = 9,  /* Bad file number */
+	ECHILD = 10,  /* No child processes */
+	EAGAIN = 11,  /* Try again */
+	ENOMEM = 12,  /* Out of memory */
+	EACCES = 13,  /* Permission denied */
+	EFAULT = 14,  /* Bad address */
+	EBUSY = 16,  /* Device or resource busy */
+	EEXIST = 17,  /* File exists */
+	EXDEV = 18,  /* Cross-device link */
+	ENODEV = 19,  /* No such device */
+	ENOTDIR = 20,  /* Not a directory */
+	EISDIR = 21,  /* Is a directory */
+	EINVAL = 22,  /* Invalid argument */
+	ENFILE = 23,  /* File table overflow */
+	EMFILE = 24,  /* Too many open files */
+	ENOTTY = 25,  /* Not a typewriter */
+	EFBIG = 27,  /* File too large */
+	ENOSPC = 28,  /* No space left on device */
+	ESPIPE = 29,  /* Illegal seek */
+	EROFS = 30,  /* Read-only file system */
+	EMLINK = 31,  /* Too many links */
+	EPIPE = 32,  /* Broken pipe */
+	ENAMETOOLONG = 36,  /* File name too long */
+	ENOSYS = 38,  /* Function not implemented */
+	EDQUOT = 122, /* Quota exceeded */
+	EDOM = 33,   /* Math arg out of domain of func */
+	ERANGE = 34, /* Math result not representable */
+	EDEADLK = 35,  /* Deadlock condition */
+	ENOLCK = 37, /* No record locks available */
+	ENOTEMPTY = 39,  /* Directory not empty */
+	ELOOP = 40,  /* Too many symbolic links */
+	ENOMSG = 42, /* No message of desired type */
+	EIDRM = 43,  /* Identifier removed */
+	ECHRNG = 44, /* Channel number out of range */
+	EL2NSYNC = 45, /* Level 2 not synchronized */
+	EL3HLT = 46, /* Level 3 halted */
+	EL3RST = 47, /* Level 3 reset */
+	ELNRNG = 48, /* Link number out of range */
+	EUNATCH = 49,  /* Protocol driver not attached */
+	ENOCSI = 50, /* No CSI structure available */
+	EL2HLT = 51, /* Level 2 halted */
+	EBADE = 52,  /* Invalid exchange */
+	EBADR = 53,  /* Invalid request descriptor */
+	EXFULL = 54, /* Exchange full */
+	ENOANO = 55, /* No anode */
+	EBADRQC = 56,  /* Invalid request code */
+	EBADSLT = 57,  /* Invalid slot */
+	EDEADLOCK = EDEADLK,  /* File locking deadlock error */
+	EBFONT = 59, /* Bad font file fmt */
+	ENOSTR = 60, /* Device not a stream */
+	ENODATA = 61,  /* No data (for no delay io) */
+	ETIME = 62,  /* Timer expired */
+	ENOSR = 63,  /* Out of streams resources */
+	ENONET = 64, /* Machine is not on the network */
+	ENOPKG = 65, /* Package not installed */
+	EREMOTE = 66,  /* The object is remote */
+	ENOLINK = 67,  /* The link has been severed */
+	EADV = 68,   /* Advertise error */
+	ESRMNT = 69, /* Srmount error */
+	ECOMM = 70,  /* Communication error on send */
+	EPROTO = 71, /* Protocol error */
+	EMULTIHOP = 72,  /* Multihop attempted */
+	EDOTDOT = 73,  /* Cross mount point (not really error) */
+	EBADMSG = 74,  /* Trying to read unreadable message */
+	EOVERFLOW = 75, /* Value too large for defined data type */
+	ENOTUNIQ = 76, /* Given log. name not unique */
+	EBADFD = 77, /* f.d. invalid for this operation */
+	EREMCHG = 78,  /* Remote address changed */
+	ELIBACC = 79,  /* Can't access a needed shared lib */
+	ELIBBAD = 80,  /* Accessing a corrupted shared lib */
+	ELIBSCN = 81,  /* .lib section in a.out corrupted */
+	ELIBMAX = 82,  /* Attempting to link in too many libs */
+	ELIBEXEC = 83, /* Attempting to exec a shared library */
+	EILSEQ = 84,
+	EUSERS = 87,
+	ENOTSOCK = 88,  /* Socket operation on non-socket */
+	EDESTADDRREQ = 89,  /* Destination address required */
+	EMSGSIZE = 90,    /* Message too long */
+	EPROTOTYPE = 91,  /* Protocol wrong type for socket */
+	ENOPROTOOPT = 92, /* Protocol not available */
+	EPROTONOSUPPORT = 93, /* Unknown protocol */
+	ESOCKTNOSUPPORT = 94, /* Socket type not supported */
+	EOPNOTSUPP = 95, /* Operation not supported on transport endpoint */
+	EPFNOSUPPORT = 96, /* Protocol family not supported */
+	EAFNOSUPPORT = 97, /* Address family not supported by protocol family */
+	EADDRINUSE = 98,    /* Address already in use */
+	EADDRNOTAVAIL = 99, /* Address not available */
+	ENETDOWN = 100,    /* Network interface is not configured */
+	ENETUNREACH = 101,   /* Network is unreachable */
+	ENETRESET = 102,
+	ECONNABORTED = 103,  /* Connection aborted */
+	ECONNRESET = 104,  /* Connection reset by peer */
+	ENOBUFS = 105, /* No buffer space available */
+	EISCONN = 106,   /* Socket is already connected */
+	ENOTCONN = 107,    /* Socket is not connected */
+	ESHUTDOWN = 108, /* Can't send after socket shutdown */
+	ETOOMANYREFS = 109,
+	ETIMEDOUT = 110,   /* Connection timed out */
+	ECONNREFUSED = 111,  /* Connection refused */
+	EHOSTDOWN = 112,   /* Host is down */
+	EHOSTUNREACH = 113,  /* Host is unreachable */
+	EALREADY = 114,    /* Socket already connected */
+	EINPROGRESS = 115,   /* Connection already in progress */
+	ESTALE = 116,
+	ENOTSUP = EOPNOTSUPP,   /* Not supported */
+	ENOMEDIUM = 123,   /* No medium (in tape drive) */
+	ECANCELED = 125, /* Operation canceled. */
+	ELBIN = 2048,  /* Inode is remote (not really error) */
+	EFTYPE = 2049,  /* Inappropriate file type or format */
+	ENMFILE = 2050,  /* No more files */
+	EPROCLIM = 2051,
+	ENOSHARE = 2052,  /* No such host or network path */
+	ECASECLASH = 2053,  /* Filename exists with different case */
+	EWOULDBLOCK = EAGAIN,      /* Operation would block */
+
+	// native_client/src/trusted/service_runtime/include/bits/mman.h.
+	// NOTE: DO NOT USE native_client/src/shared/imc/nacl_imc_c.h.
+	// Those MAP_*values are different from these.
+	PROT_NONE	= 0x0,
+	PROT_READ	= 0x1,
+	PROT_WRITE	= 0x2,
+	PROT_EXEC	= 0x4,
+
+	MAP_SHARED	= 0x1,
+	MAP_PRIVATE	= 0x2,
+	MAP_FIXED	= 0x10,
+	MAP_ANON	= 0x20,
+};
+typedef byte* kevent_udata;
+
+int32	runtime·nacl_exception_stack(byte*, int32);
+int32	runtime·nacl_exception_handler(void*, void*);
+int32	runtime·nacl_sem_create(int32);
+int32	runtime·nacl_sem_wait(int32);
+int32	runtime·nacl_sem_post(int32);
+int32	runtime·nacl_mutex_create(int32);
+int32	runtime·nacl_mutex_lock(int32);
+int32	runtime·nacl_mutex_trylock(int32);
+int32	runtime·nacl_mutex_unlock(int32);
+int32	runtime·nacl_cond_create(int32);
+int32	runtime·nacl_cond_wait(int32, int32);
+int32	runtime·nacl_cond_signal(int32);
+int32	runtime·nacl_cond_broadcast(int32);
+int32	runtime·nacl_cond_timed_wait_abs(int32, int32, Timespec*);
+int32	runtime·nacl_thread_create(void*, void*, void*, void*);
+int32	runtime·nacl_nanosleep(Timespec*, Timespec*);
+
+void	runtime·sigpanic(void);
diff --git a/src/runtime/os_nacl_arm.c b/src/runtime/os_nacl_arm.c
new file mode 100644
index 000000000..1248ea644
--- /dev/null
+++ b/src/runtime/os_nacl_arm.c
@@ -0,0 +1,24 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+void
+runtime·checkgoarm(void)
+{
+	return; // NaCl/ARM only supports ARMv7
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·cputicks(void)
+{
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand1.
+	return runtime·nanotime();
+}
diff --git a/src/runtime/os_netbsd.c b/src/runtime/os_netbsd.c
new file mode 100644
index 000000000..d6c3bc826
--- /dev/null
+++ b/src/runtime/os_netbsd.c
@@ -0,0 +1,396 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+enum
+{
+	ESRCH = 3,
+	ENOTSUP = 91,
+
+	// From NetBSD's <sys/time.h>
+	CLOCK_REALTIME = 0,
+	CLOCK_VIRTUAL = 1,
+	CLOCK_PROF = 2,
+	CLOCK_MONOTONIC = 3
+};
+
+extern SigTab runtime·sigtab[];
+
+static Sigset sigset_none;
+static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
+
+extern void runtime·getcontext(UcontextT *context);
+extern int32 runtime·lwp_create(UcontextT *context, uintptr flags, void *lwpid);
+extern void runtime·lwp_mcontext_init(void *mc, void *stack, M *mp, G *gp, void (*fn)(void));
+extern int32 runtime·lwp_park(Timespec *abstime, int32 unpark, void *hint, void *unparkhint);
+extern int32 runtime·lwp_unpark(int32 lwp, void *hint);
+extern int32 runtime·lwp_self(void);
+
+// From NetBSD's <sys/sysctl.h>
+#define	CTL_HW	6
+#define	HW_NCPU	3
+
+static int32
+getncpu(void)
+{
+	uint32 mib[2];
+	uint32 out;
+	int32 ret;
+	uintptr nout;
+
+	// Fetch hw.ncpu via sysctl.
+	mib[0] = CTL_HW;
+	mib[1] = HW_NCPU;
+	nout = sizeof out;
+	out = 0;
+	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
+	if(ret >= 0)
+		return out;
+	else
+		return 1;
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	return 1;
+}
+
+static void
+semasleep(void)
+{
+	int64 ns;
+	Timespec ts;
+
+	ns = (int64)(uint32)g->m->scalararg[0] | (int64)(uint32)g->m->scalararg[1]<<32;
+	g->m->scalararg[0] = 0;
+	g->m->scalararg[1] = 0;
+
+	// spin-mutex lock
+	while(runtime·xchg(&g->m->waitsemalock, 1))
+		runtime·osyield();
+
+	for(;;) {
+		// lock held
+		if(g->m->waitsemacount == 0) {
+			// sleep until semaphore != 0 or timeout.
+			// thrsleep unlocks m->waitsemalock.
+			if(ns < 0) {
+				// TODO(jsing) - potential deadlock!
+				//
+				// There is a potential deadlock here since we
+				// have to release the waitsemalock mutex
+				// before we call lwp_park() to suspend the
+				// thread. This allows another thread to
+				// release the lock and call lwp_unpark()
+				// before the thread is actually suspended.
+				// If this occurs the current thread will end
+				// up sleeping indefinitely. Unfortunately
+				// the NetBSD kernel does not appear to provide
+				// a mechanism for unlocking the userspace
+				// mutex once the thread is actually parked.
+				runtime·atomicstore(&g->m->waitsemalock, 0);
+				runtime·lwp_park(nil, 0, &g->m->waitsemacount, nil);
+			} else {
+				ns = ns + runtime·nanotime();
+				// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
+				ts.tv_nsec = 0;
+				ts.tv_sec = runtime·timediv(ns, 1000000000, (int32*)&ts.tv_nsec);
+				// TODO(jsing) - potential deadlock!
+				// See above for details.
+				runtime·atomicstore(&g->m->waitsemalock, 0);
+				runtime·lwp_park(&ts, 0, &g->m->waitsemacount, nil);
+			}
+			// reacquire lock
+			while(runtime·xchg(&g->m->waitsemalock, 1))
+				runtime·osyield();
+		}
+
+		// lock held (again)
+		if(g->m->waitsemacount != 0) {
+			// semaphore is available.
+			g->m->waitsemacount--;
+			// spin-mutex unlock
+			runtime·atomicstore(&g->m->waitsemalock, 0);
+			g->m->scalararg[0] = 0; // semaphore acquired
+			return;
+		}
+
+		// semaphore not available.
+		// if there is a timeout, stop now.
+		// otherwise keep trying.
+		if(ns >= 0)
+			break;
+	}
+
+	// lock held but giving up
+	// spin-mutex unlock
+	runtime·atomicstore(&g->m->waitsemalock, 0);
+	g->m->scalararg[0] = -1;
+	return;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	int32 r;
+	void (*fn)(void);
+
+	g->m->scalararg[0] = (uint32)ns;
+	g->m->scalararg[1] = (uint32)(ns>>32);
+	fn = semasleep;
+	runtime·onM(&fn);
+	r = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	return r;
+}
+
+static void badsemawakeup(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	uint32 ret;
+	void (*fn)(void);
+	void *oldptr;
+	uintptr oldscalar;
+
+	// spin-mutex lock
+	while(runtime·xchg(&mp->waitsemalock, 1))
+		runtime·osyield();
+	mp->waitsemacount++;
+	// TODO(jsing) - potential deadlock, see semasleep() for details.
+	// Confirm that LWP is parked before unparking...
+	ret = runtime·lwp_unpark(mp->procid, &mp->waitsemacount);
+	if(ret != 0 && ret != ESRCH) {
+		// semawakeup can be called on signal stack.
+		// Save old ptrarg/scalararg so we can restore them.
+		oldptr = g->m->ptrarg[0];
+		oldscalar = g->m->scalararg[0];
+		g->m->ptrarg[0] = mp;
+		g->m->scalararg[0] = ret;
+		fn = badsemawakeup;
+		if(g == g->m->gsignal)
+			fn();
+		else
+			runtime·onM(&fn);
+		g->m->ptrarg[0] = oldptr;
+		g->m->scalararg[0] = oldscalar;
+	}
+	// spin-mutex unlock
+	runtime·atomicstore(&mp->waitsemalock, 0);
+}
+
+static void
+badsemawakeup(void)
+{
+	M *mp;
+	int32 ret;
+
+	mp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	ret = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+
+	runtime·printf("thrwakeup addr=%p sem=%d ret=%d\n", &mp->waitsemacount, mp->waitsemacount, ret);
+}
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	UcontextT uc;
+	int32 ret;
+
+	if(0) {
+		runtime·printf(
+			"newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+
+	runtime·getcontext(&uc);
+	
+	uc.uc_flags = _UC_SIGMASK | _UC_CPU;
+	uc.uc_link = nil;
+	uc.uc_sigmask = sigset_all;
+
+	runtime·lwp_mcontext_init(&uc.uc_mcontext, stk, mp, mp->g0, runtime·mstart);
+
+	ret = runtime·lwp_create(&uc, 0, &mp->procid);
+
+	if(ret < 0) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount() - 1, -ret);
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getncpu();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	g->m->procid = runtime·lwp_self();
+
+	// Initialize signal handling
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
+
+extern void runtime·sigtramp(void);
+
+typedef struct sigaction {
+	union {
+		void    (*_sa_handler)(int32);
+		void    (*_sa_sigaction)(int32, Siginfo*, void *);
+	} _sa_u;			/* signal handler */
+	uint32	sa_mask[4];		/* signal mask to apply */
+	int32	sa_flags;		/* see signal options below */
+} SigactionT;
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask[0] = ~0U;
+	sa.sa_mask[1] = ~0U;
+	sa.sa_mask[2] = ~0U;
+	sa.sa_mask[3] = ~0U;
+	if (fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	sa._sa_u._sa_sigaction = (void*)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	if((void*)sa._sa_u._sa_sigaction == runtime·sigtramp)
+		return runtime·sighandler;
+	return (void*)sa._sa_u._sa_sigaction;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go
new file mode 100644
index 000000000..8792f497e
--- /dev/null
+++ b/src/runtime/os_netbsd.go
@@ -0,0 +1,22 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func lwp_tramp()
+func raise(sig int32)
+func getcontext(ctxt unsafe.Pointer)
+func lwp_create(ctxt unsafe.Pointer, flags uintptr, lwpid unsafe.Pointer) int32
+func lwp_park(abstime unsafe.Pointer, unpark int32, hint, unparkhint unsafe.Pointer) int32
+func lwp_unpark(lwp int32, hint unsafe.Pointer) int32
+func lwp_self() int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_netbsd.h b/src/runtime/os_netbsd.h
new file mode 100644
index 000000000..f95db325f
--- /dev/null
+++ b/src/runtime/os_netbsd.h
@@ -0,0 +1,31 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef uintptr kevent_udata;
+
+struct sigaction;
+
+void	runtime·sigpanic(void);
+
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+void	runtime·sigprocmask(int32, Sigset*, Sigset*);
+void	runtime·unblocksignals(void);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+extern void runtime·lwp_tramp(void);
+
+enum {
+	SS_DISABLE = 4,
+	SIG_BLOCK = 1,
+	SIG_UNBLOCK = 2,
+	SIG_SETMASK = 3,
+	NSIG = 33,
+	SI_USER = 0,
+
+	// From NetBSD's <sys/ucontext.h>
+	_UC_SIGMASK = 0x01,
+	_UC_CPU = 0x04,
+};
diff --git a/src/runtime/os_netbsd_386.c b/src/runtime/os_netbsd_386.c
new file mode 100644
index 000000000..23e9db3c1
--- /dev/null
+++ b/src/runtime/os_netbsd_386.c
@@ -0,0 +1,17 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+void
+runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void))
+{
+	mc->__gregs[REG_EIP] = (uint32)runtime·lwp_tramp;
+	mc->__gregs[REG_UESP] = (uint32)stack;
+	mc->__gregs[REG_EBX] = (uint32)mp;
+	mc->__gregs[REG_EDX] = (uint32)gp;
+	mc->__gregs[REG_ESI] = (uint32)fn;
+}
diff --git a/src/runtime/os_netbsd_amd64.c b/src/runtime/os_netbsd_amd64.c
new file mode 100644
index 000000000..226846cbb
--- /dev/null
+++ b/src/runtime/os_netbsd_amd64.c
@@ -0,0 +1,18 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+void
+runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void))
+{
+	// Machine dependent mcontext initialisation for LWP.
+	mc->__gregs[REG_RIP] = (uint64)runtime·lwp_tramp;
+	mc->__gregs[REG_RSP] = (uint64)stack;
+	mc->__gregs[REG_R8] = (uint64)mp;
+	mc->__gregs[REG_R9] = (uint64)gp;
+	mc->__gregs[REG_R12] = (uint64)fn;
+}
diff --git a/src/runtime/os_netbsd_arm.c b/src/runtime/os_netbsd_arm.c
new file mode 100644
index 000000000..9dd4bcdc9
--- /dev/null
+++ b/src/runtime/os_netbsd_arm.c
@@ -0,0 +1,34 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_GOOS_GOARCH.h"
+#include "textflag.h"
+
+void
+runtime·lwp_mcontext_init(McontextT *mc, void *stack, M *mp, G *gp, void (*fn)(void))
+{
+	mc->__gregs[REG_R15] = (uint32)runtime·lwp_tramp;
+	mc->__gregs[REG_R13] = (uint32)stack;
+	mc->__gregs[REG_R0] = (uint32)mp;
+	mc->__gregs[REG_R1] = (uint32)gp;
+	mc->__gregs[REG_R2] = (uint32)fn;
+}
+
+void
+runtime·checkgoarm(void)
+{
+	// TODO(minux)
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·cputicks() {
+	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
+	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// TODO: need more entropy to better seed fastrand1.
+	return runtime·nanotime();
+}
diff --git a/src/runtime/os_openbsd.c b/src/runtime/os_openbsd.c
new file mode 100644
index 000000000..59abc97b7
--- /dev/null
+++ b/src/runtime/os_openbsd.c
@@ -0,0 +1,348 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+enum
+{
+	ESRCH = 3,
+	ENOTSUP = 91,
+
+	// From OpenBSD's sys/time.h
+	CLOCK_REALTIME = 0,
+	CLOCK_VIRTUAL = 1,
+	CLOCK_PROF = 2,
+	CLOCK_MONOTONIC = 3
+};
+
+extern SigTab runtime·sigtab[];
+
+static Sigset sigset_none;
+static Sigset sigset_all = ~(Sigset)0;
+
+extern int32 runtime·tfork(TforkT *param, uintptr psize, M *mp, G *gp, void (*fn)(void));
+extern int32 runtime·thrsleep(void *ident, int32 clock_id, void *tsp, void *lock, const int32 *abort);
+extern int32 runtime·thrwakeup(void *ident, int32 n);
+
+// From OpenBSD's <sys/sysctl.h>
+#define	CTL_HW	6
+#define	HW_NCPU	3
+
+static int32
+getncpu(void)
+{
+	uint32 mib[2];
+	uint32 out;
+	int32 ret;
+	uintptr nout;
+
+	// Fetch hw.ncpu via sysctl.
+	mib[0] = CTL_HW;
+	mib[1] = HW_NCPU;
+	nout = sizeof out;
+	out = 0;
+	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
+	if(ret >= 0)
+		return out;
+	else
+		return 1;
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	return 1;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	Timespec ts;
+
+	// spin-mutex lock
+	while(runtime·xchg(&g->m->waitsemalock, 1))
+		runtime·osyield();
+
+	for(;;) {
+		// lock held
+		if(g->m->waitsemacount == 0) {
+			// sleep until semaphore != 0 or timeout.
+			// thrsleep unlocks m->waitsemalock.
+			if(ns < 0)
+				runtime·thrsleep(&g->m->waitsemacount, 0, nil, &g->m->waitsemalock, nil);
+			else {
+				ns += runtime·nanotime();
+				// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
+				ts.tv_nsec = 0;
+				ts.tv_sec = runtime·timediv(ns, 1000000000, (int32*)&ts.tv_nsec);
+				runtime·thrsleep(&g->m->waitsemacount, CLOCK_MONOTONIC, &ts, &g->m->waitsemalock, nil);
+			}
+			// reacquire lock
+			while(runtime·xchg(&g->m->waitsemalock, 1))
+				runtime·osyield();
+		}
+
+		// lock held (again)
+		if(g->m->waitsemacount != 0) {
+			// semaphore is available.
+			g->m->waitsemacount--;
+			// spin-mutex unlock
+			runtime·atomicstore(&g->m->waitsemalock, 0);
+			return 0;  // semaphore acquired
+		}
+
+		// semaphore not available.
+		// if there is a timeout, stop now.
+		// otherwise keep trying.
+		if(ns >= 0)
+			break;
+	}
+
+	// lock held but giving up
+	// spin-mutex unlock
+	runtime·atomicstore(&g->m->waitsemalock, 0);
+	return -1;
+}
+
+static void badsemawakeup(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	uint32 ret;
+	void *oldptr;
+	uint32 oldscalar;
+	void (*fn)(void);
+
+	// spin-mutex lock
+	while(runtime·xchg(&mp->waitsemalock, 1))
+		runtime·osyield();
+	mp->waitsemacount++;
+	ret = runtime·thrwakeup(&mp->waitsemacount, 1);
+	if(ret != 0 && ret != ESRCH) {
+		// semawakeup can be called on signal stack.
+		// Save old ptrarg/scalararg so we can restore them.
+		oldptr = g->m->ptrarg[0];
+		oldscalar = g->m->scalararg[0];
+		g->m->ptrarg[0] = mp;
+		g->m->scalararg[0] = ret;
+		fn = badsemawakeup;
+		if(g == g->m->gsignal)
+			fn();
+		else
+			runtime·onM(&fn);
+		g->m->ptrarg[0] = oldptr;
+		g->m->scalararg[0] = oldscalar;
+	}
+	// spin-mutex unlock
+	runtime·atomicstore(&mp->waitsemalock, 0);
+}
+
+static void
+badsemawakeup(void)
+{
+	M *mp;
+	int32 ret;
+
+	mp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	ret = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+
+	runtime·printf("thrwakeup addr=%p sem=%d ret=%d\n", &mp->waitsemacount, mp->waitsemacount, ret);
+}
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	TforkT param;
+	Sigset oset;
+	int32 ret;
+
+	if(0) {
+		runtime·printf(
+			"newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+
+	param.tf_tcb = (byte*)&mp->tls[0];
+	param.tf_tid = (int32*)&mp->procid;
+	param.tf_stack = stk;
+
+	oset = runtime·sigprocmask(SIG_SETMASK, sigset_all);
+	ret = runtime·tfork(&param, sizeof(param), mp, mp->g0, runtime·mstart);
+	runtime·sigprocmask(SIG_SETMASK, oset);
+
+	if(ret < 0) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount() - 1, -ret);
+		if (ret == -ENOTSUP)
+			runtime·printf("runtime: is kern.rthreads disabled?\n");
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getncpu();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Initialize signal handling
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·sigprocmask(SIG_SETMASK, sigset_none);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
+
+extern void runtime·sigtramp(void);
+
+typedef struct sigaction {
+	union {
+		void    (*__sa_handler)(int32);
+		void    (*__sa_sigaction)(int32, Siginfo*, void *);
+	} __sigaction_u;		/* signal handler */
+	uint32	sa_mask;		/* signal mask to apply */
+	int32	sa_flags;		/* see signal options below */
+} SigactionT;
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask = ~0U;
+	if(fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	sa.__sigaction_u.__sa_sigaction = (void*)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
+		return runtime·sighandler;
+	return (void*)sa.__sigaction_u.__sa_sigaction;
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(SIG_SETMASK, sigset_none);
+}
diff --git a/src/runtime/os_openbsd.go b/src/runtime/os_openbsd.go
new file mode 100644
index 000000000..19e2b45a1
--- /dev/null
+++ b/src/runtime/os_openbsd.go
@@ -0,0 +1,19 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new uint32) uint32
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func raise(sig int32)
+func tfork(param unsafe.Pointer, psize uintptr, mm, gg, fn unsafe.Pointer) int32
+func thrsleep(ident unsafe.Pointer, clock_id int32, tsp, lock, abort unsafe.Pointer) int32
+func thrwakeup(ident unsafe.Pointer, n int32) int32
+
+const stackSystem = 0
diff --git a/src/runtime/os_openbsd.h b/src/runtime/os_openbsd.h
new file mode 100644
index 000000000..6ad98109e
--- /dev/null
+++ b/src/runtime/os_openbsd.h
@@ -0,0 +1,26 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef byte* kevent_udata;
+
+struct sigaction;
+
+void	runtime·sigpanic(void);
+
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+Sigset	runtime·sigprocmask(int32, Sigset);
+void	runtime·unblocksignals(void);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+
+enum {
+	SS_DISABLE = 4,
+	SIG_BLOCK = 1,
+	SIG_UNBLOCK = 2,
+	SIG_SETMASK = 3,
+	NSIG = 33,
+	SI_USER = 0,
+};
diff --git a/src/runtime/os_plan9.c b/src/runtime/os_plan9.c
new file mode 100644
index 000000000..853f3ef7a
--- /dev/null
+++ b/src/runtime/os_plan9.c
@@ -0,0 +1,437 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "os_GOOS.h"
+#include "arch_GOARCH.h"
+#include "textflag.h"
+
+int8 *goos = "plan9";
+extern SigTab runtime·sigtab[];
+
+int32 runtime·postnote(int32, int8*);
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	// Initialize stack and goroutine for note handling.
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+	mp->notesig = (int8*)runtime·mallocgc(ERRMAX*sizeof(int8), nil, 0);
+
+	// Initialize stack for handling strings from the
+	// errstr system call, as used in package syscall.
+	mp->errstr = (byte*)runtime·mallocgc(ERRMAX*sizeof(byte), nil, 0);
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	// Mask all SSE floating-point exceptions
+	// when running on the 64-bit kernel.
+	runtime·setfpmasks();
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+}
+
+
+static int32
+getproccount(void)
+{
+	int32 fd, i, n, ncpu;
+	byte buf[2048];
+
+	fd = runtime·open("/dev/sysstat", OREAD, 0);
+	if(fd < 0)
+		return 1;
+	ncpu = 0;
+	for(;;) {
+		n = runtime·read(fd, buf, sizeof buf);
+		if(n <= 0)
+			break;
+		for(i = 0; i < n; i++) {
+			if(buf[i] == '\n')
+				ncpu++;
+		}
+	}
+	runtime·close(fd);
+	return ncpu > 0 ? ncpu : 1;
+}
+
+static int32
+getpid(void)
+{
+	byte b[20], *c;
+	int32 fd;
+
+	runtime·memclr(b, sizeof(b));
+	fd = runtime·open("#c/pid", 0, 0);
+	if(fd >= 0) {
+		runtime·read(fd, b, sizeof(b));
+		runtime·close(fd);
+	}
+	c = b;
+	while(*c == ' ' || *c == '\t')
+		c++;
+	return runtime·atoi(c);
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getproccount();
+	g->m->procid = getpid();
+	runtime·notify(runtime·sigtramp);
+}
+
+void
+runtime·crash(void)
+{
+	runtime·notify(nil);
+	*(int32*)0 = 0;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	static byte random_data[HashRandomBytes];
+	int32 fd;
+
+	fd = runtime·open("/dev/random", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, random_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = random_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+}
+
+void
+runtime·initsig(void)
+{
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·osyield(void)
+{
+	runtime·sleep(0);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·usleep(uint32 µs)
+{
+	uint32 ms;
+
+	ms = µs/1000;
+	if(ms == 0)
+		ms = 1;
+	runtime·sleep(ms);
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·nanotime(void)
+{
+	int64 ns, scratch;
+
+	ns = runtime·nsec(&scratch);
+	// TODO(aram): remove hack after I fix _nsec in the pc64 kernel.
+	if(ns == 0)
+		return scratch;
+	return ns;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·itoa(int32 n, byte *p, uint32 len)
+{
+	byte *q, c;
+	uint32 i;
+
+	if(len <= 1)
+		return;
+
+	runtime·memclr(p, len);
+	q = p;
+
+	if(n==0) {
+		*q++ = '0';
+		USED(q);
+		return;
+	}
+	if(n < 0) {
+		*q++ = '-';
+		p++;
+		n = -n;
+	}
+	for(i=0; n > 0 && i < len; i++) {
+		*q++ = '0' + (n%10);
+		n = n/10;
+	}
+	for(q--; q >= p; ) {
+		c = *p;
+		*p++ = *q;
+		*q-- = c;
+	}
+}
+
+void
+runtime·goexitsall(int8 *status)
+{
+	int8 buf[ERRMAX];
+	M *mp;
+	int32 pid;
+
+	runtime·snprintf((byte*)buf, sizeof buf, "go: exit %s", status);
+	pid = getpid();
+	for(mp=runtime·atomicloadp(&runtime·allm); mp; mp=mp->alllink)
+		if(mp->procid != pid)
+			runtime·postnote(mp->procid, buf);
+}
+
+int32
+runtime·postnote(int32 pid, int8* msg)
+{
+	int32 fd;
+	intgo len;
+	uint8 buf[128];
+	uint8 tmp[16];
+	uint8 *p, *q;
+
+	runtime·memclr(buf, sizeof buf);
+
+	/* build path string /proc/pid/note */
+	q = tmp;
+	p = buf;
+	runtime·itoa(pid, tmp, sizeof tmp);
+	runtime·memmove((void*)p, (void*)"/proc/", 6);
+	for(p += 6; *p++ = *q++; );
+	p--;
+	runtime·memmove((void*)p, (void*)"/note", 5);
+
+	fd = runtime·open((int8*)buf, OWRITE, 0);
+	if(fd < 0)
+		return -1;
+
+	len = runtime·findnull((byte*)msg);
+	if(runtime·write(fd, msg, len) != len) {
+		runtime·close(fd);
+		return -1;
+	}
+	runtime·close(fd);
+	return 0;
+}
+
+static void exit(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·exit(int32 e)
+{
+	void (*fn)(void);
+
+	g->m->scalararg[0] = e;
+	fn = exit;
+	runtime·onM(&fn);
+}
+
+static void
+exit(void)
+{
+	int32 e;
+	byte tmp[16];
+	int8 *status;
+ 
+ 	e = g->m->scalararg[0];
+ 	g->m->scalararg[0] = 0;
+
+	if(e == 0)
+		status = "";
+	else {
+		/* build error string */
+		runtime·itoa(e, tmp, sizeof tmp);
+		status = (int8*)tmp;
+	}
+
+	runtime·goexitsall(status);
+	runtime·exits(status);
+}
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	mp->tls[0] = mp->id;	// so 386 asm can find it
+	if(0){
+		runtime·printf("newosproc stk=%p m=%p g=%p rfork=%p id=%d/%d ostk=%p\n",
+			stk, mp, mp->g0, runtime·rfork, mp->id, (int32)mp->tls[0], &mp);
+	}
+
+	if(runtime·rfork(RFPROC|RFMEM|RFNOWAIT, stk, mp, mp->g0, runtime·mstart) < 0)
+		runtime·throw("newosproc: rfork failed");
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	return 1;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	int32 ret;
+	int32 ms;
+
+	if(ns >= 0) {
+		ms = runtime·timediv(ns, 1000000, nil);
+		if(ms == 0)
+			ms = 1;
+		ret = runtime·plan9_tsemacquire(&g->m->waitsemacount, ms);
+		if(ret == 1)
+			return 0;  // success
+		return -1;  // timeout or interrupted
+	}
+
+	while(runtime·plan9_semacquire(&g->m->waitsemacount, 1) < 0) {
+		/* interrupted; try again (c.f. lock_sema.c) */
+	}
+	return 0;  // success
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	runtime·plan9_semrelease(&mp->waitsemacount, 1);
+}
+
+static int64
+atolwhex(byte *p)
+{
+	int64 n;
+	int32 f;
+
+	n = 0;
+	f = 0;
+	while(*p == ' ' || *p == '\t')
+		p++;
+	if(*p == '-' || *p == '+') {
+		if(*p++ == '-')
+			f = 1;
+		while(*p == ' ' || *p == '\t')
+			p++;
+	}
+	if(p[0] == '0' && p[1]) {
+		if(p[1] == 'x' || p[1] == 'X') {
+			p += 2;
+			for(;;) {
+				if('0' <= *p && *p <= '9')
+					n = n*16 + *p++ - '0';
+				else if('a' <= *p && *p <= 'f')
+					n = n*16 + *p++ - 'a' + 10;
+				else if('A' <= *p && *p <= 'F')
+					n = n*16 + *p++ - 'A' + 10;
+				else
+					break;
+			}
+		} else
+			while('0' <= *p && *p <= '7')
+				n = n*8 + *p++ - '0';
+	} else
+		while('0' <= *p && *p <= '9')
+			n = n*10 + *p++ - '0';
+	if(f)
+		n = -n;
+	return n;
+}
+
+void
+runtime·sigpanic(void)
+{
+	byte *p;
+
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGRFAULT:
+	case SIGWFAULT:
+		p = runtime·strstr((byte*)g->m->notesig, (byte*)"addr=")+5;
+		g->sigcode1 = atolwhex(p);
+		if(g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+		break;
+	case SIGTRAP:
+		if(g->paniconfault)
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		runtime·throw(g->m->notesig);
+		break;
+	case SIGINTDIV:
+		runtime·panicstring("integer divide by zero");
+		break;
+	case SIGFLOAT:
+		runtime·panicstring("floating point error");
+		break;
+	default:
+		runtime·panicstring(g->m->notesig);
+		break;
+	}
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·read(int32 fd, void *buf, int32 nbytes)
+{
+	return runtime·pread(fd, buf, nbytes, -1LL);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·write(uintptr fd, void *buf, int32 nbytes)
+{
+	return runtime·pwrite((int32)fd, buf, nbytes, -1LL);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
+
+#pragma dataflag NOPTR
+static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n";
+
+// This runs on a foreign stack, without an m or a g.  No stack split.
+#pragma textflag NOSPLIT
+void
+runtime·badsignal2(void)
+{
+	runtime·pwrite(2, badsignal, sizeof badsignal - 1, -1LL);
+	runtime·exits(badsignal);
+}
diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go
new file mode 100644
index 000000000..c45d22551
--- /dev/null
+++ b/src/runtime/os_plan9.go
@@ -0,0 +1,34 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func pread(fd int32, buf unsafe.Pointer, nbytes int32, offset int64) int32
+func pwrite(fd int32, buf unsafe.Pointer, nbytes int32, offset int64) int32
+func seek(fd int32, offset int64, whence int32) int64
+func exits(msg *byte)
+func brk_(addr unsafe.Pointer) uintptr
+func sleep(ms int32) int32
+func rfork(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+func plan9_semacquire(addr *uint32, block int32) int32
+func plan9_tsemacquire(addr *uint32, ms int32) int32
+func plan9_semrelease(addr *uint32, count int32) int32
+func notify(fn unsafe.Pointer) int32
+func noted(mode int32) int32
+func nsec(*int64) int64
+func sigtramp(ureg, msg unsafe.Pointer)
+func setfpmasks()
+func errstr() string
+
+// The size of the note handler frame varies among architectures,
+// but 512 bytes should be enough for every implementation.
+const stackSystem = 512
+
+type _Plink uintptr
+
+func os_sigpipe() {
+	gothrow("too many writes on closed pipe")
+}
diff --git a/src/runtime/os_plan9.h b/src/runtime/os_plan9.h
new file mode 100644
index 000000000..57a2cafa7
--- /dev/null
+++ b/src/runtime/os_plan9.h
@@ -0,0 +1,92 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Plan 9-specific system calls
+int32	runtime·pread(int32 fd, void *buf, int32 nbytes, int64 offset);
+int32	runtime·pwrite(int32 fd, void *buf, int32 nbytes, int64 offset);
+int64	runtime·seek(int32 fd, int64 offset, int32 whence);
+void	runtime·exits(int8* msg);
+intptr	runtime·brk_(void*);
+int32	runtime·sleep(int32 ms);
+int32	runtime·rfork(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void));
+int32	runtime·plan9_semacquire(uint32 *addr, int32 block);
+int32	runtime·plan9_tsemacquire(uint32 *addr, int32 ms);
+int32 	runtime·plan9_semrelease(uint32 *addr, int32 count);
+int32	runtime·notify(void (*fn)(void*, int8*));
+int32	runtime·noted(int32);
+int64	runtime·nsec(int64*);
+void	runtime·sigtramp(void*, int8*);
+void	runtime·sigpanic(void);
+void	runtime·goexitsall(int8*);
+void	runtime·setfpmasks(void);
+
+/* open */
+enum
+{
+	OREAD	= 0,
+	OWRITE	= 1,
+	ORDWR	= 2,
+	OEXEC	= 3,
+	OTRUNC	= 16,
+	OCEXEC	= 32,
+	ORCLOSE	= 64,
+	OEXCL	= 0x1000
+};
+
+/* rfork */
+enum
+{
+	RFNAMEG         = (1<<0),
+	RFENVG          = (1<<1),
+	RFFDG           = (1<<2),
+	RFNOTEG         = (1<<3),
+	RFPROC          = (1<<4),
+	RFMEM           = (1<<5),
+	RFNOWAIT        = (1<<6),
+	RFCNAMEG        = (1<<10),
+	RFCENVG         = (1<<11),
+	RFCFDG          = (1<<12),
+	RFREND          = (1<<13),
+	RFNOMNT         = (1<<14)
+};
+
+/* notify */
+enum
+{
+	NCONT	= 0,
+	NDFLT	= 1
+};
+
+typedef struct Tos Tos;
+typedef intptr _Plink;
+
+struct Tos {
+	struct			/* Per process profiling */
+	{
+		_Plink	*pp;	/* known to be 0(ptr) */
+		_Plink	*next;	/* known to be 4(ptr) */
+		_Plink	*last;
+		_Plink	*first;
+		uint32	pid;
+		uint32	what;
+	} prof;
+	uint64	cyclefreq;	/* cycle clock frequency if there is one, 0 otherwise */
+	int64	kcycles;	/* cycles spent in kernel */
+	int64	pcycles;	/* cycles spent in process (kernel + user) */
+	uint32	pid;		/* might as well put the pid here */
+	uint32	clock;
+	/* top of stack is here */
+};
+
+enum {
+	NSIG = 14, /* number of signals in runtime·SigTab array */
+	ERRMAX = 128, /* max length of note string */
+
+	/* Notes in runtime·sigtab that are handled by runtime·sigpanic. */
+	SIGRFAULT = 2,
+	SIGWFAULT = 3,
+	SIGINTDIV = 4,
+	SIGFLOAT = 5,
+	SIGTRAP = 6,
+};
diff --git a/src/runtime/os_plan9_386.c b/src/runtime/os_plan9_386.c
new file mode 100644
index 000000000..349086224
--- /dev/null
+++ b/src/runtime/os_plan9_386.c
@@ -0,0 +1,150 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file. 
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Ureg *u)
+{
+	runtime·printf("ax	%x\n", u->ax);
+	runtime·printf("bx	%x\n", u->bx);
+	runtime·printf("cx	%x\n", u->cx);
+	runtime·printf("dx	%x\n", u->dx);
+	runtime·printf("di	%x\n", u->di);
+	runtime·printf("si	%x\n", u->si);
+	runtime·printf("bp	%x\n", u->bp);
+	runtime·printf("sp	%x\n", u->sp);
+	runtime·printf("pc	%x\n", u->pc);
+	runtime·printf("flags	%x\n", u->flags);
+	runtime·printf("cs	%x\n", u->cs);
+	runtime·printf("fs	%x\n", u->fs);
+	runtime·printf("gs	%x\n", u->gs);
+}
+
+int32
+runtime·sighandler(void *v, int8 *note, G *gp)
+{
+	uintptr *sp;
+	SigTab *t;
+	bool crash;
+	Ureg *ureg;
+	intgo len, n;
+	int32 sig, flags;
+
+	ureg = (Ureg*)v;
+
+	// The kernel will never pass us a nil note or ureg so we probably
+	// made a mistake somewhere in runtime·sigtramp.
+	if(ureg == nil || note == nil) {
+		runtime·printf("sighandler: ureg %p note %p\n", ureg, note);
+		goto Throw;
+	}
+
+	// Check that the note is no more than ERRMAX bytes (including
+	// the trailing NUL). We should never receive a longer note.
+	len = runtime·findnull((byte*)note);
+	if(len > ERRMAX-1) {
+		runtime·printf("sighandler: note is longer than ERRMAX\n");
+		goto Throw;
+	}
+
+	// See if the note matches one of the patterns in runtime·sigtab.
+	// Notes that do not match any pattern can be handled at a higher
+	// level by the program but will otherwise be ignored.
+	flags = SigNotify;
+	for(sig = 0; sig < nelem(runtime·sigtab); sig++) {
+		t = &runtime·sigtab[sig];
+		n = runtime·findnull((byte*)t->name);
+		if(len < n)
+			continue;
+		if(runtime·strncmp((byte*)note, (byte*)t->name, n) == 0) {
+			flags = t->flags;
+			break;
+		}
+	}
+
+	if(flags & SigGoExit)
+		runtime·exits(note+9); // Strip "go: exit " prefix.
+
+	if(flags & SigPanic) {
+		// Copy the error string from sigtramp's stack into m->notesig so
+		// we can reliably access it from the panic routines.
+		runtime·memmove(g->m->notesig, note, len+1);
+
+		gp->sig = sig;
+		gp->sigpc = ureg->pc;
+
+		// Only push runtime·sigpanic if PC != 0.
+		//
+		// If PC == 0, probably panicked because of a call to a nil func.
+		// Not pushing that onto SP will make the trace look like a call
+		// to runtime·sigpanic instead. (Otherwise the trace will end at
+		// runtime·sigpanic and we won't get to see who faulted).
+		if(ureg->pc != 0) {
+			sp = (uintptr*)ureg->sp;
+			*--sp = ureg->pc;
+			ureg->sp = (uint32)sp;
+		}
+		ureg->pc = (uintptr)runtime·sigpanic;
+		return NCONT;
+	}
+
+	if(flags & SigNotify) {
+		// TODO(ality): See if os/signal wants it.
+		//if(runtime·sigsend(...))
+		//	return NCONT;
+	}
+	if(flags & SigKill)
+		goto Exit;
+	if(!(flags & SigThrow))
+		return NCONT;
+
+Throw:
+	g->m->throwing = 1;
+	g->m->caughtsig = gp;
+	runtime·startpanic();
+
+	runtime·printf("%s\n", note);
+	runtime·printf("PC=%x\n", ureg->pc);
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)) {
+		runtime·goroutineheader(gp);
+		runtime·traceback(ureg->pc, ureg->sp, 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·printf("\n");
+		runtime·dumpregs(ureg);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+Exit:
+	runtime·goexitsall(note);
+	runtime·exits(note);
+	return NDFLT; // not reached
+}
+
+void
+runtime·sigenable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·sigdisable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·resetcpuprofiler(int32 hz)
+{
+	// TODO: Enable profiling interrupts.
+	
+	g->m->profilehz = hz;
+}
diff --git a/src/runtime/os_plan9_amd64.c b/src/runtime/os_plan9_amd64.c
new file mode 100644
index 000000000..6b0f8ae3a
--- /dev/null
+++ b/src/runtime/os_plan9_amd64.c
@@ -0,0 +1,158 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file. 
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Ureg *u)
+{
+	runtime·printf("ax	%X\n", u->ax);
+	runtime·printf("bx	%X\n", u->bx);
+	runtime·printf("cx	%X\n", u->cx);
+	runtime·printf("dx	%X\n", u->dx);
+	runtime·printf("di	%X\n", u->di);
+	runtime·printf("si	%X\n", u->si);
+	runtime·printf("bp	%X\n", u->bp);
+	runtime·printf("sp	%X\n", u->sp);
+	runtime·printf("r8	%X\n", u->r8);
+	runtime·printf("r9	%X\n", u->r9);
+	runtime·printf("r10	%X\n", u->r10);
+	runtime·printf("r11	%X\n", u->r11);
+	runtime·printf("r12	%X\n", u->r12);
+	runtime·printf("r13	%X\n", u->r13);
+	runtime·printf("r14	%X\n", u->r14);
+	runtime·printf("r15	%X\n", u->r15);
+	runtime·printf("ip	%X\n", u->ip);
+	runtime·printf("flags	%X\n", u->flags);
+	runtime·printf("cs	%X\n", (uint64)u->cs);
+	runtime·printf("fs	%X\n", (uint64)u->fs);
+	runtime·printf("gs	%X\n", (uint64)u->gs);
+}
+
+int32
+runtime·sighandler(void *v, int8 *note, G *gp)
+{
+	uintptr *sp;
+	SigTab *t;
+	bool crash;
+	Ureg *ureg;
+	intgo len, n;
+	int32 sig, flags;
+
+	ureg = (Ureg*)v;
+
+	// The kernel will never pass us a nil note or ureg so we probably
+	// made a mistake somewhere in runtime·sigtramp.
+	if(ureg == nil || note == nil) {
+		runtime·printf("sighandler: ureg %p note %p\n", ureg, note);
+		goto Throw;
+	}
+
+	// Check that the note is no more than ERRMAX bytes (including
+	// the trailing NUL). We should never receive a longer note.
+	len = runtime·findnull((byte*)note);
+	if(len > ERRMAX-1) {
+		runtime·printf("sighandler: note is longer than ERRMAX\n");
+		goto Throw;
+	}
+
+	// See if the note matches one of the patterns in runtime·sigtab.
+	// Notes that do not match any pattern can be handled at a higher
+	// level by the program but will otherwise be ignored.
+	flags = SigNotify;
+	for(sig = 0; sig < nelem(runtime·sigtab); sig++) {
+		t = &runtime·sigtab[sig];
+		n = runtime·findnull((byte*)t->name);
+		if(len < n)
+			continue;
+		if(runtime·strncmp((byte*)note, (byte*)t->name, n) == 0) {
+			flags = t->flags;
+			break;
+		}
+	}
+
+	if(flags & SigGoExit)
+		runtime·exits(note+9); // Strip "go: exit " prefix.
+
+	if(flags & SigPanic) {
+		// Copy the error string from sigtramp's stack into m->notesig so
+		// we can reliably access it from the panic routines.
+		runtime·memmove(g->m->notesig, note, len+1);
+
+		gp->sig = sig;
+		gp->sigpc = ureg->ip;
+
+		// Only push runtime·sigpanic if PC != 0.
+		//
+		// If PC == 0, probably panicked because of a call to a nil func.
+		// Not pushing that onto SP will make the trace look like a call
+		// to runtime·sigpanic instead. (Otherwise the trace will end at
+		// runtime·sigpanic and we won't get to see who faulted).
+		if(ureg->ip != 0) {
+			sp = (uintptr*)ureg->sp;
+			*--sp = ureg->ip;
+			ureg->sp = (uint64)sp;
+		}
+		ureg->ip = (uintptr)runtime·sigpanic;
+		return NCONT;
+	}
+
+	if(flags & SigNotify) {
+		// TODO(ality): See if os/signal wants it.
+		//if(runtime·sigsend(...))
+		//	return NCONT;
+	}
+	if(flags & SigKill)
+		goto Exit;
+	if(!(flags & SigThrow))
+		return NCONT;
+
+Throw:
+	g->m->throwing = 1;
+	g->m->caughtsig = gp;
+	runtime·startpanic();
+
+	runtime·printf("%s\n", note);
+	runtime·printf("PC=%X\n", ureg->ip);
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)) {
+		runtime·goroutineheader(gp);
+		runtime·traceback(ureg->ip, ureg->sp, 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·printf("\n");
+		runtime·dumpregs(ureg);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+Exit:
+	runtime·goexitsall(note);
+	runtime·exits(note);
+	return NDFLT; // not reached
+}
+
+void
+runtime·sigenable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·sigdisable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·resetcpuprofiler(int32 hz)
+{
+	// TODO: Enable profiling interrupts.
+	
+	g->m->profilehz = hz;
+}
diff --git a/src/runtime/os_solaris.c b/src/runtime/os_solaris.c
new file mode 100644
index 000000000..e35d2b997
--- /dev/null
+++ b/src/runtime/os_solaris.c
@@ -0,0 +1,582 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+#include "stack.h"
+#include "textflag.h"
+
+#pragma dynexport runtime·end _end
+#pragma dynexport runtime·etext _etext
+#pragma dynexport runtime·edata _edata
+
+#pragma dynimport libc·___errno ___errno "libc.so"
+#pragma dynimport libc·clock_gettime clock_gettime "libc.so"
+#pragma dynimport libc·close close "libc.so"
+#pragma dynimport libc·exit exit "libc.so"
+#pragma dynimport libc·fstat fstat "libc.so"
+#pragma dynimport libc·getcontext getcontext "libc.so"
+#pragma dynimport libc·getrlimit getrlimit "libc.so"
+#pragma dynimport libc·malloc malloc "libc.so"
+#pragma dynimport libc·mmap mmap "libc.so"
+#pragma dynimport libc·munmap munmap "libc.so"
+#pragma dynimport libc·open open "libc.so"
+#pragma dynimport libc·pthread_attr_destroy pthread_attr_destroy "libc.so"
+#pragma dynimport libc·pthread_attr_getstack pthread_attr_getstack "libc.so"
+#pragma dynimport libc·pthread_attr_init pthread_attr_init "libc.so"
+#pragma dynimport libc·pthread_attr_setdetachstate pthread_attr_setdetachstate "libc.so"
+#pragma dynimport libc·pthread_attr_setstack pthread_attr_setstack "libc.so"
+#pragma dynimport libc·pthread_create pthread_create "libc.so"
+#pragma dynimport libc·raise raise "libc.so"
+#pragma dynimport libc·read read "libc.so"
+#pragma dynimport libc·select select "libc.so"
+#pragma dynimport libc·sched_yield sched_yield "libc.so"
+#pragma dynimport libc·sem_init sem_init "libc.so"
+#pragma dynimport libc·sem_post sem_post "libc.so"
+#pragma dynimport libc·sem_reltimedwait_np sem_reltimedwait_np "libc.so"
+#pragma dynimport libc·sem_wait sem_wait "libc.so"
+#pragma dynimport libc·setitimer setitimer "libc.so"
+#pragma dynimport libc·sigaction sigaction "libc.so"
+#pragma dynimport libc·sigaltstack sigaltstack "libc.so"
+#pragma dynimport libc·sigprocmask sigprocmask "libc.so"
+#pragma dynimport libc·sysconf sysconf "libc.so"
+#pragma dynimport libc·usleep usleep "libc.so"
+#pragma dynimport libc·write write "libc.so"
+
+extern uintptr libc·___errno;
+extern uintptr libc·clock_gettime;
+extern uintptr libc·close;
+extern uintptr libc·exit;
+extern uintptr libc·fstat;
+extern uintptr libc·getcontext;
+extern uintptr libc·getrlimit;
+extern uintptr libc·malloc;
+extern uintptr libc·mmap;
+extern uintptr libc·munmap;
+extern uintptr libc·open;
+extern uintptr libc·pthread_attr_destroy;
+extern uintptr libc·pthread_attr_getstack;
+extern uintptr libc·pthread_attr_init;
+extern uintptr libc·pthread_attr_setdetachstate;
+extern uintptr libc·pthread_attr_setstack;
+extern uintptr libc·pthread_create;
+extern uintptr libc·raise;
+extern uintptr libc·read;
+extern uintptr libc·sched_yield;
+extern uintptr libc·select;
+extern uintptr libc·sem_init;
+extern uintptr libc·sem_post;
+extern uintptr libc·sem_reltimedwait_np;
+extern uintptr libc·sem_wait;
+extern uintptr libc·setitimer;
+extern uintptr libc·sigaction;
+extern uintptr libc·sigaltstack;
+extern uintptr libc·sigprocmask;
+extern uintptr libc·sysconf;
+extern uintptr libc·usleep;
+extern uintptr libc·write;
+
+void	runtime·getcontext(Ucontext *context);
+int32	runtime·pthread_attr_destroy(PthreadAttr* attr);
+int32	runtime·pthread_attr_init(PthreadAttr* attr);
+int32	runtime·pthread_attr_getstack(PthreadAttr* attr, void** addr, uint64* size);
+int32	runtime·pthread_attr_setdetachstate(PthreadAttr* attr, int32 state);
+int32	runtime·pthread_attr_setstack(PthreadAttr* attr, void* addr, uint64 size);
+int32	runtime·pthread_create(Pthread* thread, PthreadAttr* attr, void(*fn)(void), void *arg);
+uint32	runtime·tstart_sysvicall(M *newm);
+int32	runtime·sem_init(SemT* sem, int32 pshared, uint32 value);
+int32	runtime·sem_post(SemT* sem);
+int32	runtime·sem_reltimedwait_np(SemT* sem, Timespec* timeout);
+int32	runtime·sem_wait(SemT* sem);
+int64	runtime·sysconf(int32 name);
+
+extern SigTab runtime·sigtab[];
+static Sigset sigset_none;
+static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
+
+static int32
+getncpu(void) 
+{
+	int32 n;
+	
+	n = (int32)runtime·sysconf(_SC_NPROCESSORS_ONLN);
+	if(n < 1)
+		return 1;
+	return n;
+}
+
+void
+runtime·osinit(void)
+{
+	runtime·ncpu = getncpu(); 
+}
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	PthreadAttr attr;
+	Sigset oset;
+	Pthread tid;
+	int32 ret;
+
+	USED(stk);
+	if(runtime·pthread_attr_init(&attr) != 0)
+		runtime·throw("pthread_attr_init");
+	if(runtime·pthread_attr_setstack(&attr, 0, 0x200000) != 0)
+		runtime·throw("pthread_attr_setstack");
+	if(runtime·pthread_attr_getstack(&attr, (void**)&mp->g0->stackbase, &mp->g0->stacksize) != 0)
+		runtime·throw("pthread_attr_getstack");	
+	if(runtime·pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
+		runtime·throw("pthread_attr_setdetachstate");
+
+	// Disable signals during create, so that the new thread starts
+	// with signals disabled.  It will enable them in minit.
+	runtime·sigprocmask(SIG_SETMASK, &sigset_all, &oset);
+	ret = runtime·pthread_create(&tid, &attr, (void (*)(void))runtime·tstart_sysvicall, mp);
+	runtime·sigprocmask(SIG_SETMASK, &oset, nil);
+	if(ret != 0) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), ret);
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	#pragma dataflag NOPTR
+	static byte urandom_data[HashRandomBytes];
+	int32 fd;
+	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
+	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
+		*rnd = urandom_data;
+		*rnd_len = HashRandomBytes;
+	} else {
+		*rnd = nil;
+		*rnd_len = 0;
+	}
+	runtime·close(fd);
+}
+
+void
+runtime·goenvs(void)
+{
+	runtime·goenvs_unix();
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	mp->gsignal = runtime·malg(32*1024);
+	mp->gsignal->m = mp;
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	runtime·asmcgocall(runtime·miniterrno, (void *)libc·___errno);
+	// Initialize signal handling
+	runtime·signalstack((byte*)g->m->gsignal->stackguard - StackGuard, 32*1024);
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+	runtime·signalstack(nil, 0);
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case SIGBUS:
+		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGSEGV:
+		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case SIGFPE:
+		switch(g->sigcode0) {
+		case FPE_INTDIV:
+			runtime·panicstring("integer divide by zero");
+		case FPE_INTOVF:
+			runtime·panicstring("integer overflow");
+		}
+		runtime·panicstring("floating point error");
+	}
+	runtime·panicstring(runtime·sigtab[g->sig].name);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte runtime·text[], runtime·end[];
+	uintptr used;
+	
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = runtime·end - runtime·text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
+
+void
+runtime·setprof(bool on)
+{
+	USED(on);
+}
+
+extern void runtime·sigtramp(void);
+
+void
+runtime·setsig(int32 i, GoSighandler *fn, bool restart)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
+	if(restart)
+		sa.sa_flags |= SA_RESTART;
+	sa.sa_mask.__sigbits[0] = ~(uint32)0;
+	sa.sa_mask.__sigbits[1] = ~(uint32)0;
+	sa.sa_mask.__sigbits[2] = ~(uint32)0;
+	sa.sa_mask.__sigbits[3] = ~(uint32)0;
+	if(fn == runtime·sighandler)
+		fn = (void*)runtime·sigtramp;
+	*((void**)&sa._funcptr[0]) = (void*)fn;
+	runtime·sigaction(i, &sa, nil);
+}
+
+GoSighandler*
+runtime·getsig(int32 i)
+{
+	SigactionT sa;
+
+	runtime·memclr((byte*)&sa, sizeof sa);
+	runtime·sigaction(i, nil, &sa);
+	if(*((void**)&sa._funcptr[0]) == runtime·sigtramp)
+		return runtime·sighandler;
+	return *((void**)&sa._funcptr[0]);
+}
+
+void
+runtime·signalstack(byte *p, int32 n)
+{
+	StackT st;
+
+	st.ss_sp = (void*)p;
+	st.ss_size = n;
+	st.ss_flags = 0;
+	if(p == nil)
+		st.ss_flags = SS_DISABLE;
+	runtime·sigaltstack(&st, nil);
+}
+
+void
+runtime·unblocksignals(void)
+{
+	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	SemT* sem;
+
+	// Call libc's malloc rather than runtime·malloc.  This will
+	// allocate space on the C heap.  We can't call runtime·malloc
+	// here because it could cause a deadlock.
+	g->m->libcall.fn = (void*)libc·malloc;
+	g->m->libcall.n = 1;
+	runtime·memclr((byte*)&g->m->scratch, sizeof(g->m->scratch));
+	g->m->scratch.v[0] = (uintptr)sizeof(*sem);
+	g->m->libcall.args = (uintptr*)&g->m->scratch;
+	runtime·asmcgocall(runtime·asmsysvicall6, &g->m->libcall);
+	sem = (void*)g->m->libcall.r1;
+	if(runtime·sem_init(sem, 0, 0) != 0)
+		runtime·throw("sem_init");
+	return (uintptr)sem;
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	M *m;
+
+	m = g->m;
+	if(ns >= 0) {
+		m->ts.tv_sec = ns / 1000000000LL;
+		m->ts.tv_nsec = ns % 1000000000LL;
+
+		m->libcall.fn = (void*)libc·sem_reltimedwait_np;
+		m->libcall.n = 2;
+		runtime·memclr((byte*)&m->scratch, sizeof(m->scratch));
+		m->scratch.v[0] = m->waitsema;
+		m->scratch.v[1] = (uintptr)&m->ts;
+		m->libcall.args = (uintptr*)&m->scratch;
+		runtime·asmcgocall(runtime·asmsysvicall6, &m->libcall);
+		if(*m->perrno != 0) {
+			if(*m->perrno == ETIMEDOUT || *m->perrno == EAGAIN || *m->perrno == EINTR)
+				return -1;
+			runtime·throw("sem_reltimedwait_np");
+		}
+		return 0;
+	}
+	for(;;) {
+		m->libcall.fn = (void*)libc·sem_wait;
+		m->libcall.n = 1;
+		runtime·memclr((byte*)&m->scratch, sizeof(m->scratch));
+		m->scratch.v[0] = m->waitsema;
+		m->libcall.args = (uintptr*)&m->scratch;
+		runtime·asmcgocall(runtime·asmsysvicall6, &m->libcall);
+		if(m->libcall.r1 == 0)
+			break;
+		if(*m->perrno == EINTR) 
+			continue;
+		runtime·throw("sem_wait");
+	}
+	return 0;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	SemT* sem = (SemT*)mp->waitsema;
+	if(runtime·sem_post(sem) != 0)
+		runtime·throw("sem_post");
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·close(int32 fd)
+{
+	return runtime·sysvicall1(libc·close, (uintptr)fd);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·exit(int32 r)
+{
+	runtime·sysvicall1(libc·exit, (uintptr)r);
+}
+
+#pragma textflag NOSPLIT
+/* int32 */ void
+runtime·getcontext(Ucontext* context)
+{
+	runtime·sysvicall1(libc·getcontext, (uintptr)context);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·getrlimit(int32 res, Rlimit* rlp)
+{
+	return runtime·sysvicall2(libc·getrlimit, (uintptr)res, (uintptr)rlp);
+}
+
+#pragma textflag NOSPLIT
+uint8*
+runtime·mmap(byte* addr, uintptr len, int32 prot, int32 flags, int32 fildes, uint32 off)
+{
+	return (uint8*)runtime·sysvicall6(libc·mmap, (uintptr)addr, (uintptr)len, (uintptr)prot, (uintptr)flags, (uintptr)fildes, (uintptr)off);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·munmap(byte* addr, uintptr len)
+{
+	runtime·sysvicall2(libc·munmap, (uintptr)addr, (uintptr)len);
+}
+
+extern int64 runtime·nanotime1(void);
+#pragma textflag NOSPLIT
+int64
+runtime·nanotime(void)
+{
+	return runtime·sysvicall0((uintptr)runtime·nanotime1);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·open(int8* path, int32 oflag, int32 mode)
+{
+	return runtime·sysvicall3(libc·open, (uintptr)path, (uintptr)oflag, (uintptr)mode);
+}
+
+int32
+runtime·pthread_attr_destroy(PthreadAttr* attr)
+{
+	return runtime·sysvicall1(libc·pthread_attr_destroy, (uintptr)attr);
+}
+
+int32
+runtime·pthread_attr_getstack(PthreadAttr* attr, void** addr, uint64* size)
+{
+	return runtime·sysvicall3(libc·pthread_attr_getstack, (uintptr)attr, (uintptr)addr, (uintptr)size);
+}
+
+int32
+runtime·pthread_attr_init(PthreadAttr* attr)
+{
+	return runtime·sysvicall1(libc·pthread_attr_init, (uintptr)attr);
+}
+
+int32
+runtime·pthread_attr_setdetachstate(PthreadAttr* attr, int32 state)
+{
+	return runtime·sysvicall2(libc·pthread_attr_setdetachstate, (uintptr)attr, (uintptr)state);
+}
+
+int32
+runtime·pthread_attr_setstack(PthreadAttr* attr, void* addr, uint64 size)
+{
+	return runtime·sysvicall3(libc·pthread_attr_setstack, (uintptr)attr, (uintptr)addr, (uintptr)size);
+}
+
+int32
+runtime·pthread_create(Pthread* thread, PthreadAttr* attr, void(*fn)(void), void *arg)
+{
+	return runtime·sysvicall4(libc·pthread_create, (uintptr)thread, (uintptr)attr, (uintptr)fn, (uintptr)arg);
+}
+
+/* int32 */ void
+runtime·raise(int32 sig)
+{
+	runtime·sysvicall1(libc·raise, (uintptr)sig);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·read(int32 fd, void* buf, int32 nbyte)
+{
+	return runtime·sysvicall3(libc·read, (uintptr)fd, (uintptr)buf, (uintptr)nbyte);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·sem_init(SemT* sem, int32 pshared, uint32 value)
+{
+	return runtime·sysvicall3(libc·sem_init, (uintptr)sem, (uintptr)pshared, (uintptr)value);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·sem_post(SemT* sem)
+{
+	return runtime·sysvicall1(libc·sem_post, (uintptr)sem);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·sem_reltimedwait_np(SemT* sem, Timespec* timeout)
+{
+	return runtime·sysvicall2(libc·sem_reltimedwait_np, (uintptr)sem, (uintptr)timeout);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·sem_wait(SemT* sem)
+{
+	return runtime·sysvicall1(libc·sem_wait, (uintptr)sem);
+}
+
+/* int32 */ void
+runtime·setitimer(int32 which, Itimerval* value, Itimerval* ovalue)
+{
+	runtime·sysvicall3(libc·setitimer, (uintptr)which, (uintptr)value, (uintptr)ovalue);
+}
+
+/* int32 */ void
+runtime·sigaction(int32 sig, struct SigactionT* act, struct SigactionT* oact)
+{
+	runtime·sysvicall3(libc·sigaction, (uintptr)sig, (uintptr)act, (uintptr)oact);
+}
+
+/* int32 */ void
+runtime·sigaltstack(SigaltstackT* ss, SigaltstackT* oss)
+{
+	runtime·sysvicall2(libc·sigaltstack, (uintptr)ss, (uintptr)oss);
+}
+
+/* int32 */ void
+runtime·sigprocmask(int32 how, Sigset* set, Sigset* oset)
+{
+	runtime·sysvicall3(libc·sigprocmask, (uintptr)how, (uintptr)set, (uintptr)oset);
+}
+
+int64
+runtime·sysconf(int32 name)
+{
+	return runtime·sysvicall1(libc·sysconf, (uintptr)name);
+}
+
+extern void runtime·usleep1(uint32);
+
+#pragma textflag NOSPLIT
+void
+runtime·usleep(uint32 µs)
+{
+	runtime·usleep1(µs);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·write(uintptr fd, void* buf, int32 nbyte)
+{
+	return runtime·sysvicall3(libc·write, (uintptr)fd, (uintptr)buf, (uintptr)nbyte);
+}
+
+extern void runtime·osyield1(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·osyield(void)
+{
+	// Check the validity of m because we might be called in cgo callback
+	// path early enough where there isn't a m available yet.
+	if(g && g->m != nil) {
+		runtime·sysvicall0(libc·sched_yield);
+		return;
+	}
+	runtime·osyield1();
+}
diff --git a/src/runtime/os_solaris.go b/src/runtime/os_solaris.go
new file mode 100644
index 000000000..a5e696678
--- /dev/null
+++ b/src/runtime/os_solaris.go
@@ -0,0 +1,101 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer)
+func miniterrno(fn unsafe.Pointer)
+func raise(sig int32)
+func getcontext(ctxt unsafe.Pointer)
+func tstart_sysvicall(mm unsafe.Pointer) uint32
+func nanotime1() int64
+func usleep1(usec uint32)
+func osyield1()
+func netpollinit()
+func netpollopen(fd uintptr, pd *pollDesc) int32
+func netpollclose(fd uintptr) int32
+func netpollarm(pd *pollDesc, mode int)
+
+type libcFunc byte
+
+var asmsysvicall6 libcFunc
+
+//go:nosplit
+func sysvicall0(fn *libcFunc) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 0
+	libcall.args = unsafe.Pointer(fn) // it's unused but must be non-nil, otherwise crashes
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall1(fn *libcFunc, a1 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 1
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall2(fn *libcFunc, a1, a2 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 2
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall3(fn *libcFunc, a1, a2, a3 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 3
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall4(fn *libcFunc, a1, a2, a3, a4 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 4
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall5(fn *libcFunc, a1, a2, a3, a4, a5 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 5
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+//go:nosplit
+func sysvicall6(fn *libcFunc, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
+	libcall := &getg().m.libcall
+	libcall.fn = unsafe.Pointer(fn)
+	libcall.n = 6
+	libcall.args = noescape(unsafe.Pointer(&a1))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
+	return libcall.r1
+}
+
+const stackSystem = 0
diff --git a/src/runtime/os_solaris.h b/src/runtime/os_solaris.h
new file mode 100644
index 000000000..3d9e1a240
--- /dev/null
+++ b/src/runtime/os_solaris.h
@@ -0,0 +1,55 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+
+typedef uintptr kevent_udata;
+
+struct sigaction;
+
+void	runtime·sigpanic(void);
+
+void	runtime·setitimer(int32, Itimerval*, Itimerval*);
+void	runtime·sigaction(int32, struct SigactionT*, struct SigactionT*);
+void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
+void	runtime·sigprocmask(int32, Sigset*, Sigset*);
+void	runtime·unblocksignals(void);
+int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
+
+
+void	runtime·raisesigpipe(void);
+void	runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool);
+void	runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp);
+void	runtime·sigpanic(void);
+
+enum {
+	SS_DISABLE = 2,
+	SIG_BLOCK = 1,
+	SIG_UNBLOCK = 2,
+	SIG_SETMASK = 3,
+	NSIG = 73, /* number of signals in runtime·SigTab array */
+	SI_USER = 0,
+	_UC_SIGMASK = 0x01,
+	_UC_CPU = 0x04,
+	RLIMIT_AS = 10,
+};
+
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	int64   rlim_cur;
+	int64   rlim_max;
+};
+int32   runtime·getrlimit(int32, Rlimit*);
+
+// Call an external library function described by {fn, a0, ..., an}, with
+// SysV conventions, switching to os stack during the call, if necessary.
+uintptr	runtime·sysvicall0(uintptr fn);
+uintptr	runtime·sysvicall1(uintptr fn, uintptr a1);
+uintptr	runtime·sysvicall2(uintptr fn, uintptr a1, uintptr a2);
+uintptr	runtime·sysvicall3(uintptr fn, uintptr a1, uintptr a2, uintptr a3);
+uintptr	runtime·sysvicall4(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4);
+uintptr	runtime·sysvicall5(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5);
+uintptr	runtime·sysvicall6(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6);
+void	runtime·asmsysvicall6(void *c);
+
+void	runtime·miniterrno(void *fn);
diff --git a/src/runtime/os_windows.c b/src/runtime/os_windows.c
new file mode 100644
index 000000000..a4d77f6b7
--- /dev/null
+++ b/src/runtime/os_windows.c
@@ -0,0 +1,619 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "type.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "textflag.h"
+
+#pragma dynimport runtime·AddVectoredExceptionHandler AddVectoredExceptionHandler "kernel32.dll"
+#pragma dynimport runtime·CloseHandle CloseHandle "kernel32.dll"
+#pragma dynimport runtime·CreateEvent CreateEventA "kernel32.dll"
+#pragma dynimport runtime·CreateThread CreateThread "kernel32.dll"
+#pragma dynimport runtime·CreateWaitableTimer CreateWaitableTimerA "kernel32.dll"
+#pragma dynimport runtime·CryptAcquireContextW CryptAcquireContextW "advapi32.dll"
+#pragma dynimport runtime·CryptGenRandom CryptGenRandom "advapi32.dll"
+#pragma dynimport runtime·CryptReleaseContext CryptReleaseContext "advapi32.dll"
+#pragma dynimport runtime·DuplicateHandle DuplicateHandle "kernel32.dll"
+#pragma dynimport runtime·ExitProcess ExitProcess "kernel32.dll"
+#pragma dynimport runtime·FreeEnvironmentStringsW FreeEnvironmentStringsW "kernel32.dll"
+#pragma dynimport runtime·GetEnvironmentStringsW GetEnvironmentStringsW "kernel32.dll"
+#pragma dynimport runtime·GetProcAddress GetProcAddress "kernel32.dll"
+#pragma dynimport runtime·GetStdHandle GetStdHandle "kernel32.dll"
+#pragma dynimport runtime·GetSystemInfo GetSystemInfo "kernel32.dll"
+#pragma dynimport runtime·GetThreadContext GetThreadContext "kernel32.dll"
+#pragma dynimport runtime·LoadLibrary LoadLibraryW "kernel32.dll"
+#pragma dynimport runtime·LoadLibraryA LoadLibraryA "kernel32.dll"
+#pragma dynimport runtime·NtWaitForSingleObject NtWaitForSingleObject "ntdll.dll"
+#pragma dynimport runtime·ResumeThread ResumeThread "kernel32.dll"
+#pragma dynimport runtime·SetConsoleCtrlHandler SetConsoleCtrlHandler "kernel32.dll"
+#pragma dynimport runtime·SetEvent SetEvent "kernel32.dll"
+#pragma dynimport runtime·SetProcessPriorityBoost SetProcessPriorityBoost "kernel32.dll"
+#pragma dynimport runtime·SetThreadPriority SetThreadPriority "kernel32.dll"
+#pragma dynimport runtime·SetWaitableTimer SetWaitableTimer "kernel32.dll"
+#pragma dynimport runtime·Sleep Sleep "kernel32.dll"
+#pragma dynimport runtime·SuspendThread SuspendThread "kernel32.dll"
+#pragma dynimport runtime·WaitForSingleObject WaitForSingleObject "kernel32.dll"
+#pragma dynimport runtime·WriteFile WriteFile "kernel32.dll"
+#pragma dynimport runtime·timeBeginPeriod timeBeginPeriod "winmm.dll"
+
+extern void *runtime·AddVectoredExceptionHandler;
+extern void *runtime·CloseHandle;
+extern void *runtime·CreateEvent;
+extern void *runtime·CreateThread;
+extern void *runtime·CreateWaitableTimer;
+extern void *runtime·CryptAcquireContextW;
+extern void *runtime·CryptGenRandom;
+extern void *runtime·CryptReleaseContext;
+extern void *runtime·DuplicateHandle;
+extern void *runtime·ExitProcess;
+extern void *runtime·FreeEnvironmentStringsW;
+extern void *runtime·GetEnvironmentStringsW;
+extern void *runtime·GetProcAddress;
+extern void *runtime·GetStdHandle;
+extern void *runtime·GetSystemInfo;
+extern void *runtime·GetThreadContext;
+extern void *runtime·LoadLibrary;
+extern void *runtime·LoadLibraryA;
+extern void *runtime·NtWaitForSingleObject;
+extern void *runtime·ResumeThread;
+extern void *runtime·SetConsoleCtrlHandler;
+extern void *runtime·SetEvent;
+extern void *runtime·SetProcessPriorityBoost;
+extern void *runtime·SetThreadPriority;
+extern void *runtime·SetWaitableTimer;
+extern void *runtime·Sleep;
+extern void *runtime·SuspendThread;
+extern void *runtime·WaitForSingleObject;
+extern void *runtime·WriteFile;
+extern void *runtime·timeBeginPeriod;
+
+void *runtime·GetQueuedCompletionStatusEx;
+
+extern uintptr runtime·externalthreadhandlerp;
+void runtime·externalthreadhandler(void);
+void runtime·sigtramp(void);
+
+static int32
+getproccount(void)
+{
+	SystemInfo info;
+
+	runtime·stdcall1(runtime·GetSystemInfo, (uintptr)&info);
+	return info.dwNumberOfProcessors;
+}
+
+void
+runtime·osinit(void)
+{
+	void *kernel32;
+
+	runtime·externalthreadhandlerp = (uintptr)runtime·externalthreadhandler;
+
+	runtime·stdcall2(runtime·AddVectoredExceptionHandler, 1, (uintptr)runtime·sigtramp);
+	runtime·stdcall2(runtime·SetConsoleCtrlHandler, (uintptr)runtime·ctrlhandler, 1);
+	runtime·stdcall1(runtime·timeBeginPeriod, 1);
+	runtime·ncpu = getproccount();
+	
+	// Windows dynamic priority boosting assumes that a process has different types
+	// of dedicated threads -- GUI, IO, computational, etc. Go processes use
+	// equivalent threads that all do a mix of GUI, IO, computations, etc.
+	// In such context dynamic priority boosting does nothing but harm, so we turn it off.
+	runtime·stdcall2(runtime·SetProcessPriorityBoost, -1, 1);
+
+	kernel32 = runtime·stdcall1(runtime·LoadLibraryA, (uintptr)"kernel32.dll");
+	if(kernel32 != nil) {
+		runtime·GetQueuedCompletionStatusEx = runtime·stdcall2(runtime·GetProcAddress, (uintptr)kernel32, (uintptr)"GetQueuedCompletionStatusEx");
+	}
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·get_random_data(byte **rnd, int32 *rnd_len)
+{
+	uintptr handle;
+	*rnd = nil;
+	*rnd_len = 0;
+	if(runtime·stdcall5(runtime·CryptAcquireContextW, (uintptr)&handle, (uintptr)nil, (uintptr)nil,
+			   1 /* PROV_RSA_FULL */,
+			   0xf0000000U /* CRYPT_VERIFYCONTEXT */) != 0) {
+		static byte random_data[HashRandomBytes];
+		if(runtime·stdcall3(runtime·CryptGenRandom, handle, HashRandomBytes, (uintptr)&random_data[0])) {
+			*rnd = random_data;
+			*rnd_len = HashRandomBytes;
+		}
+		runtime·stdcall2(runtime·CryptReleaseContext, handle, 0);
+	}
+}
+
+void
+runtime·goenvs(void)
+{
+	extern Slice syscall·envs;
+
+	uint16 *env;
+	String *s;
+	int32 i, n;
+	uint16 *p;
+
+	env = runtime·stdcall0(runtime·GetEnvironmentStringsW);
+
+	n = 0;
+	for(p=env; *p; n++)
+		p += runtime·findnullw(p)+1;
+
+	s = runtime·mallocgc(n*sizeof s[0], nil, 0);
+
+	p = env;
+	for(i=0; i<n; i++) {
+		s[i] = runtime·gostringw(p);
+		p += runtime·findnullw(p)+1;
+	}
+	syscall·envs.array = (byte*)s;
+	syscall·envs.len = n;
+	syscall·envs.cap = n;
+
+	runtime·stdcall1(runtime·FreeEnvironmentStringsW, (uintptr)env);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·exit(int32 code)
+{
+	runtime·stdcall1(runtime·ExitProcess, code);
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·write(uintptr fd, void *buf, int32 n)
+{
+	void *handle;
+	uint32 written;
+
+	written = 0;
+	switch(fd) {
+	case 1:
+		handle = runtime·stdcall1(runtime·GetStdHandle, -11);
+		break;
+	case 2:
+		handle = runtime·stdcall1(runtime·GetStdHandle, -12);
+		break;
+	default:
+		// assume fd is real windows handle.
+		handle = (void*)fd;
+		break;
+	}
+	runtime·stdcall5(runtime·WriteFile, (uintptr)handle, (uintptr)buf, n, (uintptr)&written, 0);
+	return written;
+}
+
+#define INFINITE ((uintptr)0xFFFFFFFF)
+
+#pragma textflag NOSPLIT
+int32
+runtime·semasleep(int64 ns)
+{
+	// store ms in ns to save stack space
+	if(ns < 0)
+		ns = INFINITE;
+	else {
+		ns = runtime·timediv(ns, 1000000, nil);
+		if(ns == 0)
+			ns = 1;
+	}
+	if(runtime·stdcall2(runtime·WaitForSingleObject, (uintptr)g->m->waitsema, ns) != 0)
+		return -1;  // timeout
+	return 0;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·semawakeup(M *mp)
+{
+	runtime·stdcall1(runtime·SetEvent, mp->waitsema);
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·semacreate(void)
+{
+	return (uintptr)runtime·stdcall4(runtime·CreateEvent, 0, 0, 0, 0);
+}
+
+#define STACK_SIZE_PARAM_IS_A_RESERVATION ((uintptr)0x00010000)
+
+void
+runtime·newosproc(M *mp, void *stk)
+{
+	void *thandle;
+
+	USED(stk);
+
+	thandle = runtime·stdcall6(runtime·CreateThread,
+		(uintptr)nil, 0x20000, (uintptr)runtime·tstart_stdcall, (uintptr)mp,
+		STACK_SIZE_PARAM_IS_A_RESERVATION, (uintptr)nil);
+	if(thandle == nil) {
+		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror());
+		runtime·throw("runtime.newosproc");
+	}
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+void
+runtime·mpreinit(M *mp)
+{
+	USED(mp);
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+void
+runtime·minit(void)
+{
+	void *thandle;
+
+	// -1 = current process, -2 = current thread
+	runtime·stdcall7(runtime·DuplicateHandle, -1, -2, -1, (uintptr)&thandle, 0, 0, DUPLICATE_SAME_ACCESS);
+	runtime·atomicstorep(&g->m->thread, thandle);
+}
+
+// Called from dropm to undo the effect of an minit.
+void
+runtime·unminit(void)
+{
+}
+
+// Described in http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
+typedef struct KSYSTEM_TIME {
+	uint32	LowPart;
+	int32	High1Time;
+	int32	High2Time;
+} KSYSTEM_TIME;
+
+const KSYSTEM_TIME* INTERRUPT_TIME	= (KSYSTEM_TIME*)0x7ffe0008;
+const KSYSTEM_TIME* SYSTEM_TIME		= (KSYSTEM_TIME*)0x7ffe0014;
+
+static void badsystime(void);
+
+#pragma textflag NOSPLIT
+int64
+runtime·systime(KSYSTEM_TIME *timeaddr)
+{
+	KSYSTEM_TIME t;
+	int32 i;
+	void (*fn)(void);
+
+	for(i = 1; i < 10000; i++) {
+		// these fields must be read in that order (see URL above)
+		t.High1Time = timeaddr->High1Time;
+		t.LowPart = timeaddr->LowPart;
+		t.High2Time = timeaddr->High2Time;
+		if(t.High1Time == t.High2Time)
+			return (int64)t.High1Time<<32 | t.LowPart;
+		if((i%100) == 0)
+			runtime·osyield();
+	}
+	fn = badsystime;
+	runtime·onM(&fn);
+	return 0;
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·unixnano(void)
+{
+	return (runtime·systime(SYSTEM_TIME) - 116444736000000000LL) * 100LL;
+}
+
+static void
+badsystime(void)
+{
+	runtime·throw("interrupt/system time is changing too fast");
+}
+
+#pragma textflag NOSPLIT
+int64
+runtime·nanotime(void)
+{
+	return runtime·systime(INTERRUPT_TIME) * 100LL;
+}
+
+// Calling stdcall on os stack.
+#pragma textflag NOSPLIT
+static void*
+stdcall(void *fn)
+{
+	g->m->libcall.fn = fn;
+	if(g->m->profilehz != 0) {
+		// leave pc/sp for cpu profiler
+		g->m->libcallg = g;
+		g->m->libcallpc = (uintptr)runtime·getcallerpc(&fn);
+		// sp must be the last, because once async cpu profiler finds
+		// all three values to be non-zero, it will use them
+		g->m->libcallsp = (uintptr)runtime·getcallersp(&fn);
+	}
+	runtime·asmcgocall(runtime·asmstdcall, &g->m->libcall);
+	g->m->libcallsp = 0;
+	return (void*)g->m->libcall.r1;
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall0(void *fn)
+{
+	g->m->libcall.n = 0;
+	g->m->libcall.args = &fn;  // it's unused but must be non-nil, otherwise crashes
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall1(void *fn, uintptr a0)
+{
+	USED(a0);
+	g->m->libcall.n = 1;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall2(void *fn, uintptr a0, uintptr a1)
+{
+	USED(a0, a1);
+	g->m->libcall.n = 2;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall3(void *fn, uintptr a0, uintptr a1, uintptr a2)
+{
+	USED(a0, a1, a2);
+	g->m->libcall.n = 3;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall4(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3)
+{
+	USED(a0, a1, a2, a3);
+	g->m->libcall.n = 4;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall5(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4)
+{
+	USED(a0, a1, a2, a3, a4);
+	g->m->libcall.n = 5;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall6(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5)
+{
+	USED(a0, a1, a2, a3, a4, a5);
+	g->m->libcall.n = 6;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+#pragma textflag NOSPLIT
+void*
+runtime·stdcall7(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6)
+{
+	USED(a0, a1, a2, a3, a4, a5, a6);
+	g->m->libcall.n = 7;
+	g->m->libcall.args = &a0;
+	return stdcall(fn);
+}
+
+extern void runtime·usleep1(uint32);
+
+#pragma textflag NOSPLIT
+void
+runtime·osyield(void)
+{
+	runtime·usleep1(1);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·usleep(uint32 us)
+{
+	// Have 1us units; want 100ns units.
+	runtime·usleep1(10*us);
+}
+
+uint32
+runtime·issigpanic(uint32 code)
+{
+	switch(code) {
+	case EXCEPTION_ACCESS_VIOLATION:
+	case EXCEPTION_INT_DIVIDE_BY_ZERO:
+	case EXCEPTION_INT_OVERFLOW:
+	case EXCEPTION_FLT_DENORMAL_OPERAND:
+	case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+	case EXCEPTION_FLT_INEXACT_RESULT:
+	case EXCEPTION_FLT_OVERFLOW:
+	case EXCEPTION_FLT_UNDERFLOW:
+		return 1;
+	}
+	return 0;
+}
+
+void
+runtime·sigpanic(void)
+{
+	if(!runtime·canpanic(g))
+		runtime·throw("unexpected signal during runtime execution");
+
+	switch(g->sig) {
+	case EXCEPTION_ACCESS_VIOLATION:
+		if(g->sigcode1 < 0x1000 || g->paniconfault) {
+			if(g->sigpc == 0)
+				runtime·panicstring("call of nil func value");
+			runtime·panicstring("invalid memory address or nil pointer dereference");
+		}
+		runtime·printf("unexpected fault address %p\n", g->sigcode1);
+		runtime·throw("fault");
+	case EXCEPTION_INT_DIVIDE_BY_ZERO:
+		runtime·panicstring("integer divide by zero");
+	case EXCEPTION_INT_OVERFLOW:
+		runtime·panicstring("integer overflow");
+	case EXCEPTION_FLT_DENORMAL_OPERAND:
+	case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+	case EXCEPTION_FLT_INEXACT_RESULT:
+	case EXCEPTION_FLT_OVERFLOW:
+	case EXCEPTION_FLT_UNDERFLOW:
+		runtime·panicstring("floating point error");
+	}
+	runtime·throw("fault");
+}
+
+void
+runtime·initsig(void)
+{
+	// following line keeps sigtramp alive at link stage
+	// if there's a better way please write it here
+	void *p = runtime·sigtramp;
+	USED(p);
+}
+
+uint32
+runtime·ctrlhandler1(uint32 type)
+{
+	int32 s;
+
+	switch(type) {
+	case CTRL_C_EVENT:
+	case CTRL_BREAK_EVENT:
+		s = SIGINT;
+		break;
+	default:
+		return 0;
+	}
+
+	if(runtime·sigsend(s))
+		return 1;
+	runtime·exit(2);	// SIGINT, SIGTERM, etc
+	return 0;
+}
+
+extern void runtime·dosigprof(Context *r, G *gp, M *mp);
+extern void runtime·profileloop(void);
+static void *profiletimer;
+
+static void
+profilem(M *mp)
+{
+	extern M runtime·m0;
+	extern uint32 runtime·tls0[];
+	byte rbuf[sizeof(Context)+15];
+	Context *r;
+	void *tls;
+	G *gp;
+
+	tls = mp->tls;
+	if(mp == &runtime·m0)
+		tls = runtime·tls0;
+	gp = *(G**)tls;
+
+	// align Context to 16 bytes
+	r = (Context*)((uintptr)(&rbuf[15]) & ~15);
+	r->ContextFlags = CONTEXT_CONTROL;
+	runtime·stdcall2(runtime·GetThreadContext, (uintptr)mp->thread, (uintptr)r);
+	runtime·dosigprof(r, gp, mp);
+}
+
+void
+runtime·profileloop1(void)
+{
+	M *mp, *allm;
+	void *thread;
+
+	runtime·stdcall2(runtime·SetThreadPriority, -2, THREAD_PRIORITY_HIGHEST);
+
+	for(;;) {
+		runtime·stdcall2(runtime·WaitForSingleObject, (uintptr)profiletimer, -1);
+		allm = runtime·atomicloadp(&runtime·allm);
+		for(mp = allm; mp != nil; mp = mp->alllink) {
+			thread = runtime·atomicloadp(&mp->thread);
+			// Do not profile threads blocked on Notes,
+			// this includes idle worker threads,
+			// idle timer thread, idle heap scavenger, etc.
+			if(thread == nil || mp->profilehz == 0 || mp->blocked)
+				continue;
+			runtime·stdcall1(runtime·SuspendThread, (uintptr)thread);
+			if(mp->profilehz != 0 && !mp->blocked)
+				profilem(mp);
+			runtime·stdcall1(runtime·ResumeThread, (uintptr)thread);
+		}
+	}
+}
+
+void
+runtime·resetcpuprofiler(int32 hz)
+{
+	static Mutex lock;
+	void *timer, *thread;
+	int32 ms;
+	int64 due;
+
+	runtime·lock(&lock);
+	if(profiletimer == nil) {
+		timer = runtime·stdcall3(runtime·CreateWaitableTimer, (uintptr)nil, (uintptr)nil, (uintptr)nil);
+		runtime·atomicstorep(&profiletimer, timer);
+		thread = runtime·stdcall6(runtime·CreateThread,
+			(uintptr)nil, (uintptr)nil, (uintptr)runtime·profileloop, (uintptr)nil, (uintptr)nil, (uintptr)nil);
+		runtime·stdcall2(runtime·SetThreadPriority, (uintptr)thread, THREAD_PRIORITY_HIGHEST);
+		runtime·stdcall1(runtime·CloseHandle, (uintptr)thread);
+	}
+	runtime·unlock(&lock);
+
+	ms = 0;
+	due = 1LL<<63;
+	if(hz > 0) {
+		ms = 1000 / hz;
+		if(ms == 0)
+			ms = 1;
+		due = ms * -10000;
+	}
+	runtime·stdcall6(runtime·SetWaitableTimer,
+		(uintptr)profiletimer, (uintptr)&due, ms, (uintptr)nil, (uintptr)nil, (uintptr)nil);
+	runtime·atomicstore((uint32*)&g->m->profilehz, hz);
+}
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
+
+#pragma dataflag NOPTR
+int8 runtime·badsignalmsg[] = "runtime: signal received on thread not created by Go.\n";
+int32 runtime·badsignallen = sizeof runtime·badsignalmsg - 1;
+
+void
+runtime·crash(void)
+{
+	// TODO: This routine should do whatever is needed
+	// to make the Windows program abort/crash as it
+	// would if Go was not intercepting signals.
+	// On Unix the routine would remove the custom signal
+	// handler and then raise a signal (like SIGABRT).
+	// Something like that should happen here.
+	// It's okay to leave this empty for now: if crash returns
+	// the ordinary exit-after-panic happens.
+}
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
new file mode 100644
index 000000000..6a3bfca41
--- /dev/null
+++ b/src/runtime/os_windows.go
@@ -0,0 +1,33 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type stdFunction *byte
+
+func stdcall0(fn stdFunction) uintptr
+func stdcall1(fn stdFunction, a0 uintptr) uintptr
+func stdcall2(fn stdFunction, a0, a1 uintptr) uintptr
+func stdcall3(fn stdFunction, a0, a1, a2 uintptr) uintptr
+func stdcall4(fn stdFunction, a0, a1, a2, a3 uintptr) uintptr
+func stdcall5(fn stdFunction, a0, a1, a2, a3, a4 uintptr) uintptr
+func stdcall6(fn stdFunction, a0, a1, a2, a3, a4, a5 uintptr) uintptr
+func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr
+
+func asmstdcall(fn unsafe.Pointer)
+func getlasterror() uint32
+func setlasterror(err uint32)
+func usleep1(usec uint32)
+func netpollinit()
+func netpollopen(fd uintptr, pd *pollDesc) int32
+func netpollclose(fd uintptr) int32
+func netpollarm(pd *pollDesc, mode int)
+
+const stackSystem = 512 * ptrSize
+
+func os_sigpipe() {
+	gothrow("too many writes on closed pipe")
+}
diff --git a/src/runtime/os_windows.h b/src/runtime/os_windows.h
new file mode 100644
index 000000000..d5d168d77
--- /dev/null
+++ b/src/runtime/os_windows.h
@@ -0,0 +1,42 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+extern void *runtime·LoadLibrary;
+extern void *runtime·GetProcAddress;
+extern void *runtime·GetQueuedCompletionStatusEx;
+
+// Call a Windows function with stdcall conventions,
+// and switch to os stack during the call.
+void runtime·asmstdcall(void *c);
+void *runtime·stdcall0(void *fn);
+void *runtime·stdcall1(void *fn, uintptr a0);
+void *runtime·stdcall2(void *fn, uintptr a0, uintptr a1);
+void *runtime·stdcall3(void *fn, uintptr a0, uintptr a1, uintptr a2);
+void *runtime·stdcall4(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3);
+void *runtime·stdcall5(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4);
+void *runtime·stdcall6(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5);
+void *runtime·stdcall7(void *fn, uintptr a0, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6);
+
+uint32 runtime·getlasterror(void);
+void runtime·setlasterror(uint32 err);
+
+// Function to be called by windows CreateThread
+// to start new os thread.
+uint32 runtime·tstart_stdcall(M *newm);
+
+uint32 runtime·issigpanic(uint32);
+void runtime·sigpanic(void);
+uint32 runtime·ctrlhandler(uint32 type);
+
+// Windows dll function to go callback entry.
+byte *runtime·compilecallback(Eface fn, bool cleanstack);
+void *runtime·callbackasm(void);
+
+void runtime·install_exception_handler(void);
+void runtime·remove_exception_handler(void);
+
+// TODO(brainman): should not need those
+enum {
+	NSIG = 65,
+};
diff --git a/src/runtime/os_windows_386.c b/src/runtime/os_windows_386.c
new file mode 100644
index 000000000..15a5ea5d1
--- /dev/null
+++ b/src/runtime/os_windows_386.c
@@ -0,0 +1,138 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+void
+runtime·dumpregs(Context *r)
+{
+	runtime·printf("eax     %x\n", r->Eax);
+	runtime·printf("ebx     %x\n", r->Ebx);
+	runtime·printf("ecx     %x\n", r->Ecx);
+	runtime·printf("edx     %x\n", r->Edx);
+	runtime·printf("edi     %x\n", r->Edi);
+	runtime·printf("esi     %x\n", r->Esi);
+	runtime·printf("ebp     %x\n", r->Ebp);
+	runtime·printf("esp     %x\n", r->Esp);
+	runtime·printf("eip     %x\n", r->Eip);
+	runtime·printf("eflags  %x\n", r->EFlags);
+	runtime·printf("cs      %x\n", r->SegCs);
+	runtime·printf("fs      %x\n", r->SegFs);
+	runtime·printf("gs      %x\n", r->SegGs);
+}
+
+#define DBG_PRINTEXCEPTION_C 0x40010006
+
+// Called by sigtramp from Windows VEH handler.
+// Return value signals whether the exception has been handled (-1)
+// or should be made available to other handlers in the chain (0).
+uint32
+runtime·sighandler(ExceptionRecord *info, Context *r, G *gp)
+{
+	bool crash;
+	uintptr *sp;
+	extern byte runtime·text[], runtime·etext[];
+
+	if(info->ExceptionCode == DBG_PRINTEXCEPTION_C) {
+		// This exception is intended to be caught by debuggers.
+		// There is a not-very-informational message like
+		// "Invalid parameter passed to C runtime function"
+		// sitting at info->ExceptionInformation[0] (a wchar_t*),
+		// with length info->ExceptionInformation[1].
+		// The default behavior is to ignore this exception,
+		// but somehow returning 0 here (meaning keep going)
+		// makes the program crash instead. Maybe Windows has no
+		// other handler registered? In any event, ignore it.
+		return -1;
+	}
+
+	// Only handle exception if executing instructions in Go binary
+	// (not Windows library code). 
+	if(r->Eip < (uint32)runtime·text || (uint32)runtime·etext < r->Eip)
+		return 0;
+
+	switch(info->ExceptionCode) {
+	case EXCEPTION_BREAKPOINT:
+		// It is unclear whether this is needed, unclear whether it
+		// would work, and unclear how to test it. Leave out for now.
+		// This only handles breakpoint instructions written in the
+		// assembly sources, not breakpoints set by a debugger, and
+		// there are very few of the former.
+		//
+		// r->Eip--;	// because 8l generates 2 bytes for INT3
+		// return 0;
+		break;
+	}
+
+	if(gp != nil && runtime·issigpanic(info->ExceptionCode)) {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp->sig = info->ExceptionCode;
+		gp->sigcode0 = info->ExceptionInformation[0];
+		gp->sigcode1 = info->ExceptionInformation[1];
+		gp->sigpc = r->Eip;
+
+		// Only push runtime·sigpanic if r->eip != 0.
+		// If r->eip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime·sigpanic instead.
+		// (Otherwise the trace will end at runtime·sigpanic and we
+		// won't get to see who faulted.)
+		if(r->Eip != 0) {
+			sp = (uintptr*)r->Esp;
+			*--sp = r->Eip;
+			r->Esp = (uintptr)sp;
+		}
+		r->Eip = (uintptr)runtime·sigpanic;
+		return -1;
+	}
+
+	if(runtime·panicking)	// traceback already printed
+		runtime·exit(2);
+	runtime·panicking = 1;
+
+	runtime·printf("Exception %x %p %p %p\n", info->ExceptionCode,
+		(uintptr)info->ExceptionInformation[0], (uintptr)info->ExceptionInformation[1], (uintptr)r->Eip);
+
+	runtime·printf("PC=%x\n", r->Eip);
+	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+		runtime·printf("signal arrived during cgo execution\n");
+		gp = g->m->lockedg;
+	}
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)){
+		runtime·traceback(r->Eip, r->Esp, 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·dumpregs(r);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+	return -1; // not reached
+}
+
+void
+runtime·sigenable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·sigdisable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·dosigprof(Context *r, G *gp, M *mp)
+{
+	runtime·sigprof((uint8*)r->Eip, (uint8*)r->Esp, nil, gp, mp);
+}
diff --git a/src/runtime/os_windows_386.go b/src/runtime/os_windows_386.go
new file mode 100644
index 000000000..86a1906c0
--- /dev/null
+++ b/src/runtime/os_windows_386.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// contextPC returns the EIP (program counter) register from the context.
+func contextPC(r *context) uintptr { return uintptr(r.eip) }
+
+// contextSP returns the ESP (stack pointer) register from the context.
+func contextSP(r *context) uintptr { return uintptr(r.esp) }
diff --git a/src/runtime/os_windows_amd64.c b/src/runtime/os_windows_amd64.c
new file mode 100644
index 000000000..9a69d73c0
--- /dev/null
+++ b/src/runtime/os_windows_amd64.c
@@ -0,0 +1,144 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+
+void
+runtime·dumpregs(Context *r)
+{
+	runtime·printf("rax     %X\n", r->Rax);
+	runtime·printf("rbx     %X\n", r->Rbx);
+	runtime·printf("rcx     %X\n", r->Rcx);
+	runtime·printf("rdx     %X\n", r->Rdx);
+	runtime·printf("rdi     %X\n", r->Rdi);
+	runtime·printf("rsi     %X\n", r->Rsi);
+	runtime·printf("rbp     %X\n", r->Rbp);
+	runtime·printf("rsp     %X\n", r->Rsp);
+	runtime·printf("r8      %X\n", r->R8 );
+	runtime·printf("r9      %X\n", r->R9 );
+	runtime·printf("r10     %X\n", r->R10);
+	runtime·printf("r11     %X\n", r->R11);
+	runtime·printf("r12     %X\n", r->R12);
+	runtime·printf("r13     %X\n", r->R13);
+	runtime·printf("r14     %X\n", r->R14);
+	runtime·printf("r15     %X\n", r->R15);
+	runtime·printf("rip     %X\n", r->Rip);
+	runtime·printf("rflags  %X\n", r->EFlags);
+	runtime·printf("cs      %X\n", (uint64)r->SegCs);
+	runtime·printf("fs      %X\n", (uint64)r->SegFs);
+	runtime·printf("gs      %X\n", (uint64)r->SegGs);
+}
+
+#define DBG_PRINTEXCEPTION_C 0x40010006
+
+// Called by sigtramp from Windows VEH handler.
+// Return value signals whether the exception has been handled (-1)
+// or should be made available to other handlers in the chain (0).
+uint32
+runtime·sighandler(ExceptionRecord *info, Context *r, G *gp)
+{
+	bool crash;
+	uintptr *sp;
+	extern byte runtime·text[], runtime·etext[];
+
+	if(info->ExceptionCode == DBG_PRINTEXCEPTION_C) {
+		// This exception is intended to be caught by debuggers.
+		// There is a not-very-informational message like
+		// "Invalid parameter passed to C runtime function"
+		// sitting at info->ExceptionInformation[0] (a wchar_t*),
+		// with length info->ExceptionInformation[1].
+		// The default behavior is to ignore this exception,
+		// but somehow returning 0 here (meaning keep going)
+		// makes the program crash instead. Maybe Windows has no
+		// other handler registered? In any event, ignore it.
+		return -1;
+	}
+
+	// Only handle exception if executing instructions in Go binary
+	// (not Windows library code). 
+	if(r->Rip < (uint64)runtime·text || (uint64)runtime·etext < r->Rip)
+		return 0;
+
+	switch(info->ExceptionCode) {
+	case EXCEPTION_BREAKPOINT:
+		// It is unclear whether this is needed, unclear whether it
+		// would work, and unclear how to test it. Leave out for now.
+		// This only handles breakpoint instructions written in the
+		// assembly sources, not breakpoints set by a debugger, and
+		// there are very few of the former.
+		break;
+	}
+
+	if(gp != nil && runtime·issigpanic(info->ExceptionCode)) {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp->sig = info->ExceptionCode;
+		gp->sigcode0 = info->ExceptionInformation[0];
+		gp->sigcode1 = info->ExceptionInformation[1];
+		gp->sigpc = r->Rip;
+
+		// Only push runtime·sigpanic if r->rip != 0.
+		// If r->rip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime·sigpanic instead.
+		// (Otherwise the trace will end at runtime·sigpanic and we
+		// won't get to see who faulted.)
+		if(r->Rip != 0) {
+			sp = (uintptr*)r->Rsp;
+			*--sp = r->Rip;
+			r->Rsp = (uintptr)sp;
+		}
+		r->Rip = (uintptr)runtime·sigpanic;
+		return -1;
+	}
+
+	if(runtime·panicking)	// traceback already printed
+		runtime·exit(2);
+	runtime·panicking = 1;
+
+	runtime·printf("Exception %x %p %p %p\n", info->ExceptionCode,
+		info->ExceptionInformation[0], info->ExceptionInformation[1], r->Rip);
+
+
+	runtime·printf("PC=%X\n", r->Rip);
+	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+		runtime·printf("signal arrived during cgo execution\n");
+		gp = g->m->lockedg;
+	}
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)){
+		runtime·traceback(r->Rip, r->Rsp, 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·dumpregs(r);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+	return -1; // not reached
+}
+
+void
+runtime·sigenable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·sigdisable(uint32 sig)
+{
+	USED(sig);
+}
+
+void
+runtime·dosigprof(Context *r, G *gp, M *mp)
+{
+	runtime·sigprof((uint8*)r->Rip, (uint8*)r->Rsp, nil, gp, mp);
+}
diff --git a/src/runtime/os_windows_amd64.go b/src/runtime/os_windows_amd64.go
new file mode 100644
index 000000000..3f4d4d07c
--- /dev/null
+++ b/src/runtime/os_windows_amd64.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// contextPC returns the RIP (program counter) register from the context.
+func contextPC(r *context) uintptr { return uintptr(r.rip) }
+
+// contextSP returns the RSP (stack pointer) register from the context.
+func contextSP(r *context) uintptr { return uintptr(r.rsp) }
diff --git a/src/runtime/panic.c b/src/runtime/panic.c
new file mode 100644
index 000000000..e38ce740b
--- /dev/null
+++ b/src/runtime/panic.c
@@ -0,0 +1,219 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "stack.h"
+#include "malloc.h"
+#include "textflag.h"
+
+// Code related to defer, panic and recover.
+
+// TODO: remove once code is moved to Go
+extern Defer* runtime·newdefer(int32 siz);
+extern runtime·freedefer(Defer *d);
+
+uint32 runtime·panicking;
+static Mutex paniclk;
+
+void
+runtime·deferproc_m(void) {
+	int32 siz;
+	FuncVal *fn;
+	uintptr argp;
+	uintptr callerpc;
+	Defer *d;
+
+	siz = g->m->scalararg[0];
+	fn = g->m->ptrarg[0];
+	argp = g->m->scalararg[1];
+	callerpc = g->m->scalararg[2];
+	g->m->ptrarg[0] = nil;
+
+	d = runtime·newdefer(siz);
+	d->fn = fn;
+	d->pc = callerpc;
+	d->argp = argp;
+	runtime·memmove(d->args, (void*)argp, siz);
+}
+
+// Unwind the stack after a deferred function calls recover
+// after a panic.  Then arrange to continue running as though
+// the caller of the deferred function returned normally.
+void
+runtime·recovery_m(G *gp)
+{
+	void *argp;
+	uintptr pc;
+	
+	// Info about defer passed in G struct.
+	argp = (void*)gp->sigcode0;
+	pc = (uintptr)gp->sigcode1;
+
+	// Unwind to the stack frame with d's arguments in it.
+	runtime·unwindstack(gp, argp);
+
+	// Make the deferproc for this d return again,
+	// this time returning 1.  The calling function will
+	// jump to the standard return epilogue.
+	// The -2*sizeof(uintptr) makes up for the
+	// two extra words that are on the stack at
+	// each call to deferproc.
+	// (The pc we're returning to does pop pop
+	// before it tests the return value.)
+	// On the arm there are 2 saved LRs mixed in too.
+	if(thechar == '5')
+		gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr);
+	else
+		gp->sched.sp = (uintptr)argp - 2*sizeof(uintptr);
+	gp->sched.pc = pc;
+	gp->sched.lr = 0;
+	gp->sched.ret = 1;
+	runtime·gogo(&gp->sched);
+}
+
+// Free stack frames until we hit the last one
+// or until we find the one that contains the sp.
+void
+runtime·unwindstack(G *gp, byte *sp)
+{
+	Stktop *top;
+	byte *stk;
+
+	// Must be called from a different goroutine, usually m->g0.
+	if(g == gp)
+		runtime·throw("unwindstack on self");
+
+	while((top = (Stktop*)gp->stackbase) != 0 && top->stackbase != 0) {
+		stk = (byte*)gp->stackguard - StackGuard;
+		if(stk <= sp && sp < (byte*)gp->stackbase)
+			break;
+		gp->stackbase = top->stackbase;
+		gp->stackguard = top->stackguard;
+		gp->stackguard0 = gp->stackguard;
+		runtime·stackfree(gp, stk, top);
+	}
+
+	if(sp != nil && (sp < (byte*)gp->stackguard - StackGuard || (byte*)gp->stackbase < sp)) {
+		runtime·printf("recover: %p not in [%p, %p]\n", sp, gp->stackguard - StackGuard, gp->stackbase);
+		runtime·throw("bad unwindstack");
+	}
+}
+
+void
+runtime·startpanic_m(void)
+{
+	if(runtime·mheap.cachealloc.size == 0) { // very early
+		runtime·printf("runtime: panic before malloc heap initialized\n");
+		g->m->mallocing = 1; // tell rest of panic not to try to malloc
+	} else if(g->m->mcache == nil) // can happen if called from signal handler or throw
+		g->m->mcache = runtime·allocmcache();
+	switch(g->m->dying) {
+	case 0:
+		g->m->dying = 1;
+		if(g != nil) {
+			g->writebuf.array = nil;
+			g->writebuf.len = 0;
+			g->writebuf.cap = 0;
+		}
+		runtime·xadd(&runtime·panicking, 1);
+		runtime·lock(&paniclk);
+		if(runtime·debug.schedtrace > 0 || runtime·debug.scheddetail > 0)
+			runtime·schedtrace(true);
+		runtime·freezetheworld();
+		return;
+	case 1:
+		// Something failed while panicing, probably the print of the
+		// argument to panic().  Just print a stack trace and exit.
+		g->m->dying = 2;
+		runtime·printf("panic during panic\n");
+		runtime·dopanic(0);
+		runtime·exit(3);
+	case 2:
+		// This is a genuine bug in the runtime, we couldn't even
+		// print the stack trace successfully.
+		g->m->dying = 3;
+		runtime·printf("stack trace unavailable\n");
+		runtime·exit(4);
+	default:
+		// Can't even print!  Just exit.
+		runtime·exit(5);
+	}
+}
+
+void
+runtime·dopanic_m(void)
+{
+	G *gp;
+	uintptr sp, pc;
+	static bool didothers;
+	bool crash;
+	int32 t;
+
+	gp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	pc = g->m->scalararg[0];
+	sp = g->m->scalararg[1];
+	if(gp->sig != 0)
+		runtime·printf("[signal %x code=%p addr=%p pc=%p]\n",
+			gp->sig, gp->sigcode0, gp->sigcode1, gp->sigpc);
+
+	if((t = runtime·gotraceback(&crash)) > 0){
+		if(gp != gp->m->g0) {
+			runtime·printf("\n");
+			runtime·goroutineheader(gp);
+			runtime·traceback(pc, sp, 0, gp);
+		} else if(t >= 2 || g->m->throwing > 0) {
+			runtime·printf("\nruntime stack:\n");
+			runtime·traceback(pc, sp, 0, gp);
+		}
+		if(!didothers) {
+			didothers = true;
+			runtime·tracebackothers(gp);
+		}
+	}
+	runtime·unlock(&paniclk);
+	if(runtime·xadd(&runtime·panicking, -1) != 0) {
+		// Some other m is panicking too.
+		// Let it print what it needs to print.
+		// Wait forever without chewing up cpu.
+		// It will exit when it's done.
+		static Mutex deadlock;
+		runtime·lock(&deadlock);
+		runtime·lock(&deadlock);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+}
+
+bool
+runtime·canpanic(G *gp)
+{
+	M *m;
+	uint32 status;
+
+	// Note that g is m->gsignal, different from gp.
+	// Note also that g->m can change at preemption, so m can go stale
+	// if this function ever makes a function call.
+	m = g->m;
+
+	// Is it okay for gp to panic instead of crashing the program?
+	// Yes, as long as it is running Go code, not runtime code,
+	// and not stuck in a system call.
+	if(gp == nil || gp != m->curg)
+		return false;
+	if(m->locks-m->softfloat != 0 || m->mallocing != 0 || m->throwing != 0 || m->gcing != 0 || m->dying != 0)
+		return false;
+	status = runtime·readgstatus(gp);
+	if((status&~Gscan) != Grunning || gp->syscallsp != 0)
+		return false;
+#ifdef GOOS_windows
+	if(m->libcallsp != 0)
+		return false;
+#endif
+	return true;
+}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
new file mode 100644
index 000000000..1e35561d1
--- /dev/null
+++ b/src/runtime/panic.go
@@ -0,0 +1,216 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var indexError = error(errorString("index out of range"))
+
+func panicindex() {
+	panic(indexError)
+}
+
+var sliceError = error(errorString("slice bounds out of range"))
+
+func panicslice() {
+	panic(sliceError)
+}
+
+var divideError = error(errorString("integer divide by zero"))
+
+func panicdivide() {
+	panic(divideError)
+}
+
+func throwreturn() {
+	gothrow("no return at end of a typed function - compiler is broken")
+}
+
+func throwinit() {
+	gothrow("recursive call during initialization - linker skew")
+}
+
+// Create a new deferred function fn with siz bytes of arguments.
+// The compiler turns a defer statement into a call to this.
+//go:nosplit
+func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn
+	// the arguments of fn are in a perilous state.  The stack map
+	// for deferproc does not describe them.  So we can't let garbage
+	// collection or stack copying trigger until we've copied them out
+	// to somewhere safe.  deferproc_m does that.  Until deferproc_m,
+	// we can only call nosplit routines.
+	argp := uintptr(unsafe.Pointer(&fn))
+	argp += unsafe.Sizeof(fn)
+	if GOARCH == "arm" {
+		argp += ptrSize // skip caller's saved link register
+	}
+	mp := acquirem()
+	mp.scalararg[0] = uintptr(siz)
+	mp.ptrarg[0] = unsafe.Pointer(fn)
+	mp.scalararg[1] = argp
+	mp.scalararg[2] = getcallerpc(unsafe.Pointer(&siz))
+
+	if mp.curg != getg() {
+		// go code on the m stack can't defer
+		gothrow("defer on m")
+	}
+
+	onM(deferproc_m)
+
+	releasem(mp)
+
+	// deferproc returns 0 normally.
+	// a deferred func that stops a panic
+	// makes the deferproc return 1.
+	// the code the compiler generates always
+	// checks the return value and jumps to the
+	// end of the function if deferproc returns != 0.
+	return0()
+	// No code can go here - the C return register has
+	// been set and must not be clobbered.
+}
+
+// Each P holds pool for defers with arg sizes 8, 24, 40, 56 and 72 bytes.
+// Memory block is 40 (24 for 32 bits) bytes larger due to Defer header.
+// This maps exactly to malloc size classes.
+
+// defer size class for arg size sz
+func deferclass(siz uintptr) uintptr {
+	return (siz + 7) >> 4
+}
+
+// total size of memory block for defer with arg size sz
+func totaldefersize(siz uintptr) uintptr {
+	return (unsafe.Sizeof(_defer{}) - unsafe.Sizeof(_defer{}.args)) + round(siz, ptrSize)
+}
+
+// Ensure that defer arg sizes that map to the same defer size class
+// also map to the same malloc size class.
+func testdefersizes() {
+	var m [len(p{}.deferpool)]int32
+
+	for i := range m {
+		m[i] = -1
+	}
+	for i := uintptr(0); ; i++ {
+		defersc := deferclass(i)
+		if defersc >= uintptr(len(m)) {
+			break
+		}
+		siz := goroundupsize(totaldefersize(i))
+		if m[defersc] < 0 {
+			m[defersc] = int32(siz)
+			continue
+		}
+		if m[defersc] != int32(siz) {
+			print("bad defer size class: i=", i, " siz=", siz, " defersc=", defersc, "\n")
+			gothrow("bad defer size class")
+		}
+	}
+}
+
+// Allocate a Defer, usually using per-P pool.
+// Each defer must be released with freedefer.
+// Note: runs on M stack
+func newdefer(siz int32) *_defer {
+	var d *_defer
+	sc := deferclass(uintptr(siz))
+	mp := acquirem()
+	if sc < uintptr(len(p{}.deferpool)) {
+		pp := mp.p
+		d = pp.deferpool[sc]
+		if d != nil {
+			pp.deferpool[sc] = d.link
+		}
+	}
+	if d == nil {
+		// deferpool is empty or just a big defer
+		total := goroundupsize(totaldefersize(uintptr(siz)))
+		d = (*_defer)(gomallocgc(total, conservative, 0))
+	}
+	d.siz = siz
+	d.special = false
+	gp := mp.curg
+	d.link = gp._defer
+	gp._defer = d
+	releasem(mp)
+	return d
+}
+
+// Free the given defer.
+// The defer cannot be used after this call.
+func freedefer(d *_defer) {
+	if d.special {
+		return
+	}
+	sc := deferclass(uintptr(d.siz))
+	if sc < uintptr(len(p{}.deferpool)) {
+		mp := acquirem()
+		pp := mp.p
+		d.link = pp.deferpool[sc]
+		pp.deferpool[sc] = d
+		releasem(mp)
+		// No need to wipe out pointers in argp/pc/fn/args,
+		// because we empty the pool before GC.
+	}
+}
+
+// Run a deferred function if there is one.
+// The compiler inserts a call to this at the end of any
+// function which calls defer.
+// If there is a deferred function, this will call runtime·jmpdefer,
+// which will jump to the deferred function such that it appears
+// to have been called by the caller of deferreturn at the point
+// just before deferreturn was called.  The effect is that deferreturn
+// is called again and again until there are no more deferred functions.
+// Cannot split the stack because we reuse the caller's frame to
+// call the deferred function.
+
+// The single argument isn't actually used - it just has its address
+// taken so it can be matched against pending defers.
+//go:nosplit
+func deferreturn(arg0 uintptr) {
+	gp := getg()
+	d := gp._defer
+	if d == nil {
+		return
+	}
+	argp := uintptr(unsafe.Pointer(&arg0))
+	if d.argp != argp {
+		return
+	}
+
+	// Moving arguments around.
+	// Do not allow preemption here, because the garbage collector
+	// won't know the form of the arguments until the jmpdefer can
+	// flip the PC over to fn.
+	mp := acquirem()
+	memmove(unsafe.Pointer(argp), unsafe.Pointer(&d.args), uintptr(d.siz))
+	fn := d.fn
+	gp._defer = d.link
+	freedefer(d)
+	releasem(mp)
+	jmpdefer(fn, argp)
+}
+
+// Goexit terminates the goroutine that calls it.  No other goroutine is affected.
+// Goexit runs all deferred calls before terminating the goroutine.
+//
+// Calling Goexit from the main goroutine terminates that goroutine
+// without func main returning. Since func main has not returned,
+// the program continues execution of other goroutines.
+// If all other goroutines exit, the program crashes.
+func Goexit() {
+	// Run all deferred functions for the current goroutine.
+	gp := getg()
+	for gp._defer != nil {
+		d := gp._defer
+		gp._defer = d.link
+		reflectcall(unsafe.Pointer(d.fn), unsafe.Pointer(&d.args), uint32(d.siz), uint32(d.siz))
+		freedefer(d)
+		// Note: we ignore recovers here because Goexit isn't a panic
+	}
+	goexit()
+}
diff --git a/src/runtime/panic1.go b/src/runtime/panic1.go
new file mode 100644
index 000000000..e87743432
--- /dev/null
+++ b/src/runtime/panic1.go
@@ -0,0 +1,209 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Print all currently active panics.  Used when crashing.
+func printpanics(p *_panic) {
+	if p.link != nil {
+		printpanics(p.link)
+		print("\t")
+	}
+	print("panic: ")
+	printany(p.arg)
+	if p.recovered {
+		print(" [recovered]")
+	}
+	print("\n")
+}
+
+// The implementation of the predeclared function panic.
+func gopanic(e interface{}) {
+	gp := getg()
+	if gp.m.curg != gp {
+		gothrow("panic on m stack")
+	}
+	var p _panic
+	var dabort _defer
+	p.arg = e
+	p.link = gp._panic
+	gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+
+	fn := abortpanic
+	dabort.fn = *(**funcval)(unsafe.Pointer(&fn))
+	dabort.siz = ptrSize
+	dabort.args[0] = noescape((unsafe.Pointer)(&p)) // TODO(khr): why do I need noescape here?
+	dabort.argp = _NoArgs
+	dabort.special = true
+
+	for {
+		d := gp._defer
+		if d == nil {
+			break
+		}
+		// take defer off list in case of recursive panic
+		gp._defer = d.link
+		argp := unsafe.Pointer(d.argp) // must be pointer so it gets adjusted during stack copy
+		pc := d.pc
+
+		// The deferred function may cause another panic,
+		// so reflectcall may not return. Set up a defer
+		// to mark this panic aborted if that happens.
+		dabort.link = gp._defer
+		gp._defer = (*_defer)(noescape(unsafe.Pointer(&dabort)))
+		p._defer = d
+
+		p.argp = getargp(0)
+		reflectcall(unsafe.Pointer(d.fn), unsafe.Pointer(&d.args), uint32(d.siz), uint32(d.siz))
+		p.argp = 0
+
+		// reflectcall did not panic. Remove dabort.
+		if gp._defer != &dabort {
+			gothrow("bad defer entry in panic")
+		}
+		gp._defer = dabort.link
+
+		// trigger shrinkage to test stack copy.  See stack_test.go:TestStackPanic
+		//GC()
+
+		freedefer(d)
+		if p.recovered {
+			gp._panic = p.link
+			// Aborted panics are marked but remain on the g.panic list.
+			// Remove them from the list and free the associated defers.
+			for gp._panic != nil && gp._panic.aborted {
+				freedefer(gp._panic._defer)
+				gp._panic = gp._panic.link
+			}
+			if gp._panic == nil { // must be done with signal
+				gp.sig = 0
+			}
+			// Pass information about recovering frame to recovery.
+			gp.sigcode0 = uintptr(argp)
+			gp.sigcode1 = pc
+			mcall(recovery_m)
+			gothrow("recovery failed") // mcall should not return
+		}
+	}
+
+	// ran out of deferred calls - old-school panic now
+	startpanic()
+	printpanics(gp._panic)
+	dopanic(0)       // should not return
+	*(*int)(nil) = 0 // not reached
+}
+
+// getargp returns the location where the caller
+// writes outgoing function call arguments.
+//go:nosplit
+func getargp(x int) uintptr {
+	// x is an argument mainly so that we can return its address.
+	// However, we need to make the function complex enough
+	// that it won't be inlined. We always pass x = 0, so this code
+	// does nothing other than keep the compiler from thinking
+	// the function is simple enough to inline.
+	if x > 0 {
+		return getcallersp(unsafe.Pointer(&x)) * 0
+	}
+	return uintptr(noescape(unsafe.Pointer(&x)))
+}
+
+func abortpanic(p *_panic) {
+	p.aborted = true
+}
+
+// The implementation of the predeclared function recover.
+// Cannot split the stack because it needs to reliably
+// find the stack segment of its caller.
+//
+// TODO(rsc): Once we commit to CopyStackAlways,
+// this doesn't need to be nosplit.
+//go:nosplit
+func gorecover(argp uintptr) interface{} {
+	// Must be in a function running as part of a deferred call during the panic.
+	// Must be called from the topmost function of the call
+	// (the function used in the defer statement).
+	// p.argp is the argument pointer of that topmost deferred function call.
+	// Compare against argp reported by caller.
+	// If they match, the caller is the one who can recover.
+	gp := getg()
+	p := gp._panic
+	if p != nil && !p.recovered && argp == p.argp {
+		p.recovered = true
+		return p.arg
+	}
+	return nil
+}
+
+//go:nosplit
+func startpanic() {
+	onM(startpanic_m)
+}
+
+//go:nosplit
+func dopanic(unused int) {
+	gp := getg()
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(gp)
+	mp.scalararg[0] = getcallerpc((unsafe.Pointer)(&unused))
+	mp.scalararg[1] = getcallersp((unsafe.Pointer)(&unused))
+	onM(dopanic_m) // should never return
+	*(*int)(nil) = 0
+}
+
+//go:nosplit
+func throw(s *byte) {
+	gp := getg()
+	if gp.m.throwing == 0 {
+		gp.m.throwing = 1
+	}
+	startpanic()
+	print("fatal error: ", gostringnocopy(s), "\n")
+	dopanic(0)
+	*(*int)(nil) = 0 // not reached
+}
+
+//go:nosplit
+func gothrow(s string) {
+	gp := getg()
+	if gp.m.throwing == 0 {
+		gp.m.throwing = 1
+	}
+	startpanic()
+	print("fatal error: ", s, "\n")
+	dopanic(0)
+	*(*int)(nil) = 0 // not reached
+}
+
+func panicstring(s *int8) {
+	// m.softfloat is set during software floating point,
+	// which might cause a fault during a memory load.
+	// It increments m.locks to avoid preemption.
+	// If we're panicking, the software floating point frames
+	// will be unwound, so decrement m.locks as they would.
+	gp := getg()
+	if gp.m.softfloat != 0 {
+		gp.m.locks--
+		gp.m.softfloat = 0
+	}
+
+	if gp.m.mallocing != 0 {
+		print("panic: ", s, "\n")
+		gothrow("panic during malloc")
+	}
+	if gp.m.gcing != 0 {
+		print("panic: ", s, "\n")
+		gothrow("panic during gc")
+	}
+	if gp.m.locks != 0 {
+		print("panic: ", s, "\n")
+		gothrow("panic holding locks")
+	}
+
+	var err interface{}
+	newErrorCString(unsafe.Pointer(s), &err)
+	gopanic(err)
+}
diff --git a/src/runtime/parfor.c b/src/runtime/parfor.c
new file mode 100644
index 000000000..6023193b5
--- /dev/null
+++ b/src/runtime/parfor.c
@@ -0,0 +1,238 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Parallel for algorithm.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+
+struct ParForThread
+{
+	// the thread's iteration space [32lsb, 32msb)
+	uint64 pos;
+	// stats
+	uint64 nsteal;
+	uint64 nstealcnt;
+	uint64 nprocyield;
+	uint64 nosyield;
+	uint64 nsleep;
+	byte pad[CacheLineSize];
+};
+
+ParFor*
+runtime·parforalloc(uint32 nthrmax)
+{
+	ParFor *desc;
+
+	// The ParFor object is followed by CacheLineSize padding
+	// and then nthrmax ParForThread.
+	desc = (ParFor*)runtime·mallocgc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread), nil, 0);
+	desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
+	desc->nthrmax = nthrmax;
+	return desc;
+}
+
+void
+runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
+{
+	uint32 i, begin, end;
+	uint64 *pos;
+
+	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
+		runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
+		runtime·throw("parfor: invalid args");
+	}
+
+	desc->body = body;
+	desc->done = 0;
+	desc->nthr = nthr;
+	desc->thrseq = 0;
+	desc->cnt = n;
+	desc->ctx = ctx;
+	desc->wait = wait;
+	desc->nsteal = 0;
+	desc->nstealcnt = 0;
+	desc->nprocyield = 0;
+	desc->nosyield = 0;
+	desc->nsleep = 0;
+	for(i=0; i<nthr; i++) {
+		begin = (uint64)n*i / nthr;
+		end = (uint64)n*(i+1) / nthr;
+		pos = &desc->thr[i].pos;
+		if(((uintptr)pos & 7) != 0)
+			runtime·throw("parforsetup: pos is not aligned");
+		*pos = (uint64)begin | (((uint64)end)<<32);
+	}
+}
+
+void
+runtime·parfordo(ParFor *desc)
+{
+	ParForThread *me;
+	uint32 tid, begin, end, begin2, try, victim, i;
+	uint64 *mypos, *victimpos, pos, newpos;
+	void (*body)(ParFor*, uint32);
+	bool idle;
+
+	// Obtain 0-based thread index.
+	tid = runtime·xadd(&desc->thrseq, 1) - 1;
+	if(tid >= desc->nthr) {
+		runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr);
+		runtime·throw("parfor: invalid tid");
+	}
+
+	// If single-threaded, just execute the for serially.
+	if(desc->nthr==1) {
+		for(i=0; i<desc->cnt; i++)
+			desc->body(desc, i);
+		return;
+	}
+
+	body = desc->body;
+	me = &desc->thr[tid];
+	mypos = &me->pos;
+	for(;;) {
+		for(;;) {
+			// While there is local work,
+			// bump low index and execute the iteration.
+			pos = runtime·xadd64(mypos, 1);
+			begin = (uint32)pos-1;
+			end = (uint32)(pos>>32);
+			if(begin < end) {
+				body(desc, begin);
+				continue;
+			}
+			break;
+		}
+
+		// Out of work, need to steal something.
+		idle = false;
+		for(try=0;; try++) {
+			// If we don't see any work for long enough,
+			// increment the done counter...
+			if(try > desc->nthr*4 && !idle) {
+				idle = true;
+				runtime·xadd(&desc->done, 1);
+			}
+			// ...if all threads have incremented the counter,
+			// we are done.
+			if(desc->done + !idle == desc->nthr) {
+				if(!idle)
+					runtime·xadd(&desc->done, 1);
+				goto exit;
+			}
+			// Choose a random victim for stealing.
+			victim = runtime·fastrand1() % (desc->nthr-1);
+			if(victim >= tid)
+				victim++;
+			victimpos = &desc->thr[victim].pos;
+			for(;;) {
+				// See if it has any work.
+				pos = runtime·atomicload64(victimpos);
+				begin = (uint32)pos;
+				end = (uint32)(pos>>32);
+				if(begin+1 >= end) {
+					begin = end = 0;
+					break;
+				}
+				if(idle) {
+					runtime·xadd(&desc->done, -1);
+					idle = false;
+				}
+				begin2 = begin + (end-begin)/2;
+				newpos = (uint64)begin | (uint64)begin2<<32;
+				if(runtime·cas64(victimpos, pos, newpos)) {
+					begin = begin2;
+					break;
+				}
+			}
+			if(begin < end) {
+				// Has successfully stolen some work.
+				if(idle)
+					runtime·throw("parfor: should not be idle");
+				runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
+				me->nsteal++;
+				me->nstealcnt += end-begin;
+				break;
+			}
+			// Backoff.
+			if(try < desc->nthr) {
+				// nothing
+			} else if (try < 4*desc->nthr) {
+				me->nprocyield++;
+				runtime·procyield(20);
+			// If a caller asked not to wait for the others, exit now
+			// (assume that most work is already done at this point).
+			} else if (!desc->wait) {
+				if(!idle)
+					runtime·xadd(&desc->done, 1);
+				goto exit;
+			} else if (try < 6*desc->nthr) {
+				me->nosyield++;
+				runtime·osyield();
+			} else {
+				me->nsleep++;
+				runtime·usleep(1);
+			}
+		}
+	}
+exit:
+	runtime·xadd64(&desc->nsteal, me->nsteal);
+	runtime·xadd64(&desc->nstealcnt, me->nstealcnt);
+	runtime·xadd64(&desc->nprocyield, me->nprocyield);
+	runtime·xadd64(&desc->nosyield, me->nosyield);
+	runtime·xadd64(&desc->nsleep, me->nsleep);
+	me->nsteal = 0;
+	me->nstealcnt = 0;
+	me->nprocyield = 0;
+	me->nosyield = 0;
+	me->nsleep = 0;
+}
+
+// For testing from Go.
+void
+runtime·newparfor_m(void)
+{
+	g->m->ptrarg[0] = runtime·parforalloc(g->m->scalararg[0]);
+}
+
+void
+runtime·parforsetup_m(void)
+{
+	ParFor *desc;
+	void *ctx;
+	void (*body)(ParFor*, uint32);
+
+	desc = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	ctx = g->m->ptrarg[1];
+	g->m->ptrarg[1] = nil;
+	body = g->m->ptrarg[2];
+	g->m->ptrarg[2] = nil;
+
+	runtime·parforsetup(desc, g->m->scalararg[0], g->m->scalararg[1], ctx, g->m->scalararg[2], body);
+}
+
+void
+runtime·parfordo_m(void)
+{
+	ParFor *desc;
+
+	desc = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	runtime·parfordo(desc);
+}
+
+void
+runtime·parforiters_m(void)
+{
+	ParFor *desc;
+	uintptr tid;
+
+	desc = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	tid = g->m->scalararg[0];
+	g->m->scalararg[0] = desc->thr[tid].pos;
+	g->m->scalararg[1] = desc->thr[tid].pos>>32;
+}
diff --git a/src/runtime/parfor_test.go b/src/runtime/parfor_test.go
new file mode 100644
index 000000000..de64285b8
--- /dev/null
+++ b/src/runtime/parfor_test.go
@@ -0,0 +1,139 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The race detector does not understand ParFor synchronization.
+// +build !race
+
+package runtime_test
+
+import (
+	. "runtime"
+	"testing"
+	"unsafe"
+)
+
+var gdata []uint64
+
+// Simple serial sanity test for parallelfor.
+func TestParFor(t *testing.T) {
+	const P = 1
+	const N = 20
+	data := make([]uint64, N)
+	for i := uint64(0); i < N; i++ {
+		data[i] = i
+	}
+	desc := NewParFor(P)
+	// Avoid making func a closure: parfor cannot invoke them.
+	// Since it doesn't happen in the C code, it's not worth doing
+	// just for the test.
+	gdata = data
+	ParForSetup(desc, P, N, nil, true, func(desc *ParFor, i uint32) {
+		data := gdata
+		data[i] = data[i]*data[i] + 1
+	})
+	ParForDo(desc)
+	for i := uint64(0); i < N; i++ {
+		if data[i] != i*i+1 {
+			t.Fatalf("Wrong element %d: %d", i, data[i])
+		}
+	}
+}
+
+// Test that nonblocking parallelfor does not block.
+func TestParFor2(t *testing.T) {
+	const P = 7
+	const N = 1003
+	data := make([]uint64, N)
+	for i := uint64(0); i < N; i++ {
+		data[i] = i
+	}
+	desc := NewParFor(P)
+	ParForSetup(desc, P, N, (*byte)(unsafe.Pointer(&data)), false, func(desc *ParFor, i uint32) {
+		d := *(*[]uint64)(unsafe.Pointer(desc.Ctx))
+		d[i] = d[i]*d[i] + 1
+	})
+	for p := 0; p < P; p++ {
+		ParForDo(desc)
+	}
+	for i := uint64(0); i < N; i++ {
+		if data[i] != i*i+1 {
+			t.Fatalf("Wrong element %d: %d", i, data[i])
+		}
+	}
+}
+
+// Test that iterations are properly distributed.
+func TestParForSetup(t *testing.T) {
+	const P = 11
+	const N = 101
+	desc := NewParFor(P)
+	for n := uint32(0); n < N; n++ {
+		for p := uint32(1); p <= P; p++ {
+			ParForSetup(desc, p, n, nil, true, func(desc *ParFor, i uint32) {})
+			sum := uint32(0)
+			size0 := uint32(0)
+			end0 := uint32(0)
+			for i := uint32(0); i < p; i++ {
+				begin, end := ParForIters(desc, i)
+				size := end - begin
+				sum += size
+				if i == 0 {
+					size0 = size
+					if begin != 0 {
+						t.Fatalf("incorrect begin: %d (n=%d, p=%d)", begin, n, p)
+					}
+				} else {
+					if size != size0 && size != size0+1 {
+						t.Fatalf("incorrect size: %d/%d (n=%d, p=%d)", size, size0, n, p)
+					}
+					if begin != end0 {
+						t.Fatalf("incorrect begin/end: %d/%d (n=%d, p=%d)", begin, end0, n, p)
+					}
+				}
+				end0 = end
+			}
+			if sum != n {
+				t.Fatalf("incorrect sum: %d/%d (p=%d)", sum, n, p)
+			}
+		}
+	}
+}
+
+// Test parallel parallelfor.
+func TestParForParallel(t *testing.T) {
+	N := uint64(1e7)
+	if testing.Short() {
+		N /= 10
+	}
+	data := make([]uint64, N)
+	for i := uint64(0); i < N; i++ {
+		data[i] = i
+	}
+	P := GOMAXPROCS(-1)
+	c := make(chan bool, P)
+	desc := NewParFor(uint32(P))
+	gdata = data
+	ParForSetup(desc, uint32(P), uint32(N), nil, false, func(desc *ParFor, i uint32) {
+		data := gdata
+		data[i] = data[i]*data[i] + 1
+	})
+	for p := 1; p < P; p++ {
+		go func() {
+			ParForDo(desc)
+			c <- true
+		}()
+	}
+	ParForDo(desc)
+	for p := 1; p < P; p++ {
+		<-c
+	}
+	for i := uint64(0); i < N; i++ {
+		if data[i] != i*i+1 {
+			t.Fatalf("Wrong element %d: %d", i, data[i])
+		}
+	}
+
+	data, desc = nil, nil
+	GC()
+}
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
new file mode 100644
index 000000000..236de54f3
--- /dev/null
+++ b/src/runtime/pprof/pprof.go
@@ -0,0 +1,673 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package pprof writes runtime profiling data in the format expected
+// by the pprof visualization tool.
+// For more information about pprof, see
+// http://code.google.com/p/google-perftools/.
+package pprof
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"runtime"
+	"sort"
+	"strings"
+	"sync"
+	"text/tabwriter"
+)
+
+// BUG(rsc): Profiles are incomplete and inaccurate on NetBSD and OS X.
+// See http://golang.org/issue/6047 for details.
+
+// A Profile is a collection of stack traces showing the call sequences
+// that led to instances of a particular event, such as allocation.
+// Packages can create and maintain their own profiles; the most common
+// use is for tracking resources that must be explicitly closed, such as files
+// or network connections.
+//
+// A Profile's methods can be called from multiple goroutines simultaneously.
+//
+// Each Profile has a unique name.  A few profiles are predefined:
+//
+//	goroutine    - stack traces of all current goroutines
+//	heap         - a sampling of all heap allocations
+//	threadcreate - stack traces that led to the creation of new OS threads
+//	block        - stack traces that led to blocking on synchronization primitives
+//
+// These predefined profiles maintain themselves and panic on an explicit
+// Add or Remove method call.
+//
+// The CPU profile is not available as a Profile.  It has a special API,
+// the StartCPUProfile and StopCPUProfile functions, because it streams
+// output to a writer during profiling.
+//
+type Profile struct {
+	name  string
+	mu    sync.Mutex
+	m     map[interface{}][]uintptr
+	count func() int
+	write func(io.Writer, int) error
+}
+
+// profiles records all registered profiles.
+var profiles struct {
+	mu sync.Mutex
+	m  map[string]*Profile
+}
+
+var goroutineProfile = &Profile{
+	name:  "goroutine",
+	count: countGoroutine,
+	write: writeGoroutine,
+}
+
+var threadcreateProfile = &Profile{
+	name:  "threadcreate",
+	count: countThreadCreate,
+	write: writeThreadCreate,
+}
+
+var heapProfile = &Profile{
+	name:  "heap",
+	count: countHeap,
+	write: writeHeap,
+}
+
+var blockProfile = &Profile{
+	name:  "block",
+	count: countBlock,
+	write: writeBlock,
+}
+
+func lockProfiles() {
+	profiles.mu.Lock()
+	if profiles.m == nil {
+		// Initial built-in profiles.
+		profiles.m = map[string]*Profile{
+			"goroutine":    goroutineProfile,
+			"threadcreate": threadcreateProfile,
+			"heap":         heapProfile,
+			"block":        blockProfile,
+		}
+	}
+}
+
+func unlockProfiles() {
+	profiles.mu.Unlock()
+}
+
+// NewProfile creates a new profile with the given name.
+// If a profile with that name already exists, NewProfile panics.
+// The convention is to use a 'import/path.' prefix to create
+// separate name spaces for each package.
+func NewProfile(name string) *Profile {
+	lockProfiles()
+	defer unlockProfiles()
+	if name == "" {
+		panic("pprof: NewProfile with empty name")
+	}
+	if profiles.m[name] != nil {
+		panic("pprof: NewProfile name already in use: " + name)
+	}
+	p := &Profile{
+		name: name,
+		m:    map[interface{}][]uintptr{},
+	}
+	profiles.m[name] = p
+	return p
+}
+
+// Lookup returns the profile with the given name, or nil if no such profile exists.
+func Lookup(name string) *Profile {
+	lockProfiles()
+	defer unlockProfiles()
+	return profiles.m[name]
+}
+
+// Profiles returns a slice of all the known profiles, sorted by name.
+func Profiles() []*Profile {
+	lockProfiles()
+	defer unlockProfiles()
+
+	var all []*Profile
+	for _, p := range profiles.m {
+		all = append(all, p)
+	}
+
+	sort.Sort(byName(all))
+	return all
+}
+
+type byName []*Profile
+
+func (x byName) Len() int           { return len(x) }
+func (x byName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byName) Less(i, j int) bool { return x[i].name < x[j].name }
+
+// Name returns this profile's name, which can be passed to Lookup to reobtain the profile.
+func (p *Profile) Name() string {
+	return p.name
+}
+
+// Count returns the number of execution stacks currently in the profile.
+func (p *Profile) Count() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.count != nil {
+		return p.count()
+	}
+	return len(p.m)
+}
+
+// Add adds the current execution stack to the profile, associated with value.
+// Add stores value in an internal map, so value must be suitable for use as
+// a map key and will not be garbage collected until the corresponding
+// call to Remove.  Add panics if the profile already contains a stack for value.
+//
+// The skip parameter has the same meaning as runtime.Caller's skip
+// and controls where the stack trace begins.  Passing skip=0 begins the
+// trace in the function calling Add.  For example, given this
+// execution stack:
+//
+//	Add
+//	called from rpc.NewClient
+//	called from mypkg.Run
+//	called from main.main
+//
+// Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient.
+// Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run.
+//
+func (p *Profile) Add(value interface{}, skip int) {
+	if p.name == "" {
+		panic("pprof: use of uninitialized Profile")
+	}
+	if p.write != nil {
+		panic("pprof: Add called on built-in Profile " + p.name)
+	}
+
+	stk := make([]uintptr, 32)
+	n := runtime.Callers(skip+1, stk[:])
+
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.m[value] != nil {
+		panic("pprof: Profile.Add of duplicate value")
+	}
+	p.m[value] = stk[:n]
+}
+
+// Remove removes the execution stack associated with value from the profile.
+// It is a no-op if the value is not in the profile.
+func (p *Profile) Remove(value interface{}) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	delete(p.m, value)
+}
+
+// WriteTo writes a pprof-formatted snapshot of the profile to w.
+// If a write to w returns an error, WriteTo returns that error.
+// Otherwise, WriteTo returns nil.
+//
+// The debug parameter enables additional output.
+// Passing debug=0 prints only the hexadecimal addresses that pprof needs.
+// Passing debug=1 adds comments translating addresses to function names
+// and line numbers, so that a programmer can read the profile without tools.
+//
+// The predefined profiles may assign meaning to other debug values;
+// for example, when printing the "goroutine" profile, debug=2 means to
+// print the goroutine stacks in the same form that a Go program uses
+// when dying due to an unrecovered panic.
+func (p *Profile) WriteTo(w io.Writer, debug int) error {
+	if p.name == "" {
+		panic("pprof: use of zero Profile")
+	}
+	if p.write != nil {
+		return p.write(w, debug)
+	}
+
+	// Obtain consistent snapshot under lock; then process without lock.
+	var all [][]uintptr
+	p.mu.Lock()
+	for _, stk := range p.m {
+		all = append(all, stk)
+	}
+	p.mu.Unlock()
+
+	// Map order is non-deterministic; make output deterministic.
+	sort.Sort(stackProfile(all))
+
+	return printCountProfile(w, debug, p.name, stackProfile(all))
+}
+
+type stackProfile [][]uintptr
+
+func (x stackProfile) Len() int              { return len(x) }
+func (x stackProfile) Stack(i int) []uintptr { return x[i] }
+func (x stackProfile) Swap(i, j int)         { x[i], x[j] = x[j], x[i] }
+func (x stackProfile) Less(i, j int) bool {
+	t, u := x[i], x[j]
+	for k := 0; k < len(t) && k < len(u); k++ {
+		if t[k] != u[k] {
+			return t[k] < u[k]
+		}
+	}
+	return len(t) < len(u)
+}
+
+// A countProfile is a set of stack traces to be printed as counts
+// grouped by stack trace.  There are multiple implementations:
+// all that matters is that we can find out how many traces there are
+// and obtain each trace in turn.
+type countProfile interface {
+	Len() int
+	Stack(i int) []uintptr
+}
+
+// printCountProfile prints a countProfile at the specified debug level.
+func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
+	b := bufio.NewWriter(w)
+	var tw *tabwriter.Writer
+	w = b
+	if debug > 0 {
+		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		w = tw
+	}
+
+	fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len())
+
+	// Build count of each stack.
+	var buf bytes.Buffer
+	key := func(stk []uintptr) string {
+		buf.Reset()
+		fmt.Fprintf(&buf, "@")
+		for _, pc := range stk {
+			fmt.Fprintf(&buf, " %#x", pc)
+		}
+		return buf.String()
+	}
+	m := map[string]int{}
+	n := p.Len()
+	for i := 0; i < n; i++ {
+		m[key(p.Stack(i))]++
+	}
+
+	// Print stacks, listing count on first occurrence of a unique stack.
+	for i := 0; i < n; i++ {
+		stk := p.Stack(i)
+		s := key(stk)
+		if count := m[s]; count != 0 {
+			fmt.Fprintf(w, "%d %s\n", count, s)
+			if debug > 0 {
+				printStackRecord(w, stk, false)
+			}
+			delete(m, s)
+		}
+	}
+
+	if tw != nil {
+		tw.Flush()
+	}
+	return b.Flush()
+}
+
+// printStackRecord prints the function + source line information
+// for a single stack trace.
+func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
+	show := allFrames
+	wasPanic := false
+	for i, pc := range stk {
+		f := runtime.FuncForPC(pc)
+		if f == nil {
+			show = true
+			fmt.Fprintf(w, "#\t%#x\n", pc)
+			wasPanic = false
+		} else {
+			tracepc := pc
+			// Back up to call instruction.
+			if i > 0 && pc > f.Entry() && !wasPanic {
+				if runtime.GOARCH == "386" || runtime.GOARCH == "amd64" {
+					tracepc--
+				} else {
+					tracepc -= 4 // arm, etc
+				}
+			}
+			file, line := f.FileLine(tracepc)
+			name := f.Name()
+			// Hide runtime.goexit and any runtime functions at the beginning.
+			// This is useful mainly for allocation traces.
+			wasPanic = name == "runtime.panic"
+			if name == "runtime.goexit" || !show && strings.HasPrefix(name, "runtime.") {
+				continue
+			}
+			show = true
+			fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, name, pc-f.Entry(), file, line)
+		}
+	}
+	if !show {
+		// We didn't print anything; do it again,
+		// and this time include runtime functions.
+		printStackRecord(w, stk, true)
+		return
+	}
+	fmt.Fprintf(w, "\n")
+}
+
+// Interface to system profiles.
+
+type byInUseBytes []runtime.MemProfileRecord
+
+func (x byInUseBytes) Len() int           { return len(x) }
+func (x byInUseBytes) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byInUseBytes) Less(i, j int) bool { return x[i].InUseBytes() > x[j].InUseBytes() }
+
+// WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).
+// It is preserved for backwards compatibility.
+func WriteHeapProfile(w io.Writer) error {
+	return writeHeap(w, 0)
+}
+
+// countHeap returns the number of records in the heap profile.
+func countHeap() int {
+	n, _ := runtime.MemProfile(nil, true)
+	return n
+}
+
+// writeHeap writes the current runtime heap profile to w.
+func writeHeap(w io.Writer, debug int) error {
+	// Find out how many records there are (MemProfile(nil, true)),
+	// allocate that many records, and get the data.
+	// There's a race—more records might be added between
+	// the two calls—so allocate a few extra records for safety
+	// and also try again if we're very unlucky.
+	// The loop should only execute one iteration in the common case.
+	var p []runtime.MemProfileRecord
+	n, ok := runtime.MemProfile(nil, true)
+	for {
+		// Allocate room for a slightly bigger profile,
+		// in case a few more entries have been added
+		// since the call to MemProfile.
+		p = make([]runtime.MemProfileRecord, n+50)
+		n, ok = runtime.MemProfile(p, true)
+		if ok {
+			p = p[0:n]
+			break
+		}
+		// Profile grew; try again.
+	}
+
+	sort.Sort(byInUseBytes(p))
+
+	b := bufio.NewWriter(w)
+	var tw *tabwriter.Writer
+	w = b
+	if debug > 0 {
+		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		w = tw
+	}
+
+	var total runtime.MemProfileRecord
+	for i := range p {
+		r := &p[i]
+		total.AllocBytes += r.AllocBytes
+		total.AllocObjects += r.AllocObjects
+		total.FreeBytes += r.FreeBytes
+		total.FreeObjects += r.FreeObjects
+	}
+
+	// Technically the rate is MemProfileRate not 2*MemProfileRate,
+	// but early versions of the C++ heap profiler reported 2*MemProfileRate,
+	// so that's what pprof has come to expect.
+	fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
+		total.InUseObjects(), total.InUseBytes(),
+		total.AllocObjects, total.AllocBytes,
+		2*runtime.MemProfileRate)
+
+	for i := range p {
+		r := &p[i]
+		fmt.Fprintf(w, "%d: %d [%d: %d] @",
+			r.InUseObjects(), r.InUseBytes(),
+			r.AllocObjects, r.AllocBytes)
+		for _, pc := range r.Stack() {
+			fmt.Fprintf(w, " %#x", pc)
+		}
+		fmt.Fprintf(w, "\n")
+		if debug > 0 {
+			printStackRecord(w, r.Stack(), false)
+		}
+	}
+
+	// Print memstats information too.
+	// Pprof will ignore, but useful for people
+	if debug > 0 {
+		s := new(runtime.MemStats)
+		runtime.ReadMemStats(s)
+		fmt.Fprintf(w, "\n# runtime.MemStats\n")
+		fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
+		fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
+		fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
+		fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
+		fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
+		fmt.Fprintf(w, "# Frees = %d\n", s.Frees)
+
+		fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
+		fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
+		fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
+		fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
+		fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased)
+		fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects)
+
+		fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
+		fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
+		fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
+		fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
+
+		fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
+		fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
+		fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
+		fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC)
+		fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
+	}
+
+	if tw != nil {
+		tw.Flush()
+	}
+	return b.Flush()
+}
+
+// countThreadCreate returns the size of the current ThreadCreateProfile.
+func countThreadCreate() int {
+	n, _ := runtime.ThreadCreateProfile(nil)
+	return n
+}
+
+// writeThreadCreate writes the current runtime ThreadCreateProfile to w.
+func writeThreadCreate(w io.Writer, debug int) error {
+	return writeRuntimeProfile(w, debug, "threadcreate", runtime.ThreadCreateProfile)
+}
+
+// countGoroutine returns the number of goroutines.
+func countGoroutine() int {
+	return runtime.NumGoroutine()
+}
+
+// writeGoroutine writes the current runtime GoroutineProfile to w.
+func writeGoroutine(w io.Writer, debug int) error {
+	if debug >= 2 {
+		return writeGoroutineStacks(w)
+	}
+	return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile)
+}
+
+func writeGoroutineStacks(w io.Writer) error {
+	// We don't know how big the buffer needs to be to collect
+	// all the goroutines.  Start with 1 MB and try a few times, doubling each time.
+	// Give up and use a truncated trace if 64 MB is not enough.
+	buf := make([]byte, 1<<20)
+	for i := 0; ; i++ {
+		n := runtime.Stack(buf, true)
+		if n < len(buf) {
+			buf = buf[:n]
+			break
+		}
+		if len(buf) >= 64<<20 {
+			// Filled 64 MB - stop there.
+			break
+		}
+		buf = make([]byte, 2*len(buf))
+	}
+	_, err := w.Write(buf)
+	return err
+}
+
+func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error {
+	// Find out how many records there are (fetch(nil)),
+	// allocate that many records, and get the data.
+	// There's a race—more records might be added between
+	// the two calls—so allocate a few extra records for safety
+	// and also try again if we're very unlucky.
+	// The loop should only execute one iteration in the common case.
+	var p []runtime.StackRecord
+	n, ok := fetch(nil)
+	for {
+		// Allocate room for a slightly bigger profile,
+		// in case a few more entries have been added
+		// since the call to ThreadProfile.
+		p = make([]runtime.StackRecord, n+10)
+		n, ok = fetch(p)
+		if ok {
+			p = p[0:n]
+			break
+		}
+		// Profile grew; try again.
+	}
+
+	return printCountProfile(w, debug, name, runtimeProfile(p))
+}
+
+type runtimeProfile []runtime.StackRecord
+
+func (p runtimeProfile) Len() int              { return len(p) }
+func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() }
+
+var cpu struct {
+	sync.Mutex
+	profiling bool
+	done      chan bool
+}
+
+// StartCPUProfile enables CPU profiling for the current process.
+// While profiling, the profile will be buffered and written to w.
+// StartCPUProfile returns an error if profiling is already enabled.
+func StartCPUProfile(w io.Writer) error {
+	// The runtime routines allow a variable profiling rate,
+	// but in practice operating systems cannot trigger signals
+	// at more than about 500 Hz, and our processing of the
+	// signal is not cheap (mostly getting the stack trace).
+	// 100 Hz is a reasonable choice: it is frequent enough to
+	// produce useful data, rare enough not to bog down the
+	// system, and a nice round number to make it easy to
+	// convert sample counts to seconds.  Instead of requiring
+	// each client to specify the frequency, we hard code it.
+	const hz = 100
+
+	cpu.Lock()
+	defer cpu.Unlock()
+	if cpu.done == nil {
+		cpu.done = make(chan bool)
+	}
+	// Double-check.
+	if cpu.profiling {
+		return fmt.Errorf("cpu profiling already in use")
+	}
+	cpu.profiling = true
+	runtime.SetCPUProfileRate(hz)
+	go profileWriter(w)
+	return nil
+}
+
+func profileWriter(w io.Writer) {
+	for {
+		data := runtime.CPUProfile()
+		if data == nil {
+			break
+		}
+		w.Write(data)
+	}
+	cpu.done <- true
+}
+
+// StopCPUProfile stops the current CPU profile, if any.
+// StopCPUProfile only returns after all the writes for the
+// profile have completed.
+func StopCPUProfile() {
+	cpu.Lock()
+	defer cpu.Unlock()
+
+	if !cpu.profiling {
+		return
+	}
+	cpu.profiling = false
+	runtime.SetCPUProfileRate(0)
+	<-cpu.done
+}
+
+type byCycles []runtime.BlockProfileRecord
+
+func (x byCycles) Len() int           { return len(x) }
+func (x byCycles) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles }
+
+// countBlock returns the number of records in the blocking profile.
+func countBlock() int {
+	n, _ := runtime.BlockProfile(nil)
+	return n
+}
+
+// writeBlock writes the current blocking profile to w.
+func writeBlock(w io.Writer, debug int) error {
+	var p []runtime.BlockProfileRecord
+	n, ok := runtime.BlockProfile(nil)
+	for {
+		p = make([]runtime.BlockProfileRecord, n+50)
+		n, ok = runtime.BlockProfile(p)
+		if ok {
+			p = p[:n]
+			break
+		}
+	}
+
+	sort.Sort(byCycles(p))
+
+	b := bufio.NewWriter(w)
+	var tw *tabwriter.Writer
+	w = b
+	if debug > 0 {
+		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
+		w = tw
+	}
+
+	fmt.Fprintf(w, "--- contention:\n")
+	fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+	for i := range p {
+		r := &p[i]
+		fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
+		for _, pc := range r.Stack() {
+			fmt.Fprintf(w, " %#x", pc)
+		}
+		fmt.Fprint(w, "\n")
+		if debug > 0 {
+			printStackRecord(w, r.Stack(), true)
+		}
+	}
+
+	if tw != nil {
+		tw.Flush()
+	}
+	return b.Flush()
+}
+
+func runtime_cyclesPerSecond() int64
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
new file mode 100644
index 000000000..54f93f861
--- /dev/null
+++ b/src/runtime/pprof/pprof_test.go
@@ -0,0 +1,452 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !nacl
+
+package pprof_test
+
+import (
+	"bytes"
+	"fmt"
+	"math/big"
+	"os/exec"
+	"regexp"
+	"runtime"
+	. "runtime/pprof"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+func cpuHogger(f func()) {
+	// We only need to get one 100 Hz clock tick, so we've got
+	// a 100x safety buffer.
+	// But do at least 2000 iterations (which should take about 400ms),
+	// otherwise TestCPUProfileMultithreaded can fail if only one
+	// thread is scheduled during the 1 second period.
+	t0 := time.Now()
+	for i := 0; i < 2000 || time.Since(t0) < time.Second; i++ {
+		f()
+	}
+}
+
+var (
+	salt1 = 0
+	salt2 = 0
+)
+
+// The actual CPU hogging function.
+// Must not call other functions nor access heap/globals in the loop,
+// otherwise under race detector the samples will be in the race runtime.
+func cpuHog1() {
+	foo := salt1
+	for i := 0; i < 1e5; i++ {
+		if foo > 0 {
+			foo *= foo
+		} else {
+			foo *= foo + 1
+		}
+	}
+	salt1 = foo
+}
+
+func cpuHog2() {
+	foo := salt2
+	for i := 0; i < 1e5; i++ {
+		if foo > 0 {
+			foo *= foo
+		} else {
+			foo *= foo + 2
+		}
+	}
+	salt2 = foo
+}
+
+func TestCPUProfile(t *testing.T) {
+	testCPUProfile(t, []string{"runtime/pprof_test.cpuHog1"}, func() {
+		cpuHogger(cpuHog1)
+	})
+}
+
+func TestCPUProfileMultithreaded(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	testCPUProfile(t, []string{"runtime/pprof_test.cpuHog1", "runtime/pprof_test.cpuHog2"}, func() {
+		c := make(chan int)
+		go func() {
+			cpuHogger(cpuHog1)
+			c <- 1
+		}()
+		cpuHogger(cpuHog2)
+		<-c
+	})
+}
+
+func parseProfile(t *testing.T, bytes []byte, f func(uintptr, []uintptr)) {
+	// Convert []byte to []uintptr.
+	l := len(bytes) / int(unsafe.Sizeof(uintptr(0)))
+	val := *(*[]uintptr)(unsafe.Pointer(&bytes))
+	val = val[:l]
+
+	// 5 for the header, 2 for the per-sample header on at least one sample, 3 for the trailer.
+	if l < 5+2+3 {
+		t.Logf("profile too short: %#x", val)
+		if badOS[runtime.GOOS] {
+			t.Skipf("ignoring failure on %s; see golang.org/issue/6047", runtime.GOOS)
+			return
+		}
+		t.FailNow()
+	}
+
+	hd, val, tl := val[:5], val[5:l-3], val[l-3:]
+	if hd[0] != 0 || hd[1] != 3 || hd[2] != 0 || hd[3] != 1e6/100 || hd[4] != 0 {
+		t.Fatalf("unexpected header %#x", hd)
+	}
+
+	if tl[0] != 0 || tl[1] != 1 || tl[2] != 0 {
+		t.Fatalf("malformed end-of-data marker %#x", tl)
+	}
+
+	for len(val) > 0 {
+		if len(val) < 2 || val[0] < 1 || val[1] < 1 || uintptr(len(val)) < 2+val[1] {
+			t.Fatalf("malformed profile.  leftover: %#x", val)
+		}
+		f(val[0], val[2:2+val[1]])
+		val = val[2+val[1]:]
+	}
+}
+
+func testCPUProfile(t *testing.T, need []string, f func()) {
+	switch runtime.GOOS {
+	case "darwin":
+		out, err := exec.Command("uname", "-a").CombinedOutput()
+		if err != nil {
+			t.Fatal(err)
+		}
+		vers := string(out)
+		t.Logf("uname -a: %v", vers)
+	case "plan9":
+		// unimplemented
+		return
+	}
+
+	var prof bytes.Buffer
+	if err := StartCPUProfile(&prof); err != nil {
+		t.Fatal(err)
+	}
+	f()
+	StopCPUProfile()
+
+	// Check that profile is well formed and contains need.
+	have := make([]uintptr, len(need))
+	parseProfile(t, prof.Bytes(), func(count uintptr, stk []uintptr) {
+		for _, pc := range stk {
+			f := runtime.FuncForPC(pc)
+			if f == nil {
+				continue
+			}
+			for i, name := range need {
+				if strings.Contains(f.Name(), name) {
+					have[i] += count
+				}
+			}
+		}
+	})
+
+	if len(need) == 0 {
+		return
+	}
+
+	var total uintptr
+	for i, name := range need {
+		total += have[i]
+		t.Logf("%s: %d\n", name, have[i])
+	}
+	ok := true
+	if total == 0 {
+		t.Logf("no CPU profile samples collected")
+		ok = false
+	}
+	// We'd like to check a reasonable minimum, like
+	// total / len(have) / smallconstant, but this test is
+	// pretty flaky (see bug 7095).  So we'll just test to
+	// make sure we got at least one sample.
+	min := uintptr(1)
+	for i, name := range need {
+		if have[i] < min {
+			t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
+			ok = false
+		}
+	}
+
+	if !ok {
+		if badOS[runtime.GOOS] {
+			t.Skipf("ignoring failure on %s; see golang.org/issue/6047", runtime.GOOS)
+			return
+		}
+		t.FailNow()
+	}
+}
+
+func TestCPUProfileWithFork(t *testing.T) {
+	// Fork can hang if preempted with signals frequently enough (see issue 5517).
+	// Ensure that we do not do this.
+	heap := 1 << 30
+	if testing.Short() {
+		heap = 100 << 20
+	}
+	// This makes fork slower.
+	garbage := make([]byte, heap)
+	// Need to touch the slice, otherwise it won't be paged in.
+	done := make(chan bool)
+	go func() {
+		for i := range garbage {
+			garbage[i] = 42
+		}
+		done <- true
+	}()
+	<-done
+
+	var prof bytes.Buffer
+	if err := StartCPUProfile(&prof); err != nil {
+		t.Fatal(err)
+	}
+	defer StopCPUProfile()
+
+	for i := 0; i < 10; i++ {
+		exec.Command("go").CombinedOutput()
+	}
+}
+
+// Test that profiler does not observe runtime.gogo as "user" goroutine execution.
+// If it did, it would see inconsistent state and would either record an incorrect stack
+// or crash because the stack was malformed.
+func TestGoroutineSwitch(t *testing.T) {
+	// How much to try. These defaults take about 1 seconds
+	// on a 2012 MacBook Pro. The ones in short mode take
+	// about 0.1 seconds.
+	tries := 10
+	count := 1000000
+	if testing.Short() {
+		tries = 1
+	}
+	for try := 0; try < tries; try++ {
+		var prof bytes.Buffer
+		if err := StartCPUProfile(&prof); err != nil {
+			t.Fatal(err)
+		}
+		for i := 0; i < count; i++ {
+			runtime.Gosched()
+		}
+		StopCPUProfile()
+
+		// Read profile to look for entries for runtime.gogo with an attempt at a traceback.
+		// The special entry
+		parseProfile(t, prof.Bytes(), func(count uintptr, stk []uintptr) {
+			// An entry with two frames with 'System' in its top frame
+			// exists to record a PC without a traceback. Those are okay.
+			if len(stk) == 2 {
+				f := runtime.FuncForPC(stk[1])
+				if f != nil && (f.Name() == "System" || f.Name() == "ExternalCode") {
+					return
+				}
+			}
+
+			// Otherwise, should not see runtime.gogo.
+			// The place we'd see it would be the inner most frame.
+			f := runtime.FuncForPC(stk[0])
+			if f != nil && f.Name() == "runtime.gogo" {
+				var buf bytes.Buffer
+				for _, pc := range stk {
+					f := runtime.FuncForPC(pc)
+					if f == nil {
+						fmt.Fprintf(&buf, "%#x ?:0\n", pc)
+					} else {
+						file, line := f.FileLine(pc)
+						fmt.Fprintf(&buf, "%#x %s:%d\n", pc, file, line)
+					}
+				}
+				t.Fatalf("found profile entry for runtime.gogo:\n%s", buf.String())
+			}
+		})
+	}
+}
+
+// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
+func TestMathBigDivide(t *testing.T) {
+	testCPUProfile(t, nil, func() {
+		t := time.After(5 * time.Second)
+		pi := new(big.Int)
+		for {
+			for i := 0; i < 100; i++ {
+				n := big.NewInt(2646693125139304345)
+				d := big.NewInt(842468587426513207)
+				pi.Div(n, d)
+			}
+			select {
+			case <-t:
+				return
+			default:
+			}
+		}
+	})
+}
+
+// Operating systems that are expected to fail the tests. See issue 6047.
+var badOS = map[string]bool{
+	"darwin": true,
+	"netbsd": true,
+	"plan9":  true,
+}
+
+func TestBlockProfile(t *testing.T) {
+	type TestCase struct {
+		name string
+		f    func()
+		re   string
+	}
+	tests := [...]TestCase{
+		{"chan recv", blockChanRecv, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime\.chanrecv1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanRecv\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"chan send", blockChanSend, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime\.chansend1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanSend\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"chan close", blockChanClose, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime\.chanrecv1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanClose\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"select recv async", blockSelectRecvAsync, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime\.selectgo\+0x[0-9,a-f]+	.*/src/runtime/select.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockSelectRecvAsync\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"select send sync", blockSelectSendSync, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	runtime\.selectgo\+0x[0-9,a-f]+	.*/src/runtime/select.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockSelectSendSync\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"mutex", blockMutex, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	sync\.\(\*Mutex\)\.Lock\+0x[0-9,a-f]+	.*/src/sync/mutex\.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockMutex\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+		{"cond", blockCond, `
+[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#	0x[0-9,a-f]+	sync\.\(\*Cond\)\.Wait\+0x[0-9,a-f]+	.*/src/sync/cond\.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.blockCond\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
+`},
+	}
+
+	runtime.SetBlockProfileRate(1)
+	defer runtime.SetBlockProfileRate(0)
+	for _, test := range tests {
+		test.f()
+	}
+	var w bytes.Buffer
+	Lookup("block").WriteTo(&w, 1)
+	prof := w.String()
+
+	if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
+		t.Fatalf("Bad profile header:\n%v", prof)
+	}
+
+	for _, test := range tests {
+		if !regexp.MustCompile(test.re).MatchString(prof) {
+			t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
+		}
+	}
+}
+
+const blockDelay = 10 * time.Millisecond
+
+func blockChanRecv() {
+	c := make(chan bool)
+	go func() {
+		time.Sleep(blockDelay)
+		c <- true
+	}()
+	<-c
+}
+
+func blockChanSend() {
+	c := make(chan bool)
+	go func() {
+		time.Sleep(blockDelay)
+		<-c
+	}()
+	c <- true
+}
+
+func blockChanClose() {
+	c := make(chan bool)
+	go func() {
+		time.Sleep(blockDelay)
+		close(c)
+	}()
+	<-c
+}
+
+func blockSelectRecvAsync() {
+	c := make(chan bool, 1)
+	c2 := make(chan bool, 1)
+	go func() {
+		time.Sleep(blockDelay)
+		c <- true
+	}()
+	select {
+	case <-c:
+	case <-c2:
+	}
+}
+
+func blockSelectSendSync() {
+	c := make(chan bool)
+	c2 := make(chan bool)
+	go func() {
+		time.Sleep(blockDelay)
+		<-c
+	}()
+	select {
+	case c <- true:
+	case c2 <- true:
+	}
+}
+
+func blockMutex() {
+	var mu sync.Mutex
+	mu.Lock()
+	go func() {
+		time.Sleep(blockDelay)
+		mu.Unlock()
+	}()
+	mu.Lock()
+}
+
+func blockCond() {
+	var mu sync.Mutex
+	c := sync.NewCond(&mu)
+	mu.Lock()
+	go func() {
+		time.Sleep(blockDelay)
+		mu.Lock()
+		c.Signal()
+		mu.Unlock()
+	}()
+	c.Wait()
+	mu.Unlock()
+}
diff --git a/src/runtime/print1.go b/src/runtime/print1.go
new file mode 100644
index 000000000..93f83ed26
--- /dev/null
+++ b/src/runtime/print1.go
@@ -0,0 +1,341 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// The compiler knows that a print of a value of this type
+// should use printhex instead of printuint (decimal).
+type hex uint64
+
+//go:noescape
+func gostring(*byte) string
+
+func bytes(s string) (ret []byte) {
+	rp := (*slice)(unsafe.Pointer(&ret))
+	sp := (*_string)(noescape(unsafe.Pointer(&s)))
+	rp.array = sp.str
+	rp.len = uint(sp.len)
+	rp.cap = uint(sp.len)
+	return
+}
+
+// goprintf is the function call that is actually deferred when you write
+//	defer print(...)
+// It is otherwise unused. In particular it is not used for ordinary prints.
+// Right now a dynamically allocated string that is being passed as an
+// argument is invisible to the garbage collector and might be collected
+// if that argument list is the only reference. For now we ignore that possibility.
+// To fix, we should change to defer a call to vprintf with a pointer to
+// an argument list on the stack, stored in an appropriately typed
+// struct. golang.org/issue/8614.
+//go:nosplit
+func goprintf(s string) {
+	vprintf(s, add(unsafe.Pointer(&s), unsafe.Sizeof(s)))
+}
+
+// printf is only called from C code. It has the same problem as goprintf
+// with strings possibly being collected from underneath.
+// However, the runtime never prints dynamically allocated
+// Go strings using printf. The strings it prints come from the symbol
+// and type tables.
+//go:nosplit
+func printf(s *byte) {
+	vprintf(gostringnocopy(s), add(unsafe.Pointer(&s), unsafe.Sizeof(s)))
+}
+
+// sprintf is only called from C code.
+// It has the same problem as goprintf.
+//go:nosplit
+func snprintf(dst *byte, n int32, s *byte) {
+	buf := (*[1 << 30]byte)(unsafe.Pointer(dst))[0:n:n]
+
+	gp := getg()
+	gp.writebuf = buf[0:0 : n-1] // leave room for NUL, this is called from C
+	vprintf(gostringnocopy(s), add(unsafe.Pointer(&s), unsafe.Sizeof(s)))
+	buf[len(gp.writebuf)] = '\x00'
+	gp.writebuf = nil
+}
+
+//var debuglock mutex
+
+// write to goroutine-local buffer if diverting output,
+// or else standard error.
+func gwrite(b []byte) {
+	if len(b) == 0 {
+		return
+	}
+	gp := getg()
+	if gp == nil || gp.writebuf == nil {
+		write(2, unsafe.Pointer(&b[0]), int32(len(b)))
+		return
+	}
+
+	n := copy(gp.writebuf[len(gp.writebuf):cap(gp.writebuf)], b)
+	gp.writebuf = gp.writebuf[:len(gp.writebuf)+n]
+}
+
+func prints(s *byte) {
+	b := (*[1 << 30]byte)(unsafe.Pointer(s))
+	for i := 0; ; i++ {
+		if b[i] == 0 {
+			gwrite(b[:i])
+			return
+		}
+	}
+}
+
+func printsp() {
+	print(" ")
+}
+
+func printnl() {
+	print("\n")
+}
+
+// Very simple printf.  Only for debugging prints.
+// Do not add to this without checking with Rob.
+func vprintf(str string, arg unsafe.Pointer) {
+	//lock(&debuglock);
+
+	s := bytes(str)
+	start := 0
+	i := 0
+	for ; i < len(s); i++ {
+		if s[i] != '%' {
+			continue
+		}
+		if i > start {
+			gwrite(s[start:i])
+		}
+		if i++; i >= len(s) {
+			break
+		}
+		var siz uintptr
+		switch s[i] {
+		case 't', 'c':
+			siz = 1
+		case 'd', 'x': // 32-bit
+			arg = roundup(arg, 4)
+			siz = 4
+		case 'D', 'U', 'X', 'f': // 64-bit
+			arg = roundup(arg, unsafe.Sizeof(uintreg(0)))
+			siz = 8
+		case 'C':
+			arg = roundup(arg, unsafe.Sizeof(uintreg(0)))
+			siz = 16
+		case 'p', 's': // pointer-sized
+			arg = roundup(arg, unsafe.Sizeof(uintptr(0)))
+			siz = unsafe.Sizeof(uintptr(0))
+		case 'S': // pointer-aligned but bigger
+			arg = roundup(arg, unsafe.Sizeof(uintptr(0)))
+			siz = unsafe.Sizeof(string(""))
+		case 'a': // pointer-aligned but bigger
+			arg = roundup(arg, unsafe.Sizeof(uintptr(0)))
+			siz = unsafe.Sizeof([]byte{})
+		case 'i', 'e': // pointer-aligned but bigger
+			arg = roundup(arg, unsafe.Sizeof(uintptr(0)))
+			siz = unsafe.Sizeof(interface{}(nil))
+		}
+		switch s[i] {
+		case 'a':
+			printslice(*(*[]byte)(arg))
+		case 'c':
+			printbyte(*(*byte)(arg))
+		case 'd':
+			printint(int64(*(*int32)(arg)))
+		case 'D':
+			printint(int64(*(*int64)(arg)))
+		case 'e':
+			printeface(*(*interface{})(arg))
+		case 'f':
+			printfloat(*(*float64)(arg))
+		case 'C':
+			printcomplex(*(*complex128)(arg))
+		case 'i':
+			printiface(*(*fInterface)(arg))
+		case 'p':
+			printpointer(*(*unsafe.Pointer)(arg))
+		case 's':
+			prints(*(**byte)(arg))
+		case 'S':
+			printstring(*(*string)(arg))
+		case 't':
+			printbool(*(*bool)(arg))
+		case 'U':
+			printuint(*(*uint64)(arg))
+		case 'x':
+			printhex(uint64(*(*uint32)(arg)))
+		case 'X':
+			printhex(*(*uint64)(arg))
+		}
+		arg = add(arg, siz)
+		start = i + 1
+	}
+	if start < i {
+		gwrite(s[start:i])
+	}
+
+	//unlock(&debuglock);
+}
+
+func printpc(p unsafe.Pointer) {
+	print("PC=", hex(uintptr(p)))
+}
+
+func printbool(v bool) {
+	if v {
+		print("true")
+	} else {
+		print("false")
+	}
+}
+
+func printbyte(c byte) {
+	gwrite((*[1]byte)(unsafe.Pointer(&c))[:])
+}
+
+func printfloat(v float64) {
+	switch {
+	case v != v:
+		print("NaN")
+		return
+	case v+v == v && v > 0:
+		print("+Inf")
+		return
+	case v+v == v && v < 0:
+		print("-Inf")
+		return
+	}
+
+	const n = 7 // digits printed
+	var buf [n + 7]byte
+	buf[0] = '+'
+	e := 0 // exp
+	if v == 0 {
+		if 1/v < 0 {
+			buf[0] = '-'
+		}
+	} else {
+		if v < 0 {
+			v = -v
+			buf[0] = '-'
+		}
+
+		// normalize
+		for v >= 10 {
+			e++
+			v /= 10
+		}
+		for v < 1 {
+			e--
+			v *= 10
+		}
+
+		// round
+		h := 5.0
+		for i := 0; i < n; i++ {
+			h /= 10
+		}
+		v += h
+		if v >= 10 {
+			e++
+			v /= 10
+		}
+	}
+
+	// format +d.dddd+edd
+	for i := 0; i < n; i++ {
+		s := int(v)
+		buf[i+2] = byte(s + '0')
+		v -= float64(s)
+		v *= 10
+	}
+	buf[1] = buf[2]
+	buf[2] = '.'
+
+	buf[n+2] = 'e'
+	buf[n+3] = '+'
+	if e < 0 {
+		e = -e
+		buf[n+3] = '-'
+	}
+
+	buf[n+4] = byte(e/100) + '0'
+	buf[n+5] = byte(e/10)%10 + '0'
+	buf[n+6] = byte(e%10) + '0'
+	gwrite(buf[:])
+}
+
+func printcomplex(c complex128) {
+	print("(", real(c), imag(c), "i)")
+}
+
+func printuint(v uint64) {
+	var buf [100]byte
+	i := len(buf)
+	for i--; i > 0; i-- {
+		buf[i] = byte(v%10 + '0')
+		if v < 10 {
+			break
+		}
+		v /= 10
+	}
+	gwrite(buf[i:])
+}
+
+func printint(v int64) {
+	if v < 0 {
+		print("-")
+		v = -v
+	}
+	printuint(uint64(v))
+}
+
+func printhex(v uint64) {
+	const dig = "0123456789abcdef"
+	var buf [100]byte
+	i := len(buf)
+	for i--; i > 0; i-- {
+		buf[i] = dig[v%16]
+		if v < 16 {
+			break
+		}
+		v /= 16
+	}
+	i--
+	buf[i] = 'x'
+	i--
+	buf[i] = '0'
+	gwrite(buf[i:])
+}
+
+func printpointer(p unsafe.Pointer) {
+	printhex(uint64(uintptr(p)))
+}
+
+func printstring(s string) {
+	if uintptr(len(s)) > maxstring {
+		gwrite(bytes("[string too long]"))
+		return
+	}
+	gwrite(bytes(s))
+}
+
+func printslice(s []byte) {
+	sp := (*slice)(unsafe.Pointer(&s))
+	print("[", len(s), "/", cap(s), "]")
+	printpointer(unsafe.Pointer(sp.array))
+}
+
+func printeface(e interface{}) {
+	ep := (*eface)(unsafe.Pointer(&e))
+	print("(", ep._type, ",", ep.data, ")")
+}
+
+func printiface(i fInterface) {
+	ip := (*iface)(unsafe.Pointer(&i))
+	print("(", ip.tab, ",", ip.data, ")")
+}
diff --git a/src/runtime/proc.c b/src/runtime/proc.c
new file mode 100644
index 000000000..698be9ffa
--- /dev/null
+++ b/src/runtime/proc.c
@@ -0,0 +1,3663 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "zaexperiment.h"
+#include "malloc.h"
+#include "stack.h"
+#include "race.h"
+#include "type.h"
+#include "mgc0.h"
+#include "textflag.h"
+
+// Goroutine scheduler
+// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
+//
+// The main concepts are:
+// G - goroutine.
+// M - worker thread, or machine.
+// P - processor, a resource that is required to execute Go code.
+//     M must have an associated P to execute Go code, however it can be
+//     blocked or in a syscall w/o an associated P.
+//
+// Design doc at http://golang.org/s/go11sched.
+
+typedef struct Sched Sched;
+struct Sched {
+	Mutex	lock;
+
+	uint64	goidgen;
+
+	M*	midle;	 // idle m's waiting for work
+	int32	nmidle;	 // number of idle m's waiting for work
+	int32	nmidlelocked; // number of locked m's waiting for work
+	int32	mcount;	 // number of m's that have been created
+	int32	maxmcount;	// maximum number of m's allowed (or die)
+
+	P*	pidle;  // idle P's
+	uint32	npidle;
+	uint32	nmspinning;
+
+	// Global runnable queue.
+	G*	runqhead;
+	G*	runqtail;
+	int32	runqsize;
+
+	// Global cache of dead G's.
+	Mutex	gflock;
+	G*	gfree;
+	int32	ngfree;
+
+	uint32	gcwaiting;	// gc is waiting to run
+	int32	stopwait;
+	Note	stopnote;
+	uint32	sysmonwait;
+	Note	sysmonnote;
+	uint64	lastpoll;
+
+	int32	profilehz;	// cpu profiling rate
+};
+
+enum
+{
+	// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
+	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
+	GoidCacheBatch = 16,
+};
+
+Sched	runtime·sched;
+int32	runtime·gomaxprocs;
+uint32	runtime·needextram;
+bool	runtime·iscgo;
+M	runtime·m0;
+G	runtime·g0;	// idle goroutine for m0
+G*	runtime·lastg;
+M*	runtime·allm;
+M*	runtime·extram;
+P*	runtime·allp[MaxGomaxprocs+1];
+int8*	runtime·goos;
+int32	runtime·ncpu;
+static int32	newprocs;
+
+static	Mutex allglock;	// the following vars are protected by this lock or by stoptheworld
+G**	runtime·allg;
+Slice	runtime·allgs;
+uintptr runtime·allglen;
+static	uintptr allgcap;
+ForceGCState	runtime·forcegc;
+
+void runtime·mstart(void);
+static void runqput(P*, G*);
+static G* runqget(P*);
+static bool runqputslow(P*, G*, uint32, uint32);
+static G* runqsteal(P*, P*);
+static void mput(M*);
+static M* mget(void);
+static void mcommoninit(M*);
+static void schedule(void);
+static void procresize(int32);
+static void acquirep(P*);
+static P* releasep(void);
+static void newm(void(*)(void), P*);
+static void stopm(void);
+static void startm(P*, bool);
+static void handoffp(P*);
+static void wakep(void);
+static void stoplockedm(void);
+static void startlockedm(G*);
+static void sysmon(void);
+static uint32 retake(int64);
+static void incidlelocked(int32);
+static void checkdead(void);
+static void exitsyscall0(G*);
+void runtime·park_m(G*);
+static void goexit0(G*);
+static void gfput(P*, G*);
+static G* gfget(P*);
+static void gfpurge(P*);
+static void globrunqput(G*);
+static void globrunqputbatch(G*, G*, int32);
+static G* globrunqget(P*, int32);
+static P* pidleget(void);
+static void pidleput(P*);
+static void injectglist(G*);
+static bool preemptall(void);
+static bool preemptone(P*);
+static bool exitsyscallfast(void);
+static bool haveexperiment(int8*);
+static void allgadd(G*);
+static void dropg(void);
+
+extern String runtime·buildVersion;
+
+// For cgo-using programs with external linking,
+// export "main" (defined in assembly) so that libc can handle basic
+// C runtime startup and call the Go program as if it were
+// the C main function.
+#pragma cgo_export_static main
+
+// Filled in by dynamic linker when Cgo is available.
+void* _cgo_init;
+void* _cgo_malloc;
+void* _cgo_free;
+
+// Copy for Go code.
+void* runtime·cgoMalloc;
+void* runtime·cgoFree;
+
+// The bootstrap sequence is:
+//
+//	call osinit
+//	call schedinit
+//	make & queue new G
+//	call runtime·mstart
+//
+// The new G calls runtime·main.
+void
+runtime·schedinit(void)
+{
+	int32 n, procs;
+	byte *p;
+	Eface i;
+
+	// raceinit must be the first call to race detector.
+	// In particular, it must be done before mallocinit below calls racemapshadow.
+	if(raceenabled)
+		g->racectx = runtime·raceinit();
+
+	runtime·sched.maxmcount = 10000;
+	runtime·precisestack = true; // haveexperiment("precisestack");
+
+	runtime·symtabinit();
+	runtime·stackinit();
+	runtime·mallocinit();
+	mcommoninit(g->m);
+	
+	// Initialize the itable value for newErrorCString,
+	// so that the next time it gets called, possibly
+	// in a fault during a garbage collection, it will not
+	// need to allocated memory.
+	runtime·newErrorCString(0, &i);
+	
+	runtime·goargs();
+	runtime·goenvs();
+	runtime·parsedebugvars();
+	runtime·gcinit();
+
+	runtime·sched.lastpoll = runtime·nanotime();
+	procs = 1;
+	p = runtime·getenv("GOMAXPROCS");
+	if(p != nil && (n = runtime·atoi(p)) > 0) {
+		if(n > MaxGomaxprocs)
+			n = MaxGomaxprocs;
+		procs = n;
+	}
+	procresize(procs);
+
+	runtime·copystack = runtime·precisestack;
+	p = runtime·getenv("GOCOPYSTACK");
+	if(p != nil && !runtime·strcmp(p, (byte*)"0"))
+		runtime·copystack = false;
+
+	if(runtime·buildVersion.str == nil) {
+		// Condition should never trigger.  This code just serves
+		// to ensure runtime·buildVersion is kept in the resulting binary.
+		runtime·buildVersion.str = (uint8*)"unknown";
+		runtime·buildVersion.len = 7;
+	}
+
+	runtime·cgoMalloc = _cgo_malloc;
+	runtime·cgoFree = _cgo_free;
+}
+
+extern void main·init(void);
+extern void runtime·init(void);
+extern void main·main(void);
+
+static FuncVal initDone = { runtime·unlockOSThread };
+
+// The main goroutine.
+// Note: C frames in general are not copyable during stack growth, for two reasons:
+//   1) We don't know where in a frame to find pointers to other stack locations.
+//   2) There's no guarantee that globals or heap values do not point into the frame.
+//
+// The C frame for runtime.main is copyable, because:
+//   1) There are no pointers to other stack locations in the frame
+//      (d.fn points at a global, d.link is nil, d.argp is -1).
+//   2) The only pointer into this frame is from the defer chain,
+//      which is explicitly handled during stack copying.
+void
+runtime·main(void)
+{
+	Defer d;
+
+	// Racectx of m0->g0 is used only as the parent of the main goroutine.
+	// It must not be used for anything else.
+	g->m->g0->racectx = 0;
+
+	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
+	// Using decimal instead of binary GB and MB because
+	// they look nicer in the stack overflow failure message.
+	if(sizeof(void*) == 8)
+		runtime·maxstacksize = 1000000000;
+	else
+		runtime·maxstacksize = 250000000;
+
+	newm(sysmon, nil);
+
+	// Lock the main goroutine onto this, the main OS thread,
+	// during initialization.  Most programs won't care, but a few
+	// do require certain calls to be made by the main thread.
+	// Those can arrange for main.main to run in the main thread
+	// by calling runtime.LockOSThread during initialization
+	// to preserve the lock.
+	runtime·lockOSThread();
+	
+	// Defer unlock so that runtime.Goexit during init does the unlock too.
+	d.fn = &initDone;
+	d.siz = 0;
+	d.link = g->defer;
+	d.argp = NoArgs;
+	d.special = true;
+	g->defer = &d;
+
+	if(g->m != &runtime·m0)
+		runtime·throw("runtime·main not on m0");
+
+	runtime·init();
+	mstats.enablegc = 1; // now that runtime is initialized, GC is okay
+
+	main·init();
+
+	if(g->defer != &d || d.fn != &initDone)
+		runtime·throw("runtime: bad defer entry after init");
+	g->defer = d.link;
+	runtime·unlockOSThread();
+
+	main·main();
+	if(raceenabled)
+		runtime·racefini();
+
+	// Make racy client program work: if panicking on
+	// another goroutine at the same time as main returns,
+	// let the other goroutine finish printing the panic trace.
+	// Once it does, it will exit. See issue 3934.
+	if(runtime·panicking)
+		runtime·park(nil, nil, runtime·gostringnocopy((byte*)"panicwait"));
+
+	runtime·exit(0);
+	for(;;)
+		*(int32*)runtime·main = 0;
+}
+
+static void
+dumpgstatus(G* gp)
+{
+	runtime·printf("runtime: gp: gp=%p, goid=%D, gp->atomicstatus=%x\n", gp, gp->goid, runtime·readgstatus(gp));
+	runtime·printf("runtime:  g:  g=%p, goid=%D,  g->atomicstatus=%x\n", g, g->goid, runtime·readgstatus(g));
+}
+
+static void
+checkmcount(void)
+{
+	// sched lock is held
+	if(runtime·sched.mcount > runtime·sched.maxmcount){
+		runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
+		runtime·throw("thread exhaustion");
+	}
+}
+
+static void
+mcommoninit(M *mp)
+{
+	// g0 stack won't make sense for user (and is not necessary unwindable).
+	if(g != g->m->g0)
+		runtime·callers(1, mp->createstack, nelem(mp->createstack));
+
+	mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
+
+	runtime·lock(&runtime·sched.lock);
+	mp->id = runtime·sched.mcount++;
+	checkmcount();
+	runtime·mpreinit(mp);
+
+	// Add to runtime·allm so garbage collector doesn't free g->m
+	// when it is just in a register or thread-local storage.
+	mp->alllink = runtime·allm;
+	// runtime·NumCgoCall() iterates over allm w/o schedlock,
+	// so we need to publish it safely.
+	runtime·atomicstorep(&runtime·allm, mp);
+	runtime·unlock(&runtime·sched.lock);
+}
+
+// Mark gp ready to run.
+void
+runtime·ready(G *gp)
+{
+	uint32 status;
+
+	status = runtime·readgstatus(gp);
+	// Mark runnable.
+	g->m->locks++;  // disable preemption because it can be holding p in a local var
+	if((status&~Gscan) != Gwaiting){
+		dumpgstatus(gp);
+		runtime·throw("bad g->status in ready");
+	}
+	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
+	runtime·casgstatus(gp, Gwaiting, Grunnable);
+	runqput(g->m->p, gp);
+	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
+		wakep();
+	g->m->locks--;
+	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
+		g->stackguard0 = StackPreempt;
+}
+
+void
+runtime·ready_m(void)
+{
+	G *gp;
+
+	gp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	runtime·ready(gp);
+}
+
+int32
+runtime·gcprocs(void)
+{
+	int32 n;
+
+	// Figure out how many CPUs to use during GC.
+	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+	runtime·lock(&runtime·sched.lock);
+	n = runtime·gomaxprocs;
+	if(n > runtime·ncpu)
+		n = runtime·ncpu;
+	if(n > MaxGcproc)
+		n = MaxGcproc;
+	if(n > runtime·sched.nmidle+1) // one M is currently running
+		n = runtime·sched.nmidle+1;
+	runtime·unlock(&runtime·sched.lock);
+	return n;
+}
+
+static bool
+needaddgcproc(void)
+{
+	int32 n;
+
+	runtime·lock(&runtime·sched.lock);
+	n = runtime·gomaxprocs;
+	if(n > runtime·ncpu)
+		n = runtime·ncpu;
+	if(n > MaxGcproc)
+		n = MaxGcproc;
+	n -= runtime·sched.nmidle+1; // one M is currently running
+	runtime·unlock(&runtime·sched.lock);
+	return n > 0;
+}
+
+void
+runtime·helpgc(int32 nproc)
+{
+	M *mp;
+	int32 n, pos;
+
+	runtime·lock(&runtime·sched.lock);
+	pos = 0;
+	for(n = 1; n < nproc; n++) {  // one M is currently running
+		if(runtime·allp[pos]->mcache == g->m->mcache)
+			pos++;
+		mp = mget();
+		if(mp == nil)
+			runtime·throw("runtime·gcprocs inconsistency");
+		mp->helpgc = n;
+		mp->mcache = runtime·allp[pos]->mcache;
+		pos++;
+		runtime·notewakeup(&mp->park);
+	}
+	runtime·unlock(&runtime·sched.lock);
+}
+
+// Similar to stoptheworld but best-effort and can be called several times.
+// There is no reverse operation, used during crashing.
+// This function must not lock any mutexes.
+void
+runtime·freezetheworld(void)
+{
+	int32 i;
+
+	if(runtime·gomaxprocs == 1)
+		return;
+	// stopwait and preemption requests can be lost
+	// due to races with concurrently executing threads,
+	// so try several times
+	for(i = 0; i < 5; i++) {
+		// this should tell the scheduler to not start any new goroutines
+		runtime·sched.stopwait = 0x7fffffff;
+		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
+		// this should stop running goroutines
+		if(!preemptall())
+			break;  // no running goroutines
+		runtime·usleep(1000);
+	}
+	// to be sure
+	runtime·usleep(1000);
+	preemptall();
+	runtime·usleep(1000);
+}
+
+static bool
+isscanstatus(uint32 status)
+{
+	if(status == Gscan)
+		runtime·throw("isscanstatus: Bad status Gscan");
+	return (status&Gscan) == Gscan;
+}
+
+// All reads and writes of g's status go through readgstatus, casgstatus
+// castogscanstatus, casfromgscanstatus.
+#pragma textflag NOSPLIT
+uint32
+runtime·readgstatus(G *gp)
+{
+	return runtime·atomicload(&gp->atomicstatus);
+}
+
+// The Gscanstatuses are acting like locks and this releases them.
+// If it proves to be a performance hit we should be able to make these
+// simple atomic stores but for now we are going to throw if
+// we see an inconsistent state.
+void
+runtime·casfromgscanstatus(G *gp, uint32 oldval, uint32 newval)
+{
+	bool success = false;
+
+	// Check that transition is valid.
+	switch(oldval) {
+	case Gscanrunnable:
+	case Gscanwaiting:
+	case Gscanrunning:
+	case Gscansyscall:
+		if(newval == (oldval&~Gscan))
+			success = runtime·cas(&gp->atomicstatus, oldval, newval);
+		break;
+	case Gscanenqueue:
+		if(newval == Gwaiting)
+			success = runtime·cas(&gp->atomicstatus, oldval, newval);
+		break;
+	}	
+	if(!success){
+		runtime·printf("runtime: casfromgscanstatus failed gp=%p, oldval=%d, newval=%d\n",  
+			gp, oldval, newval);
+		dumpgstatus(gp);
+		runtime·throw("casfromgscanstatus: gp->status is not in scan state");
+	}
+}
+
+// This will return false if the gp is not in the expected status and the cas fails. 
+// This acts like a lock acquire while the casfromgstatus acts like a lock release.
+bool
+runtime·castogscanstatus(G *gp, uint32 oldval, uint32 newval)
+{
+	switch(oldval) {
+	case Grunnable:
+	case Gwaiting:
+	case Gsyscall:
+		if(newval == (oldval|Gscan))
+			return runtime·cas(&gp->atomicstatus, oldval, newval);
+		break;
+	case Grunning:
+		if(newval == Gscanrunning || newval == Gscanenqueue)
+			return runtime·cas(&gp->atomicstatus, oldval, newval);
+		break;   
+	}
+
+	runtime·printf("runtime: castogscanstatus oldval=%d newval=%d\n", oldval, newval);
+	runtime·throw("castogscanstatus");
+	return false; // not reached
+}
+
+static void badcasgstatus(void);
+static void helpcasgstatus(void);
+
+// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
+// and casfromgscanstatus instead.
+// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 
+// put it in the Gscan state is finished.
+#pragma textflag NOSPLIT
+void
+runtime·casgstatus(G *gp, uint32 oldval, uint32 newval)
+{
+	void (*fn)(void);
+
+	if((oldval&Gscan) || (newval&Gscan) || oldval == newval) {
+		g->m->scalararg[0] = oldval;
+		g->m->scalararg[1] = newval;
+		fn = badcasgstatus;
+		runtime·onM(&fn);
+	}
+
+	// loop if gp->atomicstatus is in a scan state giving
+	// GC time to finish and change the state to oldval.
+	while(!runtime·cas(&gp->atomicstatus, oldval, newval)) {
+		// Help GC if needed. 
+		if(gp->preemptscan && !gp->gcworkdone && (oldval == Grunning || oldval == Gsyscall)) {
+			gp->preemptscan = false;
+			g->m->ptrarg[0] = gp;
+			fn = helpcasgstatus;
+			runtime·onM(&fn);
+		}
+	}	
+}
+
+static void
+badcasgstatus(void)
+{
+	uint32 oldval, newval;
+	
+	oldval = g->m->scalararg[0];
+	newval = g->m->scalararg[1];
+	g->m->scalararg[0] = 0;
+	g->m->scalararg[1] = 0;
+
+	runtime·printf("casgstatus: oldval=%d, newval=%d\n", oldval, newval);
+	runtime·throw("casgstatus: bad incoming values");
+}
+
+static void
+helpcasgstatus(void)
+{
+	G *gp;
+	
+	gp = g->m->ptrarg[0];
+	g->m->ptrarg[0] = 0;
+	runtime·gcphasework(gp);
+}
+
+// stopg ensures that gp is stopped at a GC safe point where its stack can be scanned
+// or in the context of a moving collector the pointers can be flipped from pointing 
+// to old object to pointing to new objects. 
+// If stopg returns true, the caller knows gp is at a GC safe point and will remain there until
+// the caller calls restartg.
+// If stopg returns false, the caller is not responsible for calling restartg. This can happen
+// if another thread, either the gp itself or another GC thread is taking the responsibility 
+// to do the GC work related to this thread.
+bool
+runtime·stopg(G *gp)
+{
+	uint32 s;
+
+	for(;;) {
+		if(gp->gcworkdone)
+			return false;
+
+		s = runtime·readgstatus(gp);
+		switch(s) {
+		default:
+			dumpgstatus(gp);
+			runtime·throw("stopg: gp->atomicstatus is not valid");
+
+		case Gdead:
+			return false;
+
+		case Gcopystack:
+			// Loop until a new stack is in place.
+			break;
+
+		case Grunnable:
+		case Gsyscall:
+		case Gwaiting:
+			// Claim goroutine by setting scan bit.
+			if(!runtime·castogscanstatus(gp, s, s|Gscan))
+				break;
+			// In scan state, do work.
+			runtime·gcphasework(gp);
+			return true;
+
+		case Gscanrunnable:
+		case Gscanwaiting:
+		case Gscansyscall:
+			// Goroutine already claimed by another GC helper.
+			return false;
+
+		case Grunning:
+			// Claim goroutine, so we aren't racing with a status
+			// transition away from Grunning.
+			if(!runtime·castogscanstatus(gp, Grunning, Gscanrunning))
+				break;
+
+			// Mark gp for preemption.
+			if(!gp->gcworkdone) {
+				gp->preemptscan = true;
+				gp->preempt = true;
+				gp->stackguard0 = StackPreempt;
+			}
+
+			// Unclaim.
+			runtime·casfromgscanstatus(gp, Gscanrunning, Grunning);
+			return false;
+		}
+	}
+	// Should not be here....
+}
+
+// The GC requests that this routine be moved from a scanmumble state to a mumble state.
+void 
+runtime·restartg (G *gp)
+{
+	uint32 s;
+
+	s = runtime·readgstatus(gp);
+	switch(s) {
+	default:
+		dumpgstatus(gp); 
+		runtime·throw("restartg: unexpected status");
+
+	case Gdead:
+		break;
+
+	case Gscanrunnable:
+	case Gscanwaiting:
+	case Gscansyscall:
+		runtime·casfromgscanstatus(gp, s, s&~Gscan);
+		break;
+
+	case Gscanenqueue:
+		// Scan is now completed.
+		// Goroutine now needs to be made runnable.
+		// We put it on the global run queue; ready blocks on the global scheduler lock.
+		runtime·casfromgscanstatus(gp, Gscanenqueue, Gwaiting);
+		if(gp != g->m->curg)
+			runtime·throw("processing Gscanenqueue on wrong m");
+		dropg();
+		runtime·ready(gp);
+		break;
+	}
+}
+
+static void
+stopscanstart(G* gp)
+{
+	if(g == gp)
+		runtime·throw("GC not moved to G0");
+	if(runtime·stopg(gp)) {
+		if(!isscanstatus(runtime·readgstatus(gp))) {
+			dumpgstatus(gp);
+			runtime·throw("GC not in scan state");
+		}
+		runtime·restartg(gp);
+	}
+}
+
+// Runs on g0 and does the actual work after putting the g back on the run queue.
+static void
+mquiesce(G *gpmaster)
+{
+	G* gp;
+	uint32 i;
+	uint32 status;
+	uint32 activeglen;
+
+	activeglen = runtime·allglen;
+	// enqueue the calling goroutine.
+	runtime·restartg(gpmaster);
+	for(i = 0; i < activeglen; i++) {
+		gp = runtime·allg[i];
+		if(runtime·readgstatus(gp) == Gdead) 
+			gp->gcworkdone = true; // noop scan.
+		else 
+			gp->gcworkdone = false; 
+		stopscanstart(gp); 
+	}
+
+	// Check that the G's gcwork (such as scanning) has been done. If not do it now. 
+	// You can end up doing work here if the page trap on a Grunning Goroutine has
+	// not been sprung or in some race situations. For example a runnable goes dead
+	// and is started up again with a gp->gcworkdone set to false.
+	for(i = 0; i < activeglen; i++) {
+		gp = runtime·allg[i];
+		while (!gp->gcworkdone) {
+			status = runtime·readgstatus(gp);
+			if(status == Gdead) {
+				gp->gcworkdone = true; // scan is a noop
+				break;
+				//do nothing, scan not needed. 
+			}
+			if(status == Grunning && gp->stackguard0 == (uintptr)StackPreempt && runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) // nanosecond arg 
+				runtime·noteclear(&runtime·sched.stopnote);
+			else 
+				stopscanstart(gp);
+		}
+	}
+
+	for(i = 0; i < activeglen; i++) {
+		gp = runtime·allg[i];
+		status = runtime·readgstatus(gp);
+		if(isscanstatus(status)) {
+			runtime·printf("mstopandscang:bottom: post scan bad status gp=%p has status %x\n", gp, status);
+			dumpgstatus(gp);
+		}
+		if(!gp->gcworkdone && status != Gdead) {
+			runtime·printf("mstopandscang:bottom: post scan gp=%p->gcworkdone still false\n", gp);
+			dumpgstatus(gp);
+		}
+	}
+
+	schedule(); // Never returns.
+}
+
+// quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point.
+// If the global runtime·gcphase is GCmark quiesce will ensure that all of the goroutine's stacks
+// have been scanned before it returns.
+void
+runtime·quiesce(G* mastergp)
+{
+	void (*fn)(G*);
+
+	runtime·castogscanstatus(mastergp, Grunning, Gscanenqueue);
+	// Now move this to the g0 (aka m) stack.
+	// g0 will potentially scan this thread and put mastergp on the runqueue 
+	fn = mquiesce;
+	runtime·mcall(&fn);
+}
+
+// This is used by the GC as well as the routines that do stack dumps. In the case
+// of GC all the routines can be reliably stopped. This is not always the case
+// when the system is in panic or being exited.
+void
+runtime·stoptheworld(void)
+{
+	int32 i;
+	uint32 s;
+	P *p;
+	bool wait;
+
+	// If we hold a lock, then we won't be able to stop another M
+	// that is blocked trying to acquire the lock.
+	if(g->m->locks > 0)
+		runtime·throw("stoptheworld: holding locks");
+
+	runtime·lock(&runtime·sched.lock);
+	runtime·sched.stopwait = runtime·gomaxprocs;
+	runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
+	preemptall();
+	// stop current P
+	g->m->p->status = Pgcstop; // Pgcstop is only diagnostic.
+	runtime·sched.stopwait--;
+	// try to retake all P's in Psyscall status
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		s = p->status;
+		if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
+			runtime·sched.stopwait--;
+	}
+	// stop idle P's
+	while(p = pidleget()) {
+		p->status = Pgcstop;
+		runtime·sched.stopwait--;
+	}
+	wait = runtime·sched.stopwait > 0;
+	runtime·unlock(&runtime·sched.lock);
+
+	// wait for remaining P's to stop voluntarily
+	if(wait) {
+		for(;;) {
+			// wait for 100us, then try to re-preempt in case of any races
+			if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
+				runtime·noteclear(&runtime·sched.stopnote);
+				break;
+			}
+			preemptall();
+		}
+	}
+	if(runtime·sched.stopwait)
+		runtime·throw("stoptheworld: not stopped");
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		if(p->status != Pgcstop)
+			runtime·throw("stoptheworld: not stopped");
+	}
+}
+
+static void
+mhelpgc(void)
+{
+	g->m->helpgc = -1;
+}
+
+void
+runtime·starttheworld(void)
+{
+	P *p, *p1;
+	M *mp;
+	G *gp;
+	bool add;
+
+	g->m->locks++;  // disable preemption because it can be holding p in a local var
+	gp = runtime·netpoll(false);  // non-blocking
+	injectglist(gp);
+	add = needaddgcproc();
+	runtime·lock(&runtime·sched.lock);
+	if(newprocs) {
+		procresize(newprocs);
+		newprocs = 0;
+	} else
+		procresize(runtime·gomaxprocs);
+	runtime·sched.gcwaiting = 0;
+
+	p1 = nil;
+	while(p = pidleget()) {
+		// procresize() puts p's with work at the beginning of the list.
+		// Once we reach a p without a run queue, the rest don't have one either.
+		if(p->runqhead == p->runqtail) {
+			pidleput(p);
+			break;
+		}
+		p->m = mget();
+		p->link = p1;
+		p1 = p;
+	}
+	if(runtime·sched.sysmonwait) {
+		runtime·sched.sysmonwait = false;
+		runtime·notewakeup(&runtime·sched.sysmonnote);
+	}
+	runtime·unlock(&runtime·sched.lock);
+
+	while(p1) {
+		p = p1;
+		p1 = p1->link;
+		if(p->m) {
+			mp = p->m;
+			p->m = nil;
+			if(mp->nextp)
+				runtime·throw("starttheworld: inconsistent mp->nextp");
+			mp->nextp = p;
+			runtime·notewakeup(&mp->park);
+		} else {
+			// Start M to run P.  Do not start another M below.
+			newm(nil, p);
+			add = false;
+		}
+	}
+
+	if(add) {
+		// If GC could have used another helper proc, start one now,
+		// in the hope that it will be available next time.
+		// It would have been even better to start it before the collection,
+		// but doing so requires allocating memory, so it's tricky to
+		// coordinate.  This lazy approach works out in practice:
+		// we don't mind if the first couple gc rounds don't have quite
+		// the maximum number of procs.
+		newm(mhelpgc, nil);
+	}
+	g->m->locks--;
+	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
+		g->stackguard0 = StackPreempt;
+}
+
+// Called to start an M.
+void
+runtime·mstart(void)
+{
+	if(g != g->m->g0)
+		runtime·throw("bad runtime·mstart");
+
+	// Record top of stack for use by mcall.
+	// Once we call schedule we're never coming back,
+	// so other calls can reuse this stack space.
+	runtime·gosave(&g->m->g0->sched);
+	g->m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
+	g->m->g0->stackguard = g->m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
+	runtime·asminit();
+	runtime·minit();
+
+	// Install signal handlers; after minit so that minit can
+	// prepare the thread to be able to handle the signals.
+	if(g->m == &runtime·m0)
+		runtime·initsig();
+	
+	if(g->m->mstartfn)
+		g->m->mstartfn();
+
+	if(g->m->helpgc) {
+		g->m->helpgc = 0;
+		stopm();
+	} else if(g->m != &runtime·m0) {
+		acquirep(g->m->nextp);
+		g->m->nextp = nil;
+	}
+	schedule();
+
+	// TODO(brainman): This point is never reached, because scheduler
+	// does not release os threads at the moment. But once this path
+	// is enabled, we must remove our seh here.
+}
+
+// When running with cgo, we call _cgo_thread_start
+// to start threads for us so that we can play nicely with
+// foreign code.
+void (*_cgo_thread_start)(void*);
+
+typedef struct CgoThreadStart CgoThreadStart;
+struct CgoThreadStart
+{
+	G *g;
+	uintptr *tls;
+	void (*fn)(void);
+};
+
+// Allocate a new m unassociated with any thread.
+// Can use p for allocation context if needed.
+M*
+runtime·allocm(P *p)
+{
+	M *mp;
+	static Type *mtype;  // The Go type M
+
+	g->m->locks++;  // disable GC because it can be called from sysmon
+	if(g->m->p == nil)
+		acquirep(p);  // temporarily borrow p for mallocs in this function
+	if(mtype == nil) {
+		Eface e;
+		runtime·gc_m_ptr(&e);
+		mtype = ((PtrType*)e.type)->elem;
+	}
+
+	mp = runtime·cnew(mtype);
+	mcommoninit(mp);
+
+	// In case of cgo or Solaris, pthread_create will make us a stack.
+	// Windows will layout sched stack on OS stack.
+	if(runtime·iscgo || Solaris || Windows)
+		mp->g0 = runtime·malg(-1);
+	else
+		mp->g0 = runtime·malg(8192);
+	mp->g0->m = mp;
+
+	if(p == g->m->p)
+		releasep();
+	g->m->locks--;
+	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
+		g->stackguard0 = StackPreempt;
+
+	return mp;
+}
+
+static G*
+allocg(void)
+{
+	G *gp;
+	static Type *gtype;
+	
+	if(gtype == nil) {
+		Eface e;
+		runtime·gc_g_ptr(&e);
+		gtype = ((PtrType*)e.type)->elem;
+	}
+	gp = runtime·cnew(gtype);
+	return gp;
+}
+
+static M* lockextra(bool nilokay);
+static void unlockextra(M*);
+
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+#pragma textflag NOSPLIT
+void
+runtime·needm(byte x)
+{
+	M *mp;
+
+	if(runtime·needextram) {
+		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can not throw, because scheduler is not initialized yet.
+		runtime·write(2, "fatal error: cgo callback before cgo call\n",
+			sizeof("fatal error: cgo callback before cgo call\n")-1);
+		runtime·exit(1);
+	}
+
+	// Lock extra list, take head, unlock popped list.
+	// nilokay=false is safe here because of the invariant above,
+	// that the extra list always contains or will soon contain
+	// at least one m.
+	mp = lockextra(false);
+
+	// Set needextram when we've just emptied the list,
+	// so that the eventual call into cgocallbackg will
+	// allocate a new m for the extra list. We delay the
+	// allocation until then so that it can be done
+	// after exitsyscall makes sure it is okay to be
+	// running at all (that is, there's no garbage collection
+	// running right now).
+	mp->needextram = mp->schedlink == nil;
+	unlockextra(mp->schedlink);
+
+	// Install g (= m->g0) and set the stack bounds
+	// to match the current stack. We don't actually know
+	// how big the stack is, like we don't know how big any
+	// scheduling stack is, but we assume there's at least 32 kB,
+	// which is more than enough for us.
+	runtime·setg(mp->g0);
+	g->stackbase = (uintptr)(&x + 1024);
+	g->stackguard = (uintptr)(&x - 32*1024);
+	g->stackguard0 = g->stackguard;
+
+	// Initialize this thread to use the m.
+	runtime·asminit();
+	runtime·minit();
+}
+
+// newextram allocates an m and puts it on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+void
+runtime·newextram(void)
+{
+	M *mp, *mnext;
+	G *gp;
+
+	// Create extra goroutine locked to extra m.
+	// The goroutine is the context in which the cgo callback will run.
+	// The sched.pc will never be returned to, but setting it to
+	// runtime.goexit makes clear to the traceback routines where
+	// the goroutine stack ends.
+	mp = runtime·allocm(nil);
+	gp = runtime·malg(4096);
+	gp->sched.pc = (uintptr)runtime·goexit;
+	gp->sched.sp = gp->stackbase;
+	gp->sched.lr = 0;
+	gp->sched.g = gp;
+	gp->syscallpc = gp->sched.pc;
+	gp->syscallsp = gp->sched.sp;
+	gp->syscallstack = gp->stackbase;
+	gp->syscallguard = gp->stackguard;
+	// malg returns status as Gidle, change to Gsyscall before adding to allg
+	// where GC will see it.
+	runtime·casgstatus(gp, Gidle, Gsyscall);
+	gp->m = mp;
+	mp->curg = gp;
+	mp->locked = LockInternal;
+	mp->lockedg = gp;
+	gp->lockedm = mp;
+	gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
+	if(raceenabled)
+		gp->racectx = runtime·racegostart(runtime·newextram);
+	// put on allg for garbage collector
+	allgadd(gp);
+
+	// Add m to the extra list.
+	mnext = lockextra(true);
+	mp->schedlink = mnext;
+	unlockextra(mp);
+}
+
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+void
+runtime·dropm(void)
+{
+	M *mp, *mnext;
+
+	// Undo whatever initialization minit did during needm.
+	runtime·unminit();
+
+	// Clear m and g, and return m to the extra list.
+	// After the call to setmg we can only call nosplit functions.
+	mp = g->m;
+	runtime·setg(nil);
+
+	mnext = lockextra(true);
+	mp->schedlink = mnext;
+	unlockextra(mp);
+}
+
+#define MLOCKED ((M*)1)
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to runtime.extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+#pragma textflag NOSPLIT
+static M*
+lockextra(bool nilokay)
+{
+	M *mp;
+	void (*yield)(void);
+
+	for(;;) {
+		mp = runtime·atomicloadp(&runtime·extram);
+		if(mp == MLOCKED) {
+			yield = runtime·osyield;
+			yield();
+			continue;
+		}
+		if(mp == nil && !nilokay) {
+			runtime·usleep(1);
+			continue;
+		}
+		if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
+			yield = runtime·osyield;
+			yield();
+			continue;
+		}
+		break;
+	}
+	return mp;
+}
+
+#pragma textflag NOSPLIT
+static void
+unlockextra(M *mp)
+{
+	runtime·atomicstorep(&runtime·extram, mp);
+}
+
+
+// Create a new m.  It will start off with a call to fn, or else the scheduler.
+static void
+newm(void(*fn)(void), P *p)
+{
+	M *mp;
+
+	mp = runtime·allocm(p);
+	mp->nextp = p;
+	mp->mstartfn = fn;
+
+	if(runtime·iscgo) {
+		CgoThreadStart ts;
+
+		if(_cgo_thread_start == nil)
+			runtime·throw("_cgo_thread_start missing");
+		ts.g = mp->g0;
+		ts.tls = mp->tls;
+		ts.fn = runtime·mstart;
+		runtime·asmcgocall(_cgo_thread_start, &ts);
+		return;
+	}
+	runtime·newosproc(mp, (byte*)mp->g0->stackbase);
+}
+
+// Stops execution of the current m until new work is available.
+// Returns with acquired P.
+static void
+stopm(void)
+{
+	if(g->m->locks)
+		runtime·throw("stopm holding locks");
+	if(g->m->p)
+		runtime·throw("stopm holding p");
+	if(g->m->spinning) {
+		g->m->spinning = false;
+		runtime·xadd(&runtime·sched.nmspinning, -1);
+	}
+
+retry:
+	runtime·lock(&runtime·sched.lock);
+	mput(g->m);
+	runtime·unlock(&runtime·sched.lock);
+	runtime·notesleep(&g->m->park);
+	runtime·noteclear(&g->m->park);
+	if(g->m->helpgc) {
+		runtime·gchelper();
+		g->m->helpgc = 0;
+		g->m->mcache = nil;
+		goto retry;
+	}
+	acquirep(g->m->nextp);
+	g->m->nextp = nil;
+}
+
+static void
+mspinning(void)
+{
+	g->m->spinning = true;
+}
+
+// Schedules some M to run the p (creates an M if necessary).
+// If p==nil, tries to get an idle P, if no idle P's does nothing.
+static void
+startm(P *p, bool spinning)
+{
+	M *mp;
+	void (*fn)(void);
+
+	runtime·lock(&runtime·sched.lock);
+	if(p == nil) {
+		p = pidleget();
+		if(p == nil) {
+			runtime·unlock(&runtime·sched.lock);
+			if(spinning)
+				runtime·xadd(&runtime·sched.nmspinning, -1);
+			return;
+		}
+	}
+	mp = mget();
+	runtime·unlock(&runtime·sched.lock);
+	if(mp == nil) {
+		fn = nil;
+		if(spinning)
+			fn = mspinning;
+		newm(fn, p);
+		return;
+	}
+	if(mp->spinning)
+		runtime·throw("startm: m is spinning");
+	if(mp->nextp)
+		runtime·throw("startm: m has p");
+	mp->spinning = spinning;
+	mp->nextp = p;
+	runtime·notewakeup(&mp->park);
+}
+
+// Hands off P from syscall or locked M.
+static void
+handoffp(P *p)
+{
+	// if it has local work, start it straight away
+	if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
+		startm(p, false);
+		return;
+	}
+	// no local work, check that there are no spinning/idle M's,
+	// otherwise our help is not required
+	if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
+		runtime·cas(&runtime·sched.nmspinning, 0, 1)){
+		startm(p, true);
+		return;
+	}
+	runtime·lock(&runtime·sched.lock);
+	if(runtime·sched.gcwaiting) {
+		p->status = Pgcstop;
+		if(--runtime·sched.stopwait == 0)
+			runtime·notewakeup(&runtime·sched.stopnote);
+		runtime·unlock(&runtime·sched.lock);
+		return;
+	}
+	if(runtime·sched.runqsize) {
+		runtime·unlock(&runtime·sched.lock);
+		startm(p, false);
+		return;
+	}
+	// If this is the last running P and nobody is polling network,
+	// need to wakeup another M to poll network.
+	if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
+		runtime·unlock(&runtime·sched.lock);
+		startm(p, false);
+		return;
+	}
+	pidleput(p);
+	runtime·unlock(&runtime·sched.lock);
+}
+
+// Tries to add one more P to execute G's.
+// Called when a G is made runnable (newproc, ready).
+static void
+wakep(void)
+{
+	// be conservative about spinning threads
+	if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
+		return;
+	startm(nil, true);
+}
+
+// Stops execution of the current m that is locked to a g until the g is runnable again.
+// Returns with acquired P.
+static void
+stoplockedm(void)
+{
+	P *p;
+	uint32 status;
+
+	if(g->m->lockedg == nil || g->m->lockedg->lockedm != g->m)
+		runtime·throw("stoplockedm: inconsistent locking");
+	if(g->m->p) {
+		// Schedule another M to run this p.
+		p = releasep();
+		handoffp(p);
+	}
+	incidlelocked(1);
+	// Wait until another thread schedules lockedg again.
+	runtime·notesleep(&g->m->park);
+	runtime·noteclear(&g->m->park);
+	status = runtime·readgstatus(g->m->lockedg);
+	if((status&~Gscan) != Grunnable){
+		runtime·printf("runtime:stoplockedm: g is not Grunnable or Gscanrunnable");
+		dumpgstatus(g);
+		runtime·throw("stoplockedm: not runnable");
+	}
+	acquirep(g->m->nextp);
+	g->m->nextp = nil;
+}
+
+// Schedules the locked m to run the locked gp.
+static void
+startlockedm(G *gp)
+{
+	M *mp;
+	P *p;
+
+	mp = gp->lockedm;
+	if(mp == g->m)
+		runtime·throw("startlockedm: locked to me");
+	if(mp->nextp)
+		runtime·throw("startlockedm: m has p");
+	// directly handoff current P to the locked m
+	incidlelocked(-1);
+	p = releasep();
+	mp->nextp = p;
+	runtime·notewakeup(&mp->park);
+	stopm();
+}
+
+// Stops the current m for stoptheworld.
+// Returns when the world is restarted.
+static void
+gcstopm(void)
+{
+	P *p;
+
+	if(!runtime·sched.gcwaiting)
+		runtime·throw("gcstopm: not waiting for gc");
+	if(g->m->spinning) {
+		g->m->spinning = false;
+		runtime·xadd(&runtime·sched.nmspinning, -1);
+	}
+	p = releasep();
+	runtime·lock(&runtime·sched.lock);
+	p->status = Pgcstop;
+	if(--runtime·sched.stopwait == 0)
+		runtime·notewakeup(&runtime·sched.stopnote);
+	runtime·unlock(&runtime·sched.lock);
+	stopm();
+}
+
+// Schedules gp to run on the current M.
+// Never returns.
+static void
+execute(G *gp)
+{
+	int32 hz;
+	
+	runtime·casgstatus(gp, Grunnable, Grunning);
+	gp->waitsince = 0;
+	gp->preempt = false;
+	gp->stackguard0 = gp->stackguard;
+	g->m->p->schedtick++;
+	g->m->curg = gp;
+	gp->m = g->m;
+
+	// Check whether the profiler needs to be turned on or off.
+	hz = runtime·sched.profilehz;
+	if(g->m->profilehz != hz)
+		runtime·resetcpuprofiler(hz);
+
+	runtime·gogo(&gp->sched);
+}
+
+// Finds a runnable goroutine to execute.
+// Tries to steal from other P's, get g from global queue, poll network.
+static G*
+findrunnable(void)
+{
+	G *gp;
+	P *p;
+	int32 i;
+
+top:
+	if(runtime·sched.gcwaiting) {
+		gcstopm();
+		goto top;
+	}
+	if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil)
+		runtime·ready(gp);
+	// local runq
+	gp = runqget(g->m->p);
+	if(gp)
+		return gp;
+	// global runq
+	if(runtime·sched.runqsize) {
+		runtime·lock(&runtime·sched.lock);
+		gp = globrunqget(g->m->p, 0);
+		runtime·unlock(&runtime·sched.lock);
+		if(gp)
+			return gp;
+	}
+	// poll network
+	gp = runtime·netpoll(false);  // non-blocking
+	if(gp) {
+		injectglist(gp->schedlink);
+		runtime·casgstatus(gp, Gwaiting, Grunnable);
+		return gp;
+	}
+	// If number of spinning M's >= number of busy P's, block.
+	// This is necessary to prevent excessive CPU consumption
+	// when GOMAXPROCS>>1 but the program parallelism is low.
+	if(!g->m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
+		goto stop;
+	if(!g->m->spinning) {
+		g->m->spinning = true;
+		runtime·xadd(&runtime·sched.nmspinning, 1);
+	}
+	// random steal from other P's
+	for(i = 0; i < 2*runtime·gomaxprocs; i++) {
+		if(runtime·sched.gcwaiting)
+			goto top;
+		p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
+		if(p == g->m->p)
+			gp = runqget(p);
+		else
+			gp = runqsteal(g->m->p, p);
+		if(gp)
+			return gp;
+	}
+stop:
+	// return P and block
+	runtime·lock(&runtime·sched.lock);
+	if(runtime·sched.gcwaiting) {
+		runtime·unlock(&runtime·sched.lock);
+		goto top;
+	}
+	if(runtime·sched.runqsize) {
+		gp = globrunqget(g->m->p, 0);
+		runtime·unlock(&runtime·sched.lock);
+		return gp;
+	}
+	p = releasep();
+	pidleput(p);
+	runtime·unlock(&runtime·sched.lock);
+	if(g->m->spinning) {
+		g->m->spinning = false;
+		runtime·xadd(&runtime·sched.nmspinning, -1);
+	}
+	// check all runqueues once again
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		if(p && p->runqhead != p->runqtail) {
+			runtime·lock(&runtime·sched.lock);
+			p = pidleget();
+			runtime·unlock(&runtime·sched.lock);
+			if(p) {
+				acquirep(p);
+				goto top;
+			}
+			break;
+		}
+	}
+	// poll network
+	if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
+		if(g->m->p)
+			runtime·throw("findrunnable: netpoll with p");
+		if(g->m->spinning)
+			runtime·throw("findrunnable: netpoll with spinning");
+		gp = runtime·netpoll(true);  // block until new work is available
+		runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
+		if(gp) {
+			runtime·lock(&runtime·sched.lock);
+			p = pidleget();
+			runtime·unlock(&runtime·sched.lock);
+			if(p) {
+				acquirep(p);
+				injectglist(gp->schedlink);
+				runtime·casgstatus(gp, Gwaiting, Grunnable);
+				return gp;
+			}
+			injectglist(gp);
+		}
+	}
+	stopm();
+	goto top;
+}
+
+static void
+resetspinning(void)
+{
+	int32 nmspinning;
+
+	if(g->m->spinning) {
+		g->m->spinning = false;
+		nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
+		if(nmspinning < 0)
+			runtime·throw("findrunnable: negative nmspinning");
+	} else
+		nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
+
+	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+	// so see if we need to wakeup another P here.
+	if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
+		wakep();
+}
+
+// Injects the list of runnable G's into the scheduler.
+// Can run concurrently with GC.
+static void
+injectglist(G *glist)
+{
+	int32 n;
+	G *gp;
+
+	if(glist == nil)
+		return;
+	runtime·lock(&runtime·sched.lock);
+	for(n = 0; glist; n++) {
+		gp = glist;
+		glist = gp->schedlink;
+		runtime·casgstatus(gp, Gwaiting, Grunnable); 
+		globrunqput(gp);
+	}
+	runtime·unlock(&runtime·sched.lock);
+
+	for(; n && runtime·sched.npidle; n--)
+		startm(nil, false);
+}
+
+// One round of scheduler: find a runnable goroutine and execute it.
+// Never returns.
+static void
+schedule(void)
+{
+	G *gp;
+	uint32 tick;
+
+	if(g->m->locks)
+		runtime·throw("schedule: holding locks");
+
+	if(g->m->lockedg) {
+		stoplockedm();
+		execute(g->m->lockedg);  // Never returns.
+	}
+
+top:
+	if(runtime·sched.gcwaiting) {
+		gcstopm();
+		goto top;
+	}
+
+	gp = nil;
+	// Check the global runnable queue once in a while to ensure fairness.
+	// Otherwise two goroutines can completely occupy the local runqueue
+	// by constantly respawning each other.
+	tick = g->m->p->schedtick;
+	// This is a fancy way to say tick%61==0,
+	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
+	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
+		runtime·lock(&runtime·sched.lock);
+		gp = globrunqget(g->m->p, 1);
+		runtime·unlock(&runtime·sched.lock);
+		if(gp)
+			resetspinning();
+	}
+	if(gp == nil) {
+		gp = runqget(g->m->p);
+		if(gp && g->m->spinning)
+			runtime·throw("schedule: spinning with local work");
+	}
+	if(gp == nil) {
+		gp = findrunnable();  // blocks until work is available
+		resetspinning();
+	}
+
+	if(gp->lockedm) {
+		// Hands off own p to the locked m,
+		// then blocks waiting for a new p.
+		startlockedm(gp);
+		goto top;
+	}
+
+	execute(gp);
+}
+
+// dropg removes the association between m and the current goroutine m->curg (gp for short).
+// Typically a caller sets gp's status away from Grunning and then
+// immediately calls dropg to finish the job. The caller is also responsible
+// for arranging that gp will be restarted using runtime·ready at an
+// appropriate time. After calling dropg and arranging for gp to be
+// readied later, the caller can do other work but eventually should
+// call schedule to restart the scheduling of goroutines on this m.
+static void
+dropg(void)
+{
+	if(g->m->lockedg == nil) {
+		g->m->curg->m = nil;
+		g->m->curg = nil;
+	}
+}
+
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
+void
+runtime·park(bool(*unlockf)(G*, void*), void *lock, String reason)
+{
+	void (*fn)(G*);
+
+	g->m->waitlock = lock;
+	g->m->waitunlockf = unlockf;
+	g->waitreason = reason;
+	fn = runtime·park_m;
+	runtime·mcall(&fn);
+}
+
+bool
+runtime·parkunlock_c(G *gp, void *lock)
+{
+	USED(gp);
+	runtime·unlock(lock);
+	return true;
+}
+
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling runtime·ready(gp).
+void
+runtime·parkunlock(Mutex *lock, String reason)
+{
+	runtime·park(runtime·parkunlock_c, lock, reason);
+}
+
+// runtime·park continuation on g0.
+void
+runtime·park_m(G *gp)
+{
+	bool ok;
+
+	runtime·casgstatus(gp, Grunning, Gwaiting);
+	dropg();
+
+	if(g->m->waitunlockf) {
+		ok = g->m->waitunlockf(gp, g->m->waitlock);
+		g->m->waitunlockf = nil;
+		g->m->waitlock = nil;
+		if(!ok) {
+			runtime·casgstatus(gp, Gwaiting, Grunnable); 
+			execute(gp);  // Schedule it back, never returns.
+		}
+	}
+
+	schedule();
+}
+
+// Scheduler yield.
+void
+runtime·gosched(void)
+{
+	void (*fn)(G*);
+	
+	fn = runtime·gosched_m;
+	runtime·mcall(&fn);
+}
+
+// runtime·gosched continuation on g0.
+void
+runtime·gosched_m(G *gp)
+{
+	uint32 status;
+
+	status = runtime·readgstatus(gp);
+	if((status&~Gscan) != Grunning){
+		dumpgstatus(gp);
+		runtime·throw("bad g status");
+	}
+	runtime·casgstatus(gp, Grunning, Grunnable);
+	dropg();
+	runtime·lock(&runtime·sched.lock);
+	globrunqput(gp);
+	runtime·unlock(&runtime·sched.lock);
+
+	schedule();
+}
+
+// Finishes execution of the current goroutine.
+// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
+// Since it does not return it does not matter.  But if it is preempted
+// at the split stack check, GC will complain about inconsistent sp.
+#pragma textflag NOSPLIT
+void
+runtime·goexit(void)
+{
+	void (*fn)(G*);
+
+	if(raceenabled)
+		runtime·racegoend();
+	fn = goexit0;
+	runtime·mcall(&fn);
+}
+
+// runtime·goexit continuation on g0.
+static void
+goexit0(G *gp)
+{
+	runtime·casgstatus(gp, Grunning, Gdead);
+	gp->m = nil;
+	gp->lockedm = nil;
+	g->m->lockedg = nil;
+	gp->paniconfault = 0;
+	gp->defer = nil; // should be true already but just in case.
+	gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
+	gp->writebuf.array = nil;
+	gp->writebuf.len = 0;
+	gp->writebuf.cap = 0;
+	gp->waitreason.str = nil;
+	gp->waitreason.len = 0;
+	gp->param = nil;
+
+	dropg();
+
+	if(g->m->locked & ~LockExternal) {
+		runtime·printf("invalid m->locked = %d\n", g->m->locked);
+		runtime·throw("internal lockOSThread error");
+	}	
+	g->m->locked = 0;
+	runtime·unwindstack(gp, nil);
+	gfput(g->m->p, gp);
+	schedule();
+}
+
+#pragma textflag NOSPLIT
+static void
+save(void *pc, uintptr sp)
+{
+	g->sched.pc = (uintptr)pc;
+	g->sched.sp = sp;
+	g->sched.lr = 0;
+	g->sched.ret = 0;
+	g->sched.ctxt = 0;
+	g->sched.g = g;
+}
+
+static void entersyscall_bad(void);
+static void entersyscall_sysmon(void);
+static void entersyscall_gcwait(void);
+
+// The goroutine g is about to enter a system call.
+// Record that it's not using the cpu anymore.
+// This is called only from the go syscall library and cgocall,
+// not from the low-level system calls used by the runtime.
+//
+// Entersyscall cannot split the stack: the runtime·gosave must
+// make g->sched refer to the caller's stack segment, because
+// entersyscall is going to return immediately after.
+//
+// Nothing entersyscall calls can split the stack either.
+// We cannot safely move the stack during an active call to syscall,
+// because we do not know which of the uintptr arguments are
+// really pointers (back into the stack).
+// In practice, this means that we make the fast path run through
+// entersyscall doing no-split things, and the slow path has to use onM
+// to run bigger things on the m stack.
+#pragma textflag NOSPLIT
+void
+·entersyscall(int32 dummy)
+{
+	void (*fn)(void);
+
+	// Disable preemption because during this function g is in Gsyscall status,
+	// but can have inconsistent g->sched, do not let GC observe it.
+	g->m->locks++;
+	
+	// Entersyscall must not call any function that might split/grow the stack.
+	// (See details in comment above.)
+	// Catch calls that might, by replacing the stack guard with something that
+	// will trip any stack check and leaving a flag to tell newstack to die.
+	g->stackguard0 = StackPreempt;
+	g->throwsplit = 1;
+
+	// Leave SP around for GC and traceback.
+	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+	g->syscallsp = g->sched.sp;
+	g->syscallpc = g->sched.pc;
+	g->syscallstack = g->stackbase;
+	g->syscallguard = g->stackguard;
+	runtime·casgstatus(g, Grunning, Gsyscall);
+	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
+		fn = entersyscall_bad;
+		runtime·onM(&fn);
+	}
+
+	if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
+		fn = entersyscall_sysmon;
+		runtime·onM(&fn);
+		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+	}
+
+	g->m->mcache = nil;
+	g->m->p->m = nil;
+	runtime·atomicstore(&g->m->p->status, Psyscall);
+	if(runtime·sched.gcwaiting) {
+		fn = entersyscall_gcwait;
+		runtime·onM(&fn);
+		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+	}
+
+	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
+	// We set stackguard to StackPreempt so that first split stack check calls morestack.
+	// Morestack detects this case and throws.
+	g->stackguard0 = StackPreempt;
+	g->m->locks--;
+}
+
+static void
+entersyscall_bad(void)
+{
+	G *gp;
+	
+	gp = g->m->curg;
+	runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
+		gp->syscallsp, gp->syscallguard-StackGuard, gp->syscallstack);
+	runtime·throw("entersyscall");
+}
+
+static void
+entersyscall_sysmon(void)
+{
+	runtime·lock(&runtime·sched.lock);
+	if(runtime·atomicload(&runtime·sched.sysmonwait)) {
+		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+		runtime·notewakeup(&runtime·sched.sysmonnote);
+	}
+	runtime·unlock(&runtime·sched.lock);
+}
+
+static void
+entersyscall_gcwait(void)
+{
+	runtime·lock(&runtime·sched.lock);
+	if (runtime·sched.stopwait > 0 && runtime·cas(&g->m->p->status, Psyscall, Pgcstop)) {
+		if(--runtime·sched.stopwait == 0)
+			runtime·notewakeup(&runtime·sched.stopnote);
+	}
+	runtime·unlock(&runtime·sched.lock);
+}
+
+static void entersyscallblock_handoff(void);
+
+// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
+#pragma textflag NOSPLIT
+void
+·entersyscallblock(int32 dummy)
+{
+	void (*fn)(void);
+
+	g->m->locks++;  // see comment in entersyscall
+	g->throwsplit = 1;
+	g->stackguard0 = StackPreempt;  // see comment in entersyscall
+
+	// Leave SP around for GC and traceback.
+	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+	g->syscallsp = g->sched.sp;
+	g->syscallpc = g->sched.pc;
+	g->syscallstack = g->stackbase;
+	g->syscallguard = g->stackguard;
+	runtime·casgstatus(g, Grunning, Gsyscall);
+	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
+		fn = entersyscall_bad;
+		runtime·onM(&fn);
+	}
+	
+	fn = entersyscallblock_handoff;
+	runtime·onM(&fn);
+
+	// Resave for traceback during blocked call.
+	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
+
+	g->m->locks--;
+}
+
+static void
+entersyscallblock_handoff(void)
+{
+	handoffp(releasep());
+}
+
+// The goroutine g exited its system call.
+// Arrange for it to run on a cpu again.
+// This is called only from the go syscall library, not
+// from the low-level system calls used by the runtime.
+#pragma textflag NOSPLIT
+void
+runtime·exitsyscall(void)
+{
+	void (*fn)(G*);
+
+	g->m->locks++;  // see comment in entersyscall
+
+	g->waitsince = 0;
+	if(exitsyscallfast()) {
+		// There's a cpu for us, so we can run.
+		g->m->p->syscalltick++;
+		// We need to cas the status and scan before resuming...
+		runtime·casgstatus(g, Gsyscall, Grunning);
+
+		// Garbage collector isn't running (since we are),
+		// so okay to clear gcstack and gcsp.
+		g->syscallstack = (uintptr)nil;
+		g->syscallsp = (uintptr)nil;
+		g->m->locks--;
+		if(g->preempt) {
+			// restore the preemption request in case we've cleared it in newstack
+			g->stackguard0 = StackPreempt;
+		} else {
+			// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
+			g->stackguard0 = g->stackguard;
+		}
+		g->throwsplit = 0;
+		return;
+	}
+
+	g->m->locks--;
+
+	// Call the scheduler.
+	fn = exitsyscall0;
+	runtime·mcall(&fn);
+
+	// Scheduler returned, so we're allowed to run now.
+	// Delete the gcstack information that we left for
+	// the garbage collector during the system call.
+	// Must wait until now because until gosched returns
+	// we don't know for sure that the garbage collector
+	// is not running.
+	g->syscallstack = (uintptr)nil;
+	g->syscallsp = (uintptr)nil;
+	g->m->p->syscalltick++;
+	g->throwsplit = 0;
+}
+
+static void exitsyscallfast_pidle(void);
+
+#pragma textflag NOSPLIT
+static bool
+exitsyscallfast(void)
+{
+	void (*fn)(void);
+
+	// Freezetheworld sets stopwait but does not retake P's.
+	if(runtime·sched.stopwait) {
+		g->m->p = nil;
+		return false;
+	}
+
+	// Try to re-acquire the last P.
+	if(g->m->p && g->m->p->status == Psyscall && runtime·cas(&g->m->p->status, Psyscall, Prunning)) {
+		// There's a cpu for us, so we can run.
+		g->m->mcache = g->m->p->mcache;
+		g->m->p->m = g->m;
+		return true;
+	}
+	// Try to get any other idle P.
+	g->m->p = nil;
+	if(runtime·sched.pidle) {
+		fn = exitsyscallfast_pidle;
+		runtime·onM(&fn);
+		if(g->m->scalararg[0]) {
+			g->m->scalararg[0] = 0;
+			return true;
+		}
+	}
+	return false;
+}
+
+static void
+exitsyscallfast_pidle(void)
+{
+	P *p;
+
+	runtime·lock(&runtime·sched.lock);
+	p = pidleget();
+	if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
+		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+		runtime·notewakeup(&runtime·sched.sysmonnote);
+	}
+	runtime·unlock(&runtime·sched.lock);
+	if(p) {
+		acquirep(p);
+		g->m->scalararg[0] = 1;
+	} else
+		g->m->scalararg[0] = 0;
+}
+
+// runtime·exitsyscall slow path on g0.
+// Failed to acquire P, enqueue gp as runnable.
+static void
+exitsyscall0(G *gp)
+{
+	P *p;
+
+	runtime·casgstatus(gp, Gsyscall, Grunnable);
+	dropg();
+	runtime·lock(&runtime·sched.lock);
+	p = pidleget();
+	if(p == nil)
+		globrunqput(gp);
+	else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
+		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+		runtime·notewakeup(&runtime·sched.sysmonnote);
+	}
+	runtime·unlock(&runtime·sched.lock);
+	if(p) {
+		acquirep(p);
+		execute(gp);  // Never returns.
+	}
+	if(g->m->lockedg) {
+		// Wait until another thread schedules gp and so m again.
+		stoplockedm();
+		execute(gp);  // Never returns.
+	}
+	stopm();
+	schedule();  // Never returns.
+}
+
+static void
+beforefork(void)
+{
+	G *gp;
+	
+	gp = g->m->curg;
+	// Fork can hang if preempted with signals frequently enough (see issue 5517).
+	// Ensure that we stay on the same M where we disable profiling.
+	gp->m->locks++;
+	if(gp->m->profilehz != 0)
+		runtime·resetcpuprofiler(0);
+
+	// This function is called before fork in syscall package.
+	// Code between fork and exec must not allocate memory nor even try to grow stack.
+	// Here we spoil g->stackguard to reliably detect any attempts to grow stack.
+	// runtime_AfterFork will undo this in parent process, but not in child.
+	gp->m->forkstackguard = gp->stackguard;
+	gp->stackguard0 = StackPreempt-1;
+	gp->stackguard = StackPreempt-1;
+}
+
+// Called from syscall package before fork.
+#pragma textflag NOSPLIT
+void
+syscall·runtime_BeforeFork(void)
+{
+	void (*fn)(void);
+	
+	fn = beforefork;
+	runtime·onM(&fn);
+}
+
+static void
+afterfork(void)
+{
+	int32 hz;
+	G *gp;
+	
+	gp = g->m->curg;
+	// See the comment in runtime_BeforeFork.
+	gp->stackguard0 = gp->m->forkstackguard;
+	gp->stackguard = gp->m->forkstackguard;
+	gp->m->forkstackguard = 0;
+
+	hz = runtime·sched.profilehz;
+	if(hz != 0)
+		runtime·resetcpuprofiler(hz);
+	gp->m->locks--;
+}
+
+// Called from syscall package after fork in parent.
+#pragma textflag NOSPLIT
+void
+syscall·runtime_AfterFork(void)
+{
+	void (*fn)(void);
+	
+	fn = afterfork;
+	runtime·onM(&fn);
+}
+
+// Hook used by runtime·malg to call runtime·stackalloc on the
+// scheduler stack.  This exists because runtime·stackalloc insists
+// on being called on the scheduler stack, to avoid trying to grow
+// the stack while allocating a new stack segment.
+static void
+mstackalloc(G *gp)
+{
+	G *newg;
+	uintptr size;
+
+	newg = (G*)gp->param;
+	size = newg->stacksize;
+	newg->stacksize = 0;
+	gp->param = runtime·stackalloc(newg, size);
+	runtime·gogo(&gp->sched);
+}
+
+// Allocate a new g, with a stack big enough for stacksize bytes.
+G*
+runtime·malg(int32 stacksize)
+{
+	G *newg;
+	byte *stk;
+	void (*fn)(G*);
+
+	if(StackTop < sizeof(Stktop)) {
+		runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
+		runtime·throw("runtime: bad stack.h");
+	}
+
+	newg = allocg();
+	if(stacksize >= 0) {
+		stacksize = runtime·round2(StackSystem + stacksize);
+		if(g == g->m->g0) {
+			// running on scheduler stack already.
+			stk = runtime·stackalloc(newg, stacksize);
+		} else {
+			// have to call stackalloc on scheduler stack.
+			newg->stacksize = stacksize;
+			g->param = newg;
+			fn = mstackalloc;
+			runtime·mcall(&fn);
+			stk = g->param;
+			g->param = nil;
+		}
+		newg->stack0 = (uintptr)stk;
+		newg->stackguard = (uintptr)stk + StackGuard;
+		newg->stackguard0 = newg->stackguard;
+		newg->stackbase = (uintptr)stk + stacksize - sizeof(Stktop);
+	}
+	return newg;
+}
+
+static void
+newproc_m(void)
+{
+	byte *argp;
+	void *callerpc;
+	FuncVal *fn;
+	int32 siz;
+
+	siz = g->m->scalararg[0];
+	callerpc = (void*)g->m->scalararg[1];	
+	argp = g->m->ptrarg[0];
+	fn = (FuncVal*)g->m->ptrarg[1];
+
+	runtime·newproc1(fn, argp, siz, 0, callerpc);
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+}
+
+// Create a new g running fn with siz bytes of arguments.
+// Put it on the queue of g's waiting to run.
+// The compiler turns a go statement into a call to this.
+// Cannot split the stack because it assumes that the arguments
+// are available sequentially after &fn; they would not be
+// copied if a stack split occurred.
+#pragma textflag NOSPLIT
+void
+runtime·newproc(int32 siz, FuncVal* fn, ...)
+{
+	byte *argp;
+	void (*mfn)(void);
+
+	if(thechar == '5')
+		argp = (byte*)(&fn+2);  // skip caller's saved LR
+	else
+		argp = (byte*)(&fn+1);
+
+	g->m->locks++;
+	g->m->scalararg[0] = siz;
+	g->m->scalararg[1] = (uintptr)runtime·getcallerpc(&siz);
+	g->m->ptrarg[0] = argp;
+	g->m->ptrarg[1] = fn;
+	mfn = newproc_m;
+	runtime·onM(&mfn);
+	g->m->locks--;
+}
+
+// Create a new g running fn with narg bytes of arguments starting
+// at argp and returning nret bytes of results.  callerpc is the
+// address of the go statement that created this.  The new g is put
+// on the queue of g's waiting to run.
+G*
+runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
+{
+	byte *sp;
+	G *newg;
+	P *p;
+	int32 siz;
+
+	if(fn == nil) {
+		g->m->throwing = -1;  // do not dump full stacks
+		runtime·throw("go of nil func value");
+	}
+	g->m->locks++;  // disable preemption because it can be holding p in a local var
+	siz = narg + nret;
+	siz = (siz+7) & ~7;
+
+	// We could instead create a secondary stack frame
+	// and make it look like goexit was on the original but
+	// the call to the actual goroutine function was split.
+	// Not worth it: this is almost always an error.
+	if(siz > StackMin - 1024)
+		runtime·throw("runtime.newproc: function arguments too large for new goroutine");
+
+	p = g->m->p;
+	if((newg = gfget(p)) != nil) {
+		if(newg->stackguard - StackGuard != newg->stack0)
+			runtime·throw("invalid stack in newg");
+	} else {
+		newg = runtime·malg(StackMin);
+		runtime·casgstatus(newg, Gidle, Gdead);
+		allgadd(newg); // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
+	}
+
+	if(runtime·readgstatus(newg) != Gdead) 
+		runtime·throw("newproc1: new g is not Gdead");
+
+	sp = (byte*)newg->stackbase;
+	sp -= siz;
+	runtime·memmove(sp, argp, narg);
+	if(thechar == '5') {
+		// caller's LR
+		sp -= sizeof(void*);
+		*(void**)sp = nil;
+	}
+
+	runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
+	newg->sched.sp = (uintptr)sp;
+	newg->sched.pc = (uintptr)runtime·goexit;
+	newg->sched.g = newg;
+	runtime·gostartcallfn(&newg->sched, fn);
+	newg->gopc = (uintptr)callerpc;
+	runtime·casgstatus(newg, Gdead, Grunnable);
+
+	if(p->goidcache == p->goidcacheend) {
+		// Sched.goidgen is the last allocated id,
+		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
+		// At startup sched.goidgen=0, so main goroutine receives goid=1.
+		p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
+		p->goidcache -= GoidCacheBatch - 1;
+		p->goidcacheend = p->goidcache + GoidCacheBatch;
+	}
+	newg->goid = p->goidcache++;
+	if(raceenabled)
+		newg->racectx = runtime·racegostart((void*)callerpc);
+	runqput(p, newg);
+
+	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
+		wakep();
+	g->m->locks--;
+	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
+		g->stackguard0 = StackPreempt;
+	return newg;
+}
+
+static void
+allgadd(G *gp)
+{
+	G **new;
+	uintptr cap;
+
+	if(runtime·readgstatus(gp) == Gidle) 
+		runtime·throw("allgadd: bad status Gidle");
+
+	runtime·lock(&allglock);
+	if(runtime·allglen >= allgcap) {
+		cap = 4096/sizeof(new[0]);
+		if(cap < 2*allgcap)
+			cap = 2*allgcap;
+		new = runtime·mallocgc(cap*sizeof(new[0]), nil, 0);
+		if(new == nil)
+			runtime·throw("runtime: cannot allocate memory");
+		if(runtime·allg != nil)
+			runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
+		runtime·allg = new;
+		runtime·allgs.array = (void*)runtime·allg;
+		allgcap = cap;
+		runtime·allgs.cap = allgcap;
+	}
+	runtime·allg[runtime·allglen++] = gp;
+	runtime·allgs.len = runtime·allglen;
+	runtime·unlock(&allglock);
+}
+
+// Put on gfree list.
+// If local list is too long, transfer a batch to the global list.
+static void
+gfput(P *p, G *gp)
+{
+	uintptr stksize;
+	Stktop *top;
+
+	if(runtime·readgstatus(gp) != Gdead) 
+		runtime·throw("gfput: bad status (not Gdead)");
+
+	if(gp->stackguard - StackGuard != gp->stack0)
+		runtime·throw("invalid stack in gfput");
+	stksize = gp->stackbase + sizeof(Stktop) - gp->stack0;
+	if(stksize != gp->stacksize) {
+		runtime·printf("runtime: bad stacksize, goroutine %D, remain=%d, last=%d\n",
+			gp->goid, (int32)gp->stacksize, (int32)stksize);
+		runtime·throw("gfput: bad stacksize");
+	}
+	top = (Stktop*)gp->stackbase;
+	if(stksize != FixedStack) {
+		// non-standard stack size - free it.
+		runtime·stackfree(gp, (void*)gp->stack0, top);
+		gp->stack0 = 0;
+		gp->stackguard = 0;
+		gp->stackguard0 = 0;
+		gp->stackbase = 0;
+	}
+	gp->schedlink = p->gfree;
+	p->gfree = gp;
+	p->gfreecnt++;
+	if(p->gfreecnt >= 64) {
+		runtime·lock(&runtime·sched.gflock);
+		while(p->gfreecnt >= 32) {
+			p->gfreecnt--;
+			gp = p->gfree;
+			p->gfree = gp->schedlink;
+			gp->schedlink = runtime·sched.gfree;
+			runtime·sched.gfree = gp;
+			runtime·sched.ngfree++;
+		}
+		runtime·unlock(&runtime·sched.gflock);
+	}
+}
+
+// Get from gfree list.
+// If local list is empty, grab a batch from global list.
+static G*
+gfget(P *p)
+{
+	G *gp;
+	byte *stk;
+	void (*fn)(G*);
+
+retry:
+	gp = p->gfree;
+	if(gp == nil && runtime·sched.gfree) {
+		runtime·lock(&runtime·sched.gflock);
+		while(p->gfreecnt < 32 && runtime·sched.gfree != nil) {
+			p->gfreecnt++;
+			gp = runtime·sched.gfree;
+			runtime·sched.gfree = gp->schedlink;
+			runtime·sched.ngfree--;
+			gp->schedlink = p->gfree;
+			p->gfree = gp;
+		}
+		runtime·unlock(&runtime·sched.gflock);
+		goto retry;
+	}
+	if(gp) {
+		p->gfree = gp->schedlink;
+		p->gfreecnt--;
+
+		if(gp->stack0 == 0) {
+			// Stack was deallocated in gfput.  Allocate a new one.
+			if(g == g->m->g0) {
+				stk = runtime·stackalloc(gp, FixedStack);
+			} else {
+				gp->stacksize = FixedStack;
+				g->param = gp;
+				fn = mstackalloc;
+				runtime·mcall(&fn);
+				stk = g->param;
+				g->param = nil;
+			}
+			gp->stack0 = (uintptr)stk;
+			gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
+			gp->stackguard = (uintptr)stk + StackGuard;
+			gp->stackguard0 = gp->stackguard;
+		} else {
+			if(raceenabled)
+				runtime·racemalloc((void*)gp->stack0, gp->stackbase + sizeof(Stktop) - gp->stack0);
+		}
+	}
+	return gp;
+}
+
+// Purge all cached G's from gfree list to the global list.
+static void
+gfpurge(P *p)
+{
+	G *gp;
+
+	runtime·lock(&runtime·sched.gflock);
+	while(p->gfreecnt != 0) {
+		p->gfreecnt--;
+		gp = p->gfree;
+		p->gfree = gp->schedlink;
+		gp->schedlink = runtime·sched.gfree;
+		runtime·sched.gfree = gp;
+		runtime·sched.ngfree++;
+	}
+	runtime·unlock(&runtime·sched.gflock);
+}
+
+void
+runtime·Breakpoint(void)
+{
+	runtime·breakpoint();
+}
+
+// Implementation of runtime.GOMAXPROCS.
+// delete when scheduler is even stronger
+void
+runtime·gomaxprocs_m(void)
+{
+	int32 n, ret;
+	
+	n = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+
+	if(n > MaxGomaxprocs)
+		n = MaxGomaxprocs;
+	runtime·lock(&runtime·sched.lock);
+	ret = runtime·gomaxprocs;
+	if(n <= 0 || n == ret) {
+		runtime·unlock(&runtime·sched.lock);
+		g->m->scalararg[0] = ret;
+		return;
+	}
+	runtime·unlock(&runtime·sched.lock);
+
+	runtime·semacquire(&runtime·worldsema, false);
+	g->m->gcing = 1;
+	runtime·stoptheworld();
+	newprocs = n;
+	g->m->gcing = 0;
+	runtime·semrelease(&runtime·worldsema);
+	runtime·starttheworld();
+
+	g->m->scalararg[0] = ret;
+	return;
+}
+
+// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
+// after they modify m->locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
+#pragma textflag NOSPLIT
+static void
+lockOSThread(void)
+{
+	g->m->lockedg = g;
+	g->lockedm = g->m;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·LockOSThread(void)
+{
+	g->m->locked |= LockExternal;
+	lockOSThread();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·lockOSThread(void)
+{
+	g->m->locked += LockInternal;
+	lockOSThread();
+}
+
+
+// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
+#pragma textflag NOSPLIT
+static void
+unlockOSThread(void)
+{
+	if(g->m->locked != 0)
+		return;
+	g->m->lockedg = nil;
+	g->lockedm = nil;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·UnlockOSThread(void)
+{
+	g->m->locked &= ~LockExternal;
+	unlockOSThread();
+}
+
+static void badunlockOSThread(void);
+
+#pragma textflag NOSPLIT
+void
+runtime·unlockOSThread(void)
+{
+	void (*fn)(void);
+
+	if(g->m->locked < LockInternal) {
+		fn = badunlockOSThread;
+		runtime·onM(&fn);
+	}
+	g->m->locked -= LockInternal;
+	unlockOSThread();
+}
+
+static void
+badunlockOSThread(void)
+{
+	runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
+}
+
+#pragma textflag NOSPLIT
+int32
+runtime·gcount(void)
+{
+	P *p, **pp;
+	int32 n;
+
+	n = runtime·allglen - runtime·sched.ngfree;
+	for(pp=runtime·allp; p=*pp; pp++)
+		n -= p->gfreecnt;
+	// All these variables can be changed concurrently, so the result can be inconsistent.
+	// But at least the current goroutine is running.
+	if(n < 1)
+		n = 1;
+	return n;
+}
+
+int32
+runtime·mcount(void)
+{
+	return runtime·sched.mcount;
+}
+
+static struct {
+	uint32 lock;
+	int32 hz;
+} prof;
+
+static void System(void) {}
+static void ExternalCode(void) {}
+static void GC(void) {}
+extern void runtime·cpuproftick(uintptr*, int32);
+extern byte runtime·etext[];
+
+// Called if we receive a SIGPROF signal.
+void
+runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
+{
+	int32 n;
+	bool traceback;
+	// Do not use global m in this function, use mp instead.
+	// On windows one m is sending reports about all the g's, so m means a wrong thing.
+	byte m;
+	uintptr stk[100];
+
+	m = 0;
+	USED(m);
+
+	if(prof.hz == 0)
+		return;
+
+	// Profiling runs concurrently with GC, so it must not allocate.
+	mp->mallocing++;
+
+	// Define that a "user g" is a user-created goroutine, and a "system g"
+	// is one that is m->g0 or m->gsignal. We've only made sure that we
+	// can unwind user g's, so exclude the system g's.
+	//
+	// It is not quite as easy as testing gp == m->curg (the current user g)
+	// because we might be interrupted for profiling halfway through a
+	// goroutine switch. The switch involves updating three (or four) values:
+	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
+	// because once it gets updated the new g is running.
+	//
+	// When switching from a user g to a system g, LR is not considered live,
+	// so the update only affects g, SP, and PC. Since PC must be last, there
+	// the possible partial transitions in ordinary execution are (1) g alone is updated,
+	// (2) both g and SP are updated, and (3) SP alone is updated.
+	// If g is updated, we'll see a system g and not look closer.
+	// If SP alone is updated, we can detect the partial transition by checking
+	// whether the SP is within g's stack bounds. (We could also require that SP
+	// be changed only after g, but the stack bounds check is needed by other
+	// cases, so there is no need to impose an additional requirement.)
+	//
+	// There is one exceptional transition to a system g, not in ordinary execution.
+	// When a signal arrives, the operating system starts the signal handler running
+	// with an updated PC and SP. The g is updated last, at the beginning of the
+	// handler. There are two reasons this is okay. First, until g is updated the
+	// g and SP do not match, so the stack bounds check detects the partial transition.
+	// Second, signal handlers currently run with signals disabled, so a profiling
+	// signal cannot arrive during the handler.
+	//
+	// When switching from a system g to a user g, there are three possibilities.
+	//
+	// First, it may be that the g switch has no PC update, because the SP
+	// either corresponds to a user g throughout (as in runtime.asmcgocall)
+	// or because it has been arranged to look like a user g frame
+	// (as in runtime.cgocallback_gofunc). In this case, since the entire
+	// transition is a g+SP update, a partial transition updating just one of 
+	// those will be detected by the stack bounds check.
+	//
+	// Second, when returning from a signal handler, the PC and SP updates
+	// are performed by the operating system in an atomic update, so the g
+	// update must be done before them. The stack bounds check detects
+	// the partial transition here, and (again) signal handlers run with signals
+	// disabled, so a profiling signal cannot arrive then anyway.
+	//
+	// Third, the common case: it may be that the switch updates g, SP, and PC
+	// separately, as in runtime.gogo.
+	//
+	// Because runtime.gogo is the only instance, we check whether the PC lies
+	// within that function, and if so, not ask for a traceback. This approach
+	// requires knowing the size of the runtime.gogo function, which we
+	// record in arch_*.h and check in runtime_test.go.
+	//
+	// There is another apparently viable approach, recorded here in case
+	// the "PC within runtime.gogo" check turns out not to be usable.
+	// It would be possible to delay the update of either g or SP until immediately
+	// before the PC update instruction. Then, because of the stack bounds check,
+	// the only problematic interrupt point is just before that PC update instruction,
+	// and the sigprof handler can detect that instruction and simulate stepping past
+	// it in order to reach a consistent state. On ARM, the update of g must be made
+	// in two places (in R10 and also in a TLS slot), so the delayed update would
+	// need to be the SP update. The sigprof handler must read the instruction at
+	// the current PC and if it was the known instruction (for example, JMP BX or 
+	// MOV R2, PC), use that other register in place of the PC value.
+	// The biggest drawback to this solution is that it requires that we can tell
+	// whether it's safe to read from the memory pointed at by PC.
+	// In a correct program, we can test PC == nil and otherwise read,
+	// but if a profiling signal happens at the instant that a program executes
+	// a bad jump (before the program manages to handle the resulting fault)
+	// the profiling handler could fault trying to read nonexistent memory.
+	//
+	// To recap, there are no constraints on the assembly being used for the
+	// transition. We simply require that g and SP match and that the PC is not
+	// in runtime.gogo.
+	traceback = true;
+	if(gp == nil || gp != mp->curg ||
+	   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
+	   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
+		traceback = false;
+
+	n = 0;
+	if(traceback)
+		n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, stk, nelem(stk), nil, nil, false);
+	if(!traceback || n <= 0) {
+		// Normal traceback is impossible or has failed.
+		// See if it falls into several common cases.
+		n = 0;
+		if(mp->ncgo > 0 && mp->curg != nil &&
+			mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
+			// Cgo, we can't unwind and symbolize arbitrary C code,
+			// so instead collect Go stack that leads to the cgo call.
+			// This is especially important on windows, since all syscalls are cgo calls.
+			n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, stk, nelem(stk), nil, nil, false);
+		}
+#ifdef GOOS_windows
+		if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
+			// Libcall, i.e. runtime syscall on windows.
+			// Collect Go stack that leads to the call.
+			n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, stk, nelem(stk), nil, nil, false);
+		}
+#endif
+		if(n == 0) {
+			// If all of the above has failed, account it against abstract "System" or "GC".
+			n = 2;
+			// "ExternalCode" is better than "etext".
+			if((uintptr)pc > (uintptr)runtime·etext)
+				pc = (byte*)ExternalCode + PCQuantum;
+			stk[0] = (uintptr)pc;
+			if(mp->gcing || mp->helpgc)
+				stk[1] = (uintptr)GC + PCQuantum;
+			else
+				stk[1] = (uintptr)System + PCQuantum;
+		}
+	}
+
+	if(prof.hz != 0) {
+		// Simple cas-lock to coordinate with setcpuprofilerate.
+		while(!runtime·cas(&prof.lock, 0, 1))
+			runtime·osyield();
+		if(prof.hz != 0)
+			runtime·cpuproftick(stk, n);
+		runtime·atomicstore(&prof.lock, 0);
+	}
+	mp->mallocing--;
+}
+
+// Arrange to call fn with a traceback hz times a second.
+void
+runtime·setcpuprofilerate_m(void)
+{
+	int32 hz;
+	
+	hz = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+
+	// Force sane arguments.
+	if(hz < 0)
+		hz = 0;
+
+	// Disable preemption, otherwise we can be rescheduled to another thread
+	// that has profiling enabled.
+	g->m->locks++;
+
+	// Stop profiler on this thread so that it is safe to lock prof.
+	// if a profiling signal came in while we had prof locked,
+	// it would deadlock.
+	runtime·resetcpuprofiler(0);
+
+	while(!runtime·cas(&prof.lock, 0, 1))
+		runtime·osyield();
+	prof.hz = hz;
+	runtime·atomicstore(&prof.lock, 0);
+
+	runtime·lock(&runtime·sched.lock);
+	runtime·sched.profilehz = hz;
+	runtime·unlock(&runtime·sched.lock);
+
+	if(hz != 0)
+		runtime·resetcpuprofiler(hz);
+
+	g->m->locks--;
+}
+
+// Change number of processors.  The world is stopped, sched is locked.
+static void
+procresize(int32 new)
+{
+	int32 i, old;
+	bool empty;
+	G *gp;
+	P *p;
+
+	old = runtime·gomaxprocs;
+	if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
+		runtime·throw("procresize: invalid arg");
+	// initialize new P's
+	for(i = 0; i < new; i++) {
+		p = runtime·allp[i];
+		if(p == nil) {
+			p = (P*)runtime·mallocgc(sizeof(*p), 0, 0);
+			p->id = i;
+			p->status = Pgcstop;
+			runtime·atomicstorep(&runtime·allp[i], p);
+		}
+		if(p->mcache == nil) {
+			if(old==0 && i==0)
+				p->mcache = g->m->mcache;  // bootstrap
+			else
+				p->mcache = runtime·allocmcache();
+		}
+	}
+
+	// redistribute runnable G's evenly
+	// collect all runnable goroutines in global queue preserving FIFO order
+	// FIFO order is required to ensure fairness even during frequent GCs
+	// see http://golang.org/issue/7126
+	empty = false;
+	while(!empty) {
+		empty = true;
+		for(i = 0; i < old; i++) {
+			p = runtime·allp[i];
+			if(p->runqhead == p->runqtail)
+				continue;
+			empty = false;
+			// pop from tail of local queue
+			p->runqtail--;
+			gp = p->runq[p->runqtail%nelem(p->runq)];
+			// push onto head of global queue
+			gp->schedlink = runtime·sched.runqhead;
+			runtime·sched.runqhead = gp;
+			if(runtime·sched.runqtail == nil)
+				runtime·sched.runqtail = gp;
+			runtime·sched.runqsize++;
+		}
+	}
+	// fill local queues with at most nelem(p->runq)/2 goroutines
+	// start at 1 because current M already executes some G and will acquire allp[0] below,
+	// so if we have a spare G we want to put it into allp[1].
+	for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
+		gp = runtime·sched.runqhead;
+		runtime·sched.runqhead = gp->schedlink;
+		if(runtime·sched.runqhead == nil)
+			runtime·sched.runqtail = nil;
+		runtime·sched.runqsize--;
+		runqput(runtime·allp[i%new], gp);
+	}
+
+	// free unused P's
+	for(i = new; i < old; i++) {
+		p = runtime·allp[i];
+		runtime·freemcache(p->mcache);
+		p->mcache = nil;
+		gfpurge(p);
+		p->status = Pdead;
+		// can't free P itself because it can be referenced by an M in syscall
+	}
+
+	if(g->m->p)
+		g->m->p->m = nil;
+	g->m->p = nil;
+	g->m->mcache = nil;
+	p = runtime·allp[0];
+	p->m = nil;
+	p->status = Pidle;
+	acquirep(p);
+	for(i = new-1; i > 0; i--) {
+		p = runtime·allp[i];
+		p->status = Pidle;
+		pidleput(p);
+	}
+	runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
+}
+
+// Associate p and the current m.
+static void
+acquirep(P *p)
+{
+	if(g->m->p || g->m->mcache)
+		runtime·throw("acquirep: already in go");
+	if(p->m || p->status != Pidle) {
+		runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
+		runtime·throw("acquirep: invalid p state");
+	}
+	g->m->mcache = p->mcache;
+	g->m->p = p;
+	p->m = g->m;
+	p->status = Prunning;
+}
+
+// Disassociate p and the current m.
+static P*
+releasep(void)
+{
+	P *p;
+
+	if(g->m->p == nil || g->m->mcache == nil)
+		runtime·throw("releasep: invalid arg");
+	p = g->m->p;
+	if(p->m != g->m || p->mcache != g->m->mcache || p->status != Prunning) {
+		runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
+			g->m, g->m->p, p->m, g->m->mcache, p->mcache, p->status);
+		runtime·throw("releasep: invalid p state");
+	}
+	g->m->p = nil;
+	g->m->mcache = nil;
+	p->m = nil;
+	p->status = Pidle;
+	return p;
+}
+
+static void
+incidlelocked(int32 v)
+{
+	runtime·lock(&runtime·sched.lock);
+	runtime·sched.nmidlelocked += v;
+	if(v > 0)
+		checkdead();
+	runtime·unlock(&runtime·sched.lock);
+}
+
+// Check for deadlock situation.
+// The check is based on number of running M's, if 0 -> deadlock.
+static void
+checkdead(void)
+{
+	G *gp;
+	int32 run, grunning, s;
+	uintptr i;
+
+	// -1 for sysmon
+	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
+	if(run > 0)
+		return;
+	// If we are dying because of a signal caught on an already idle thread,
+	// freezetheworld will cause all running threads to block.
+	// And runtime will essentially enter into deadlock state,
+	// except that there is a thread that will call runtime·exit soon.
+	if(runtime·panicking > 0)
+		return;
+	if(run < 0) {
+		runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
+			runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
+		runtime·throw("checkdead: inconsistent counts");
+	}
+	grunning = 0;
+	runtime·lock(&allglock);
+	for(i = 0; i < runtime·allglen; i++) {
+		gp = runtime·allg[i];
+		if(gp->issystem)
+			continue;
+		s = runtime·readgstatus(gp);
+		switch(s&~Gscan) {
+		case Gwaiting:
+			grunning++;
+			break;
+		case Grunnable:
+		case Grunning:
+		case Gsyscall:
+			runtime·unlock(&allglock);
+			runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
+			runtime·throw("checkdead: runnable g");
+			break;
+		}
+	}
+	runtime·unlock(&allglock);
+	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
+		runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");
+	g->m->throwing = -1;  // do not dump full stacks
+	runtime·throw("all goroutines are asleep - deadlock!");
+}
+
+static void
+sysmon(void)
+{
+	uint32 idle, delay, nscavenge;
+	int64 now, unixnow, lastpoll, lasttrace, lastgc;
+	int64 forcegcperiod, scavengelimit, lastscavenge, maxsleep;
+	G *gp;
+
+	// If we go two minutes without a garbage collection, force one to run.
+	forcegcperiod = 2*60*1e9;
+	// If a heap span goes unused for 5 minutes after a garbage collection,
+	// we hand it back to the operating system.
+	scavengelimit = 5*60*1e9;
+	if(runtime·debug.scavenge > 0) {
+		// Scavenge-a-lot for testing.
+		forcegcperiod = 10*1e6;
+		scavengelimit = 20*1e6;
+	}
+	lastscavenge = runtime·nanotime();
+	nscavenge = 0;
+	// Make wake-up period small enough for the sampling to be correct.
+	maxsleep = forcegcperiod/2;
+	if(scavengelimit < forcegcperiod)
+		maxsleep = scavengelimit/2;
+
+	lasttrace = 0;
+	idle = 0;  // how many cycles in succession we had not wokeup somebody
+	delay = 0;
+	for(;;) {
+		if(idle == 0)  // start with 20us sleep...
+			delay = 20;
+		else if(idle > 50)  // start doubling the sleep after 1ms...
+			delay *= 2;
+		if(delay > 10*1000)  // up to 10ms
+			delay = 10*1000;
+		runtime·usleep(delay);
+		if(runtime·debug.schedtrace <= 0 &&
+			(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
+			runtime·lock(&runtime·sched.lock);
+			if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
+				runtime·atomicstore(&runtime·sched.sysmonwait, 1);
+				runtime·unlock(&runtime·sched.lock);
+				runtime·notetsleep(&runtime·sched.sysmonnote, maxsleep);
+				runtime·lock(&runtime·sched.lock);
+				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
+				runtime·noteclear(&runtime·sched.sysmonnote);
+				idle = 0;
+				delay = 20;
+			}
+			runtime·unlock(&runtime·sched.lock);
+		}
+		// poll network if not polled for more than 10ms
+		lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
+		now = runtime·nanotime();
+		unixnow = runtime·unixnanotime();
+		if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
+			runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
+			gp = runtime·netpoll(false);  // non-blocking
+			if(gp) {
+				// Need to decrement number of idle locked M's
+				// (pretending that one more is running) before injectglist.
+				// Otherwise it can lead to the following situation:
+				// injectglist grabs all P's but before it starts M's to run the P's,
+				// another M returns from syscall, finishes running its G,
+				// observes that there is no work to do and no other running M's
+				// and reports deadlock.
+				incidlelocked(-1);
+				injectglist(gp);
+				incidlelocked(1);
+			}
+		}
+		// retake P's blocked in syscalls
+		// and preempt long running G's
+		if(retake(now))
+			idle = 0;
+		else
+			idle++;
+
+		// check if we need to force a GC
+		lastgc = runtime·atomicload64(&mstats.last_gc);
+		if(lastgc != 0 && unixnow - lastgc > forcegcperiod && runtime·atomicload(&runtime·forcegc.idle)) {
+			runtime·lock(&runtime·forcegc.lock);
+			runtime·forcegc.idle = 0;
+			runtime·forcegc.g->schedlink = nil;
+			injectglist(runtime·forcegc.g);
+			runtime·unlock(&runtime·forcegc.lock);
+		}
+
+		// scavenge heap once in a while
+		if(lastscavenge + scavengelimit/2 < now) {
+			runtime·MHeap_Scavenge(nscavenge, now, scavengelimit);
+			lastscavenge = now;
+			nscavenge++;
+		}
+
+		if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
+			lasttrace = now;
+			runtime·schedtrace(runtime·debug.scheddetail);
+		}
+	}
+}
+
+typedef struct Pdesc Pdesc;
+struct Pdesc
+{
+	uint32	schedtick;
+	int64	schedwhen;
+	uint32	syscalltick;
+	int64	syscallwhen;
+};
+#pragma dataflag NOPTR
+static Pdesc pdesc[MaxGomaxprocs];
+
+static uint32
+retake(int64 now)
+{
+	uint32 i, s, n;
+	int64 t;
+	P *p;
+	Pdesc *pd;
+
+	n = 0;
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		if(p==nil)
+			continue;
+		pd = &pdesc[i];
+		s = p->status;
+		if(s == Psyscall) {
+			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
+			t = p->syscalltick;
+			if(pd->syscalltick != t) {
+				pd->syscalltick = t;
+				pd->syscallwhen = now;
+				continue;
+			}
+			// On the one hand we don't want to retake Ps if there is no other work to do,
+			// but on the other hand we want to retake them eventually
+			// because they can prevent the sysmon thread from deep sleep.
+			if(p->runqhead == p->runqtail &&
+				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
+				pd->syscallwhen + 10*1000*1000 > now)
+				continue;
+			// Need to decrement number of idle locked M's
+			// (pretending that one more is running) before the CAS.
+			// Otherwise the M from which we retake can exit the syscall,
+			// increment nmidle and report deadlock.
+			incidlelocked(-1);
+			if(runtime·cas(&p->status, s, Pidle)) {
+				n++;
+				handoffp(p);
+			}
+			incidlelocked(1);
+		} else if(s == Prunning) {
+			// Preempt G if it's running for more than 10ms.
+			t = p->schedtick;
+			if(pd->schedtick != t) {
+				pd->schedtick = t;
+				pd->schedwhen = now;
+				continue;
+			}
+			if(pd->schedwhen + 10*1000*1000 > now)
+				continue;
+			preemptone(p);
+		}
+	}
+	return n;
+}
+
+// Tell all goroutines that they have been preempted and they should stop.
+// This function is purely best-effort.  It can fail to inform a goroutine if a
+// processor just started running it.
+// No locks need to be held.
+// Returns true if preemption request was issued to at least one goroutine.
+static bool
+preemptall(void)
+{
+	P *p;
+	int32 i;
+	bool res;
+
+	res = false;
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		if(p == nil || p->status != Prunning)
+			continue;
+		res |= preemptone(p);
+	}
+	return res;
+}
+
+// Tell the goroutine running on processor P to stop.
+// This function is purely best-effort.  It can incorrectly fail to inform the
+// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
+// correct goroutine, that goroutine might ignore the request if it is
+// simultaneously executing runtime·newstack.
+// No lock needs to be held.
+// Returns true if preemption request was issued.
+// The actual preemption will happen at some point in the future
+// and will be indicated by the gp->status no longer being
+// Grunning
+static bool
+preemptone(P *p)
+{
+	M *mp;
+	G *gp;
+
+	mp = p->m;
+	if(mp == nil || mp == g->m)
+		return false;
+	gp = mp->curg;
+	if(gp == nil || gp == mp->g0)
+		return false;
+	gp->preempt = true;
+	// Every call in a go routine checks for stack overflow by
+	// comparing the current stack pointer to gp->stackguard0.
+	// Setting gp->stackguard0 to StackPreempt folds
+	// preemption into the normal stack overflow check.
+	gp->stackguard0 = StackPreempt;
+	return true;
+}
+
+void
+runtime·schedtrace(bool detailed)
+{
+	static int64 starttime;
+	int64 now;
+	int64 id1, id2, id3;
+	int32 i, t, h;
+	uintptr gi;
+	int8 *fmt;
+	M *mp, *lockedm;
+	G *gp, *lockedg;
+	P *p;
+
+	now = runtime·nanotime();
+	if(starttime == 0)
+		starttime = now;
+
+	runtime·lock(&runtime·sched.lock);
+	runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d spinningthreads=%d idlethreads=%d runqueue=%d",
+		(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
+		runtime·sched.nmspinning, runtime·sched.nmidle, runtime·sched.runqsize);
+	if(detailed) {
+		runtime·printf(" gcwaiting=%d nmidlelocked=%d stopwait=%d sysmonwait=%d\n",
+			runtime·sched.gcwaiting, runtime·sched.nmidlelocked,
+			runtime·sched.stopwait, runtime·sched.sysmonwait);
+	}
+	// We must be careful while reading data from P's, M's and G's.
+	// Even if we hold schedlock, most data can be changed concurrently.
+	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
+	for(i = 0; i < runtime·gomaxprocs; i++) {
+		p = runtime·allp[i];
+		if(p == nil)
+			continue;
+		mp = p->m;
+		h = runtime·atomicload(&p->runqhead);
+		t = runtime·atomicload(&p->runqtail);
+		if(detailed)
+			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
+				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
+		else {
+			// In non-detailed mode format lengths of per-P run queues as:
+			// [len1 len2 len3 len4]
+			fmt = " %d";
+			if(runtime·gomaxprocs == 1)
+				fmt = " [%d]\n";
+			else if(i == 0)
+				fmt = " [%d";
+			else if(i == runtime·gomaxprocs-1)
+				fmt = " %d]\n";
+			runtime·printf(fmt, t-h);
+		}
+	}
+	if(!detailed) {
+		runtime·unlock(&runtime·sched.lock);
+		return;
+	}
+	for(mp = runtime·allm; mp; mp = mp->alllink) {
+		p = mp->p;
+		gp = mp->curg;
+		lockedg = mp->lockedg;
+		id1 = -1;
+		if(p)
+			id1 = p->id;
+		id2 = -1;
+		if(gp)
+			id2 = gp->goid;
+		id3 = -1;
+		if(lockedg)
+			id3 = lockedg->goid;
+		runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
+			" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
+			mp->id, id1, id2,
+			mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
+			mp->spinning, g->m->blocked, id3);
+	}
+	runtime·lock(&allglock);
+	for(gi = 0; gi < runtime·allglen; gi++) {
+		gp = runtime·allg[gi];
+		mp = gp->m;
+		lockedm = gp->lockedm;
+		runtime·printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
+			gp->goid, runtime·readgstatus(gp), gp->waitreason, mp ? mp->id : -1,
+			lockedm ? lockedm->id : -1);
+	}
+	runtime·unlock(&allglock);
+	runtime·unlock(&runtime·sched.lock);
+}
+
+// Put mp on midle list.
+// Sched must be locked.
+static void
+mput(M *mp)
+{
+	mp->schedlink = runtime·sched.midle;
+	runtime·sched.midle = mp;
+	runtime·sched.nmidle++;
+	checkdead();
+}
+
+// Try to get an m from midle list.
+// Sched must be locked.
+static M*
+mget(void)
+{
+	M *mp;
+
+	if((mp = runtime·sched.midle) != nil){
+		runtime·sched.midle = mp->schedlink;
+		runtime·sched.nmidle--;
+	}
+	return mp;
+}
+
+// Put gp on the global runnable queue.
+// Sched must be locked.
+static void
+globrunqput(G *gp)
+{
+	gp->schedlink = nil;
+	if(runtime·sched.runqtail)
+		runtime·sched.runqtail->schedlink = gp;
+	else
+		runtime·sched.runqhead = gp;
+	runtime·sched.runqtail = gp;
+	runtime·sched.runqsize++;
+}
+
+// Put a batch of runnable goroutines on the global runnable queue.
+// Sched must be locked.
+static void
+globrunqputbatch(G *ghead, G *gtail, int32 n)
+{
+	gtail->schedlink = nil;
+	if(runtime·sched.runqtail)
+		runtime·sched.runqtail->schedlink = ghead;
+	else
+		runtime·sched.runqhead = ghead;
+	runtime·sched.runqtail = gtail;
+	runtime·sched.runqsize += n;
+}
+
+// Try get a batch of G's from the global runnable queue.
+// Sched must be locked.
+static G*
+globrunqget(P *p, int32 max)
+{
+	G *gp, *gp1;
+	int32 n;
+
+	if(runtime·sched.runqsize == 0)
+		return nil;
+	n = runtime·sched.runqsize/runtime·gomaxprocs+1;
+	if(n > runtime·sched.runqsize)
+		n = runtime·sched.runqsize;
+	if(max > 0 && n > max)
+		n = max;
+	if(n > nelem(p->runq)/2)
+		n = nelem(p->runq)/2;
+	runtime·sched.runqsize -= n;
+	if(runtime·sched.runqsize == 0)
+		runtime·sched.runqtail = nil;
+	gp = runtime·sched.runqhead;
+	runtime·sched.runqhead = gp->schedlink;
+	n--;
+	while(n--) {
+		gp1 = runtime·sched.runqhead;
+		runtime·sched.runqhead = gp1->schedlink;
+		runqput(p, gp1);
+	}
+	return gp;
+}
+
+// Put p to on pidle list.
+// Sched must be locked.
+static void
+pidleput(P *p)
+{
+	p->link = runtime·sched.pidle;
+	runtime·sched.pidle = p;
+	runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
+}
+
+// Try get a p from pidle list.
+// Sched must be locked.
+static P*
+pidleget(void)
+{
+	P *p;
+
+	p = runtime·sched.pidle;
+	if(p) {
+		runtime·sched.pidle = p->link;
+		runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
+	}
+	return p;
+}
+
+// Try to put g on local runnable queue.
+// If it's full, put onto global queue.
+// Executed only by the owner P.
+static void
+runqput(P *p, G *gp)
+{
+	uint32 h, t;
+
+retry:
+	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
+	t = p->runqtail;
+	if(t - h < nelem(p->runq)) {
+		p->runq[t%nelem(p->runq)] = gp;
+		runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
+		return;
+	}
+	if(runqputslow(p, gp, h, t))
+		return;
+	// the queue is not full, now the put above must suceed
+	goto retry;
+}
+
+// Put g and a batch of work from local runnable queue on global queue.
+// Executed only by the owner P.
+static bool
+runqputslow(P *p, G *gp, uint32 h, uint32 t)
+{
+	G *batch[nelem(p->runq)/2+1];
+	uint32 n, i;
+
+	// First, grab a batch from local queue.
+	n = t-h;
+	n = n/2;
+	if(n != nelem(p->runq)/2)
+		runtime·throw("runqputslow: queue is not full");
+	for(i=0; i<n; i++)
+		batch[i] = p->runq[(h+i)%nelem(p->runq)];
+	if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
+		return false;
+	batch[n] = gp;
+	// Link the goroutines.
+	for(i=0; i<n; i++)
+		batch[i]->schedlink = batch[i+1];
+	// Now put the batch on global queue.
+	runtime·lock(&runtime·sched.lock);
+	globrunqputbatch(batch[0], batch[n], n+1);
+	runtime·unlock(&runtime·sched.lock);
+	return true;
+}
+
+// Get g from local runnable queue.
+// Executed only by the owner P.
+static G*
+runqget(P *p)
+{
+	G *gp;
+	uint32 t, h;
+
+	for(;;) {
+		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
+		t = p->runqtail;
+		if(t == h)
+			return nil;
+		gp = p->runq[h%nelem(p->runq)];
+		if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
+			return gp;
+	}
+}
+
+// Grabs a batch of goroutines from local runnable queue.
+// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
+// Can be executed by any P.
+static uint32
+runqgrab(P *p, G **batch)
+{
+	uint32 t, h, n, i;
+
+	for(;;) {
+		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
+		t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
+		n = t-h;
+		n = n - n/2;
+		if(n == 0)
+			break;
+		if(n > nelem(p->runq)/2)  // read inconsistent h and t
+			continue;
+		for(i=0; i<n; i++)
+			batch[i] = p->runq[(h+i)%nelem(p->runq)];
+		if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
+			break;
+	}
+	return n;
+}
+
+// Steal half of elements from local runnable queue of p2
+// and put onto local runnable queue of p.
+// Returns one of the stolen elements (or nil if failed).
+static G*
+runqsteal(P *p, P *p2)
+{
+	G *gp;
+	G *batch[nelem(p->runq)/2];
+	uint32 t, h, n, i;
+
+	n = runqgrab(p2, batch);
+	if(n == 0)
+		return nil;
+	n--;
+	gp = batch[n];
+	if(n == 0)
+		return gp;
+	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
+	t = p->runqtail;
+	if(t - h + n >= nelem(p->runq))
+		runtime·throw("runqsteal: runq overflow");
+	for(i=0; i<n; i++, t++)
+		p->runq[t%nelem(p->runq)] = batch[i];
+	runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
+	return gp;
+}
+
+void
+runtime·testSchedLocalQueue(void)
+{
+	P *p;
+	G *gs;
+	int32 i, j;
+
+	p = (P*)runtime·mallocgc(sizeof(*p), nil, FlagNoScan);
+	gs = (G*)runtime·mallocgc(nelem(p->runq)*sizeof(*gs), nil, FlagNoScan);
+
+	for(i = 0; i < nelem(p->runq); i++) {
+		if(runqget(p) != nil)
+			runtime·throw("runq is not empty initially");
+		for(j = 0; j < i; j++)
+			runqput(p, &gs[i]);
+		for(j = 0; j < i; j++) {
+			if(runqget(p) != &gs[i]) {
+				runtime·printf("bad element at iter %d/%d\n", i, j);
+				runtime·throw("bad element");
+			}
+		}
+		if(runqget(p) != nil)
+			runtime·throw("runq is not empty afterwards");
+	}
+}
+
+void
+runtime·testSchedLocalQueueSteal(void)
+{
+	P *p1, *p2;
+	G *gs, *gp;
+	int32 i, j, s;
+
+	p1 = (P*)runtime·mallocgc(sizeof(*p1), nil, FlagNoScan);
+	p2 = (P*)runtime·mallocgc(sizeof(*p2), nil, FlagNoScan);
+	gs = (G*)runtime·mallocgc(nelem(p1->runq)*sizeof(*gs), nil, FlagNoScan);
+
+	for(i = 0; i < nelem(p1->runq); i++) {
+		for(j = 0; j < i; j++) {
+			gs[j].sig = 0;
+			runqput(p1, &gs[j]);
+		}
+		gp = runqsteal(p2, p1);
+		s = 0;
+		if(gp) {
+			s++;
+			gp->sig++;
+		}
+		while(gp = runqget(p2)) {
+			s++;
+			gp->sig++;
+		}
+		while(gp = runqget(p1))
+			gp->sig++;
+		for(j = 0; j < i; j++) {
+			if(gs[j].sig != 1) {
+				runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
+				runtime·throw("bad element");
+			}
+		}
+		if(s != i/2 && s != i/2+1) {
+			runtime·printf("bad steal %d, want %d or %d, iter %d\n",
+				s, i/2, i/2+1, i);
+			runtime·throw("bad steal");
+		}
+	}
+}
+
+void
+runtime·setmaxthreads_m(void)
+{
+	int32 in;
+	int32 out;
+
+	in = g->m->scalararg[0];
+
+	runtime·lock(&runtime·sched.lock);
+	out = runtime·sched.maxmcount;
+	runtime·sched.maxmcount = in;
+	checkmcount();
+	runtime·unlock(&runtime·sched.lock);
+
+	g->m->scalararg[0] = out;
+}
+
+static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
+
+static bool
+haveexperiment(int8 *name)
+{
+	int32 i, j;
+	
+	for(i=0; i<sizeof(experiment); i++) {
+		if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
+			for(j=0; name[j]; j++)
+				if(experiment[i+j] != name[j])
+					goto nomatch;
+			if(experiment[i+j] != '\0' && experiment[i+j] != ',')
+				goto nomatch;
+			return 1;
+		}
+	nomatch:;
+	}
+	return 0;
+}
+
+#pragma textflag NOSPLIT
+void
+sync·runtime_procPin(intptr p)
+{
+	M *mp;
+
+	mp = g->m;
+	// Disable preemption.
+	mp->locks++;
+	p = mp->p->id;
+	FLUSH(&p);
+}
+
+#pragma textflag NOSPLIT
+void
+sync·runtime_procUnpin()
+{
+	g->m->locks--;
+}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
new file mode 100644
index 000000000..48b8cbe39
--- /dev/null
+++ b/src/runtime/proc.go
@@ -0,0 +1,123 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var parkunlock_c byte
+
+// start forcegc helper goroutine
+func init() {
+	go forcegchelper()
+}
+
+func forcegchelper() {
+	forcegc.g = getg()
+	forcegc.g.issystem = true
+	for {
+		lock(&forcegc.lock)
+		if forcegc.idle != 0 {
+			gothrow("forcegc: phase error")
+		}
+		atomicstore(&forcegc.idle, 1)
+		goparkunlock(&forcegc.lock, "force gc (idle)")
+		// this goroutine is explicitly resumed by sysmon
+		if debug.gctrace > 0 {
+			println("GC forced")
+		}
+		gogc(1)
+	}
+}
+
+// Gosched yields the processor, allowing other goroutines to run.  It does not
+// suspend the current goroutine, so execution resumes automatically.
+func Gosched() {
+	mcall(gosched_m)
+}
+
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
+func gopark(unlockf unsafe.Pointer, lock unsafe.Pointer, reason string) {
+	mp := acquirem()
+	gp := mp.curg
+	status := readgstatus(gp)
+	if status != _Grunning && status != _Gscanrunning {
+		gothrow("gopark: bad g status")
+	}
+	mp.waitlock = lock
+	mp.waitunlockf = unlockf
+	gp.waitreason = reason
+	releasem(mp)
+	// can't do anything that might move the G between Ms here.
+	mcall(park_m)
+}
+
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling goready(gp).
+func goparkunlock(lock *mutex, reason string) {
+	gopark(unsafe.Pointer(&parkunlock_c), unsafe.Pointer(lock), reason)
+}
+
+func goready(gp *g) {
+	mp := acquirem()
+	mp.ptrarg[0] = unsafe.Pointer(gp)
+	onM(ready_m)
+	releasem(mp)
+}
+
+//go:nosplit
+func acquireSudog() *sudog {
+	c := gomcache()
+	s := c.sudogcache
+	if s != nil {
+		c.sudogcache = s.next
+		return s
+	}
+
+	// Delicate dance: the semaphore implementation calls
+	// acquireSudog, acquireSudog calls new(sudog),
+	// new calls malloc, malloc can call the garbage collector,
+	// and the garbage collector calls the semaphore implementation
+	// in stoptheworld.
+	// Break the cycle by doing acquirem/releasem around new(sudog).
+	// The acquirem/releasem increments m.locks during new(sudog),
+	// which keeps the garbage collector from being invoked.
+	mp := acquirem()
+	p := new(sudog)
+	releasem(mp)
+	return p
+}
+
+//go:nosplit
+func releaseSudog(s *sudog) {
+	c := gomcache()
+	s.next = c.sudogcache
+	c.sudogcache = s
+}
+
+// funcPC returns the entry PC of the function f.
+// It assumes that f is a func value. Otherwise the behavior is undefined.
+//go:nosplit
+func funcPC(f interface{}) uintptr {
+	return **(**uintptr)(add(unsafe.Pointer(&f), ptrSize))
+}
+
+// called from assembly
+func badmcall(fn func(*g)) {
+	gothrow("runtime: mcall called on m->g0 stack")
+}
+
+func badmcall2(fn func(*g)) {
+	gothrow("runtime: mcall function returned")
+}
+
+func badreflectcall() {
+	panic("runtime: arg size to reflect.call more than 1GB")
+}
+
+func lockedOSThread() bool {
+	gp := getg()
+	return gp.lockedm != nil && gp.m.lockedg != nil
+}
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
new file mode 100644
index 000000000..aa9bc81ac
--- /dev/null
+++ b/src/runtime/proc_test.go
@@ -0,0 +1,480 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math"
+	"runtime"
+	"sync/atomic"
+	"syscall"
+	"testing"
+	"time"
+)
+
+var stop = make(chan bool, 1)
+
+func perpetuumMobile() {
+	select {
+	case <-stop:
+	default:
+		go perpetuumMobile()
+	}
+}
+
+func TestStopTheWorldDeadlock(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping during short test")
+	}
+	maxprocs := runtime.GOMAXPROCS(3)
+	compl := make(chan bool, 2)
+	go func() {
+		for i := 0; i != 1000; i += 1 {
+			runtime.GC()
+		}
+		compl <- true
+	}()
+	go func() {
+		for i := 0; i != 1000; i += 1 {
+			runtime.GOMAXPROCS(3)
+		}
+		compl <- true
+	}()
+	go perpetuumMobile()
+	<-compl
+	<-compl
+	stop <- true
+	runtime.GOMAXPROCS(maxprocs)
+}
+
+func TestYieldProgress(t *testing.T) {
+	testYieldProgress(t, false)
+}
+
+func TestYieldLockedProgress(t *testing.T) {
+	testYieldProgress(t, true)
+}
+
+func testYieldProgress(t *testing.T, locked bool) {
+	c := make(chan bool)
+	cack := make(chan bool)
+	go func() {
+		if locked {
+			runtime.LockOSThread()
+		}
+		for {
+			select {
+			case <-c:
+				cack <- true
+				return
+			default:
+				runtime.Gosched()
+			}
+		}
+	}()
+	time.Sleep(10 * time.Millisecond)
+	c <- true
+	<-cack
+}
+
+func TestYieldLocked(t *testing.T) {
+	const N = 10
+	c := make(chan bool)
+	go func() {
+		runtime.LockOSThread()
+		for i := 0; i < N; i++ {
+			runtime.Gosched()
+			time.Sleep(time.Millisecond)
+		}
+		c <- true
+		// runtime.UnlockOSThread() is deliberately omitted
+	}()
+	<-c
+}
+
+func TestGoroutineParallelism(t *testing.T) {
+	P := 4
+	N := 10
+	if testing.Short() {
+		P = 3
+		N = 3
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
+	// If runtime triggers a forced GC during this test then it will deadlock,
+	// since the goroutines can't be stopped/preempted.
+	// So give this test as much time as possible.
+	runtime.GC()
+	for try := 0; try < N; try++ {
+		done := make(chan bool)
+		x := uint32(0)
+		for p := 0; p < P; p++ {
+			// Test that all P goroutines are scheduled at the same time
+			go func(p int) {
+				for i := 0; i < 3; i++ {
+					expected := uint32(P*i + p)
+					for atomic.LoadUint32(&x) != expected {
+					}
+					atomic.StoreUint32(&x, expected+1)
+				}
+				done <- true
+			}(p)
+		}
+		for p := 0; p < P; p++ {
+			<-done
+		}
+	}
+}
+
+func TestBlockLocked(t *testing.T) {
+	const N = 10
+	c := make(chan bool)
+	go func() {
+		runtime.LockOSThread()
+		for i := 0; i < N; i++ {
+			c <- true
+		}
+		runtime.UnlockOSThread()
+	}()
+	for i := 0; i < N; i++ {
+		<-c
+	}
+}
+
+func TestTimerFairness(t *testing.T) {
+	done := make(chan bool)
+	c := make(chan bool)
+	for i := 0; i < 2; i++ {
+		go func() {
+			for {
+				select {
+				case c <- true:
+				case <-done:
+					return
+				}
+			}
+		}()
+	}
+
+	timer := time.After(20 * time.Millisecond)
+	for {
+		select {
+		case <-c:
+		case <-timer:
+			close(done)
+			return
+		}
+	}
+}
+
+func TestTimerFairness2(t *testing.T) {
+	done := make(chan bool)
+	c := make(chan bool)
+	for i := 0; i < 2; i++ {
+		go func() {
+			timer := time.After(20 * time.Millisecond)
+			var buf [1]byte
+			for {
+				syscall.Read(0, buf[0:0])
+				select {
+				case c <- true:
+				case <-c:
+				case <-timer:
+					done <- true
+					return
+				}
+			}
+		}()
+	}
+	<-done
+	<-done
+}
+
+// The function is used to test preemption at split stack checks.
+// Declaring a var avoids inlining at the call site.
+var preempt = func() int {
+	var a [128]int
+	sum := 0
+	for _, v := range a {
+		sum += v
+	}
+	return sum
+}
+
+func TestPreemption(t *testing.T) {
+	// Test that goroutines are preempted at function calls.
+	N := 5
+	if testing.Short() {
+		N = 2
+	}
+	c := make(chan bool)
+	var x uint32
+	for g := 0; g < 2; g++ {
+		go func(g int) {
+			for i := 0; i < N; i++ {
+				for atomic.LoadUint32(&x) != uint32(g) {
+					preempt()
+				}
+				atomic.StoreUint32(&x, uint32(1-g))
+			}
+			c <- true
+		}(g)
+	}
+	<-c
+	<-c
+}
+
+func TestPreemptionGC(t *testing.T) {
+	// Test that pending GC preempts running goroutines.
+	P := 5
+	N := 10
+	if testing.Short() {
+		P = 3
+		N = 2
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P + 1))
+	var stop uint32
+	for i := 0; i < P; i++ {
+		go func() {
+			for atomic.LoadUint32(&stop) == 0 {
+				preempt()
+			}
+		}()
+	}
+	for i := 0; i < N; i++ {
+		runtime.Gosched()
+		runtime.GC()
+	}
+	atomic.StoreUint32(&stop, 1)
+}
+
+func TestGCFairness(t *testing.T) {
+	output := executeTest(t, testGCFairnessSource, nil)
+	want := "OK\n"
+	if output != want {
+		t.Fatalf("want %s, got %s\n", want, output)
+	}
+}
+
+const testGCFairnessSource = `
+package main
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"time"
+)
+
+func main() {
+	runtime.GOMAXPROCS(1)
+	f, err := os.Open("/dev/null")
+	if os.IsNotExist(err) {
+		// This test tests what it is intended to test only if writes are fast.
+		// If there is no /dev/null, we just don't execute the test.
+		fmt.Println("OK")
+		return
+	}
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+	for i := 0; i < 2; i++ {
+		go func() {
+			for {
+				f.Write([]byte("."))
+			}
+		}()
+	}
+	time.Sleep(10 * time.Millisecond)
+	fmt.Println("OK")
+}
+`
+
+func stackGrowthRecursive(i int) {
+	var pad [128]uint64
+	if i != 0 && pad[0] == 0 {
+		stackGrowthRecursive(i - 1)
+	}
+}
+
+func TestPreemptSplitBig(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping in -short mode")
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
+	stop := make(chan int)
+	go big(stop)
+	for i := 0; i < 3; i++ {
+		time.Sleep(10 * time.Microsecond) // let big start running
+		runtime.GC()
+	}
+	close(stop)
+}
+
+func big(stop chan int) int {
+	n := 0
+	for {
+		// delay so that gc is sure to have asked for a preemption
+		for i := 0; i < 1e9; i++ {
+			n++
+		}
+
+		// call bigframe, which used to miss the preemption in its prologue.
+		bigframe(stop)
+
+		// check if we've been asked to stop.
+		select {
+		case <-stop:
+			return n
+		}
+	}
+}
+
+func bigframe(stop chan int) int {
+	// not splitting the stack will overflow.
+	// small will notice that it needs a stack split and will
+	// catch the overflow.
+	var x [8192]byte
+	return small(stop, &x)
+}
+
+func small(stop chan int, x *[8192]byte) int {
+	for i := range x {
+		x[i] = byte(i)
+	}
+	sum := 0
+	for i := range x {
+		sum += int(x[i])
+	}
+
+	// keep small from being a leaf function, which might
+	// make it not do any stack check at all.
+	nonleaf(stop)
+
+	return sum
+}
+
+func nonleaf(stop chan int) bool {
+	// do something that won't be inlined:
+	select {
+	case <-stop:
+		return true
+	default:
+		return false
+	}
+}
+
+func TestSchedLocalQueue(t *testing.T) {
+	runtime.RunSchedLocalQueueTest()
+}
+
+func TestSchedLocalQueueSteal(t *testing.T) {
+	runtime.RunSchedLocalQueueStealTest()
+}
+
+func benchmarkStackGrowth(b *testing.B, rec int) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			stackGrowthRecursive(rec)
+		}
+	})
+}
+
+func BenchmarkStackGrowth(b *testing.B) {
+	benchmarkStackGrowth(b, 10)
+}
+
+func BenchmarkStackGrowthDeep(b *testing.B) {
+	benchmarkStackGrowth(b, 1024)
+}
+
+func BenchmarkCreateGoroutines(b *testing.B) {
+	benchmarkCreateGoroutines(b, 1)
+}
+
+func BenchmarkCreateGoroutinesParallel(b *testing.B) {
+	benchmarkCreateGoroutines(b, runtime.GOMAXPROCS(-1))
+}
+
+func benchmarkCreateGoroutines(b *testing.B, procs int) {
+	c := make(chan bool)
+	var f func(n int)
+	f = func(n int) {
+		if n == 0 {
+			c <- true
+			return
+		}
+		go f(n - 1)
+	}
+	for i := 0; i < procs; i++ {
+		go f(b.N / procs)
+	}
+	for i := 0; i < procs; i++ {
+		<-c
+	}
+}
+
+type Matrix [][]float64
+
+func BenchmarkMatmult(b *testing.B) {
+	b.StopTimer()
+	// matmult is O(N**3) but testing expects O(b.N),
+	// so we need to take cube root of b.N
+	n := int(math.Cbrt(float64(b.N))) + 1
+	A := makeMatrix(n)
+	B := makeMatrix(n)
+	C := makeMatrix(n)
+	b.StartTimer()
+	matmult(nil, A, B, C, 0, n, 0, n, 0, n, 8)
+}
+
+func makeMatrix(n int) Matrix {
+	m := make(Matrix, n)
+	for i := 0; i < n; i++ {
+		m[i] = make([]float64, n)
+		for j := 0; j < n; j++ {
+			m[i][j] = float64(i*n + j)
+		}
+	}
+	return m
+}
+
+func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, threshold int) {
+	di := i1 - i0
+	dj := j1 - j0
+	dk := k1 - k0
+	if di >= dj && di >= dk && di >= threshold {
+		// divide in two by y axis
+		mi := i0 + di/2
+		done1 := make(chan struct{}, 1)
+		go matmult(done1, A, B, C, i0, mi, j0, j1, k0, k1, threshold)
+		matmult(nil, A, B, C, mi, i1, j0, j1, k0, k1, threshold)
+		<-done1
+	} else if dj >= dk && dj >= threshold {
+		// divide in two by x axis
+		mj := j0 + dj/2
+		done1 := make(chan struct{}, 1)
+		go matmult(done1, A, B, C, i0, i1, j0, mj, k0, k1, threshold)
+		matmult(nil, A, B, C, i0, i1, mj, j1, k0, k1, threshold)
+		<-done1
+	} else if dk >= threshold {
+		// divide in two by "k" axis
+		// deliberately not parallel because of data races
+		mk := k0 + dk/2
+		matmult(nil, A, B, C, i0, i1, j0, j1, k0, mk, threshold)
+		matmult(nil, A, B, C, i0, i1, j0, j1, mk, k1, threshold)
+	} else {
+		// the matrices are small enough, compute directly
+		for i := i0; i < i1; i++ {
+			for j := j0; j < j1; j++ {
+				for k := k0; k < k1; k++ {
+					C[i][j] += A[i][k] * B[k][j]
+				}
+			}
+		}
+	}
+	if done != nil {
+		done <- struct{}{}
+	}
+}
diff --git a/src/runtime/race.c b/src/runtime/race.c
new file mode 100644
index 000000000..9ac73fbcc
--- /dev/null
+++ b/src/runtime/race.c
@@ -0,0 +1,314 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of the race detector API.
+// +build race
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "race.h"
+#include "type.h"
+#include "typekind.h"
+#include "textflag.h"
+
+// Race runtime functions called via runtime·racecall.
+void __tsan_init(void);
+void __tsan_fini(void);
+void __tsan_map_shadow(void);
+void __tsan_finalizer_goroutine(void);
+void __tsan_go_start(void);
+void __tsan_go_end(void);
+void __tsan_malloc(void);
+void __tsan_acquire(void);
+void __tsan_release(void);
+void __tsan_release_merge(void);
+void __tsan_go_ignore_sync_begin(void);
+void __tsan_go_ignore_sync_end(void);
+
+// Mimic what cmd/cgo would do.
+#pragma cgo_import_static __tsan_init
+#pragma cgo_import_static __tsan_fini
+#pragma cgo_import_static __tsan_map_shadow
+#pragma cgo_import_static __tsan_finalizer_goroutine
+#pragma cgo_import_static __tsan_go_start
+#pragma cgo_import_static __tsan_go_end
+#pragma cgo_import_static __tsan_malloc
+#pragma cgo_import_static __tsan_acquire
+#pragma cgo_import_static __tsan_release
+#pragma cgo_import_static __tsan_release_merge
+#pragma cgo_import_static __tsan_go_ignore_sync_begin
+#pragma cgo_import_static __tsan_go_ignore_sync_end
+
+// These are called from race_amd64.s.
+#pragma cgo_import_static __tsan_read
+#pragma cgo_import_static __tsan_read_pc
+#pragma cgo_import_static __tsan_read_range
+#pragma cgo_import_static __tsan_write
+#pragma cgo_import_static __tsan_write_pc
+#pragma cgo_import_static __tsan_write_range
+#pragma cgo_import_static __tsan_func_enter
+#pragma cgo_import_static __tsan_func_exit
+
+#pragma cgo_import_static __tsan_go_atomic32_load
+#pragma cgo_import_static __tsan_go_atomic64_load
+#pragma cgo_import_static __tsan_go_atomic32_store
+#pragma cgo_import_static __tsan_go_atomic64_store
+#pragma cgo_import_static __tsan_go_atomic32_exchange
+#pragma cgo_import_static __tsan_go_atomic64_exchange
+#pragma cgo_import_static __tsan_go_atomic32_fetch_add
+#pragma cgo_import_static __tsan_go_atomic64_fetch_add
+#pragma cgo_import_static __tsan_go_atomic32_compare_exchange
+#pragma cgo_import_static __tsan_go_atomic64_compare_exchange
+
+extern byte runtime·noptrdata[];
+extern byte runtime·enoptrbss[];
+  
+// start/end of heap for race_amd64.s
+uintptr runtime·racearenastart;
+uintptr runtime·racearenaend;
+
+void runtime·racefuncenter(void *callpc);
+void runtime·racefuncexit(void);
+void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc);
+void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc);
+void runtime·racesymbolizethunk(void*);
+
+// racecall allows calling an arbitrary function f from C race runtime
+// with up to 4 uintptr arguments.
+void runtime·racecall(void(*f)(void), ...);
+
+// checks if the address has shadow (i.e. heap or data/bss)
+#pragma textflag NOSPLIT
+static bool
+isvalidaddr(uintptr addr)
+{
+	if(addr >= runtime·racearenastart && addr < runtime·racearenaend)
+		return true;
+	if(addr >= (uintptr)runtime·noptrdata && addr < (uintptr)runtime·enoptrbss)
+		return true;
+	return false;
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·raceinit(void)
+{
+	uintptr racectx, start, size;
+
+	// cgo is required to initialize libc, which is used by race runtime
+	if(!runtime·iscgo)
+		runtime·throw("raceinit: race build must use cgo");
+	runtime·racecall(__tsan_init, &racectx, runtime·racesymbolizethunk);
+	// Round data segment to page boundaries, because it's used in mmap().
+	start = (uintptr)runtime·noptrdata & ~(PageSize-1);
+	size = ROUND((uintptr)runtime·enoptrbss - start, PageSize);
+	runtime·racecall(__tsan_map_shadow, start, size);
+	return racectx;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racefini(void)
+{
+	runtime·racecall(__tsan_fini);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racemapshadow(void *addr, uintptr size)
+{
+	if(runtime·racearenastart == 0)
+		runtime·racearenastart = (uintptr)addr;
+	if(runtime·racearenaend < (uintptr)addr+size)
+		runtime·racearenaend = (uintptr)addr+size;
+	runtime·racecall(__tsan_map_shadow, addr, size);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racemalloc(void *p, uintptr sz)
+{
+	runtime·racecall(__tsan_malloc, p, sz);
+}
+
+#pragma textflag NOSPLIT
+uintptr
+runtime·racegostart(void *pc)
+{
+	uintptr racectx;
+	G *spawng;
+
+	if(g->m->curg != nil)
+		spawng = g->m->curg;
+	else
+		spawng = g;
+
+	runtime·racecall(__tsan_go_start, spawng->racectx, &racectx, pc);
+	return racectx;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racegoend(void)
+{
+	runtime·racecall(__tsan_go_end, g->racectx);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc)
+{
+	if(g != g->m->curg) {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return;
+	}
+	if(callpc != nil)
+		runtime·racefuncenter(callpc);
+	runtime·racewriterangepc1(addr, sz, pc);
+	if(callpc != nil)
+		runtime·racefuncexit();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc)
+{
+	if(g != g->m->curg) {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return;
+	}
+	if(callpc != nil)
+		runtime·racefuncenter(callpc);
+	runtime·racereadrangepc1(addr, sz, pc);
+	if(callpc != nil)
+		runtime·racefuncexit();
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racewriteobjectpc(void *addr, Type *t, void *callpc, void *pc)
+{
+	uint8 kind;
+
+	kind = t->kind & KindMask;
+	if(kind == KindArray || kind == KindStruct)
+		runtime·racewriterangepc(addr, t->size, callpc, pc);
+	else
+		runtime·racewritepc(addr, callpc, pc);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racereadobjectpc(void *addr, Type *t, void *callpc, void *pc)
+{
+	uint8 kind;
+
+	kind = t->kind & KindMask;
+	if(kind == KindArray || kind == KindStruct)
+		runtime·racereadrangepc(addr, t->size, callpc, pc);
+	else
+		runtime·racereadpc(addr, callpc, pc);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·raceacquire(void *addr)
+{
+	runtime·raceacquireg(g, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·raceacquireg(G *gp, void *addr)
+{
+	if(g->raceignore || !isvalidaddr((uintptr)addr))
+		return;
+	runtime·racecall(__tsan_acquire, gp->racectx, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racerelease(void *addr)
+{
+	if(g->raceignore || !isvalidaddr((uintptr)addr))
+		return;
+	runtime·racereleaseg(g, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racereleaseg(G *gp, void *addr)
+{
+	if(g->raceignore || !isvalidaddr((uintptr)addr))
+		return;
+	runtime·racecall(__tsan_release, gp->racectx, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racereleasemerge(void *addr)
+{
+	runtime·racereleasemergeg(g, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racereleasemergeg(G *gp, void *addr)
+{
+	if(g->raceignore || !isvalidaddr((uintptr)addr))
+		return;
+	runtime·racecall(__tsan_release_merge, gp->racectx, addr);
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·racefingo(void)
+{
+	runtime·racecall(__tsan_finalizer_goroutine, g->racectx);
+}
+
+// func RaceAcquire(addr unsafe.Pointer)
+#pragma textflag NOSPLIT
+void
+runtime·RaceAcquire(void *addr)
+{
+	runtime·raceacquire(addr);
+}
+
+// func RaceRelease(addr unsafe.Pointer)
+#pragma textflag NOSPLIT
+void
+runtime·RaceRelease(void *addr)
+{
+	runtime·racerelease(addr);
+}
+
+// func RaceReleaseMerge(addr unsafe.Pointer)
+#pragma textflag NOSPLIT
+void
+runtime·RaceReleaseMerge(void *addr)
+{
+	runtime·racereleasemerge(addr);
+}
+
+// func RaceDisable()
+#pragma textflag NOSPLIT
+void
+runtime·RaceDisable(void)
+{
+	if(g->raceignore++ == 0)
+		runtime·racecall(__tsan_go_ignore_sync_begin, g->racectx);
+}
+
+// func RaceEnable()
+#pragma textflag NOSPLIT
+void
+runtime·RaceEnable(void)
+{
+	if(--g->raceignore == 0)
+		runtime·racecall(__tsan_go_ignore_sync_end, g->racectx);
+}
diff --git a/src/runtime/race.go b/src/runtime/race.go
new file mode 100644
index 000000000..c7573517d
--- /dev/null
+++ b/src/runtime/race.go
@@ -0,0 +1,125 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+// Public race detection API, present iff build with -race.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+// RaceDisable disables handling of race events in the current goroutine.
+func RaceDisable()
+
+// RaceEnable re-enables handling of race events in the current goroutine.
+func RaceEnable()
+
+func RaceAcquire(addr unsafe.Pointer)
+func RaceRelease(addr unsafe.Pointer)
+func RaceReleaseMerge(addr unsafe.Pointer)
+
+func RaceRead(addr unsafe.Pointer)
+func RaceWrite(addr unsafe.Pointer)
+func RaceReadRange(addr unsafe.Pointer, len int)
+func RaceWriteRange(addr unsafe.Pointer, len int)
+
+func RaceSemacquire(s *uint32)
+func RaceSemrelease(s *uint32)
+
+// private interface for the runtime
+const raceenabled = true
+
+func raceReadObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) {
+	kind := t.kind & kindMask
+	if kind == kindArray || kind == kindStruct {
+		// for composite objects we have to read every address
+		// because a write might happen to any subobject.
+		racereadrangepc(addr, t.size, callerpc, pc)
+	} else {
+		// for non-composite objects we can read just the start
+		// address, as any write must write the first byte.
+		racereadpc(addr, callerpc, pc)
+	}
+}
+
+func raceWriteObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) {
+	kind := t.kind & kindMask
+	if kind == kindArray || kind == kindStruct {
+		// for composite objects we have to write every address
+		// because a write might happen to any subobject.
+		racewriterangepc(addr, t.size, callerpc, pc)
+	} else {
+		// for non-composite objects we can write just the start
+		// address, as any write must write the first byte.
+		racewritepc(addr, callerpc, pc)
+	}
+}
+
+//go:noescape
+func racereadpc(addr unsafe.Pointer, callpc, pc uintptr)
+
+//go:noescape
+func racewritepc(addr unsafe.Pointer, callpc, pc uintptr)
+
+//go:noescape
+func racereadrangepc(addr unsafe.Pointer, len uintptr, callpc, pc uintptr)
+
+//go:noescape
+func racewriterangepc(addr unsafe.Pointer, len uintptr, callpc, pc uintptr)
+
+//go:noescape
+func raceacquire(addr unsafe.Pointer)
+
+//go:noescape
+func racerelease(addr unsafe.Pointer)
+
+//go:noescape
+func raceacquireg(gp *g, addr unsafe.Pointer)
+
+//go:noescape
+func racereleaseg(gp *g, addr unsafe.Pointer)
+
+func racefingo()
+
+//go:noescape
+func racemalloc(p unsafe.Pointer, size uintptr)
+
+//go:noescape
+func racereleasemerge(addr unsafe.Pointer)
+
+type symbolizeContext struct {
+	pc   uintptr
+	fn   *byte
+	file *byte
+	line uintptr
+	off  uintptr
+	res  uintptr
+}
+
+var qq = [...]byte{'?', '?', 0}
+var dash = [...]byte{'-', 0}
+
+// Callback from C into Go, runs on g0.
+func racesymbolize(ctx *symbolizeContext) {
+	f := findfunc(ctx.pc)
+	if f == nil {
+		ctx.fn = &qq[0]
+		ctx.file = &dash[0]
+		ctx.line = 0
+		ctx.off = ctx.pc
+		ctx.res = 1
+		return
+	}
+
+	ctx.fn = funcname(f)
+	var file string
+	ctx.line = uintptr(funcline(f, ctx.pc, &file))
+	ctx.file = &bytes(file)[0] // assume NUL-terminated
+	ctx.off = ctx.pc - f.entry
+	ctx.res = 1
+	return
+}
diff --git a/src/runtime/race.h b/src/runtime/race.h
new file mode 100644
index 000000000..fee31e09f
--- /dev/null
+++ b/src/runtime/race.h
@@ -0,0 +1,34 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Definitions related to data race detection.
+
+#ifdef RACE
+enum { raceenabled = 1 };
+#else
+enum { raceenabled = 0 };
+#endif
+
+// Initialize race detection subsystem.
+uintptr	runtime·raceinit(void);
+// Finalize race detection subsystem, does not return.
+void	runtime·racefini(void);
+
+void	runtime·racemapshadow(void *addr, uintptr size);
+void	runtime·racemalloc(void *p, uintptr sz);
+uintptr	runtime·racegostart(void *pc);
+void	runtime·racegoend(void);
+void	runtime·racewritepc(void *addr, void *callpc, void *pc);
+void	runtime·racereadpc(void *addr, void *callpc, void *pc);
+void	runtime·racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc);
+void	runtime·racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc);
+void	runtime·racereadobjectpc(void *addr, Type *t, void *callpc, void *pc);
+void	runtime·racewriteobjectpc(void *addr, Type *t, void *callpc, void *pc);
+void	runtime·racefingo(void);
+void	runtime·raceacquire(void *addr);
+void	runtime·raceacquireg(G *gp, void *addr);
+void	runtime·racerelease(void *addr);
+void	runtime·racereleaseg(G *gp, void *addr);
+void	runtime·racereleasemerge(void *addr);
+void	runtime·racereleasemergeg(G *gp, void *addr);
diff --git a/src/runtime/race/README b/src/runtime/race/README
new file mode 100644
index 000000000..7f185359f
--- /dev/null
+++ b/src/runtime/race/README
@@ -0,0 +1,12 @@
+runtime/race package contains the data race detector runtime library.
+It is based on ThreadSanitizer race detector, that is currently a part of
+the LLVM project.
+
+To update the .syso files you need to:
+$ svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk
+$ cd compiler-rt/lib/tsan/go
+$ ./buildgo.sh
+
+Tested with gcc 4.6.1 and 4.7.0.  On Windows it's built with 64-bit MinGW.
+
+Current runtime is built on rev 215000.
diff --git a/src/runtime/race/doc.go b/src/runtime/race/doc.go
new file mode 100644
index 000000000..aef805dad
--- /dev/null
+++ b/src/runtime/race/doc.go
@@ -0,0 +1,9 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package race implements data race detection logic.
+// No public interface is provided.
+// For details about the race detector see
+// http://golang.org/doc/articles/race_detector.html
+package race
diff --git a/src/runtime/race/output_test.go b/src/runtime/race/output_test.go
new file mode 100644
index 000000000..d2303f7af
--- /dev/null
+++ b/src/runtime/race/output_test.go
@@ -0,0 +1,156 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+package race_test
+
+import (
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+func TestOutput(t *testing.T) {
+	for _, test := range tests {
+		dir, err := ioutil.TempDir("", "go-build")
+		if err != nil {
+			t.Fatalf("failed to create temp directory: %v", err)
+		}
+		defer os.RemoveAll(dir)
+		src := filepath.Join(dir, "main.go")
+		f, err := os.Create(src)
+		if err != nil {
+			t.Fatalf("failed to create file: %v", err)
+		}
+		_, err = f.WriteString(test.source)
+		if err != nil {
+			f.Close()
+			t.Fatalf("failed to write: %v", err)
+		}
+		if err := f.Close(); err != nil {
+			t.Fatalf("failed to close file: %v", err)
+		}
+		// Pass -l to the compiler to test stack traces.
+		cmd := exec.Command("go", "run", "-race", "-gcflags=-l", src)
+		// GODEBUG spoils program output, GOMAXPROCS makes it flaky.
+		for _, env := range os.Environ() {
+			if strings.HasPrefix(env, "GODEBUG=") ||
+				strings.HasPrefix(env, "GOMAXPROCS=") ||
+				strings.HasPrefix(env, "GORACE=") {
+				continue
+			}
+			cmd.Env = append(cmd.Env, env)
+		}
+		cmd.Env = append(cmd.Env, "GORACE="+test.gorace)
+		got, _ := cmd.CombinedOutput()
+		if !regexp.MustCompile(test.re).MatchString(string(got)) {
+			t.Fatalf("failed test case %v, expect:\n%v\ngot:\n%s",
+				test.name, test.re, got)
+		}
+	}
+}
+
+var tests = []struct {
+	name   string
+	gorace string
+	source string
+	re     string
+}{
+	{"simple", "atexit_sleep_ms=0", `
+package main
+import "time"
+func main() {
+	done := make(chan bool)
+	x := 0
+	startRacer(&x, done)
+	store(&x, 43)
+	<-done
+}
+func store(x *int, v int) {
+	*x = v
+}
+func startRacer(x *int, done chan bool) {
+	go racer(x, done)
+}
+func racer(x *int, done chan bool) {
+	time.Sleep(10*time.Millisecond)
+	store(x, 42)
+	done <- true
+}
+`, `==================
+WARNING: DATA RACE
+Write by goroutine [0-9]:
+  main\.store\(\)
+      .+/main\.go:12 \+0x[0-9,a-f]+
+  main\.racer\(\)
+      .+/main\.go:19 \+0x[0-9,a-f]+
+
+Previous write by main goroutine:
+  main\.store\(\)
+      .+/main\.go:12 \+0x[0-9,a-f]+
+  main\.main\(\)
+      .+/main\.go:8 \+0x[0-9,a-f]+
+
+Goroutine [0-9] \(running\) created at:
+  main\.startRacer\(\)
+      .+/main\.go:15 \+0x[0-9,a-f]+
+  main\.main\(\)
+      .+/main\.go:7 \+0x[0-9,a-f]+
+==================
+Found 1 data race\(s\)
+exit status 66
+`},
+
+	{"exitcode", "atexit_sleep_ms=0 exitcode=13", `
+package main
+func main() {
+	done := make(chan bool)
+	x := 0
+	go func() {
+		x = 42
+		done <- true
+	}()
+	x = 43
+	<-done
+}
+`, `exit status 13`},
+
+	{"strip_path_prefix", "atexit_sleep_ms=0 strip_path_prefix=/main.", `
+package main
+func main() {
+	done := make(chan bool)
+	x := 0
+	go func() {
+		x = 42
+		done <- true
+	}()
+	x = 43
+	<-done
+}
+`, `
+      go:7 \+0x[0-9,a-f]+
+`},
+
+	{"halt_on_error", "atexit_sleep_ms=0 halt_on_error=1", `
+package main
+func main() {
+	done := make(chan bool)
+	x := 0
+	go func() {
+		x = 42
+		done <- true
+	}()
+	x = 43
+	<-done
+}
+`, `
+==================
+exit status 66
+`},
+}
diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go
new file mode 100644
index 000000000..31deedd73
--- /dev/null
+++ b/src/runtime/race/race.go
@@ -0,0 +1,15 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race,linux,amd64 race,freebsd,amd64 race,darwin,amd64 race,windows,amd64
+
+package race
+
+// This file merely ensures that we link in runtime/cgo in race build,
+// this is turn ensures that runtime uses pthread_create to create threads.
+// The prebuilt race runtime lives in race_GOOS_GOARCH.syso.
+// Calls to the runtime are done directly from src/runtime/race.c.
+
+// void __race_unused_func(void);
+import "C"
diff --git a/src/runtime/race/race_darwin_amd64.syso b/src/runtime/race/race_darwin_amd64.syso
new file mode 100644
index 000000000..81b48c6c9
--- /dev/null
+++ b/src/runtime/race/race_darwin_amd64.syso
diff --git a/src/runtime/race/race_freebsd_amd64.syso b/src/runtime/race/race_freebsd_amd64.syso
new file mode 100644
index 000000000..5bbe32229
--- /dev/null
+++ b/src/runtime/race/race_freebsd_amd64.syso
diff --git a/src/runtime/race/race_linux_amd64.syso b/src/runtime/race/race_linux_amd64.syso
new file mode 100644
index 000000000..49bf08ef3
--- /dev/null
+++ b/src/runtime/race/race_linux_amd64.syso
diff --git a/src/runtime/race/race_test.go b/src/runtime/race/race_test.go
new file mode 100644
index 000000000..7e0ee866a
--- /dev/null
+++ b/src/runtime/race/race_test.go
@@ -0,0 +1,172 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+// This program is used to verify the race detector
+// by running the tests and parsing their output.
+// It does not check stack correctness, completeness or anything else:
+// it merely verifies that if a test is expected to be racy
+// then the race is detected.
+package race_test
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+var (
+	passedTests = 0
+	totalTests  = 0
+	falsePos    = 0
+	falseNeg    = 0
+	failingPos  = 0
+	failingNeg  = 0
+	failed      = false
+)
+
+const (
+	visibleLen = 40
+	testPrefix = "=== RUN Test"
+)
+
+func TestRace(t *testing.T) {
+	testOutput, err := runTests()
+	if err != nil {
+		t.Fatalf("Failed to run tests: %v\n%v", err, string(testOutput))
+	}
+	reader := bufio.NewReader(bytes.NewReader(testOutput))
+
+	funcName := ""
+	var tsanLog []string
+	for {
+		s, err := nextLine(reader)
+		if err != nil {
+			fmt.Printf("%s\n", processLog(funcName, tsanLog))
+			break
+		}
+		if strings.HasPrefix(s, testPrefix) {
+			fmt.Printf("%s\n", processLog(funcName, tsanLog))
+			tsanLog = make([]string, 0, 100)
+			funcName = s[len(testPrefix):]
+		} else {
+			tsanLog = append(tsanLog, s)
+		}
+	}
+
+	fmt.Printf("\nPassed %d of %d tests (%.02f%%, %d+, %d-)\n",
+		passedTests, totalTests, 100*float64(passedTests)/float64(totalTests), falsePos, falseNeg)
+	fmt.Printf("%d expected failures (%d has not fail)\n", failingPos+failingNeg, failingNeg)
+	if failed {
+		t.Fail()
+	}
+}
+
+// nextLine is a wrapper around bufio.Reader.ReadString.
+// It reads a line up to the next '\n' character. Error
+// is non-nil if there are no lines left, and nil
+// otherwise.
+func nextLine(r *bufio.Reader) (string, error) {
+	s, err := r.ReadString('\n')
+	if err != nil {
+		if err != io.EOF {
+			log.Fatalf("nextLine: expected EOF, received %v", err)
+		}
+		return s, err
+	}
+	return s[:len(s)-1], nil
+}
+
+// processLog verifies whether the given ThreadSanitizer's log
+// contains a race report, checks this information against
+// the name of the testcase and returns the result of this
+// comparison.
+func processLog(testName string, tsanLog []string) string {
+	if !strings.HasPrefix(testName, "Race") && !strings.HasPrefix(testName, "NoRace") {
+		return ""
+	}
+	gotRace := false
+	for _, s := range tsanLog {
+		if strings.Contains(s, "DATA RACE") {
+			gotRace = true
+			break
+		}
+	}
+
+	failing := strings.Contains(testName, "Failing")
+	expRace := !strings.HasPrefix(testName, "No")
+	for len(testName) < visibleLen {
+		testName += " "
+	}
+	if expRace == gotRace {
+		passedTests++
+		totalTests++
+		if failing {
+			failed = true
+			failingNeg++
+		}
+		return fmt.Sprintf("%s .", testName)
+	}
+	pos := ""
+	if expRace {
+		falseNeg++
+	} else {
+		falsePos++
+		pos = "+"
+	}
+	if failing {
+		failingPos++
+	} else {
+		failed = true
+	}
+	totalTests++
+	return fmt.Sprintf("%s %s%s", testName, "FAILED", pos)
+}
+
+// runTests assures that the package and its dependencies is
+// built with instrumentation enabled and returns the output of 'go test'
+// which includes possible data race reports from ThreadSanitizer.
+func runTests() ([]byte, error) {
+	tests, err := filepath.Glob("./testdata/*_test.go")
+	if err != nil {
+		return nil, err
+	}
+	args := []string{"test", "-race", "-v"}
+	args = append(args, tests...)
+	cmd := exec.Command("go", args...)
+	// The following flags turn off heuristics that suppress seemingly identical reports.
+	// It is required because the tests contain a lot of data races on the same addresses
+	// (the tests are simple and the memory is constantly reused).
+	for _, env := range os.Environ() {
+		if strings.HasPrefix(env, "GOMAXPROCS=") || strings.HasPrefix(env, "GODEBUG=") {
+			continue
+		}
+		cmd.Env = append(cmd.Env, env)
+	}
+	cmd.Env = append(cmd.Env, `GORACE="suppress_equal_stacks=0 suppress_equal_addresses=0 exitcode=0"`)
+	return cmd.CombinedOutput()
+}
+
+func TestIssue8102(t *testing.T) {
+	// If this compiles with -race, the test passes.
+	type S struct {
+		x interface{}
+		i int
+	}
+	c := make(chan int)
+	a := [2]*int{}
+	for ; ; c <- *a[S{}.i] {
+		if t != nil {
+			break
+		}
+	}
+}
diff --git a/src/runtime/race/race_windows_amd64.syso b/src/runtime/race/race_windows_amd64.syso
new file mode 100644
index 000000000..a4eae9bdd
--- /dev/null
+++ b/src/runtime/race/race_windows_amd64.syso
diff --git a/src/runtime/race/testdata/atomic_test.go b/src/runtime/race/testdata/atomic_test.go
new file mode 100644
index 000000000..232744b3d
--- /dev/null
+++ b/src/runtime/race/testdata/atomic_test.go
@@ -0,0 +1,288 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"unsafe"
+)
+
+func TestNoRaceAtomicAddInt64(t *testing.T) {
+	var x1, x2 int8
+	var s int64
+	ch := make(chan bool, 2)
+	go func() {
+		x1 = 1
+		if atomic.AddInt64(&s, 1) == 2 {
+			x2 = 1
+		}
+		ch <- true
+	}()
+	go func() {
+		x2 = 1
+		if atomic.AddInt64(&s, 1) == 2 {
+			x1 = 1
+		}
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceAtomicAddInt64(t *testing.T) {
+	var x1, x2 int8
+	var s int64
+	ch := make(chan bool, 2)
+	go func() {
+		x1 = 1
+		if atomic.AddInt64(&s, 1) == 1 {
+			x2 = 1
+		}
+		ch <- true
+	}()
+	go func() {
+		x2 = 1
+		if atomic.AddInt64(&s, 1) == 1 {
+			x1 = 1
+		}
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceAtomicAddInt32(t *testing.T) {
+	var x1, x2 int8
+	var s int32
+	ch := make(chan bool, 2)
+	go func() {
+		x1 = 1
+		if atomic.AddInt32(&s, 1) == 2 {
+			x2 = 1
+		}
+		ch <- true
+	}()
+	go func() {
+		x2 = 1
+		if atomic.AddInt32(&s, 1) == 2 {
+			x1 = 1
+		}
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceAtomicLoadAddInt32(t *testing.T) {
+	var x int64
+	var s int32
+	go func() {
+		x = 2
+		atomic.AddInt32(&s, 1)
+	}()
+	for atomic.LoadInt32(&s) != 1 {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicLoadStoreInt32(t *testing.T) {
+	var x int64
+	var s int32
+	go func() {
+		x = 2
+		atomic.StoreInt32(&s, 1)
+	}()
+	for atomic.LoadInt32(&s) != 1 {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicStoreCASInt32(t *testing.T) {
+	var x int64
+	var s int32
+	go func() {
+		x = 2
+		atomic.StoreInt32(&s, 1)
+	}()
+	for !atomic.CompareAndSwapInt32(&s, 1, 0) {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicCASLoadInt32(t *testing.T) {
+	var x int64
+	var s int32
+	go func() {
+		x = 2
+		if !atomic.CompareAndSwapInt32(&s, 0, 1) {
+			panic("")
+		}
+	}()
+	for atomic.LoadInt32(&s) != 1 {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicCASCASInt32(t *testing.T) {
+	var x int64
+	var s int32
+	go func() {
+		x = 2
+		if !atomic.CompareAndSwapInt32(&s, 0, 1) {
+			panic("")
+		}
+	}()
+	for !atomic.CompareAndSwapInt32(&s, 1, 0) {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicCASCASInt32_2(t *testing.T) {
+	var x1, x2 int8
+	var s int32
+	ch := make(chan bool, 2)
+	go func() {
+		x1 = 1
+		if !atomic.CompareAndSwapInt32(&s, 0, 1) {
+			x2 = 1
+		}
+		ch <- true
+	}()
+	go func() {
+		x2 = 1
+		if !atomic.CompareAndSwapInt32(&s, 0, 1) {
+			x1 = 1
+		}
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceAtomicLoadInt64(t *testing.T) {
+	var x int32
+	var s int64
+	go func() {
+		x = 2
+		atomic.AddInt64(&s, 1)
+	}()
+	for atomic.LoadInt64(&s) != 1 {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicCASCASUInt64(t *testing.T) {
+	var x int64
+	var s uint64
+	go func() {
+		x = 2
+		if !atomic.CompareAndSwapUint64(&s, 0, 1) {
+			panic("")
+		}
+	}()
+	for !atomic.CompareAndSwapUint64(&s, 1, 0) {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicLoadStorePointer(t *testing.T) {
+	var x int64
+	var s unsafe.Pointer
+	var y int = 2
+	var p unsafe.Pointer = unsafe.Pointer(&y)
+	go func() {
+		x = 2
+		atomic.StorePointer(&s, p)
+	}()
+	for atomic.LoadPointer(&s) != p {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestNoRaceAtomicStoreCASUint64(t *testing.T) {
+	var x int64
+	var s uint64
+	go func() {
+		x = 2
+		atomic.StoreUint64(&s, 1)
+	}()
+	for !atomic.CompareAndSwapUint64(&s, 1, 0) {
+		runtime.Gosched()
+	}
+	x = 1
+}
+
+func TestRaceAtomicStoreLoad(t *testing.T) {
+	c := make(chan bool)
+	var a uint64
+	go func() {
+		atomic.StoreUint64(&a, 1)
+		c <- true
+	}()
+	_ = a
+	<-c
+}
+
+func TestRaceAtomicLoadStore(t *testing.T) {
+	c := make(chan bool)
+	var a uint64
+	go func() {
+		_ = atomic.LoadUint64(&a)
+		c <- true
+	}()
+	a = 1
+	<-c
+}
+
+func TestRaceAtomicAddLoad(t *testing.T) {
+	c := make(chan bool)
+	var a uint64
+	go func() {
+		atomic.AddUint64(&a, 1)
+		c <- true
+	}()
+	_ = a
+	<-c
+}
+
+func TestRaceAtomicAddStore(t *testing.T) {
+	c := make(chan bool)
+	var a uint64
+	go func() {
+		atomic.AddUint64(&a, 1)
+		c <- true
+	}()
+	a = 42
+	<-c
+}
+
+// A nil pointer in an atomic operation should not deadlock
+// the rest of the program. Used to hang indefinitely.
+func TestNoRaceAtomicCrash(t *testing.T) {
+	var mutex sync.Mutex
+	var nilptr *int32
+	panics := 0
+	defer func() {
+		if x := recover(); x != nil {
+			mutex.Lock()
+			panics++
+			mutex.Unlock()
+		} else {
+			panic("no panic")
+		}
+	}()
+	atomic.AddInt32(nilptr, 1)
+}
diff --git a/src/runtime/race/testdata/cgo_test.go b/src/runtime/race/testdata/cgo_test.go
new file mode 100644
index 000000000..ba7e7b562
--- /dev/null
+++ b/src/runtime/race/testdata/cgo_test.go
@@ -0,0 +1,20 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"os"
+	"os/exec"
+	"testing"
+)
+
+func TestNoRaceCgoSync(t *testing.T) {
+	cmd := exec.Command("go", "run", "-race", "cgo_test_main.go")
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("program exited with error: %v\n", err)
+	}
+}
diff --git a/src/runtime/race/testdata/cgo_test_main.go b/src/runtime/race/testdata/cgo_test_main.go
new file mode 100644
index 000000000..620cea18b
--- /dev/null
+++ b/src/runtime/race/testdata/cgo_test_main.go
@@ -0,0 +1,30 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+/*
+int sync;
+
+void Notify(void)
+{
+	__sync_fetch_and_add(&sync, 1);
+}
+
+void Wait(void)
+{
+	while(__sync_fetch_and_add(&sync, 0) == 0) {}
+}
+*/
+import "C"
+
+func main() {
+	data := 0
+	go func() {
+		data = 1
+		C.Notify()
+	}()
+	C.Wait()
+	_ = data
+}
diff --git a/src/runtime/race/testdata/chan_test.go b/src/runtime/race/testdata/chan_test.go
new file mode 100644
index 000000000..eabd81f40
--- /dev/null
+++ b/src/runtime/race/testdata/chan_test.go
@@ -0,0 +1,659 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"runtime"
+	"testing"
+	"time"
+)
+
+func TestNoRaceChanSync(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		v = 1
+		c <- 0
+	}()
+	<-c
+	v = 2
+}
+
+func TestNoRaceChanSyncRev(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		c <- 0
+		v = 2
+	}()
+	v = 1
+	<-c
+}
+
+func TestNoRaceChanAsync(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		c <- 0
+	}()
+	<-c
+	v = 2
+}
+
+func TestRaceChanAsyncRev(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		c <- 0
+		v = 1
+	}()
+	v = 2
+	<-c
+}
+
+func TestNoRaceChanAsyncCloseRecv(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	func() {
+		defer func() {
+			recover()
+			v = 2
+		}()
+		<-c
+	}()
+}
+
+func TestNoRaceChanAsyncCloseRecv2(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	_, _ = <-c
+	v = 2
+}
+
+func TestNoRaceChanAsyncCloseRecv3(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	for range c {
+	}
+	v = 2
+}
+
+func TestNoRaceChanSyncCloseRecv(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	func() {
+		defer func() {
+			recover()
+			v = 2
+		}()
+		<-c
+	}()
+}
+
+func TestNoRaceChanSyncCloseRecv2(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	_, _ = <-c
+	v = 2
+}
+
+func TestNoRaceChanSyncCloseRecv3(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	for range c {
+	}
+	v = 2
+}
+
+func TestRaceChanSyncCloseSend(t *testing.T) {
+	v := 0
+	c := make(chan int)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	func() {
+		defer func() {
+			recover()
+		}()
+		c <- 0
+	}()
+	v = 2
+}
+
+func TestRaceChanAsyncCloseSend(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		close(c)
+	}()
+	func() {
+		defer func() {
+			recover()
+		}()
+		for {
+			c <- 0
+		}
+	}()
+	v = 2
+}
+
+func TestRaceChanCloseClose(t *testing.T) {
+	compl := make(chan bool, 2)
+	v1 := 0
+	v2 := 0
+	c := make(chan int)
+	go func() {
+		defer func() {
+			if recover() != nil {
+				v2 = 2
+			}
+			compl <- true
+		}()
+		v1 = 1
+		close(c)
+	}()
+	go func() {
+		defer func() {
+			if recover() != nil {
+				v1 = 2
+			}
+			compl <- true
+		}()
+		v2 = 1
+		close(c)
+	}()
+	<-compl
+	<-compl
+}
+
+func TestRaceChanSendLen(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	go func() {
+		v = 1
+		c <- 1
+	}()
+	for len(c) == 0 {
+		runtime.Gosched()
+	}
+	v = 2
+}
+
+func TestRaceChanRecvLen(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	c <- 1
+	go func() {
+		v = 1
+		<-c
+	}()
+	for len(c) != 0 {
+		runtime.Gosched()
+	}
+	v = 2
+}
+
+func TestRaceChanSendSend(t *testing.T) {
+	compl := make(chan bool, 2)
+	v1 := 0
+	v2 := 0
+	c := make(chan int, 1)
+	go func() {
+		v1 = 1
+		select {
+		case c <- 1:
+		default:
+			v2 = 2
+		}
+		compl <- true
+	}()
+	go func() {
+		v2 = 1
+		select {
+		case c <- 1:
+		default:
+			v1 = 2
+		}
+		compl <- true
+	}()
+	<-compl
+	<-compl
+}
+
+func TestNoRaceChanPtr(t *testing.T) {
+	type msg struct {
+		x int
+	}
+	c := make(chan *msg)
+	go func() {
+		c <- &msg{1}
+	}()
+	m := <-c
+	m.x = 2
+}
+
+func TestRaceChanWrongSend(t *testing.T) {
+	v1 := 0
+	v2 := 0
+	c := make(chan int, 2)
+	go func() {
+		v1 = 1
+		c <- 1
+	}()
+	go func() {
+		v2 = 2
+		c <- 2
+	}()
+	time.Sleep(1e7)
+	if <-c == 1 {
+		v2 = 3
+	} else {
+		v1 = 3
+	}
+}
+
+func TestRaceChanWrongClose(t *testing.T) {
+	v1 := 0
+	v2 := 0
+	c := make(chan int, 1)
+	go func() {
+		defer func() {
+			recover()
+		}()
+		v1 = 1
+		c <- 1
+	}()
+	go func() {
+		time.Sleep(1e7)
+		v2 = 2
+		close(c)
+	}()
+	time.Sleep(2e7)
+	if _, who := <-c; who {
+		v2 = 2
+	} else {
+		v1 = 2
+	}
+}
+
+func TestRaceChanSendClose(t *testing.T) {
+	compl := make(chan bool, 2)
+	c := make(chan int, 1)
+	go func() {
+		defer func() {
+			recover()
+			compl <- true
+		}()
+		c <- 1
+	}()
+	go func() {
+		time.Sleep(10 * time.Millisecond)
+		close(c)
+		compl <- true
+	}()
+	<-compl
+	<-compl
+}
+
+func TestRaceChanSendSelectClose(t *testing.T) {
+	compl := make(chan bool, 2)
+	c := make(chan int, 1)
+	c1 := make(chan int)
+	go func() {
+		defer func() {
+			recover()
+			compl <- true
+		}()
+		time.Sleep(10 * time.Millisecond)
+		select {
+		case c <- 1:
+		case <-c1:
+		}
+	}()
+	go func() {
+		close(c)
+		compl <- true
+	}()
+	<-compl
+	<-compl
+}
+
+func TestRaceSelectReadWriteAsync(t *testing.T) {
+	done := make(chan bool)
+	x := 0
+	c1 := make(chan int, 10)
+	c2 := make(chan int, 10)
+	c3 := make(chan int)
+	c2 <- 1
+	go func() {
+		select {
+		case c1 <- x: // read of x races with...
+		case c3 <- 1:
+		}
+		done <- true
+	}()
+	select {
+	case x = <-c2: // ... write to x here
+	case c3 <- 1:
+	}
+	<-done
+}
+
+func TestRaceSelectReadWriteSync(t *testing.T) {
+	done := make(chan bool)
+	x := 0
+	c1 := make(chan int)
+	c2 := make(chan int)
+	c3 := make(chan int)
+	// make c1 and c2 ready for communication
+	go func() {
+		<-c1
+	}()
+	go func() {
+		c2 <- 1
+	}()
+	go func() {
+		select {
+		case c1 <- x: // read of x races with...
+		case c3 <- 1:
+		}
+		done <- true
+	}()
+	select {
+	case x = <-c2: // ... write to x here
+	case c3 <- 1:
+	}
+	<-done
+}
+
+func TestNoRaceSelectReadWriteAsync(t *testing.T) {
+	done := make(chan bool)
+	x := 0
+	c1 := make(chan int)
+	c2 := make(chan int)
+	go func() {
+		select {
+		case c1 <- x: // read of x does not race with...
+		case c2 <- 1:
+		}
+		done <- true
+	}()
+	select {
+	case x = <-c1: // ... write to x here
+	case c2 <- 1:
+	}
+	<-done
+}
+
+func TestRaceChanReadWriteAsync(t *testing.T) {
+	done := make(chan bool)
+	c1 := make(chan int, 10)
+	c2 := make(chan int, 10)
+	c2 <- 10
+	x := 0
+	go func() {
+		c1 <- x // read of x races with...
+		done <- true
+	}()
+	x = <-c2 // ... write to x here
+	<-done
+}
+
+func TestRaceChanReadWriteSync(t *testing.T) {
+	done := make(chan bool)
+	c1 := make(chan int)
+	c2 := make(chan int)
+	// make c1 and c2 ready for communication
+	go func() {
+		<-c1
+	}()
+	go func() {
+		c2 <- 10
+	}()
+	x := 0
+	go func() {
+		c1 <- x // read of x races with...
+		done <- true
+	}()
+	x = <-c2 // ... write to x here
+	<-done
+}
+
+func TestNoRaceChanReadWriteAsync(t *testing.T) {
+	done := make(chan bool)
+	c1 := make(chan int, 10)
+	x := 0
+	go func() {
+		c1 <- x // read of x does not race with...
+		done <- true
+	}()
+	x = <-c1 // ... write to x here
+	<-done
+}
+
+func TestNoRaceProducerConsumerUnbuffered(t *testing.T) {
+	type Task struct {
+		f    func()
+		done chan bool
+	}
+
+	queue := make(chan Task)
+
+	go func() {
+		t := <-queue
+		t.f()
+		t.done <- true
+	}()
+
+	doit := func(f func()) {
+		done := make(chan bool, 1)
+		queue <- Task{f, done}
+		<-done
+	}
+
+	x := 0
+	doit(func() {
+		x = 1
+	})
+	_ = x
+}
+
+func TestRaceChanItselfSend(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int, 10)
+	go func() {
+		c <- 0
+		compl <- true
+	}()
+	c = make(chan int, 20)
+	<-compl
+}
+
+func TestRaceChanItselfRecv(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int, 10)
+	c <- 1
+	go func() {
+		<-c
+		compl <- true
+	}()
+	time.Sleep(1e7)
+	c = make(chan int, 20)
+	<-compl
+}
+
+func TestRaceChanItselfNil(t *testing.T) {
+	c := make(chan int, 10)
+	go func() {
+		c <- 0
+	}()
+	time.Sleep(1e7)
+	c = nil
+	_ = c
+}
+
+func TestRaceChanItselfClose(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int)
+	go func() {
+		close(c)
+		compl <- true
+	}()
+	c = make(chan int)
+	<-compl
+}
+
+func TestRaceChanItselfLen(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int)
+	go func() {
+		_ = len(c)
+		compl <- true
+	}()
+	c = make(chan int)
+	<-compl
+}
+
+func TestRaceChanItselfCap(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int)
+	go func() {
+		_ = cap(c)
+		compl <- true
+	}()
+	c = make(chan int)
+	<-compl
+}
+
+func TestRaceChanCloseLen(t *testing.T) {
+	v := 0
+	c := make(chan int, 10)
+	c <- 0
+	go func() {
+		v = 1
+		close(c)
+	}()
+	time.Sleep(1e7)
+	_ = len(c)
+	v = 2
+}
+
+func TestRaceChanCloseSend(t *testing.T) {
+	compl := make(chan bool, 1)
+	c := make(chan int, 10)
+	go func() {
+		close(c)
+		compl <- true
+	}()
+	c <- 0
+	<-compl
+}
+
+func TestNoRaceChanMutex(t *testing.T) {
+	done := make(chan struct{})
+	mtx := make(chan struct{}, 1)
+	data := 0
+	go func() {
+		mtx <- struct{}{}
+		data = 42
+		<-mtx
+		done <- struct{}{}
+	}()
+	mtx <- struct{}{}
+	data = 43
+	<-mtx
+	<-done
+}
+
+func TestNoRaceSelectMutex(t *testing.T) {
+	done := make(chan struct{})
+	mtx := make(chan struct{}, 1)
+	aux := make(chan bool)
+	data := 0
+	go func() {
+		select {
+		case mtx <- struct{}{}:
+		case <-aux:
+		}
+		data = 42
+		select {
+		case <-mtx:
+		case <-aux:
+		}
+		done <- struct{}{}
+	}()
+	select {
+	case mtx <- struct{}{}:
+	case <-aux:
+	}
+	data = 43
+	select {
+	case <-mtx:
+	case <-aux:
+	}
+	<-done
+}
+
+func TestRaceChanSem(t *testing.T) {
+	done := make(chan struct{})
+	mtx := make(chan bool, 2)
+	data := 0
+	go func() {
+		mtx <- true
+		data = 42
+		<-mtx
+		done <- struct{}{}
+	}()
+	mtx <- true
+	data = 43
+	<-mtx
+	<-done
+}
+
+func TestNoRaceChanWaitGroup(t *testing.T) {
+	const N = 10
+	chanWg := make(chan bool, N/2)
+	data := make([]int, N)
+	for i := 0; i < N; i++ {
+		chanWg <- true
+		go func(i int) {
+			data[i] = 42
+			<-chanWg
+		}(i)
+	}
+	for i := 0; i < cap(chanWg); i++ {
+		chanWg <- true
+	}
+	for i := 0; i < N; i++ {
+		_ = data[i]
+	}
+}
diff --git a/src/runtime/race/testdata/comp_test.go b/src/runtime/race/testdata/comp_test.go
new file mode 100644
index 000000000..27b2d0081
--- /dev/null
+++ b/src/runtime/race/testdata/comp_test.go
@@ -0,0 +1,186 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"testing"
+)
+
+type P struct {
+	x, y int
+}
+
+type S struct {
+	s1, s2 P
+}
+
+func TestNoRaceComp(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S
+	go func() {
+		s.s2.x = 1
+		c <- true
+	}()
+	s.s2.y = 2
+	<-c
+}
+
+func TestNoRaceComp2(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S
+	go func() {
+		s.s1.x = 1
+		c <- true
+	}()
+	s.s1.y = 2
+	<-c
+}
+
+func TestRaceComp(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S
+	go func() {
+		s.s2.y = 1
+		c <- true
+	}()
+	s.s2.y = 2
+	<-c
+}
+
+func TestRaceComp2(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S
+	go func() {
+		s.s1.x = 1
+		c <- true
+	}()
+	s = S{}
+	<-c
+}
+
+func TestRaceComp3(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S
+	go func() {
+		s.s2.y = 1
+		c <- true
+	}()
+	s = S{}
+	<-c
+}
+
+func TestRaceCompArray(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]S, 10)
+	x := 4
+	go func() {
+		s[x].s2.y = 1
+		c <- true
+	}()
+	x = 5
+	<-c
+}
+
+type P2 P
+type S2 S
+
+func TestRaceConv1(t *testing.T) {
+	c := make(chan bool, 1)
+	var p P2
+	go func() {
+		p.x = 1
+		c <- true
+	}()
+	_ = P(p).x
+	<-c
+}
+
+func TestRaceConv2(t *testing.T) {
+	c := make(chan bool, 1)
+	var p P2
+	go func() {
+		p.x = 1
+		c <- true
+	}()
+	ptr := &p
+	_ = P(*ptr).x
+	<-c
+}
+
+func TestRaceConv3(t *testing.T) {
+	c := make(chan bool, 1)
+	var s S2
+	go func() {
+		s.s1.x = 1
+		c <- true
+	}()
+	_ = P2(S(s).s1).x
+	<-c
+}
+
+type X struct {
+	V [4]P
+}
+
+type X2 X
+
+func TestRaceConv4(t *testing.T) {
+	c := make(chan bool, 1)
+	var x X2
+	go func() {
+		x.V[1].x = 1
+		c <- true
+	}()
+	_ = P2(X(x).V[1]).x
+	<-c
+}
+
+type Ptr struct {
+	s1, s2 *P
+}
+
+func TestNoRaceCompPtr(t *testing.T) {
+	c := make(chan bool, 1)
+	p := Ptr{&P{}, &P{}}
+	go func() {
+		p.s1.x = 1
+		c <- true
+	}()
+	p.s1.y = 2
+	<-c
+}
+
+func TestNoRaceCompPtr2(t *testing.T) {
+	c := make(chan bool, 1)
+	p := Ptr{&P{}, &P{}}
+	go func() {
+		p.s1.x = 1
+		c <- true
+	}()
+	_ = p
+	<-c
+}
+
+func TestRaceCompPtr(t *testing.T) {
+	c := make(chan bool, 1)
+	p := Ptr{&P{}, &P{}}
+	go func() {
+		p.s2.x = 1
+		c <- true
+	}()
+	p.s2.x = 2
+	<-c
+}
+
+func TestRaceCompPtr2(t *testing.T) {
+	c := make(chan bool, 1)
+	p := Ptr{&P{}, &P{}}
+	go func() {
+		p.s2.x = 1
+		c <- true
+	}()
+	p.s2 = &P{}
+	<-c
+}
diff --git a/src/runtime/race/testdata/finalizer_test.go b/src/runtime/race/testdata/finalizer_test.go
new file mode 100644
index 000000000..222cbf67a
--- /dev/null
+++ b/src/runtime/race/testdata/finalizer_test.go
@@ -0,0 +1,67 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNoRaceFin(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		x := new(string)
+		runtime.SetFinalizer(x, func(x *string) {
+			*x = "foo"
+		})
+		*x = "bar"
+		c <- true
+	}()
+	<-c
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+}
+
+var finVar struct {
+	sync.Mutex
+	cnt int
+}
+
+func TestNoRaceFinGlobal(t *testing.T) {
+	c := make(chan bool)
+	go func() {
+		x := new(string)
+		runtime.SetFinalizer(x, func(x *string) {
+			finVar.Lock()
+			finVar.cnt++
+			finVar.Unlock()
+		})
+		c <- true
+	}()
+	<-c
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+	finVar.Lock()
+	finVar.cnt++
+	finVar.Unlock()
+}
+
+func TestRaceFin(t *testing.T) {
+	c := make(chan bool)
+	y := 0
+	go func() {
+		x := new(string)
+		runtime.SetFinalizer(x, func(x *string) {
+			y = 42
+		})
+		c <- true
+	}()
+	<-c
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+	y = 66
+}
diff --git a/src/runtime/race/testdata/io_test.go b/src/runtime/race/testdata/io_test.go
new file mode 100644
index 000000000..9eb3552dc
--- /dev/null
+++ b/src/runtime/race/testdata/io_test.go
@@ -0,0 +1,69 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+func TestNoRaceIOFile(t *testing.T) {
+	x := 0
+	path, _ := ioutil.TempDir("", "race_test")
+	fname := filepath.Join(path, "data")
+	go func() {
+		x = 42
+		f, _ := os.Create(fname)
+		f.Write([]byte("done"))
+		f.Close()
+	}()
+	for {
+		f, err := os.Open(fname)
+		if err != nil {
+			time.Sleep(1e6)
+			continue
+		}
+		buf := make([]byte, 100)
+		count, err := f.Read(buf)
+		if count == 0 {
+			time.Sleep(1e6)
+			continue
+		}
+		break
+	}
+	_ = x
+}
+
+func TestNoRaceIOHttp(t *testing.T) {
+	x := 0
+	go func() {
+		http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+			x = 41
+			fmt.Fprintf(w, "test")
+			x = 42
+		})
+		err := http.ListenAndServe(":23651", nil)
+		if err != nil {
+			t.Fatalf("http.ListenAndServe: %v", err)
+		}
+	}()
+	time.Sleep(1e7)
+	x = 1
+	_, err := http.Get("http://127.0.0.1:23651")
+	if err != nil {
+		t.Fatalf("http.Get: %v", err)
+	}
+	x = 2
+	_, err = http.Get("http://127.0.0.1:23651")
+	if err != nil {
+		t.Fatalf("http.Get: %v", err)
+	}
+	x = 3
+}
diff --git a/src/runtime/race/testdata/map_test.go b/src/runtime/race/testdata/map_test.go
new file mode 100644
index 000000000..a8d8148d0
--- /dev/null
+++ b/src/runtime/race/testdata/map_test.go
@@ -0,0 +1,333 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"testing"
+)
+
+func TestRaceMapRW(t *testing.T) {
+	m := make(map[int]int)
+	ch := make(chan bool, 1)
+	go func() {
+		_ = m[1]
+		ch <- true
+	}()
+	m[1] = 1
+	<-ch
+}
+
+func TestRaceMapRW2(t *testing.T) {
+	m := make(map[int]int)
+	ch := make(chan bool, 1)
+	go func() {
+		_, _ = m[1]
+		ch <- true
+	}()
+	m[1] = 1
+	<-ch
+}
+
+func TestRaceMapRWArray(t *testing.T) {
+	// Check instrumentation of unaddressable arrays (issue 4578).
+	m := make(map[int][2]int)
+	ch := make(chan bool, 1)
+	go func() {
+		_ = m[1][1]
+		ch <- true
+	}()
+	m[2] = [2]int{1, 2}
+	<-ch
+}
+
+func TestNoRaceMapRR(t *testing.T) {
+	m := make(map[int]int)
+	ch := make(chan bool, 1)
+	go func() {
+		_, _ = m[1]
+		ch <- true
+	}()
+	_ = m[1]
+	<-ch
+}
+
+func TestRaceMapRange(t *testing.T) {
+	m := make(map[int]int)
+	ch := make(chan bool, 1)
+	go func() {
+		for range m {
+		}
+		ch <- true
+	}()
+	m[1] = 1
+	<-ch
+}
+
+func TestRaceMapRange2(t *testing.T) {
+	m := make(map[int]int)
+	ch := make(chan bool, 1)
+	go func() {
+		for range m {
+		}
+		ch <- true
+	}()
+	m[1] = 1
+	<-ch
+}
+
+func TestNoRaceMapRangeRange(t *testing.T) {
+	m := make(map[int]int)
+	// now the map is not empty and range triggers an event
+	// should work without this (as in other tests)
+	// so it is suspicious if this test passes and others don't
+	m[0] = 0
+	ch := make(chan bool, 1)
+	go func() {
+		for range m {
+		}
+		ch <- true
+	}()
+	for range m {
+	}
+	<-ch
+}
+
+func TestRaceMapLen(t *testing.T) {
+	m := make(map[string]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		_ = len(m)
+		ch <- true
+	}()
+	m[""] = true
+	<-ch
+}
+
+func TestRaceMapDelete(t *testing.T) {
+	m := make(map[string]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		delete(m, "")
+		ch <- true
+	}()
+	m[""] = true
+	<-ch
+}
+
+func TestRaceMapLenDelete(t *testing.T) {
+	m := make(map[string]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		delete(m, "a")
+		ch <- true
+	}()
+	_ = len(m)
+	<-ch
+}
+
+func TestRaceMapVariable(t *testing.T) {
+	ch := make(chan bool, 1)
+	m := make(map[int]int)
+	go func() {
+		m = make(map[int]int)
+		ch <- true
+	}()
+	m = make(map[int]int)
+	<-ch
+}
+
+func TestRaceMapVariable2(t *testing.T) {
+	ch := make(chan bool, 1)
+	m := make(map[int]int)
+	go func() {
+		m[1] = 1
+		ch <- true
+	}()
+	m = make(map[int]int)
+	<-ch
+}
+
+func TestRaceMapVariable3(t *testing.T) {
+	ch := make(chan bool, 1)
+	m := make(map[int]int)
+	go func() {
+		_ = m[1]
+		ch <- true
+	}()
+	m = make(map[int]int)
+	<-ch
+}
+
+type Big struct {
+	x [17]int32
+}
+
+func TestRaceMapLookupPartKey(t *testing.T) {
+	k := &Big{}
+	m := make(map[Big]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		k.x[8] = 1
+		ch <- true
+	}()
+	_ = m[*k]
+	<-ch
+}
+
+func TestRaceMapLookupPartKey2(t *testing.T) {
+	k := &Big{}
+	m := make(map[Big]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		k.x[8] = 1
+		ch <- true
+	}()
+	_, _ = m[*k]
+	<-ch
+}
+func TestRaceMapDeletePartKey(t *testing.T) {
+	k := &Big{}
+	m := make(map[Big]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		k.x[8] = 1
+		ch <- true
+	}()
+	delete(m, *k)
+	<-ch
+}
+
+func TestRaceMapInsertPartKey(t *testing.T) {
+	k := &Big{}
+	m := make(map[Big]bool)
+	ch := make(chan bool, 1)
+	go func() {
+		k.x[8] = 1
+		ch <- true
+	}()
+	m[*k] = true
+	<-ch
+}
+
+func TestRaceMapInsertPartVal(t *testing.T) {
+	v := &Big{}
+	m := make(map[int]Big)
+	ch := make(chan bool, 1)
+	go func() {
+		v.x[8] = 1
+		ch <- true
+	}()
+	m[1] = *v
+	<-ch
+}
+
+// Test for issue 7561.
+func TestRaceMapAssignMultipleReturn(t *testing.T) {
+	connect := func() (int, error) { return 42, nil }
+	conns := make(map[int][]int)
+	conns[1] = []int{0}
+	ch := make(chan bool, 1)
+	var err error
+	go func() {
+		conns[1][0], err = connect()
+		ch <- true
+	}()
+	x := conns[1][0]
+	_ = x
+	<-ch
+}
+
+// BigKey and BigVal must be larger than 256 bytes,
+// so that compiler sets KindGCProg for them.
+type BigKey [1000]*int
+
+type BigVal struct {
+	x int
+	y [1000]*int
+}
+
+func TestRaceMapBigKeyAccess1(t *testing.T) {
+	m := make(map[BigKey]int)
+	var k BigKey
+	ch := make(chan bool, 1)
+	go func() {
+		_ = m[k]
+		ch <- true
+	}()
+	k[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigKeyAccess2(t *testing.T) {
+	m := make(map[BigKey]int)
+	var k BigKey
+	ch := make(chan bool, 1)
+	go func() {
+		_, _ = m[k]
+		ch <- true
+	}()
+	k[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigKeyInsert(t *testing.T) {
+	m := make(map[BigKey]int)
+	var k BigKey
+	ch := make(chan bool, 1)
+	go func() {
+		m[k] = 1
+		ch <- true
+	}()
+	k[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigKeyDelete(t *testing.T) {
+	m := make(map[BigKey]int)
+	var k BigKey
+	ch := make(chan bool, 1)
+	go func() {
+		delete(m, k)
+		ch <- true
+	}()
+	k[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigValInsert(t *testing.T) {
+	m := make(map[int]BigVal)
+	var v BigVal
+	ch := make(chan bool, 1)
+	go func() {
+		m[1] = v
+		ch <- true
+	}()
+	v.y[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigValAccess1(t *testing.T) {
+	m := make(map[int]BigVal)
+	var v BigVal
+	ch := make(chan bool, 1)
+	go func() {
+		v = m[1]
+		ch <- true
+	}()
+	v.y[30] = new(int)
+	<-ch
+}
+
+func TestRaceMapBigValAccess2(t *testing.T) {
+	m := make(map[int]BigVal)
+	var v BigVal
+	ch := make(chan bool, 1)
+	go func() {
+		v, _ = m[1]
+		ch <- true
+	}()
+	v.y[30] = new(int)
+	<-ch
+}
diff --git a/src/runtime/race/testdata/mop_test.go b/src/runtime/race/testdata/mop_test.go
new file mode 100644
index 000000000..cb17a27d3
--- /dev/null
+++ b/src/runtime/race/testdata/mop_test.go
@@ -0,0 +1,1957 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"bytes"
+	"crypto/sha1"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+type Point struct {
+	x, y int
+}
+
+type NamedPoint struct {
+	name string
+	p    Point
+}
+
+type DummyWriter struct {
+	state int
+}
+type Writer interface {
+	Write(p []byte) (n int)
+}
+
+func (d DummyWriter) Write(p []byte) (n int) {
+	return 0
+}
+
+var GlobalX, GlobalY int = 0, 0
+var GlobalCh chan int = make(chan int, 2)
+
+func GlobalFunc1() {
+	GlobalY = GlobalX
+	GlobalCh <- 1
+}
+
+func GlobalFunc2() {
+	GlobalX = 1
+	GlobalCh <- 1
+}
+
+func TestRaceIntRWGlobalFuncs(t *testing.T) {
+	go GlobalFunc1()
+	go GlobalFunc2()
+	<-GlobalCh
+	<-GlobalCh
+}
+
+func TestRaceIntRWClosures(t *testing.T) {
+	var x, y int
+	ch := make(chan int, 2)
+
+	go func() {
+		y = x
+		ch <- 1
+	}()
+	go func() {
+		x = 1
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceIntRWClosures(t *testing.T) {
+	var x, y int
+	ch := make(chan int, 1)
+
+	go func() {
+		y = x
+		ch <- 1
+	}()
+	<-ch
+	go func() {
+		x = 1
+		ch <- 1
+	}()
+	<-ch
+
+}
+
+func TestRaceInt32RWClosures(t *testing.T) {
+	var x, y int32
+	ch := make(chan bool, 2)
+
+	go func() {
+		y = x
+		ch <- true
+	}()
+	go func() {
+		x = 1
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceCase(t *testing.T) {
+	var y int
+	for x := -1; x <= 1; x++ {
+		switch {
+		case x < 0:
+			y = -1
+		case x == 0:
+			y = 0
+		case x > 0:
+			y = 1
+		}
+	}
+	y++
+}
+
+func TestRaceCaseCondition(t *testing.T) {
+	var x int = 0
+	ch := make(chan int, 2)
+
+	go func() {
+		x = 2
+		ch <- 1
+	}()
+	go func() {
+		switch x < 2 {
+		case true:
+			x = 1
+			//case false:
+			//	x = 5
+		}
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceCaseCondition2(t *testing.T) {
+	// switch body is rearranged by the compiler so the tests
+	// passes even if we don't instrument '<'
+	var x int = 0
+	ch := make(chan int, 2)
+
+	go func() {
+		x = 2
+		ch <- 1
+	}()
+	go func() {
+		switch x < 2 {
+		case true:
+			x = 1
+		case false:
+			x = 5
+		}
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceCaseBody(t *testing.T) {
+	var x, y int
+	ch := make(chan int, 2)
+
+	go func() {
+		y = x
+		ch <- 1
+	}()
+	go func() {
+		switch {
+		default:
+			x = 1
+		case x == 100:
+			x = -x
+		}
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceCaseFallthrough(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+	z = 1
+
+	go func() {
+		y = x
+		ch <- 1
+	}()
+	go func() {
+		switch {
+		case z == 1:
+		case z == 2:
+			x = 2
+		}
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceCaseFallthrough(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+	z = 1
+
+	go func() {
+		y = x
+		ch <- 1
+	}()
+	go func() {
+		switch {
+		case z == 1:
+			fallthrough
+		case z == 2:
+			x = 2
+		}
+		ch <- 1
+	}()
+
+	<-ch
+	<-ch
+}
+
+func TestRaceCaseIssue6418(t *testing.T) {
+	m := map[string]map[string]string{
+		"a": {
+			"b": "c",
+		},
+	}
+	ch := make(chan int)
+	go func() {
+		m["a"]["x"] = "y"
+		ch <- 1
+	}()
+	switch m["a"]["b"] {
+	}
+	<-ch
+}
+
+func TestRaceCaseType(t *testing.T) {
+	var x, y int
+	var i interface{} = x
+	c := make(chan int, 1)
+	go func() {
+		switch i.(type) {
+		case nil:
+		case int:
+		}
+		c <- 1
+	}()
+	i = y
+	<-c
+}
+
+func TestRaceCaseTypeBody(t *testing.T) {
+	var x, y int
+	var i interface{} = &x
+	c := make(chan int, 1)
+	go func() {
+		switch i := i.(type) {
+		case nil:
+		case *int:
+			*i = y
+		}
+		c <- 1
+	}()
+	x = y
+	<-c
+}
+
+func TestRaceCaseTypeIssue5890(t *testing.T) {
+	// spurious extra instrumentation of the initial interface
+	// value.
+	var x, y int
+	m := make(map[int]map[int]interface{})
+	m[0] = make(map[int]interface{})
+	c := make(chan int, 1)
+	go func() {
+		switch i := m[0][1].(type) {
+		case nil:
+		case *int:
+			*i = x
+		}
+		c <- 1
+	}()
+	m[0][1] = y
+	<-c
+}
+
+func TestNoRaceRange(t *testing.T) {
+	ch := make(chan int, 3)
+	a := [...]int{1, 2, 3}
+	for _, v := range a {
+		ch <- v
+	}
+	close(ch)
+}
+
+func TestNoRaceRangeIssue5446(t *testing.T) {
+	ch := make(chan int, 3)
+	a := []int{1, 2, 3}
+	b := []int{4}
+	// used to insert a spurious instrumentation of a[i]
+	// and crash.
+	i := 1
+	for i, a[i] = range b {
+		ch <- i
+	}
+	close(ch)
+}
+
+func TestRaceRange(t *testing.T) {
+	const N = 2
+	var a [N]int
+	var x, y int
+	done := make(chan bool, N)
+	for i, v := range a {
+		go func(i int) {
+			// we don't want a write-vs-write race
+			// so there is no array b here
+			if i == 0 {
+				x = v
+			} else {
+				y = v
+			}
+			done <- true
+		}(i)
+	}
+	for i := 0; i < N; i++ {
+		<-done
+	}
+}
+
+func TestRaceForInit(t *testing.T) {
+	c := make(chan int)
+	x := 0
+	go func() {
+		c <- x
+	}()
+	for x = 42; false; {
+	}
+	<-c
+}
+
+func TestNoRaceForInit(t *testing.T) {
+	done := make(chan bool)
+	c := make(chan bool)
+	x := 0
+	go func() {
+		for {
+			_, ok := <-c
+			if !ok {
+				done <- true
+				return
+			}
+			x++
+		}
+	}()
+	i := 0
+	for x = 42; i < 10; i++ {
+		c <- true
+	}
+	close(c)
+	<-done
+}
+
+func TestRaceForTest(t *testing.T) {
+	done := make(chan bool)
+	c := make(chan bool)
+	stop := false
+	go func() {
+		for {
+			_, ok := <-c
+			if !ok {
+				done <- true
+				return
+			}
+			stop = true
+		}
+	}()
+	for !stop {
+		c <- true
+	}
+	close(c)
+	<-done
+}
+
+func TestRaceForIncr(t *testing.T) {
+	done := make(chan bool)
+	c := make(chan bool)
+	x := 0
+	go func() {
+		for {
+			_, ok := <-c
+			if !ok {
+				done <- true
+				return
+			}
+			x++
+		}
+	}()
+	for i := 0; i < 10; x++ {
+		i++
+		c <- true
+	}
+	close(c)
+	<-done
+}
+
+func TestNoRaceForIncr(t *testing.T) {
+	done := make(chan bool)
+	x := 0
+	go func() {
+		x++
+		done <- true
+	}()
+	for i := 0; i < 0; x++ {
+	}
+	<-done
+}
+
+func TestRacePlus(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		y = x + z
+		ch <- 1
+	}()
+	go func() {
+		y = x + z + z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRacePlus2(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		x = 1
+		ch <- 1
+	}()
+	go func() {
+		y = +x + z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRacePlus(t *testing.T) {
+	var x, y, z, f int
+	ch := make(chan int, 2)
+
+	go func() {
+		y = x + z
+		ch <- 1
+	}()
+	go func() {
+		f = z + x
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceComplement(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		x = ^y
+		ch <- 1
+	}()
+	go func() {
+		y = ^z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceDiv(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		x = y / (z + 1)
+		ch <- 1
+	}()
+	go func() {
+		y = z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceDivConst(t *testing.T) {
+	var x, y, z uint32
+	ch := make(chan int, 2)
+
+	go func() {
+		x = y / 3 // involves only a HMUL node
+		ch <- 1
+	}()
+	go func() {
+		y = z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceMod(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		x = y % (z + 1)
+		ch <- 1
+	}()
+	go func() {
+		y = z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceModConst(t *testing.T) {
+	var x, y, z int
+	ch := make(chan int, 2)
+
+	go func() {
+		x = y % 3
+		ch <- 1
+	}()
+	go func() {
+		y = z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceRotate(t *testing.T) {
+	var x, y, z uint32
+	ch := make(chan int, 2)
+
+	go func() {
+		x = y<<12 | y>>20
+		ch <- 1
+	}()
+	go func() {
+		y = z
+		ch <- 1
+	}()
+	<-ch
+	<-ch
+}
+
+// May crash if the instrumentation is reckless.
+func TestNoRaceEnoughRegisters(t *testing.T) {
+	// from erf.go
+	const (
+		sa1 = 1
+		sa2 = 2
+		sa3 = 3
+		sa4 = 4
+		sa5 = 5
+		sa6 = 6
+		sa7 = 7
+		sa8 = 8
+	)
+	var s, S float64
+	s = 3.1415
+	S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8)))))))
+	s = S
+}
+
+// emptyFunc should not be inlined.
+func emptyFunc(x int) {
+	if false {
+		fmt.Println(x)
+	}
+}
+
+func TestRaceFuncArgument(t *testing.T) {
+	var x int
+	ch := make(chan bool, 1)
+	go func() {
+		emptyFunc(x)
+		ch <- true
+	}()
+	x = 1
+	<-ch
+}
+
+func TestRaceFuncArgument2(t *testing.T) {
+	var x int
+	ch := make(chan bool, 2)
+	go func() {
+		x = 42
+		ch <- true
+	}()
+	go func(y int) {
+		ch <- true
+	}(x)
+	<-ch
+	<-ch
+}
+
+func TestRaceSprint(t *testing.T) {
+	var x int
+	ch := make(chan bool, 1)
+	go func() {
+		fmt.Sprint(x)
+		ch <- true
+	}()
+	x = 1
+	<-ch
+}
+
+func TestRaceArrayCopy(t *testing.T) {
+	ch := make(chan bool, 1)
+	var a [5]int
+	go func() {
+		a[3] = 1
+		ch <- true
+	}()
+	a = [5]int{1, 2, 3, 4, 5}
+	<-ch
+}
+
+// Blows up a naive compiler.
+func TestRaceNestedArrayCopy(t *testing.T) {
+	ch := make(chan bool, 1)
+	type (
+		Point32   [2][2][2][2][2]Point
+		Point1024 [2][2][2][2][2]Point32
+		Point32k  [2][2][2][2][2]Point1024
+		Point1M   [2][2][2][2][2]Point32k
+	)
+	var a, b Point1M
+	go func() {
+		a[0][1][0][1][0][1][0][1][0][1][0][1][0][1][0][1][0][1][0][1].y = 1
+		ch <- true
+	}()
+	a = b
+	<-ch
+}
+
+func TestRaceStructRW(t *testing.T) {
+	p := Point{0, 0}
+	ch := make(chan bool, 1)
+	go func() {
+		p = Point{1, 1}
+		ch <- true
+	}()
+	q := p
+	<-ch
+	p = q
+}
+
+func TestRaceStructFieldRW1(t *testing.T) {
+	p := Point{0, 0}
+	ch := make(chan bool, 1)
+	go func() {
+		p.x = 1
+		ch <- true
+	}()
+	_ = p.x
+	<-ch
+}
+
+func TestNoRaceStructFieldRW1(t *testing.T) {
+	// Same struct, different variables, no
+	// pointers. The layout is known (at compile time?) ->
+	// no read on p
+	// writes on x and y
+	p := Point{0, 0}
+	ch := make(chan bool, 1)
+	go func() {
+		p.x = 1
+		ch <- true
+	}()
+	p.y = 1
+	<-ch
+	_ = p
+}
+
+func TestNoRaceStructFieldRW2(t *testing.T) {
+	// Same as NoRaceStructFieldRW1
+	// but p is a pointer, so there is a read on p
+	p := Point{0, 0}
+	ch := make(chan bool, 1)
+	go func() {
+		p.x = 1
+		ch <- true
+	}()
+	p.y = 1
+	<-ch
+	_ = p
+}
+
+func TestRaceStructFieldRW2(t *testing.T) {
+	p := &Point{0, 0}
+	ch := make(chan bool, 1)
+	go func() {
+		p.x = 1
+		ch <- true
+	}()
+	_ = p.x
+	<-ch
+}
+
+func TestRaceStructFieldRW3(t *testing.T) {
+	p := NamedPoint{name: "a", p: Point{0, 0}}
+	ch := make(chan bool, 1)
+	go func() {
+		p.p.x = 1
+		ch <- true
+	}()
+	_ = p.p.x
+	<-ch
+}
+
+func TestRaceEfaceWW(t *testing.T) {
+	var a, b interface{}
+	ch := make(chan bool, 1)
+	go func() {
+		a = 1
+		ch <- true
+	}()
+	a = 2
+	<-ch
+	_, _ = a, b
+}
+
+func TestRaceIfaceWW(t *testing.T) {
+	var a, b Writer
+	ch := make(chan bool, 1)
+	go func() {
+		a = DummyWriter{1}
+		ch <- true
+	}()
+	a = DummyWriter{2}
+	<-ch
+	b = a
+	a = b
+}
+
+func TestRaceIfaceCmp(t *testing.T) {
+	var a, b Writer
+	a = DummyWriter{1}
+	ch := make(chan bool, 1)
+	go func() {
+		a = DummyWriter{1}
+		ch <- true
+	}()
+	_ = a == b
+	<-ch
+}
+
+func TestRaceIfaceCmpNil(t *testing.T) {
+	var a Writer
+	a = DummyWriter{1}
+	ch := make(chan bool, 1)
+	go func() {
+		a = DummyWriter{1}
+		ch <- true
+	}()
+	_ = a == nil
+	<-ch
+}
+
+func TestRaceEfaceConv(t *testing.T) {
+	c := make(chan bool)
+	v := 0
+	go func() {
+		go func(x interface{}) {
+		}(v)
+		c <- true
+	}()
+	v = 42
+	<-c
+}
+
+type OsFile struct{}
+
+func (*OsFile) Read() {
+}
+
+type IoReader interface {
+	Read()
+}
+
+func TestRaceIfaceConv(t *testing.T) {
+	c := make(chan bool)
+	f := &OsFile{}
+	go func() {
+		go func(x IoReader) {
+		}(f)
+		c <- true
+	}()
+	f = &OsFile{}
+	<-c
+}
+
+func TestRaceError(t *testing.T) {
+	ch := make(chan bool, 1)
+	var err error
+	go func() {
+		err = nil
+		ch <- true
+	}()
+	_ = err
+	<-ch
+}
+
+func TestRaceIntptrRW(t *testing.T) {
+	var x, y int
+	var p *int = &x
+	ch := make(chan bool, 1)
+	go func() {
+		*p = 5
+		ch <- true
+	}()
+	y = *p
+	x = y
+	<-ch
+}
+
+func TestRaceStringRW(t *testing.T) {
+	ch := make(chan bool, 1)
+	s := ""
+	go func() {
+		s = "abacaba"
+		ch <- true
+	}()
+	_ = s
+	<-ch
+}
+
+func TestRaceStringPtrRW(t *testing.T) {
+	ch := make(chan bool, 1)
+	var x string
+	p := &x
+	go func() {
+		*p = "a"
+		ch <- true
+	}()
+	_ = *p
+	<-ch
+}
+
+func TestRaceFloat64WW(t *testing.T) {
+	var x, y float64
+	ch := make(chan bool, 1)
+	go func() {
+		x = 1.0
+		ch <- true
+	}()
+	x = 2.0
+	<-ch
+
+	y = x
+	x = y
+}
+
+func TestRaceComplex128WW(t *testing.T) {
+	var x, y complex128
+	ch := make(chan bool, 1)
+	go func() {
+		x = 2 + 2i
+		ch <- true
+	}()
+	x = 4 + 4i
+	<-ch
+
+	y = x
+	x = y
+}
+
+func TestRaceUnsafePtrRW(t *testing.T) {
+	var x, y, z int
+	x, y, z = 1, 2, 3
+	var p unsafe.Pointer = unsafe.Pointer(&x)
+	ch := make(chan bool, 1)
+	go func() {
+		p = (unsafe.Pointer)(&z)
+		ch <- true
+	}()
+	y = *(*int)(p)
+	x = y
+	<-ch
+}
+
+func TestRaceFuncVariableRW(t *testing.T) {
+	var f func(x int) int
+	f = func(x int) int {
+		return x * x
+	}
+	ch := make(chan bool, 1)
+	go func() {
+		f = func(x int) int {
+			return x
+		}
+		ch <- true
+	}()
+	y := f(1)
+	<-ch
+	x := y
+	y = x
+}
+
+func TestRaceFuncVariableWW(t *testing.T) {
+	var f func(x int) int
+	ch := make(chan bool, 1)
+	go func() {
+		f = func(x int) int {
+			return x
+		}
+		ch <- true
+	}()
+	f = func(x int) int {
+		return x * x
+	}
+	<-ch
+}
+
+// This one should not belong to mop_test
+func TestRacePanic(t *testing.T) {
+	var x int
+	var zero int = 0
+	ch := make(chan bool, 2)
+	go func() {
+		defer func() {
+			err := recover()
+			if err == nil {
+				panic("should be panicking")
+			}
+			x = 1
+			ch <- true
+		}()
+		var y int = 1 / zero
+		zero = y
+	}()
+	go func() {
+		defer func() {
+			err := recover()
+			if err == nil {
+				panic("should be panicking")
+			}
+			x = 2
+			ch <- true
+		}()
+		var y int = 1 / zero
+		zero = y
+	}()
+
+	<-ch
+	<-ch
+	if zero != 0 {
+		panic("zero has changed")
+	}
+}
+
+func TestNoRaceBlank(t *testing.T) {
+	var a [5]int
+	ch := make(chan bool, 1)
+	go func() {
+		_, _ = a[0], a[1]
+		ch <- true
+	}()
+	_, _ = a[2], a[3]
+	<-ch
+	a[1] = a[0]
+}
+
+func TestRaceAppendRW(t *testing.T) {
+	a := make([]int, 10)
+	ch := make(chan bool)
+	go func() {
+		_ = append(a, 1)
+		ch <- true
+	}()
+	a[0] = 1
+	<-ch
+}
+
+func TestRaceAppendLenRW(t *testing.T) {
+	a := make([]int, 0)
+	ch := make(chan bool)
+	go func() {
+		a = append(a, 1)
+		ch <- true
+	}()
+	_ = len(a)
+	<-ch
+}
+
+func TestRaceAppendCapRW(t *testing.T) {
+	a := make([]int, 0)
+	ch := make(chan string)
+	go func() {
+		a = append(a, 1)
+		ch <- ""
+	}()
+	_ = cap(a)
+	<-ch
+}
+
+func TestNoRaceFuncArgsRW(t *testing.T) {
+	ch := make(chan byte, 1)
+	var x byte
+	go func(y byte) {
+		_ = y
+		ch <- 0
+	}(x)
+	x = 1
+	<-ch
+}
+
+func TestRaceFuncArgsRW(t *testing.T) {
+	ch := make(chan byte, 1)
+	var x byte
+	go func(y *byte) {
+		_ = *y
+		ch <- 0
+	}(&x)
+	x = 1
+	<-ch
+}
+
+// from the mailing list, slightly modified
+// unprotected concurrent access to seen[]
+func TestRaceCrawl(t *testing.T) {
+	url := "dummyurl"
+	depth := 3
+	seen := make(map[string]bool)
+	ch := make(chan int, 100)
+	var wg sync.WaitGroup
+	var crawl func(string, int)
+	crawl = func(u string, d int) {
+		nurl := 0
+		defer func() {
+			ch <- nurl
+		}()
+		seen[u] = true
+		if d <= 0 {
+			return
+		}
+		urls := [...]string{"a", "b", "c"}
+		for _, uu := range urls {
+			if _, ok := seen[uu]; !ok {
+				wg.Add(1)
+				go crawl(uu, d-1)
+				nurl++
+			}
+		}
+		wg.Done()
+	}
+	wg.Add(1)
+	go crawl(url, depth)
+	wg.Wait()
+}
+
+func TestRaceIndirection(t *testing.T) {
+	ch := make(chan struct{}, 1)
+	var y int
+	var x *int = &y
+	go func() {
+		*x = 1
+		ch <- struct{}{}
+	}()
+	*x = 2
+	<-ch
+	_ = *x
+}
+
+func TestRaceRune(t *testing.T) {
+	c := make(chan bool)
+	var x rune
+	go func() {
+		x = 1
+		c <- true
+	}()
+	_ = x
+	<-c
+}
+
+func TestRaceEmptyInterface1(t *testing.T) {
+	c := make(chan bool)
+	var x interface{}
+	go func() {
+		x = nil
+		c <- true
+	}()
+	_ = x
+	<-c
+}
+
+func TestRaceEmptyInterface2(t *testing.T) {
+	c := make(chan bool)
+	var x interface{}
+	go func() {
+		x = &Point{}
+		c <- true
+	}()
+	_ = x
+	<-c
+}
+
+func TestRaceTLS(t *testing.T) {
+	comm := make(chan *int)
+	done := make(chan bool, 2)
+	go func() {
+		var x int
+		comm <- &x
+		x = 1
+		x = *(<-comm)
+		done <- true
+	}()
+	go func() {
+		p := <-comm
+		*p = 2
+		comm <- p
+		done <- true
+	}()
+	<-done
+	<-done
+}
+
+func TestNoRaceHeapReallocation(t *testing.T) {
+	// It is possible that a future implementation
+	// of memory allocation will ruin this test.
+	// Increasing n might help in this case, so
+	// this test is a bit more generic than most of the
+	// others.
+	const n = 2
+	done := make(chan bool, n)
+	empty := func(p *int) {}
+	for i := 0; i < n; i++ {
+		ms := i
+		go func() {
+			<-time.After(time.Duration(ms) * time.Millisecond)
+			runtime.GC()
+			var x int
+			empty(&x) // x goes to the heap
+			done <- true
+		}()
+	}
+	for i := 0; i < n; i++ {
+		<-done
+	}
+}
+
+func TestRaceAnd(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if x == 1 && y == 1 {
+	}
+	<-c
+}
+
+func TestRaceAnd2(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if y == 0 && x == 1 {
+	}
+	<-c
+}
+
+func TestNoRaceAnd(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if y == 1 && x == 1 {
+	}
+	<-c
+}
+
+func TestRaceOr(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if x == 1 || y == 1 {
+	}
+	<-c
+}
+
+func TestRaceOr2(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if y == 1 || x == 1 {
+	}
+	<-c
+}
+
+func TestNoRaceOr(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		x = 1
+		c <- true
+	}()
+	if y == 0 || x == 1 {
+	}
+	<-c
+}
+
+func TestNoRaceShortCalc(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		y = 1
+		c <- true
+	}()
+	if x == 0 || y == 0 {
+	}
+	<-c
+}
+
+func TestNoRaceShortCalc2(t *testing.T) {
+	c := make(chan bool)
+	x, y := 0, 0
+	go func() {
+		y = 1
+		c <- true
+	}()
+	if x == 1 && y == 0 {
+	}
+	<-c
+}
+
+func TestRaceFuncItself(t *testing.T) {
+	c := make(chan bool)
+	f := func() {}
+	go func() {
+		f()
+		c <- true
+	}()
+	f = func() {}
+	<-c
+}
+
+func TestNoRaceFuncUnlock(t *testing.T) {
+	ch := make(chan bool, 1)
+	var mu sync.Mutex
+	x := 0
+	go func() {
+		mu.Lock()
+		x = 42
+		mu.Unlock()
+		ch <- true
+	}()
+	x = func(mu *sync.Mutex) int {
+		mu.Lock()
+		return 43
+	}(&mu)
+	mu.Unlock()
+	<-ch
+}
+
+func TestRaceStructInit(t *testing.T) {
+	type X struct {
+		x, y int
+	}
+	c := make(chan bool, 1)
+	y := 0
+	go func() {
+		y = 42
+		c <- true
+	}()
+	x := X{x: y}
+	_ = x
+	<-c
+}
+
+func TestRaceArrayInit(t *testing.T) {
+	c := make(chan bool, 1)
+	y := 0
+	go func() {
+		y = 42
+		c <- true
+	}()
+	x := []int{0, y, 42}
+	_ = x
+	<-c
+}
+
+func TestRaceMapInit(t *testing.T) {
+	c := make(chan bool, 1)
+	y := 0
+	go func() {
+		y = 42
+		c <- true
+	}()
+	x := map[int]int{0: 42, y: 42}
+	_ = x
+	<-c
+}
+
+func TestRaceMapInit2(t *testing.T) {
+	c := make(chan bool, 1)
+	y := 0
+	go func() {
+		y = 42
+		c <- true
+	}()
+	x := map[int]int{0: 42, 42: y}
+	_ = x
+	<-c
+}
+
+type Inter interface {
+	Foo(x int)
+}
+type InterImpl struct {
+	x, y int
+}
+
+func (p InterImpl) Foo(x int) {
+	// prevent inlining
+	z := 42
+	x = 85
+	y := x / z
+	z = y * z
+	x = z * y
+	_, _, _ = x, y, z
+}
+
+type InterImpl2 InterImpl
+
+func (p *InterImpl2) Foo(x int) {
+	if p == nil {
+		InterImpl{}.Foo(x)
+	}
+	InterImpl(*p).Foo(x)
+}
+
+func TestRaceInterCall(t *testing.T) {
+	c := make(chan bool, 1)
+	p := InterImpl{}
+	var x Inter = p
+	go func() {
+		p2 := InterImpl{}
+		x = p2
+		c <- true
+	}()
+	x.Foo(0)
+	<-c
+}
+
+func TestRaceInterCall2(t *testing.T) {
+	c := make(chan bool, 1)
+	p := InterImpl{}
+	var x Inter = p
+	z := 0
+	go func() {
+		z = 42
+		c <- true
+	}()
+	x.Foo(z)
+	<-c
+}
+
+func TestRaceFuncCall(t *testing.T) {
+	c := make(chan bool, 1)
+	f := func(x, y int) {}
+	x, y := 0, 0
+	go func() {
+		y = 42
+		c <- true
+	}()
+	f(x, y)
+	<-c
+}
+
+func TestRaceMethodCall(t *testing.T) {
+	c := make(chan bool, 1)
+	i := InterImpl{}
+	x := 0
+	go func() {
+		x = 42
+		c <- true
+	}()
+	i.Foo(x)
+	<-c
+}
+
+func TestRaceMethodCall2(t *testing.T) {
+	c := make(chan bool, 1)
+	i := &InterImpl{}
+	go func() {
+		i = &InterImpl{}
+		c <- true
+	}()
+	i.Foo(0)
+	<-c
+}
+
+// Method value with concrete value receiver.
+func TestRaceMethodValue(t *testing.T) {
+	c := make(chan bool, 1)
+	i := InterImpl{}
+	go func() {
+		i = InterImpl{}
+		c <- true
+	}()
+	_ = i.Foo
+	<-c
+}
+
+// Method value with interface receiver.
+func TestRaceMethodValue2(t *testing.T) {
+	c := make(chan bool, 1)
+	var i Inter = InterImpl{}
+	go func() {
+		i = InterImpl{}
+		c <- true
+	}()
+	_ = i.Foo
+	<-c
+}
+
+// Method value with implicit dereference.
+func TestRaceMethodValue3(t *testing.T) {
+	c := make(chan bool, 1)
+	i := &InterImpl{}
+	go func() {
+		*i = InterImpl{}
+		c <- true
+	}()
+	_ = i.Foo // dereferences i.
+	<-c
+}
+
+// Method value implicitly taking receiver address.
+func TestNoRaceMethodValue(t *testing.T) {
+	c := make(chan bool, 1)
+	i := InterImpl2{}
+	go func() {
+		i = InterImpl2{}
+		c <- true
+	}()
+	_ = i.Foo // takes the address of i only.
+	<-c
+}
+
+func TestRacePanicArg(t *testing.T) {
+	c := make(chan bool, 1)
+	err := errors.New("err")
+	go func() {
+		err = errors.New("err2")
+		c <- true
+	}()
+	defer func() {
+		recover()
+		<-c
+	}()
+	panic(err)
+}
+
+func TestRaceDeferArg(t *testing.T) {
+	c := make(chan bool, 1)
+	x := 0
+	go func() {
+		x = 42
+		c <- true
+	}()
+	func() {
+		defer func(x int) {
+		}(x)
+	}()
+	<-c
+}
+
+type DeferT int
+
+func (d DeferT) Foo() {
+}
+
+func TestRaceDeferArg2(t *testing.T) {
+	c := make(chan bool, 1)
+	var x DeferT
+	go func() {
+		var y DeferT
+		x = y
+		c <- true
+	}()
+	func() {
+		defer x.Foo()
+	}()
+	<-c
+}
+
+func TestNoRaceAddrExpr(t *testing.T) {
+	c := make(chan bool, 1)
+	x := 0
+	go func() {
+		x = 42
+		c <- true
+	}()
+	_ = &x
+	<-c
+}
+
+type AddrT struct {
+	_ [256]byte
+	x int
+}
+
+type AddrT2 struct {
+	_ [512]byte
+	p *AddrT
+}
+
+func TestRaceAddrExpr(t *testing.T) {
+	c := make(chan bool, 1)
+	a := AddrT2{p: &AddrT{x: 42}}
+	go func() {
+		a.p = &AddrT{x: 43}
+		c <- true
+	}()
+	_ = &a.p.x
+	<-c
+}
+
+func TestRaceTypeAssert(t *testing.T) {
+	c := make(chan bool, 1)
+	x := 0
+	var i interface{} = x
+	go func() {
+		y := 0
+		i = y
+		c <- true
+	}()
+	_ = i.(int)
+	<-c
+}
+
+func TestRaceBlockAs(t *testing.T) {
+	c := make(chan bool, 1)
+	var x, y int
+	go func() {
+		x = 42
+		c <- true
+	}()
+	x, y = y, x
+	<-c
+}
+
+func TestRaceSliceSlice(t *testing.T) {
+	c := make(chan bool, 1)
+	x := make([]int, 10)
+	go func() {
+		x = make([]int, 20)
+		c <- true
+	}()
+	_ = x[2:3]
+	<-c
+}
+
+func TestRaceSliceSlice2(t *testing.T) {
+	c := make(chan bool, 1)
+	x := make([]int, 10)
+	i := 2
+	go func() {
+		i = 3
+		c <- true
+	}()
+	_ = x[i:4]
+	<-c
+}
+
+func TestRaceSliceString(t *testing.T) {
+	c := make(chan bool, 1)
+	x := "hello"
+	go func() {
+		x = "world"
+		c <- true
+	}()
+	_ = x[2:3]
+	<-c
+}
+
+func TestRaceSliceStruct(t *testing.T) {
+	type X struct {
+		x, y int
+	}
+	c := make(chan bool, 1)
+	x := make([]X, 10)
+	go func() {
+		y := make([]X, 10)
+		copy(y, x)
+		c <- true
+	}()
+	x[1].y = 42
+	<-c
+}
+
+func TestRaceAppendSliceStruct(t *testing.T) {
+	type X struct {
+		x, y int
+	}
+	c := make(chan bool, 1)
+	x := make([]X, 10)
+	go func() {
+		y := make([]X, 0, 10)
+		y = append(y, x...)
+		c <- true
+	}()
+	x[1].y = 42
+	<-c
+}
+
+func TestRaceStructInd(t *testing.T) {
+	c := make(chan bool, 1)
+	type Item struct {
+		x, y int
+	}
+	i := Item{}
+	go func(p *Item) {
+		*p = Item{}
+		c <- true
+	}(&i)
+	i.y = 42
+	<-c
+}
+
+func TestRaceAsFunc1(t *testing.T) {
+	var s []byte
+	c := make(chan bool, 1)
+	go func() {
+		var err error
+		s, err = func() ([]byte, error) {
+			t := []byte("hello world")
+			return t, nil
+		}()
+		c <- true
+		_ = err
+	}()
+	_ = string(s)
+	<-c
+}
+
+func TestRaceAsFunc2(t *testing.T) {
+	c := make(chan bool, 1)
+	x := 0
+	go func() {
+		func(x int) {
+		}(x)
+		c <- true
+	}()
+	x = 42
+	<-c
+}
+
+func TestRaceAsFunc3(t *testing.T) {
+	c := make(chan bool, 1)
+	var mu sync.Mutex
+	x := 0
+	go func() {
+		func(x int) {
+			mu.Lock()
+		}(x) // Read of x must be outside of the mutex.
+		mu.Unlock()
+		c <- true
+	}()
+	mu.Lock()
+	x = 42
+	mu.Unlock()
+	<-c
+}
+
+func TestNoRaceAsFunc4(t *testing.T) {
+	c := make(chan bool, 1)
+	var mu sync.Mutex
+	x := 0
+	go func() {
+		x = func() int { // Write of x must be under the mutex.
+			mu.Lock()
+			return 42
+		}()
+		mu.Unlock()
+		c <- true
+	}()
+	mu.Lock()
+	x = 42
+	mu.Unlock()
+	<-c
+}
+
+func TestRaceHeapParam(t *testing.T) {
+	x := func() (x int) {
+		go func() {
+			x = 42
+		}()
+		return
+	}()
+	_ = x
+}
+
+func TestNoRaceEmptyStruct(t *testing.T) {
+	type Empty struct{}
+	type X struct {
+		y int64
+		Empty
+	}
+	type Y struct {
+		x X
+		y int64
+	}
+	c := make(chan X)
+	var y Y
+	go func() {
+		x := y.x
+		c <- x
+	}()
+	y.y = 42
+	<-c
+}
+
+func TestRaceNestedStruct(t *testing.T) {
+	type X struct {
+		x, y int
+	}
+	type Y struct {
+		x X
+	}
+	c := make(chan Y)
+	var y Y
+	go func() {
+		c <- y
+	}()
+	y.x.y = 42
+	<-c
+}
+
+func TestRaceIssue5567(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
+	in := make(chan []byte)
+	res := make(chan error)
+	go func() {
+		var err error
+		defer func() {
+			close(in)
+			res <- err
+		}()
+		path := "mop_test.go"
+		f, err := os.Open(path)
+		if err != nil {
+			return
+		}
+		defer f.Close()
+		var n, total int
+		b := make([]byte, 17) // the race is on b buffer
+		for err == nil {
+			n, err = f.Read(b)
+			total += n
+			if n > 0 {
+				in <- b[:n]
+			}
+		}
+		if err == io.EOF {
+			err = nil
+		}
+	}()
+	h := sha1.New()
+	for b := range in {
+		h.Write(b)
+	}
+	_ = h.Sum(nil)
+	err := <-res
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestRaceIssue5654(t *testing.T) {
+	text := `Friends, Romans, countrymen, lend me your ears;
+I come to bury Caesar, not to praise him.
+The evil that men do lives after them;
+The good is oft interred with their bones;
+So let it be with Caesar. The noble Brutus
+Hath told you Caesar was ambitious:
+If it were so, it was a grievous fault,
+And grievously hath Caesar answer'd it.
+Here, under leave of Brutus and the rest -
+For Brutus is an honourable man;
+So are they all, all honourable men -
+Come I to speak in Caesar's funeral.
+He was my friend, faithful and just to me:
+But Brutus says he was ambitious;
+And Brutus is an honourable man.`
+
+	data := bytes.NewBufferString(text)
+	in := make(chan []byte)
+
+	go func() {
+		buf := make([]byte, 16)
+		var n int
+		var err error
+		for ; err == nil; n, err = data.Read(buf) {
+			in <- buf[:n]
+		}
+		close(in)
+	}()
+	res := ""
+	for s := range in {
+		res += string(s)
+	}
+	_ = res
+}
+
+type Base int
+
+func (b *Base) Foo() int {
+	return 42
+}
+
+func (b Base) Bar() int {
+	return int(b)
+}
+
+func TestNoRaceMethodThunk(t *testing.T) {
+	type Derived struct {
+		pad int
+		Base
+	}
+	var d Derived
+	done := make(chan bool)
+	go func() {
+		_ = d.Foo()
+		done <- true
+	}()
+	d = Derived{}
+	<-done
+}
+
+func TestRaceMethodThunk(t *testing.T) {
+	type Derived struct {
+		pad int
+		*Base
+	}
+	var d Derived
+	done := make(chan bool)
+	go func() {
+		_ = d.Foo()
+		done <- true
+	}()
+	d = Derived{}
+	<-done
+}
+
+func TestRaceMethodThunk2(t *testing.T) {
+	type Derived struct {
+		pad int
+		Base
+	}
+	var d Derived
+	done := make(chan bool)
+	go func() {
+		_ = d.Bar()
+		done <- true
+	}()
+	d = Derived{}
+	<-done
+}
+
+func TestRaceMethodThunk3(t *testing.T) {
+	type Derived struct {
+		pad int
+		*Base
+	}
+	var d Derived
+	d.Base = new(Base)
+	done := make(chan bool)
+	go func() {
+		_ = d.Bar()
+		done <- true
+	}()
+	d.Base = new(Base)
+	<-done
+}
+
+func TestRaceMethodThunk4(t *testing.T) {
+	type Derived struct {
+		pad int
+		*Base
+	}
+	var d Derived
+	d.Base = new(Base)
+	done := make(chan bool)
+	go func() {
+		_ = d.Bar()
+		done <- true
+	}()
+	*(*int)(d.Base) = 42
+	<-done
+}
+
+func TestNoRaceTinyAlloc(t *testing.T) {
+	const P = 4
+	const N = 1e6
+	var tinySink *byte
+	done := make(chan bool)
+	for p := 0; p < P; p++ {
+		go func() {
+			for i := 0; i < N; i++ {
+				var b byte
+				if b != 0 {
+					tinySink = &b // make it heap allocated
+				}
+				b = 42
+			}
+			done <- true
+		}()
+	}
+	for p := 0; p < P; p++ {
+		<-done
+	}
+}
diff --git a/src/runtime/race/testdata/mutex_test.go b/src/runtime/race/testdata/mutex_test.go
new file mode 100644
index 000000000..3cf03ae6b
--- /dev/null
+++ b/src/runtime/race/testdata/mutex_test.go
@@ -0,0 +1,138 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNoRaceMutex(t *testing.T) {
+	var mu sync.Mutex
+	var x int16 = 0
+	ch := make(chan bool, 2)
+	go func() {
+		mu.Lock()
+		defer mu.Unlock()
+		x = 1
+		ch <- true
+	}()
+	go func() {
+		mu.Lock()
+		x = 2
+		mu.Unlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceMutex(t *testing.T) {
+	var mu sync.Mutex
+	var x int16 = 0
+	ch := make(chan bool, 2)
+	go func() {
+		x = 1
+		mu.Lock()
+		defer mu.Unlock()
+		ch <- true
+	}()
+	go func() {
+		x = 2
+		mu.Lock()
+		mu.Unlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceMutex2(t *testing.T) {
+	var mu1 sync.Mutex
+	var mu2 sync.Mutex
+	var x int8 = 0
+	ch := make(chan bool, 2)
+	go func() {
+		mu1.Lock()
+		defer mu1.Unlock()
+		x = 1
+		ch <- true
+	}()
+	go func() {
+		mu2.Lock()
+		x = 2
+		mu2.Unlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceMutexPureHappensBefore(t *testing.T) {
+	var mu sync.Mutex
+	var x int16 = 0
+	ch := make(chan bool, 2)
+	go func() {
+		x = 1
+		mu.Lock()
+		mu.Unlock()
+		ch <- true
+	}()
+	go func() {
+		<-time.After(1e5)
+		mu.Lock()
+		mu.Unlock()
+		x = 1
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceMutexSemaphore(t *testing.T) {
+	var mu sync.Mutex
+	ch := make(chan bool, 2)
+	x := 0
+	mu.Lock()
+	go func() {
+		x = 1
+		mu.Unlock()
+		ch <- true
+	}()
+	go func() {
+		mu.Lock()
+		x = 2
+		mu.Unlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+// from doc/go_mem.html
+func TestNoRaceMutexExampleFromHtml(t *testing.T) {
+	var l sync.Mutex
+	a := ""
+
+	l.Lock()
+	go func() {
+		a = "hello, world"
+		l.Unlock()
+	}()
+	l.Lock()
+	_ = a
+}
+
+func TestRaceMutexOverwrite(t *testing.T) {
+	c := make(chan bool, 1)
+	var mu sync.Mutex
+	go func() {
+		mu = sync.Mutex{}
+		c <- true
+	}()
+	mu.Lock()
+	<-c
+}
diff --git a/src/runtime/race/testdata/regression_test.go b/src/runtime/race/testdata/regression_test.go
new file mode 100644
index 000000000..d461269d9
--- /dev/null
+++ b/src/runtime/race/testdata/regression_test.go
@@ -0,0 +1,194 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code patterns that caused problems in the past.
+
+package race_test
+
+import (
+	"testing"
+)
+
+type LogImpl struct {
+	x int
+}
+
+func NewLog() (l LogImpl) {
+	c := make(chan bool)
+	go func() {
+		_ = l
+		c <- true
+	}()
+	l = LogImpl{}
+	<-c
+	return
+}
+
+var _ LogImpl = NewLog()
+
+func MakeMap() map[int]int {
+	return make(map[int]int)
+}
+
+func InstrumentMapLen() {
+	_ = len(MakeMap())
+}
+
+func InstrumentMapLen2() {
+	m := make(map[int]map[int]int)
+	_ = len(m[0])
+}
+
+func InstrumentMapLen3() {
+	m := make(map[int]*map[int]int)
+	_ = len(*m[0])
+}
+
+func TestRaceUnaddressableMapLen(t *testing.T) {
+	m := make(map[int]map[int]int)
+	ch := make(chan int, 1)
+	m[0] = make(map[int]int)
+	go func() {
+		_ = len(m[0])
+		ch <- 0
+	}()
+	m[0][0] = 1
+	<-ch
+}
+
+type Rect struct {
+	x, y int
+}
+
+type Image struct {
+	min, max Rect
+}
+
+func NewImage() Image {
+	var pleaseDoNotInlineMe stack
+	pleaseDoNotInlineMe.push(1)
+	_ = pleaseDoNotInlineMe.pop()
+	return Image{}
+}
+
+func AddrOfTemp() {
+	_ = NewImage().min
+}
+
+type TypeID int
+
+func (t *TypeID) encodeType(x int) (tt TypeID, err error) {
+	switch x {
+	case 0:
+		return t.encodeType(x * x)
+	}
+	return 0, nil
+}
+
+type stack []int
+
+func (s *stack) push(x int) {
+	*s = append(*s, x)
+}
+
+func (s *stack) pop() int {
+	i := len(*s)
+	n := (*s)[i-1]
+	*s = (*s)[:i-1]
+	return n
+}
+
+func TestNoRaceStackPushPop(t *testing.T) {
+	var s stack
+	go func(s *stack) {}(&s)
+	s.push(1)
+	x := s.pop()
+	_ = x
+}
+
+type RpcChan struct {
+	c chan bool
+}
+
+var makeChanCalls int
+
+func makeChan() *RpcChan {
+	var pleaseDoNotInlineMe stack
+	pleaseDoNotInlineMe.push(1)
+	_ = pleaseDoNotInlineMe.pop()
+
+	makeChanCalls++
+	c := &RpcChan{make(chan bool, 1)}
+	c.c <- true
+	return c
+}
+
+func call() bool {
+	x := <-makeChan().c
+	return x
+}
+
+func TestNoRaceRpcChan(t *testing.T) {
+	makeChanCalls = 0
+	_ = call()
+	if makeChanCalls != 1 {
+		t.Fatalf("makeChanCalls %d, expected 1\n", makeChanCalls)
+	}
+}
+
+func divInSlice() {
+	v := make([]int64, 10)
+	i := 1
+	_ = v[(i*4)/3]
+}
+
+func TestNoRaceReturn(t *testing.T) {
+	c := make(chan int)
+	noRaceReturn(c)
+	<-c
+}
+
+// Return used to do an implicit a = a, causing a read/write race
+// with the goroutine. Compiler has an optimization to avoid that now.
+// See issue 4014.
+func noRaceReturn(c chan int) (a, b int) {
+	a = 42
+	go func() {
+		_ = a
+		c <- 1
+	}()
+	return a, 10
+}
+
+func issue5431() {
+	var p **inltype
+	if inlinetest(p).x && inlinetest(p).y {
+	} else if inlinetest(p).x || inlinetest(p).y {
+	}
+}
+
+type inltype struct {
+	x, y bool
+}
+
+func inlinetest(p **inltype) *inltype {
+	return *p
+}
+
+type iface interface {
+	Foo() *struct{ b bool }
+}
+
+type Int int
+
+func (i Int) Foo() *struct{ b bool } {
+	return &struct{ b bool }{false}
+}
+
+func TestNoRaceForInfiniteLoop(t *testing.T) {
+	var x Int
+	// interface conversion causes nodes to be put on init list
+	for iface(x).Foo().b {
+	}
+}
diff --git a/src/runtime/race/testdata/rwmutex_test.go b/src/runtime/race/testdata/rwmutex_test.go
new file mode 100644
index 000000000..85cb5df3c
--- /dev/null
+++ b/src/runtime/race/testdata/rwmutex_test.go
@@ -0,0 +1,134 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestRaceMutexRWMutex(t *testing.T) {
+	var mu1 sync.Mutex
+	var mu2 sync.RWMutex
+	var x int16 = 0
+	ch := make(chan bool, 2)
+	go func() {
+		mu1.Lock()
+		defer mu1.Unlock()
+		x = 1
+		ch <- true
+	}()
+	go func() {
+		mu2.Lock()
+		x = 2
+		mu2.Unlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestNoRaceRWMutex(t *testing.T) {
+	var mu sync.RWMutex
+	var x, y int64 = 0, 1
+	ch := make(chan bool, 2)
+	go func() {
+		mu.Lock()
+		defer mu.Unlock()
+		x = 2
+		ch <- true
+	}()
+	go func() {
+		mu.RLock()
+		y = x
+		mu.RUnlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+}
+
+func TestRaceRWMutexMultipleReaders(t *testing.T) {
+	var mu sync.RWMutex
+	var x, y int64 = 0, 1
+	ch := make(chan bool, 3)
+	go func() {
+		mu.Lock()
+		defer mu.Unlock()
+		x = 2
+		ch <- true
+	}()
+	go func() {
+		mu.RLock()
+		y = x + 1
+		mu.RUnlock()
+		ch <- true
+	}()
+	go func() {
+		mu.RLock()
+		y = x + 2
+		mu.RUnlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+	<-ch
+	_ = y
+}
+
+func TestNoRaceRWMutexMultipleReaders(t *testing.T) {
+	var mu sync.RWMutex
+	x := int64(0)
+	ch := make(chan bool, 3)
+	go func() {
+		mu.Lock()
+		defer mu.Unlock()
+		x = 2
+		ch <- true
+	}()
+	go func() {
+		mu.RLock()
+		y := x + 1
+		_ = y
+		mu.RUnlock()
+		ch <- true
+	}()
+	go func() {
+		mu.RLock()
+		y := x + 2
+		_ = y
+		mu.RUnlock()
+		ch <- true
+	}()
+	<-ch
+	<-ch
+	<-ch
+}
+
+func TestNoRaceRWMutexTransitive(t *testing.T) {
+	var mu sync.RWMutex
+	x := int64(0)
+	ch := make(chan bool, 2)
+	go func() {
+		mu.RLock()
+		_ = x
+		mu.RUnlock()
+		ch <- true
+	}()
+	go func() {
+		time.Sleep(1e7)
+		mu.RLock()
+		_ = x
+		mu.RUnlock()
+		ch <- true
+	}()
+	time.Sleep(2e7)
+	mu.Lock()
+	x = 42
+	mu.Unlock()
+	<-ch
+	<-ch
+}
diff --git a/src/runtime/race/testdata/select_test.go b/src/runtime/race/testdata/select_test.go
new file mode 100644
index 000000000..4a3a23647
--- /dev/null
+++ b/src/runtime/race/testdata/select_test.go
@@ -0,0 +1,286 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"runtime"
+	"testing"
+)
+
+func TestNoRaceSelect1(t *testing.T) {
+	var x int
+	compl := make(chan bool)
+	c := make(chan bool)
+	c1 := make(chan bool)
+
+	go func() {
+		x = 1
+		// At least two channels are needed because
+		// otherwise the compiler optimizes select out.
+		// See comment in runtime/chan.c:^selectgo.
+		select {
+		case c <- true:
+		case c1 <- true:
+		}
+		compl <- true
+	}()
+	select {
+	case <-c:
+	case c1 <- true:
+	}
+	x = 2
+	<-compl
+}
+
+func TestNoRaceSelect2(t *testing.T) {
+	var x int
+	compl := make(chan bool)
+	c := make(chan bool)
+	c1 := make(chan bool)
+	go func() {
+		select {
+		case <-c:
+		case <-c1:
+		}
+		x = 1
+		compl <- true
+	}()
+	x = 2
+	close(c)
+	runtime.Gosched()
+	<-compl
+}
+
+func TestNoRaceSelect3(t *testing.T) {
+	var x int
+	compl := make(chan bool)
+	c := make(chan bool, 10)
+	c1 := make(chan bool)
+	go func() {
+		x = 1
+		select {
+		case c <- true:
+		case <-c1:
+		}
+		compl <- true
+	}()
+	<-c
+	x = 2
+	<-compl
+}
+
+func TestNoRaceSelect4(t *testing.T) {
+	type Task struct {
+		f    func()
+		done chan bool
+	}
+
+	queue := make(chan Task)
+	dummy := make(chan bool)
+
+	go func() {
+		for {
+			select {
+			case t := <-queue:
+				t.f()
+				t.done <- true
+			}
+		}
+	}()
+
+	doit := func(f func()) {
+		done := make(chan bool, 1)
+		select {
+		case queue <- Task{f, done}:
+		case <-dummy:
+		}
+		select {
+		case <-done:
+		case <-dummy:
+		}
+	}
+
+	var x int
+	doit(func() {
+		x = 1
+	})
+	_ = x
+}
+
+func TestNoRaceSelect5(t *testing.T) {
+	test := func(sel, needSched bool) {
+		var x int
+		ch := make(chan bool)
+		c1 := make(chan bool)
+
+		done := make(chan bool, 2)
+		go func() {
+			if needSched {
+				runtime.Gosched()
+			}
+			// println(1)
+			x = 1
+			if sel {
+				select {
+				case ch <- true:
+				case <-c1:
+				}
+			} else {
+				ch <- true
+			}
+			done <- true
+		}()
+
+		go func() {
+			// println(2)
+			if sel {
+				select {
+				case <-ch:
+				case <-c1:
+				}
+			} else {
+				<-ch
+			}
+			x = 1
+			done <- true
+		}()
+		<-done
+		<-done
+	}
+
+	test(true, true)
+	test(true, false)
+	test(false, true)
+	test(false, false)
+}
+
+func TestRaceSelect1(t *testing.T) {
+	var x int
+	compl := make(chan bool, 2)
+	c := make(chan bool)
+	c1 := make(chan bool)
+
+	go func() {
+		<-c
+		<-c
+	}()
+	f := func() {
+		select {
+		case c <- true:
+		case c1 <- true:
+		}
+		x = 1
+		compl <- true
+	}
+	go f()
+	go f()
+	<-compl
+	<-compl
+}
+
+func TestRaceSelect2(t *testing.T) {
+	var x int
+	compl := make(chan bool)
+	c := make(chan bool)
+	c1 := make(chan bool)
+	go func() {
+		x = 1
+		select {
+		case <-c:
+		case <-c1:
+		}
+		compl <- true
+	}()
+	close(c)
+	x = 2
+	<-compl
+}
+
+func TestRaceSelect3(t *testing.T) {
+	var x int
+	compl := make(chan bool)
+	c := make(chan bool)
+	c1 := make(chan bool)
+	go func() {
+		x = 1
+		select {
+		case c <- true:
+		case c1 <- true:
+		}
+		compl <- true
+	}()
+	x = 2
+	select {
+	case <-c:
+	}
+	<-compl
+}
+
+func TestRaceSelect4(t *testing.T) {
+	done := make(chan bool, 1)
+	var x int
+	go func() {
+		select {
+		default:
+			x = 2
+		}
+		done <- true
+	}()
+	_ = x
+	<-done
+}
+
+// The idea behind this test:
+// there are two variables, access to one
+// of them is synchronized, access to the other
+// is not.
+// Select must (unconditionaly) choose the non-synchronized variable
+// thus causing exactly one race.
+// Currently this test doesn't look like it accomplishes
+// this goal.
+func TestRaceSelect5(t *testing.T) {
+	done := make(chan bool, 1)
+	c1 := make(chan bool, 1)
+	c2 := make(chan bool)
+	var x, y int
+	go func() {
+		select {
+		case c1 <- true:
+			x = 1
+		case c2 <- true:
+			y = 1
+		}
+		done <- true
+	}()
+	_ = x
+	_ = y
+	<-done
+}
+
+// select statements may introduce
+// flakiness: whether this test contains
+// a race depends on the scheduling
+// (some may argue that the code contains
+// this race by definition)
+/*
+func TestFlakyDefault(t *testing.T) {
+	var x int
+	c := make(chan bool, 1)
+	done := make(chan bool, 1)
+	go func() {
+		select {
+		case <-c:
+			x = 2
+		default:
+			x = 3
+		}
+		done <- true
+	}()
+	x = 1
+	c <- true
+	_ = x
+	<-done
+}
+*/
diff --git a/src/runtime/race/testdata/slice_test.go b/src/runtime/race/testdata/slice_test.go
new file mode 100644
index 000000000..5702d1ac8
--- /dev/null
+++ b/src/runtime/race/testdata/slice_test.go
@@ -0,0 +1,485 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"testing"
+)
+
+func TestRaceSliceRW(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 2)
+	go func() {
+		a[1] = 1
+		ch <- true
+	}()
+	_ = a[1]
+	<-ch
+}
+
+func TestNoRaceSliceRW(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 2)
+	go func() {
+		a[0] = 1
+		ch <- true
+	}()
+	_ = a[1]
+	<-ch
+}
+
+func TestRaceSliceWW(t *testing.T) {
+	a := make([]int, 10)
+	ch := make(chan bool, 1)
+	go func() {
+		a[1] = 1
+		ch <- true
+	}()
+	a[1] = 2
+	<-ch
+}
+
+func TestNoRaceArrayWW(t *testing.T) {
+	var a [5]int
+	ch := make(chan bool, 1)
+	go func() {
+		a[0] = 1
+		ch <- true
+	}()
+	a[1] = 2
+	<-ch
+}
+
+func TestRaceArrayWW(t *testing.T) {
+	var a [5]int
+	ch := make(chan bool, 1)
+	go func() {
+		a[1] = 1
+		ch <- true
+	}()
+	a[1] = 2
+	<-ch
+}
+
+func TestNoRaceSliceWriteLen(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]bool, 1)
+	go func() {
+		a[0] = true
+		ch <- true
+	}()
+	_ = len(a)
+	<-ch
+}
+
+func TestNoRaceSliceWriteCap(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]uint64, 100)
+	go func() {
+		a[50] = 123
+		ch <- true
+	}()
+	_ = cap(a)
+	<-ch
+}
+
+func TestRaceSliceCopyRead(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 10)
+	b := make([]int, 10)
+	go func() {
+		_ = a[5]
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
+func TestNoRaceSliceWriteCopy(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 10)
+	b := make([]int, 10)
+	go func() {
+		a[5] = 1
+		ch <- true
+	}()
+	copy(a[:5], b[:5])
+	<-ch
+}
+
+func TestRaceSliceCopyWrite2(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 10)
+	b := make([]int, 10)
+	go func() {
+		b[5] = 1
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
+func TestRaceSliceCopyWrite3(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]byte, 10)
+	go func() {
+		a[7] = 1
+		ch <- true
+	}()
+	copy(a, "qwertyqwerty")
+	<-ch
+}
+
+func TestNoRaceSliceCopyRead(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]int, 10)
+	b := make([]int, 10)
+	go func() {
+		_ = b[5]
+		ch <- true
+	}()
+	copy(a, b)
+	<-ch
+}
+
+func TestNoRaceSliceWriteSlice2(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]float64, 10)
+	go func() {
+		a[2] = 1.0
+		ch <- true
+	}()
+	_ = a[0:5]
+	<-ch
+}
+
+func TestRaceSliceWriteSlice(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]float64, 10)
+	go func() {
+		a[2] = 1.0
+		ch <- true
+	}()
+	a = a[5:10]
+	<-ch
+}
+
+func TestNoRaceSliceWriteSlice(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]float64, 10)
+	go func() {
+		a[2] = 1.0
+		ch <- true
+	}()
+	_ = a[5:10]
+	<-ch
+}
+
+func TestNoRaceSliceLenCap(t *testing.T) {
+	ch := make(chan bool, 1)
+	a := make([]struct{}, 10)
+	go func() {
+		_ = len(a)
+		ch <- true
+	}()
+	_ = cap(a)
+	<-ch
+}
+
+func TestNoRaceStructSlicesRangeWrite(t *testing.T) {
+	type Str struct {
+		a []int
+		b []int
+	}
+	ch := make(chan bool, 1)
+	var s Str
+	s.a = make([]int, 10)
+	s.b = make([]int, 10)
+	go func() {
+		for range s.a {
+		}
+		ch <- true
+	}()
+	s.b[5] = 5
+	<-ch
+}
+
+func TestRaceSliceDifferent(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	s2 := s
+	go func() {
+		s[3] = 3
+		c <- true
+	}()
+	// false negative because s2 is PAUTO w/o PHEAP
+	// so we do not instrument it
+	s2[3] = 3
+	<-c
+}
+
+func TestRaceSliceRangeWrite(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s[3] = 3
+		c <- true
+	}()
+	for _, v := range s {
+		_ = v
+	}
+	<-c
+}
+
+func TestNoRaceSliceRangeWrite(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s[3] = 3
+		c <- true
+	}()
+	for range s {
+	}
+	<-c
+}
+
+func TestRaceSliceRangeAppend(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s = append(s, 3)
+		c <- true
+	}()
+	for range s {
+	}
+	<-c
+}
+
+func TestNoRaceSliceRangeAppend(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		_ = append(s, 3)
+		c <- true
+	}()
+	for range s {
+	}
+	<-c
+}
+
+func TestRaceSliceVarWrite(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s[3] = 3
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceVarRead(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		_ = s[3]
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceVarRange(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		for range s {
+		}
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceVarAppend(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		_ = append(s, 10)
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceVarCopy(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s2 := make([]int, 10)
+		copy(s, s2)
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceVarCopy2(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s2 := make([]int, 10)
+		copy(s2, s)
+		c <- true
+	}()
+	s = make([]int, 20)
+	<-c
+}
+
+func TestRaceSliceAppend(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10, 20)
+	go func() {
+		_ = append(s, 1)
+		c <- true
+	}()
+	_ = append(s, 2)
+	<-c
+}
+
+func TestRaceSliceAppendWrite(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		_ = append(s, 1)
+		c <- true
+	}()
+	s[0] = 42
+	<-c
+}
+
+func TestRaceSliceAppendSlice(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	go func() {
+		s2 := make([]int, 10)
+		_ = append(s, s2...)
+		c <- true
+	}()
+	s[0] = 42
+	<-c
+}
+
+func TestRaceSliceAppendSlice2(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	s2foobar := make([]int, 10)
+	go func() {
+		_ = append(s, s2foobar...)
+		c <- true
+	}()
+	s2foobar[5] = 42
+	<-c
+}
+
+func TestRaceSliceAppendString(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]byte, 10)
+	go func() {
+		_ = append(s, "qwerty"...)
+		c <- true
+	}()
+	s[0] = 42
+	<-c
+}
+
+func TestNoRaceSliceIndexAccess(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	v := 0
+	go func() {
+		_ = v
+		c <- true
+	}()
+	s[v] = 1
+	<-c
+}
+
+func TestNoRaceSliceIndexAccess2(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	v := 0
+	go func() {
+		_ = v
+		c <- true
+	}()
+	_ = s[v]
+	<-c
+}
+
+func TestRaceSliceIndexAccess(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	v := 0
+	go func() {
+		v = 1
+		c <- true
+	}()
+	s[v] = 1
+	<-c
+}
+
+func TestRaceSliceIndexAccess2(t *testing.T) {
+	c := make(chan bool, 1)
+	s := make([]int, 10)
+	v := 0
+	go func() {
+		v = 1
+		c <- true
+	}()
+	_ = s[v]
+	<-c
+}
+
+func TestRaceSliceByteToString(t *testing.T) {
+	c := make(chan string)
+	s := make([]byte, 10)
+	go func() {
+		c <- string(s)
+	}()
+	s[0] = 42
+	<-c
+}
+
+func TestRaceSliceRuneToString(t *testing.T) {
+	c := make(chan string)
+	s := make([]rune, 10)
+	go func() {
+		c <- string(s)
+	}()
+	s[9] = 42
+	<-c
+}
+
+func TestRaceConcatString(t *testing.T) {
+	s := "hello"
+	c := make(chan string, 1)
+	go func() {
+		c <- s + " world"
+	}()
+	s = "world"
+	<-c
+}
+
+func TestRaceCompareString(t *testing.T) {
+	s1 := "hello"
+	s2 := "world"
+	c := make(chan bool, 1)
+	go func() {
+		c <- s1 == s2
+	}()
+	s1 = s2
+	<-c
+}
diff --git a/src/runtime/race/testdata/sync_test.go b/src/runtime/race/testdata/sync_test.go
new file mode 100644
index 000000000..93af0b1e6
--- /dev/null
+++ b/src/runtime/race/testdata/sync_test.go
@@ -0,0 +1,216 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNoRaceCond(t *testing.T) { // tsan's test02
+	ch := make(chan bool, 1)
+	var x int = 0
+	var mu sync.Mutex
+	var cond *sync.Cond = sync.NewCond(&mu)
+	var condition int = 0
+	var waker func()
+	waker = func() {
+		x = 1
+		mu.Lock()
+		condition = 1
+		cond.Signal()
+		mu.Unlock()
+	}
+
+	var waiter func()
+	waiter = func() {
+		go waker()
+		cond.L.Lock()
+		for condition != 1 {
+			cond.Wait()
+		}
+		cond.L.Unlock()
+		x = 2
+		ch <- true
+	}
+	go waiter()
+	<-ch
+}
+
+func TestRaceCond(t *testing.T) { // tsan's test50
+	ch := make(chan bool, 2)
+
+	var x int = 0
+	var mu sync.Mutex
+	var condition int = 0
+	var cond *sync.Cond = sync.NewCond(&mu)
+
+	var waker func() = func() {
+		<-time.After(1e5)
+		x = 1
+		mu.Lock()
+		condition = 1
+		cond.Signal()
+		mu.Unlock()
+		<-time.After(1e5)
+		mu.Lock()
+		x = 3
+		mu.Unlock()
+		ch <- true
+	}
+
+	var waiter func() = func() {
+		mu.Lock()
+		for condition != 1 {
+			cond.Wait()
+		}
+		mu.Unlock()
+		x = 2
+		ch <- true
+	}
+	x = 0
+	go waker()
+	go waiter()
+	<-ch
+	<-ch
+}
+
+// We do not currently automatically
+// parse this test. It is intended that the creation
+// stack is observed manually not to contain
+// off-by-one errors
+func TestRaceAnnounceThreads(t *testing.T) {
+	const N = 7
+	allDone := make(chan bool, N)
+
+	var x int
+
+	var f, g, h func()
+	f = func() {
+		x = 1
+		go g()
+		go func() {
+			x = 1
+			allDone <- true
+		}()
+		x = 2
+		allDone <- true
+	}
+
+	g = func() {
+		for i := 0; i < 2; i++ {
+			go func() {
+				x = 1
+				allDone <- true
+			}()
+			allDone <- true
+		}
+	}
+
+	h = func() {
+		x = 1
+		x = 2
+		go f()
+		allDone <- true
+	}
+
+	go h()
+
+	for i := 0; i < N; i++ {
+		<-allDone
+	}
+}
+
+func TestNoRaceAfterFunc1(t *testing.T) {
+	i := 2
+	c := make(chan bool)
+	var f func()
+	f = func() {
+		i--
+		if i >= 0 {
+			time.AfterFunc(0, f)
+		} else {
+			c <- true
+		}
+	}
+
+	time.AfterFunc(0, f)
+	<-c
+}
+
+func TestNoRaceAfterFunc2(t *testing.T) {
+	var x int
+	timer := time.AfterFunc(10, func() {
+		x = 1
+	})
+	defer timer.Stop()
+	_ = x
+}
+
+func TestNoRaceAfterFunc3(t *testing.T) {
+	c := make(chan bool, 1)
+	x := 0
+	time.AfterFunc(1e7, func() {
+		x = 1
+		c <- true
+	})
+	<-c
+}
+
+func TestRaceAfterFunc3(t *testing.T) {
+	c := make(chan bool, 2)
+	x := 0
+	time.AfterFunc(1e7, func() {
+		x = 1
+		c <- true
+	})
+	time.AfterFunc(2e7, func() {
+		x = 2
+		c <- true
+	})
+	<-c
+	<-c
+}
+
+// This test's output is intended to be
+// observed manually. One should check
+// that goroutine creation stack is
+// comprehensible.
+func TestRaceGoroutineCreationStack(t *testing.T) {
+	var x int
+	var ch = make(chan bool, 1)
+
+	f1 := func() {
+		x = 1
+		ch <- true
+	}
+	f2 := func() { go f1() }
+	f3 := func() { go f2() }
+	f4 := func() { go f3() }
+
+	go f4()
+	x = 2
+	<-ch
+}
+
+// A nil pointer in a mutex method call should not
+// corrupt the race detector state.
+// Used to hang indefinitely.
+func TestNoRaceNilMutexCrash(t *testing.T) {
+	var mutex sync.Mutex
+	panics := 0
+	defer func() {
+		if x := recover(); x != nil {
+			mutex.Lock()
+			panics++
+			mutex.Unlock()
+		} else {
+			panic("no panic")
+		}
+	}()
+	var othermutex *sync.RWMutex
+	othermutex.RLock()
+}
diff --git a/src/runtime/race/testdata/waitgroup_test.go b/src/runtime/race/testdata/waitgroup_test.go
new file mode 100644
index 000000000..ff152b0ab
--- /dev/null
+++ b/src/runtime/race/testdata/waitgroup_test.go
@@ -0,0 +1,352 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package race_test
+
+import (
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNoRaceWaitGroup(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	n := 1
+	for i := 0; i < n; i++ {
+		wg.Add(1)
+		j := i
+		go func() {
+			x = j
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func TestRaceWaitGroup(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	n := 2
+	for i := 0; i < n; i++ {
+		wg.Add(1)
+		j := i
+		go func() {
+			x = j
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func TestNoRaceWaitGroup2(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		x = 1
+		wg.Done()
+	}()
+	wg.Wait()
+	x = 2
+}
+
+// incrementing counter in Add and locking wg's mutex
+func TestRaceWaitGroupAsMutex(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	c := make(chan bool, 2)
+	go func() {
+		wg.Wait()
+		time.Sleep(100 * time.Millisecond)
+		wg.Add(+1)
+		x = 1
+		wg.Add(-1)
+		c <- true
+	}()
+	go func() {
+		wg.Wait()
+		time.Sleep(100 * time.Millisecond)
+		wg.Add(+1)
+		x = 2
+		wg.Add(-1)
+		c <- true
+	}()
+	<-c
+	<-c
+}
+
+// Incorrect usage: Add is too late.
+func TestRaceWaitGroupWrongWait(t *testing.T) {
+	c := make(chan bool, 2)
+	var x int
+	var wg sync.WaitGroup
+	go func() {
+		wg.Add(1)
+		runtime.Gosched()
+		x = 1
+		wg.Done()
+		c <- true
+	}()
+	go func() {
+		wg.Add(1)
+		runtime.Gosched()
+		x = 2
+		wg.Done()
+		c <- true
+	}()
+	wg.Wait()
+	<-c
+	<-c
+}
+
+func TestRaceWaitGroupWrongAdd(t *testing.T) {
+	c := make(chan bool, 2)
+	var wg sync.WaitGroup
+	go func() {
+		wg.Add(1)
+		time.Sleep(100 * time.Millisecond)
+		wg.Done()
+		c <- true
+	}()
+	go func() {
+		wg.Add(1)
+		time.Sleep(100 * time.Millisecond)
+		wg.Done()
+		c <- true
+	}()
+	time.Sleep(50 * time.Millisecond)
+	wg.Wait()
+	<-c
+	<-c
+}
+
+func TestNoRaceWaitGroupMultipleWait(t *testing.T) {
+	c := make(chan bool, 2)
+	var wg sync.WaitGroup
+	go func() {
+		wg.Wait()
+		c <- true
+	}()
+	go func() {
+		wg.Wait()
+		c <- true
+	}()
+	wg.Wait()
+	<-c
+	<-c
+}
+
+func TestNoRaceWaitGroupMultipleWait2(t *testing.T) {
+	c := make(chan bool, 2)
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func() {
+		wg.Done()
+		wg.Wait()
+		c <- true
+	}()
+	go func() {
+		wg.Done()
+		wg.Wait()
+		c <- true
+	}()
+	wg.Wait()
+	<-c
+	<-c
+}
+
+func TestNoRaceWaitGroupMultipleWait3(t *testing.T) {
+	const P = 3
+	var data [P]int
+	done := make(chan bool, P)
+	var wg sync.WaitGroup
+	wg.Add(P)
+	for p := 0; p < P; p++ {
+		go func(p int) {
+			data[p] = 42
+			wg.Done()
+		}(p)
+	}
+	for p := 0; p < P; p++ {
+		go func() {
+			wg.Wait()
+			for p1 := 0; p1 < P; p1++ {
+				_ = data[p1]
+			}
+			done <- true
+		}()
+	}
+	for p := 0; p < P; p++ {
+		<-done
+	}
+}
+
+// Correct usage but still a race
+func TestRaceWaitGroup2(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func() {
+		x = 1
+		wg.Done()
+	}()
+	go func() {
+		x = 2
+		wg.Done()
+	}()
+	wg.Wait()
+}
+
+func TestNoRaceWaitGroupPanicRecover(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	defer func() {
+		err := recover()
+		if err != "sync: negative WaitGroup counter" {
+			t.Fatalf("Unexpected panic: %#v", err)
+		}
+		x = 2
+	}()
+	x = 1
+	wg.Add(-1)
+}
+
+// TODO: this is actually a panic-synchronization test, not a
+// WaitGroup test. Move it to another *_test file
+// Is it possible to get a race by synchronization via panic?
+func TestNoRaceWaitGroupPanicRecover2(t *testing.T) {
+	var x int
+	var wg sync.WaitGroup
+	ch := make(chan bool, 1)
+	var f func() = func() {
+		x = 2
+		ch <- true
+	}
+	go func() {
+		defer func() {
+			err := recover()
+			if err != "sync: negative WaitGroup counter" {
+			}
+			go f()
+		}()
+		x = 1
+		wg.Add(-1)
+	}()
+
+	<-ch
+}
+
+func TestNoRaceWaitGroupTransitive(t *testing.T) {
+	x, y := 0, 0
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func() {
+		x = 42
+		wg.Done()
+	}()
+	go func() {
+		time.Sleep(1e7)
+		y = 42
+		wg.Done()
+	}()
+	wg.Wait()
+	_ = x
+	_ = y
+}
+
+func TestNoRaceWaitGroupReuse(t *testing.T) {
+	const P = 3
+	var data [P]int
+	var wg sync.WaitGroup
+	for try := 0; try < 3; try++ {
+		wg.Add(P)
+		for p := 0; p < P; p++ {
+			go func(p int) {
+				data[p]++
+				wg.Done()
+			}(p)
+		}
+		wg.Wait()
+		for p := 0; p < P; p++ {
+			data[p]++
+		}
+	}
+}
+
+func TestNoRaceWaitGroupReuse2(t *testing.T) {
+	const P = 3
+	var data [P]int
+	var wg sync.WaitGroup
+	for try := 0; try < 3; try++ {
+		wg.Add(P)
+		for p := 0; p < P; p++ {
+			go func(p int) {
+				data[p]++
+				wg.Done()
+			}(p)
+		}
+		done := make(chan bool)
+		go func() {
+			wg.Wait()
+			for p := 0; p < P; p++ {
+				data[p]++
+			}
+			done <- true
+		}()
+		wg.Wait()
+		<-done
+		for p := 0; p < P; p++ {
+			data[p]++
+		}
+	}
+}
+
+func TestRaceWaitGroupReuse(t *testing.T) {
+	const P = 3
+	const T = 3
+	done := make(chan bool, T)
+	var wg sync.WaitGroup
+	for try := 0; try < T; try++ {
+		var data [P]int
+		wg.Add(P)
+		for p := 0; p < P; p++ {
+			go func(p int) {
+				time.Sleep(50 * time.Millisecond)
+				data[p]++
+				wg.Done()
+			}(p)
+		}
+		go func() {
+			wg.Wait()
+			for p := 0; p < P; p++ {
+				data[p]++
+			}
+			done <- true
+		}()
+		time.Sleep(100 * time.Millisecond)
+		wg.Wait()
+	}
+	for try := 0; try < T; try++ {
+		<-done
+	}
+}
+
+func TestNoRaceWaitGroupConcurrentAdd(t *testing.T) {
+	const P = 4
+	waiting := make(chan bool, P)
+	var wg sync.WaitGroup
+	for p := 0; p < P; p++ {
+		go func() {
+			wg.Add(1)
+			waiting <- true
+			wg.Done()
+		}()
+	}
+	for p := 0; p < P; p++ {
+		<-waiting
+	}
+	wg.Wait()
+}
diff --git a/src/runtime/race0.go b/src/runtime/race0.go
new file mode 100644
index 000000000..5d90cc859
--- /dev/null
+++ b/src/runtime/race0.go
@@ -0,0 +1,37 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !race
+
+// Dummy race detection API, used when not built with -race.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const raceenabled = false
+
+// Because raceenabled is false, none of these functions should be called.
+
+func raceReadObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr)  { gothrow("race") }
+func raceWriteObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) { gothrow("race") }
+func raceinit()                                                             { gothrow("race") }
+func racefini()                                                             { gothrow("race") }
+func racemapshadow(addr unsafe.Pointer, size uintptr)                       { gothrow("race") }
+func racewritepc(addr unsafe.Pointer, callerpc, pc uintptr)                 { gothrow("race") }
+func racereadpc(addr unsafe.Pointer, callerpc, pc uintptr)                  { gothrow("race") }
+func racereadrangepc(addr unsafe.Pointer, sz, callerpc, pc uintptr)         { gothrow("race") }
+func racewriterangepc(addr unsafe.Pointer, sz, callerpc, pc uintptr)        { gothrow("race") }
+func raceacquire(addr unsafe.Pointer)                                       { gothrow("race") }
+func raceacquireg(gp *g, addr unsafe.Pointer)                               { gothrow("race") }
+func racerelease(addr unsafe.Pointer)                                       { gothrow("race") }
+func racereleaseg(gp *g, addr unsafe.Pointer)                               { gothrow("race") }
+func racereleasemerge(addr unsafe.Pointer)                                  { gothrow("race") }
+func racereleasemergeg(gp *g, addr unsafe.Pointer)                          { gothrow("race") }
+func racefingo()                                                            { gothrow("race") }
+func racemalloc(p unsafe.Pointer, sz uintptr)                               { gothrow("race") }
+func racegostart(pc uintptr) uintptr                                        { gothrow("race"); return 0 }
+func racegoend()                                                            { gothrow("race") }
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
new file mode 100644
index 000000000..bdea28c7c
--- /dev/null
+++ b/src/runtime/race_amd64.s
@@ -0,0 +1,383 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build race
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// The following thunks allow calling the gcc-compiled race runtime directly
+// from Go code without going all the way through cgo.
+// First, it's much faster (up to 50% speedup for real Go programs).
+// Second, it eliminates race-related special cases from cgocall and scheduler.
+// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
+
+// A brief recap of the amd64 calling convention.
+// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
+// Callee-saved registers are: BX, BP, R12-R15.
+// SP must be 16-byte aligned.
+// On Windows:
+// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
+// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
+// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
+// http://msdn.microsoft.com/en-us/library/ms235286.aspx
+// We do not do this, because it seems to be intended for vararg/unprototyped functions.
+// Gcc-compiled race runtime does not try to use that space.
+
+#ifdef GOOS_windows
+#define RARG0 CX
+#define RARG1 DX
+#define RARG2 R8
+#define RARG3 R9
+#else
+#define RARG0 DI
+#define RARG1 SI
+#define RARG2 DX
+#define RARG3 CX
+#endif
+
+// func runtime·raceread(addr uintptr)
+// Called from instrumented code.
+TEXT	runtime·raceread(SB), NOSPLIT, $0-8
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	(SP), RARG2
+	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
+	MOVQ	$__tsan_read(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·RaceRead(addr uintptr)
+TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
+	// This needs to be a tail call, because raceread reads caller pc.
+	JMP	runtime·raceread(SB)
+
+// void runtime·racereadpc(void *addr, void *callpc, void *pc)
+TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	callpc+8(FP), RARG2
+	MOVQ	pc+16(FP), RARG3
+	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
+	MOVQ	$__tsan_read_pc(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·racewrite(addr uintptr)
+// Called from instrumented code.
+TEXT	runtime·racewrite(SB), NOSPLIT, $0-8
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	(SP), RARG2
+	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
+	MOVQ	$__tsan_write(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·RaceWrite(addr uintptr)
+TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
+	// This needs to be a tail call, because racewrite reads caller pc.
+	JMP	runtime·racewrite(SB)
+
+// void runtime·racewritepc(void *addr, void *callpc, void *pc)
+TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	callpc+8(FP), RARG2
+	MOVQ	pc+16(FP), RARG3
+	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
+	MOVQ	$__tsan_write_pc(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·racereadrange(addr, size uintptr)
+// Called from instrumented code.
+TEXT	runtime·racereadrange(SB), NOSPLIT, $0-16
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	size+8(FP), RARG2
+	MOVQ	(SP), RARG3
+	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+	MOVQ	$__tsan_read_range(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·RaceReadRange(addr, size uintptr)
+TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
+	// This needs to be a tail call, because racereadrange reads caller pc.
+	JMP	runtime·racereadrange(SB)
+
+// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
+TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	size+8(FP), RARG2
+	MOVQ	pc+16(FP), RARG3
+	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+	MOVQ	$__tsan_read_range(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·racewriterange(addr, size uintptr)
+// Called from instrumented code.
+TEXT	runtime·racewriterange(SB), NOSPLIT, $0-16
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	size+8(FP), RARG2
+	MOVQ	(SP), RARG3
+	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+	MOVQ	$__tsan_write_range(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// func runtime·RaceWriteRange(addr, size uintptr)
+TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
+	// This needs to be a tail call, because racewriterange reads caller pc.
+	JMP	runtime·racewriterange(SB)
+
+// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
+TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
+	MOVQ	addr+0(FP), RARG1
+	MOVQ	size+8(FP), RARG2
+	MOVQ	pc+16(FP), RARG3
+	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
+	MOVQ	$__tsan_write_range(SB), AX
+	JMP	racecalladdr<>(SB)
+
+// If addr (RARG1) is out of range, do nothing.
+// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
+TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
+	get_tls(R12)
+	MOVQ	g(R12), R14
+	MOVQ	g_racectx(R14), RARG0	// goroutine context
+	// Check that addr is within [arenastart, arenaend) or within [noptrdata, enoptrbss).
+	CMPQ	RARG1, runtime·racearenastart(SB)
+	JB	racecalladdr_data
+	CMPQ	RARG1, runtime·racearenaend(SB)
+	JB	racecalladdr_call
+racecalladdr_data:
+	MOVQ	$runtime·noptrdata(SB), R13
+	CMPQ	RARG1, R13
+	JB	racecalladdr_ret
+	MOVQ	$runtime·enoptrbss(SB), R13
+	CMPQ	RARG1, R13
+	JAE	racecalladdr_ret
+racecalladdr_call:
+	MOVQ	AX, AX		// w/o this 6a miscompiles this function
+	JMP	racecall<>(SB)
+racecalladdr_ret:
+	RET
+
+// func runtime·racefuncenter(pc uintptr)
+// Called from instrumented code.
+TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
+	MOVQ	DX, R15		// save function entry context (for closures)
+	get_tls(R12)
+	MOVQ	g(R12), R14
+	MOVQ	g_racectx(R14), RARG0	// goroutine context
+	MOVQ	callpc+0(FP), RARG1
+	// void __tsan_func_enter(ThreadState *thr, void *pc);
+	MOVQ	$__tsan_func_enter(SB), AX
+	CALL	racecall<>(SB)
+	MOVQ	R15, DX	// restore function entry context
+	RET
+
+// func runtime·racefuncexit()
+// Called from instrumented code.
+TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
+	get_tls(R12)
+	MOVQ	g(R12), R14
+	MOVQ	g_racectx(R14), RARG0	// goroutine context
+	// void __tsan_func_exit(ThreadState *thr);
+	MOVQ	$__tsan_func_exit(SB), AX
+	JMP	racecall<>(SB)
+
+// Atomic operations for sync/atomic package.
+
+// Load
+TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic32_load(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic64_load(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·LoadInt32(SB)
+
+TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·LoadInt64(SB)
+
+TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·LoadInt64(SB)
+
+TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·LoadInt64(SB)
+
+// Store
+TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic32_store(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic64_store(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·StoreInt32(SB)
+
+TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·StoreInt64(SB)
+
+TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·StoreInt64(SB)
+
+TEXT	sync∕atomic·StorePointer(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·StoreInt64(SB)
+
+// Swap
+TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·SwapInt32(SB)
+
+TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·SwapInt64(SB)
+
+TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·SwapInt64(SB)
+
+TEXT	sync∕atomic·SwapPointer(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·SwapInt64(SB)
+
+// Add
+TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
+	CALL	racecallatomic<>(SB)
+	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
+	ADDL	AX, ret+16(FP)
+	RET
+
+TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
+	CALL	racecallatomic<>(SB)
+	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
+	ADDQ	AX, ret+16(FP)
+	RET
+
+TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·AddInt32(SB)
+
+TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·AddInt64(SB)
+
+TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·AddInt64(SB)
+
+TEXT	sync∕atomic·AddPointer(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·AddInt64(SB)
+
+// CompareAndSwap
+TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
+	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
+	CALL	racecallatomic<>(SB)
+	RET
+
+TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·CompareAndSwapInt32(SB)
+
+TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·CompareAndSwapInt64(SB)
+
+TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·CompareAndSwapInt64(SB)
+
+TEXT	sync∕atomic·CompareAndSwapPointer(SB), NOSPLIT, $0-0
+	JMP	sync∕atomic·CompareAndSwapInt64(SB)
+
+// Generic atomic operation implementation.
+// AX already contains target function.
+TEXT	racecallatomic<>(SB), NOSPLIT, $0-0
+	// Trigger SIGSEGV early.
+	MOVQ	16(SP), R12
+	MOVL	(R12), R12
+	get_tls(R12)
+	MOVQ	g(R12), R14
+	MOVQ	g_racectx(R14), RARG0	// goroutine context
+	MOVQ	8(SP), RARG1	// caller pc
+	MOVQ	(SP), RARG2	// pc
+	LEAQ	16(SP), RARG3	// arguments
+	JMP	racecall<>(SB)
+
+// void runtime·racecall(void(*f)(...), ...)
+// Calls C function f from race runtime and passes up to 4 arguments to it.
+// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
+TEXT	runtime·racecall(SB), NOSPLIT, $0-0
+	MOVQ	fn+0(FP), AX
+	MOVQ	arg0+8(FP), RARG0
+	MOVQ	arg1+16(FP), RARG1
+	MOVQ	arg2+24(FP), RARG2
+	MOVQ	arg3+32(FP), RARG3
+	JMP	racecall<>(SB)
+
+// Switches SP to g0 stack and calls (AX). Arguments already set.
+TEXT	racecall<>(SB), NOSPLIT, $0-0
+	get_tls(R12)
+	MOVQ	g(R12), R14
+	MOVQ	g_m(R14), R13
+	// Switch to g0 stack.
+	MOVQ	SP, R12		// callee-saved, preserved across the CALL
+	MOVQ	m_g0(R13), R10
+	CMPQ	R10, R14
+	JE	racecall_cont	// already on g0
+	MOVQ	(g_sched+gobuf_sp)(R10), SP
+racecall_cont:
+	ANDQ	$~15, SP	// alignment for gcc ABI
+	CALL	AX
+	MOVQ	R12, SP
+	RET
+
+// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
+// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
+// The overall effect of Go->C->Go call chain is similar to that of mcall.
+TEXT	runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
+	// Save callee-saved registers (Go code won't respect that).
+	// This is superset of darwin/linux/windows registers.
+	PUSHQ	BX
+	PUSHQ	BP
+	PUSHQ	DI
+	PUSHQ	SI
+	PUSHQ	R12
+	PUSHQ	R13
+	PUSHQ	R14
+	PUSHQ	R15
+	// Set g = g0.
+	get_tls(R12)
+	MOVQ	g(R12), R13
+	MOVQ	g_m(R13), R13
+	MOVQ	m_g0(R13), R14
+	MOVQ	R14, g(R12)	// g = m->g0
+	MOVQ	RARG0, 0(SP)	// func arg
+	CALL	runtime·racesymbolize(SB)
+	// All registers are smashed after Go code, reload.
+	get_tls(R12)
+	MOVQ	g(R12), R13
+	MOVQ	g_m(R13), R13
+	MOVQ	m_curg(R13), R14
+	MOVQ	R14, g(R12)	// g = m->curg
+	// Restore callee-saved registers.
+	POPQ	R15
+	POPQ	R14
+	POPQ	R13
+	POPQ	R12
+	POPQ	SI
+	POPQ	DI
+	POPQ	BP
+	POPQ	BX
+	RET
diff --git a/src/runtime/rdebug.go b/src/runtime/rdebug.go
new file mode 100644
index 000000000..e5e691122
--- /dev/null
+++ b/src/runtime/rdebug.go
@@ -0,0 +1,37 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+func setMaxStack(in int) (out int) {
+	out = int(maxstacksize)
+	maxstacksize = uintptr(in)
+	return out
+}
+
+func setGCPercent(in int32) (out int32) {
+	mp := acquirem()
+	mp.scalararg[0] = uintptr(int(in))
+	onM(setgcpercent_m)
+	out = int32(int(mp.scalararg[0]))
+	releasem(mp)
+	return out
+}
+
+func setPanicOnFault(new bool) (old bool) {
+	mp := acquirem()
+	old = mp.curg.paniconfault
+	mp.curg.paniconfault = new
+	releasem(mp)
+	return old
+}
+
+func setMaxThreads(in int) (out int) {
+	mp := acquirem()
+	mp.scalararg[0] = uintptr(in)
+	onM(setmaxthreads_m)
+	out = int(mp.scalararg[0])
+	releasem(mp)
+	return out
+}
diff --git a/src/runtime/rt0_android_arm.s b/src/runtime/rt0_android_arm.s
new file mode 100644
index 000000000..6b65fb47b
--- /dev/null
+++ b/src/runtime/rt0_android_arm.s
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_arm_android(SB),NOSPLIT,$-4
+	MOVW		(R13), R0      // argc
+	MOVW		$4(R13), R1    // argv
+	MOVW		$_rt0_arm_linux1(SB), R4
+	B		(R4)
diff --git a/src/runtime/rt0_darwin_386.s b/src/runtime/rt0_darwin_386.s
new file mode 100644
index 000000000..4c8c92dda
--- /dev/null
+++ b/src/runtime/rt0_darwin_386.s
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_darwin(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_darwin_amd64.s b/src/runtime/rt0_darwin_amd64.s
new file mode 100644
index 000000000..452d85455
--- /dev/null
+++ b/src/runtime/rt0_darwin_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_darwin(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_dragonfly_386.s b/src/runtime/rt0_dragonfly_386.s
new file mode 100644
index 000000000..548ba796a
--- /dev/null
+++ b/src/runtime/rt0_dragonfly_386.s
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_dragonfly(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_dragonfly_amd64.s b/src/runtime/rt0_dragonfly_amd64.s
new file mode 100644
index 000000000..fb56618d8
--- /dev/null
+++ b/src/runtime/rt0_dragonfly_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_dragonfly(SB),NOSPLIT,$-8
+	LEAQ	8(DI), SI // argv
+	MOVQ	0(DI), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_freebsd_386.s b/src/runtime/rt0_freebsd_386.s
new file mode 100644
index 000000000..cd7a915f8
--- /dev/null
+++ b/src/runtime/rt0_freebsd_386.s
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_freebsd(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_freebsd_amd64.s b/src/runtime/rt0_freebsd_amd64.s
new file mode 100644
index 000000000..7989f7c3e
--- /dev/null
+++ b/src/runtime/rt0_freebsd_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_freebsd(SB),NOSPLIT,$-8
+	LEAQ	8(DI), SI // argv
+	MOVQ	0(DI), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_freebsd_arm.s b/src/runtime/rt0_freebsd_arm.s
new file mode 100644
index 000000000..f31252698
--- /dev/null
+++ b/src/runtime/rt0_freebsd_arm.s
@@ -0,0 +1,18 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// FreeBSD and Linux use the same linkage to main
+
+TEXT _rt0_arm_freebsd(SB),NOSPLIT,$-4
+	MOVW	(R13), R0	// argc
+	MOVW	$4(R13), R1		// argv
+	MOVM.DB.W [R0-R1], (R13)
+	B	runtime·rt0_go(SB)
+
+TEXT main(SB),NOSPLIT,$-4
+	MOVM.DB.W [R0-R1], (R13)
+	MOVW	$runtime·rt0_go(SB), R4
+	B		(R4)
diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s
new file mode 100644
index 000000000..74ddc94da
--- /dev/null
+++ b/src/runtime/rt0_linux_386.s
@@ -0,0 +1,25 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_linux(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	runtime·linux_setup_vdso(SB)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
+
+TEXT _fallback_vdso(SB),NOSPLIT,$0
+	INT	$0x80
+	RET
+
+DATA	runtime·_vdso(SB)/4, $_fallback_vdso(SB)
+GLOBL	runtime·_vdso(SB), $4
+
diff --git a/src/runtime/rt0_linux_amd64.s b/src/runtime/rt0_linux_amd64.s
new file mode 100644
index 000000000..985426acc
--- /dev/null
+++ b/src/runtime/rt0_linux_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_linux_arm.s b/src/runtime/rt0_linux_arm.s
new file mode 100644
index 000000000..8af3d3505
--- /dev/null
+++ b/src/runtime/rt0_linux_arm.s
@@ -0,0 +1,91 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_arm_linux(SB),NOSPLIT,$-4
+	MOVW	(R13), R0	// argc
+	MOVW	$4(R13), R1		// argv
+	MOVW	$_rt0_arm_linux1(SB), R4
+	B		(R4)
+
+TEXT _rt0_arm_linux1(SB),NOSPLIT,$-4
+	// We first need to detect the kernel ABI, and warn the user
+	// if the system only supports OABI
+	// The strategy here is to call some EABI syscall to see if
+	// SIGILL is received.
+	// To catch SIGILL, we have to first setup sigaction, this is
+	// a chicken-and-egg problem, because we can't do syscall if
+	// we don't know the kernel ABI... Oh, not really, we can do
+	// syscall in Thumb mode.
+
+	// Save argc and argv
+	MOVM.DB.W [R0-R1], (R13)
+
+	// Thumb mode OABI check disabled because there are some
+	// EABI systems that do not support Thumb execution.
+	// We can run on them except for this check!
+
+	// // set up sa_handler
+	// MOVW	$bad_abi<>(SB), R0 // sa_handler
+	// MOVW	$0, R1 // sa_flags
+	// MOVW	$0, R2 // sa_restorer
+	// MOVW	$0, R3 // sa_mask
+	// MOVM.DB.W [R0-R3], (R13)
+	// MOVW	$4, R0 // SIGILL
+	// MOVW	R13, R1 // sa
+	// SUB	$16, R13
+	// MOVW	R13, R2 // old_sa
+	// MOVW	$8, R3 // c
+	// MOVW	$174, R7 // sys_sigaction
+	// BL	oabi_syscall<>(SB)
+
+	// do an EABI syscall
+	MOVW	$20, R7 // sys_getpid
+	SWI	$0 // this will trigger SIGILL on OABI systems
+	
+	// MOVW	$4, R0  // SIGILL
+	// MOVW	R13, R1 // sa
+	// MOVW	$0, R2 // old_sa
+	// MOVW	$8, R3 // c
+	// MOVW	$174, R7 // sys_sigaction
+	// SWI	$0 // restore signal handler
+	// ADD	$32, R13
+
+	SUB	$4, R13 // fake a stack frame for runtime·setup_auxv
+	BL	runtime·setup_auxv(SB)
+	ADD	$4, R13
+	B	runtime·rt0_go(SB)
+
+TEXT bad_abi<>(SB),NOSPLIT,$-4
+	// give diagnosis and exit
+	MOVW	$2, R0 // stderr
+	MOVW	$bad_abi_msg(SB), R1 // data
+	MOVW	$45, R2 // len
+	MOVW	$4, R7 // sys_write
+	BL	oabi_syscall<>(SB)
+	MOVW	$1, R0
+	MOVW	$1, R7 // sys_exit
+	BL	oabi_syscall<>(SB)
+	B  	0(PC)
+
+DATA bad_abi_msg+0x00(SB)/8, $"This pro"
+DATA bad_abi_msg+0x08(SB)/8, $"gram can"
+DATA bad_abi_msg+0x10(SB)/8, $" only be"
+DATA bad_abi_msg+0x18(SB)/8, $" run on "
+DATA bad_abi_msg+0x20(SB)/8, $"EABI ker"
+DATA bad_abi_msg+0x28(SB)/4, $"nels"
+DATA bad_abi_msg+0x2c(SB)/1, $0xa
+GLOBL bad_abi_msg(SB), $45
+
+TEXT oabi_syscall<>(SB),NOSPLIT,$-4
+	ADD $1, PC, R4
+	WORD $0xe12fff14 //BX	(R4) // enter thumb mode
+	// TODO(minux): only supports little-endian CPUs
+	WORD $0x4770df01 // swi $1; bx lr
+
+TEXT main(SB),NOSPLIT,$-4
+	MOVW	$_rt0_arm_linux1(SB), R4
+	B		(R4)
+
diff --git a/src/runtime/rt0_nacl_386.s b/src/runtime/rt0_nacl_386.s
new file mode 100644
index 000000000..d4ba06306
--- /dev/null
+++ b/src/runtime/rt0_nacl_386.s
@@ -0,0 +1,22 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// NaCl entry has:
+//	0(FP) - arg block == SP+8
+//	4(FP) - cleanup function pointer, always 0
+//	8(FP) - envc
+//	12(FP) - argc
+//	16(FP) - argv, then 0, then envv, then 0, then auxv
+TEXT _rt0_386_nacl(SB),NOSPLIT,$8
+	MOVL	argc+12(FP), AX
+	LEAL	argv+16(FP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_nacl_amd64p32.s b/src/runtime/rt0_nacl_amd64p32.s
new file mode 100644
index 000000000..d8703dc0f
--- /dev/null
+++ b/src/runtime/rt0_nacl_amd64p32.s
@@ -0,0 +1,30 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// NaCl entry on 32-bit x86 has DI pointing at the arg block, which contains:
+//
+//	0(DI) - cleanup function pointer, always 0
+//	4(DI) - envc
+//	8(DI) - argc
+//	12(DI) - argv, then 0, then envv, then 0, then auxv
+// NaCl entry here is almost the same, except that there
+// is no saved caller PC, so 0(FP) is -8(FP) and so on. 
+TEXT _rt0_amd64p32_nacl(SB),NOSPLIT,$16
+	MOVL	DI, 0(SP)
+	CALL	runtime·nacl_sysinfo(SB)
+	MOVL	0(SP), DI
+	MOVL	8(DI), AX
+	LEAL	12(DI), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	// Uncomment for fake time like on Go Playground.
+	//MOVQ	$1257894000000000000, AX
+	//MOVQ	AX, runtime·timens(SB)
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_nacl_arm.s b/src/runtime/rt0_nacl_arm.s
new file mode 100644
index 000000000..eadb4782d
--- /dev/null
+++ b/src/runtime/rt0_nacl_arm.s
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// NaCl entry has:
+//	0(FP) - 0
+//	4(FP) - cleanup function pointer, always 0
+//	8(FP) - envc
+//	12(FP) - argc
+//	16(FP) - argv, then 0, then envv, then 0, then auxv
+TEXT _rt0_arm_nacl(SB),NOSPLIT,$-4
+	MOVW	8(R13), R0
+	MOVW	$12(R13), R1
+	MOVM.DB.W [R0-R1], (R13)
+	B	main(SB)
+
+TEXT main(SB),NOSPLIT,$0
+	B	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_netbsd_386.s b/src/runtime/rt0_netbsd_386.s
new file mode 100644
index 000000000..70b853253
--- /dev/null
+++ b/src/runtime/rt0_netbsd_386.s
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_netbsd(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_netbsd_amd64.s b/src/runtime/rt0_netbsd_amd64.s
new file mode 100644
index 000000000..fad56614e
--- /dev/null
+++ b/src/runtime/rt0_netbsd_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_netbsd(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_netbsd_arm.s b/src/runtime/rt0_netbsd_arm.s
new file mode 100644
index 000000000..bad66e06c
--- /dev/null
+++ b/src/runtime/rt0_netbsd_arm.s
@@ -0,0 +1,13 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// FreeBSD/NetBSD and Linux use the same linkage to main
+
+TEXT _rt0_arm_netbsd(SB),NOSPLIT,$-4
+	MOVW	(R13), R0	// argc
+	MOVW	$4(R13), R1		// argv
+	MOVM.DB.W [R0-R1], (R13)
+	B runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_openbsd_386.s b/src/runtime/rt0_openbsd_386.s
new file mode 100644
index 000000000..f25d2e1cf
--- /dev/null
+++ b/src/runtime/rt0_openbsd_386.s
@@ -0,0 +1,16 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_openbsd(SB),NOSPLIT,$8
+	MOVL	8(SP), AX
+	LEAL	12(SP), BX
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	CALL	main(SB)
+	INT	$3
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
diff --git a/src/runtime/rt0_openbsd_amd64.s b/src/runtime/rt0_openbsd_amd64.s
new file mode 100644
index 000000000..58fe66639
--- /dev/null
+++ b/src/runtime/rt0_openbsd_amd64.s
@@ -0,0 +1,15 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_openbsd(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
diff --git a/src/runtime/rt0_plan9_386.s b/src/runtime/rt0_plan9_386.s
new file mode 100644
index 000000000..7e2887b85
--- /dev/null
+++ b/src/runtime/rt0_plan9_386.s
@@ -0,0 +1,23 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_plan9(SB),NOSPLIT,$12
+	MOVL	AX, _tos(SB)
+	LEAL	8(SP), AX
+	MOVL	AX, _privates(SB)
+	MOVL	$1, _nprivates(SB)
+	CALL	runtime·asminit(SB)
+	MOVL	inargc-4(FP), AX
+	MOVL	AX, 0(SP)
+	LEAL	inargv+0(FP), AX
+	MOVL	AX, 4(SP)
+	CALL	runtime·rt0_go(SB)
+
+DATA  runtime·isplan9(SB)/4, $1
+GLOBL runtime·isplan9(SB), $4
+GLOBL _tos(SB), $4
+GLOBL _privates(SB), $4
+GLOBL _nprivates(SB), $4
diff --git a/src/runtime/rt0_plan9_amd64.s b/src/runtime/rt0_plan9_amd64.s
new file mode 100644
index 000000000..a372a0ba8
--- /dev/null
+++ b/src/runtime/rt0_plan9_amd64.s
@@ -0,0 +1,21 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_plan9(SB),NOSPLIT,$24
+	MOVQ	AX, _tos(SB)
+	LEAQ	16(SP), AX
+	MOVQ	AX, _privates(SB)
+	MOVL	$1, _nprivates(SB)
+	MOVL	inargc-8(FP), DI
+	LEAQ	inargv+0(FP), SI
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
+
+DATA runtime·isplan9(SB)/4, $1
+GLOBL runtime·isplan9(SB), $4
+GLOBL _tos(SB), $8
+GLOBL _privates(SB), $8
+GLOBL _nprivates(SB), $4
diff --git a/src/runtime/rt0_solaris_amd64.s b/src/runtime/rt0_solaris_amd64.s
new file mode 100644
index 000000000..92a9fc295
--- /dev/null
+++ b/src/runtime/rt0_solaris_amd64.s
@@ -0,0 +1,18 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_amd64_solaris(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
+
+DATA runtime·issolaris(SB)/4, $1
+GLOBL runtime·issolaris(SB), $4
diff --git a/src/runtime/rt0_windows_386.s b/src/runtime/rt0_windows_386.s
new file mode 100644
index 000000000..00604372f
--- /dev/null
+++ b/src/runtime/rt0_windows_386.s
@@ -0,0 +1,20 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT _rt0_386_windows(SB),NOSPLIT,$12
+	MOVL	12(SP), AX
+	LEAL	16(SP), BX
+	MOVL	AX, 4(SP)
+	MOVL	BX, 8(SP)
+	MOVL	$-1, 0(SP) // return PC for main
+	JMP	main(SB)
+
+TEXT main(SB),NOSPLIT,$0
+	JMP	runtime·rt0_go(SB)
+
+
+DATA  runtime·iswindows(SB)/4, $1
+GLOBL runtime·iswindows(SB), $4
diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s
new file mode 100644
index 000000000..890a570d1
--- /dev/null
+++ b/src/runtime/rt0_windows_amd64.s
@@ -0,0 +1,19 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8
+	LEAQ	8(SP), SI // argv
+	MOVQ	0(SP), DI // argc
+	MOVQ	$main(SB), AX
+	JMP	AX
+
+TEXT main(SB),NOSPLIT,$-8
+	MOVQ	$runtime·rt0_go(SB), AX
+	JMP	AX
+
+DATA  runtime·iswindows(SB)/4, $1
+GLOBL runtime·iswindows(SB), $4
diff --git a/src/runtime/rune.go b/src/runtime/rune.go
new file mode 100644
index 000000000..a9f683581
--- /dev/null
+++ b/src/runtime/rune.go
@@ -0,0 +1,219 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ *              Portions Copyright 2009 The Go Authors. All rights reserved.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+/*
+ * This code is copied, with slight editing due to type differences,
+ * from a subset of ../lib9/utf/rune.c
+ */
+
+package runtime
+
+const (
+	bit1 = 7
+	bitx = 6
+	bit2 = 5
+	bit3 = 4
+	bit4 = 3
+	bit5 = 2
+
+	t1 = ((1 << (bit1 + 1)) - 1) ^ 0xFF /* 0000 0000 */
+	tx = ((1 << (bitx + 1)) - 1) ^ 0xFF /* 1000 0000 */
+	t2 = ((1 << (bit2 + 1)) - 1) ^ 0xFF /* 1100 0000 */
+	t3 = ((1 << (bit3 + 1)) - 1) ^ 0xFF /* 1110 0000 */
+	t4 = ((1 << (bit4 + 1)) - 1) ^ 0xFF /* 1111 0000 */
+	t5 = ((1 << (bit5 + 1)) - 1) ^ 0xFF /* 1111 1000 */
+
+	rune1 = (1 << (bit1 + 0*bitx)) - 1 /* 0000 0000 0111 1111 */
+	rune2 = (1 << (bit2 + 1*bitx)) - 1 /* 0000 0111 1111 1111 */
+	rune3 = (1 << (bit3 + 2*bitx)) - 1 /* 1111 1111 1111 1111 */
+	rune4 = (1 << (bit4 + 3*bitx)) - 1 /* 0001 1111 1111 1111 1111 1111 */
+
+	maskx = (1 << bitx) - 1 /* 0011 1111 */
+	testx = maskx ^ 0xFF    /* 1100 0000 */
+
+	runeerror = 0xFFFD
+	runeself  = 0x80
+
+	surrogateMin = 0xD800
+	surrogateMax = 0xDFFF
+
+	bad = runeerror
+
+	runemax = 0x10FFFF /* maximum rune value */
+)
+
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune
+ * that works on strings that are not necessarily null-terminated.
+ *
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer.  This is to avoid
+ * possible access violations.  If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
+func charntorune(s string) (rune, int) {
+	/* When we're not allowed to read anything */
+	if len(s) <= 0 {
+		return bad, 1
+	}
+
+	/*
+	 * one character sequence (7-bit value)
+	 *	00000-0007F => T1
+	 */
+	c := s[0]
+	if c < tx {
+		return rune(c), 1
+	}
+
+	// If we can't read more than one character we must stop
+	if len(s) <= 1 {
+		return bad, 1
+	}
+
+	/*
+	 * two character sequence (11-bit value)
+	 *	0080-07FF => t2 tx
+	 */
+	c1 := s[1] ^ tx
+	if (c1 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t3 {
+		if c < t2 {
+			return bad, 1
+		}
+		l := ((rune(c) << bitx) | rune(c1)) & rune2
+		if l <= rune1 {
+			return bad, 1
+		}
+		return l, 2
+	}
+
+	// If we can't read more than two characters we must stop
+	if len(s) <= 2 {
+		return bad, 1
+	}
+
+	/*
+	 * three character sequence (16-bit value)
+	 *	0800-FFFF => t3 tx tx
+	 */
+	c2 := s[2] ^ tx
+	if (c2 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t4 {
+		l := ((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) & rune3
+		if l <= rune2 {
+			return bad, 1
+		}
+		if surrogateMin <= l && l <= surrogateMax {
+			return bad, 1
+		}
+		return l, 3
+	}
+
+	if len(s) <= 3 {
+		return bad, 1
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => t4 tx tx tx
+	 */
+	c3 := s[3] ^ tx
+	if (c3 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t5 {
+		l := ((((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) << bitx) | rune(c3)) & rune4
+		if l <= rune3 || l > runemax {
+			return bad, 1
+		}
+		return l, 4
+	}
+
+	// Support for 5-byte or longer UTF-8 would go here, but
+	// since we don't have that, we'll just return bad.
+	return bad, 1
+}
+
+// runetochar converts r to bytes and writes the result to str.
+// returns the number of bytes generated.
+func runetochar(str []byte, r rune) int {
+	/* runes are signed, so convert to unsigned for range check. */
+	c := uint32(r)
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	if c <= rune1 {
+		str[0] = byte(c)
+		return 1
+	}
+	/*
+	 * two character sequence
+	 *	0080-07FF => t2 tx
+	 */
+	if c <= rune2 {
+		str[0] = byte(t2 | (c >> (1 * bitx)))
+		str[1] = byte(tx | (c & maskx))
+		return 2
+	}
+
+	/*
+	 * If the rune is out of range or a surrogate half, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * rune wouldn't have fit in one or two bytes.
+	 */
+	if c > runemax {
+		c = runeerror
+	}
+	if surrogateMin <= c && c <= surrogateMax {
+		c = runeerror
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => t3 tx tx
+	 */
+	if c <= rune3 {
+		str[0] = byte(t3 | (c >> (2 * bitx)))
+		str[1] = byte(tx | ((c >> (1 * bitx)) & maskx))
+		str[2] = byte(tx | (c & maskx))
+		return 3
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => t4 tx tx tx
+	 */
+	str[0] = byte(t4 | (c >> (3 * bitx)))
+	str[1] = byte(tx | ((c >> (2 * bitx)) & maskx))
+	str[2] = byte(tx | ((c >> (1 * bitx)) & maskx))
+	str[3] = byte(tx | (c & maskx))
+	return 4
+}
diff --git a/src/runtime/runtime-gdb.py b/src/runtime/runtime-gdb.py
new file mode 100644
index 000000000..eedac7cf4
--- /dev/null
+++ b/src/runtime/runtime-gdb.py
@@ -0,0 +1,478 @@
+# Copyright 2010 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+"""GDB Pretty printers and convenience functions for Go's runtime structures.
+
+This script is loaded by GDB when it finds a .debug_gdb_scripts
+section in the compiled binary. The [68]l linkers emit this with a
+path to this file based on the path to the runtime package.
+"""
+
+# Known issues:
+#    - pretty printing only works for the 'native' strings. E.g. 'type
+#      foo string' will make foo a plain struct in the eyes of gdb,
+#      circumventing the pretty print triggering.
+
+
+from __future__ import print_function
+import re
+import sys
+
+print("Loading Go Runtime support.", file=sys.stderr)
+#http://python3porting.com/differences.html
+if sys.version > '3':
+	xrange = range
+# allow to manually reload while developing
+goobjfile = gdb.current_objfile() or gdb.objfiles()[0]
+goobjfile.pretty_printers = []
+
+#
+#  Pretty Printers
+#
+
+
+class StringTypePrinter:
+	"Pretty print Go strings."
+
+	pattern = re.compile(r'^struct string$')
+
+	def __init__(self, val):
+		self.val = val
+
+	def display_hint(self):
+		return 'string'
+
+	def to_string(self):
+		l = int(self.val['len'])
+		return self.val['str'].string("utf-8", "ignore", l)
+
+
+class SliceTypePrinter:
+	"Pretty print slices."
+
+	pattern = re.compile(r'^struct \[\]')
+
+	def __init__(self, val):
+		self.val = val
+
+	def display_hint(self):
+		return 'array'
+
+	def to_string(self):
+		return str(self.val.type)[6:]  # skip 'struct '
+
+	def children(self):
+		if self.val["len"] > self.val["cap"]:
+			return
+		ptr = self.val["array"]
+		for idx in range(int(self.val["len"])):
+			yield ('[{0}]'.format(idx), (ptr + idx).dereference())
+
+
+class MapTypePrinter:
+	"""Pretty print map[K]V types.
+
+	Map-typed go variables are really pointers. dereference them in gdb
+	to inspect their contents with this pretty printer.
+	"""
+
+	pattern = re.compile(r'^map\[.*\].*$')
+
+	def __init__(self, val):
+		self.val = val
+
+	def display_hint(self):
+		return 'map'
+
+	def to_string(self):
+		return str(self.val.type)
+
+	def children(self):
+		B = self.val['b']
+		buckets = self.val['buckets']
+		oldbuckets = self.val['oldbuckets']
+		flags = self.val['flags']
+		inttype = self.val['hash0'].type
+		cnt = 0
+		for bucket in xrange(2 ** int(B)):
+			bp = buckets + bucket
+			if oldbuckets:
+				oldbucket = bucket & (2 ** (B - 1) - 1)
+				oldbp = oldbuckets + oldbucket
+				oldb = oldbp.dereference()
+				if (oldb['overflow'].cast(inttype) & 1) == 0:  # old bucket not evacuated yet
+					if bucket >= 2 ** (B - 1):
+						continue    # already did old bucket
+					bp = oldbp
+			while bp:
+				b = bp.dereference()
+				for i in xrange(8):
+					if b['tophash'][i] != 0:
+						k = b['keys'][i]
+						v = b['values'][i]
+						if flags & 1:
+							k = k.dereference()
+						if flags & 2:
+							v = v.dereference()
+						yield str(cnt), k
+						yield str(cnt + 1), v
+						cnt += 2
+				bp = b['overflow']
+
+
+class ChanTypePrinter:
+	"""Pretty print chan[T] types.
+
+	Chan-typed go variables are really pointers. dereference them in gdb
+	to inspect their contents with this pretty printer.
+	"""
+
+	pattern = re.compile(r'^struct hchan<.*>$')
+
+	def __init__(self, val):
+		self.val = val
+
+	def display_hint(self):
+		return 'array'
+
+	def to_string(self):
+		return str(self.val.type)
+
+	def children(self):
+		# see chan.c chanbuf(). et is the type stolen from hchan<T>::recvq->first->elem
+		et = [x.type for x in self.val['recvq']['first'].type.target().fields() if x.name == 'elem'][0]
+		ptr = (self.val.address + 1).cast(et.pointer())
+		for i in range(self.val["qcount"]):
+			j = (self.val["recvx"] + i) % self.val["dataqsiz"]
+			yield ('[{0}]'.format(i), (ptr + j).dereference())
+
+
+#
+#  Register all the *Printer classes above.
+#
+
+def makematcher(klass):
+	def matcher(val):
+		try:
+			if klass.pattern.match(str(val.type)):
+				return klass(val)
+		except Exception:
+			pass
+	return matcher
+
+goobjfile.pretty_printers.extend([makematcher(var) for var in vars().values() if hasattr(var, 'pattern')])
+
+#
+#  For reference, this is what we're trying to do:
+#  eface: p *(*(struct 'runtime.rtype'*)'main.e'->type_->data)->string
+#  iface: p *(*(struct 'runtime.rtype'*)'main.s'->tab->Type->data)->string
+#
+# interface types can't be recognized by their name, instead we check
+# if they have the expected fields.  Unfortunately the mapping of
+# fields to python attributes in gdb.py isn't complete: you can't test
+# for presence other than by trapping.
+
+
+def is_iface(val):
+	try:
+		return str(val['tab'].type) == "struct runtime.itab *" and str(val['data'].type) == "void *"
+	except gdb.error:
+		pass
+
+
+def is_eface(val):
+	try:
+		return str(val['_type'].type) == "struct runtime._type *" and str(val['data'].type) == "void *"
+	except gdb.error:
+		pass
+
+
+def lookup_type(name):
+	try:
+		return gdb.lookup_type(name)
+	except gdb.error:
+		pass
+	try:
+		return gdb.lookup_type('struct ' + name)
+	except gdb.error:
+		pass
+	try:
+		return gdb.lookup_type('struct ' + name[1:]).pointer()
+	except gdb.error:
+		pass
+
+_rctp_type = gdb.lookup_type("struct runtime.rtype").pointer()
+
+
+def iface_commontype(obj):
+	if is_iface(obj):
+		go_type_ptr = obj['tab']['_type']
+	elif is_eface(obj):
+		go_type_ptr = obj['_type']
+	else:
+		return
+
+	return go_type_ptr.cast(_rctp_type).dereference()
+
+
+def iface_dtype(obj):
+	"Decode type of the data field of an eface or iface struct."
+	# known issue: dtype_name decoded from runtime.rtype is "nested.Foo"
+	# but the dwarf table lists it as "full/path/to/nested.Foo"
+
+	dynamic_go_type = iface_commontype(obj)
+	if dynamic_go_type is None:
+		return
+	dtype_name = dynamic_go_type['string'].dereference()['str'].string()
+
+	dynamic_gdb_type = lookup_type(dtype_name)
+	if dynamic_gdb_type is None:
+		return
+
+	type_size = int(dynamic_go_type['size'])
+	uintptr_size = int(dynamic_go_type['size'].type.sizeof)	 # size is itself an uintptr
+	if type_size > uintptr_size:
+			dynamic_gdb_type = dynamic_gdb_type.pointer()
+
+	return dynamic_gdb_type
+
+
+def iface_dtype_name(obj):
+	"Decode type name of the data field of an eface or iface struct."
+
+	dynamic_go_type = iface_commontype(obj)
+	if dynamic_go_type is None:
+		return
+	return dynamic_go_type['string'].dereference()['str'].string()
+
+
+class IfacePrinter:
+	"""Pretty print interface values
+
+	Casts the data field to the appropriate dynamic type."""
+
+	def __init__(self, val):
+		self.val = val
+
+	def display_hint(self):
+		return 'string'
+
+	def to_string(self):
+		if self.val['data'] == 0:
+			return 0x0
+		try:
+			dtype = iface_dtype(self.val)
+		except Exception:
+			return "<bad dynamic type>"
+
+		if dtype is None:  # trouble looking up, print something reasonable
+			return "({0}){0}".format(iface_dtype_name(self.val), self.val['data'])
+
+		try:
+			return self.val['data'].cast(dtype).dereference()
+		except Exception:
+			pass
+		return self.val['data'].cast(dtype)
+
+
+def ifacematcher(val):
+	if is_iface(val) or is_eface(val):
+		return IfacePrinter(val)
+
+goobjfile.pretty_printers.append(ifacematcher)
+
+#
+#  Convenience Functions
+#
+
+
+class GoLenFunc(gdb.Function):
+	"Length of strings, slices, maps or channels"
+
+	how = ((StringTypePrinter, 'len'), (SliceTypePrinter, 'len'), (MapTypePrinter, 'count'), (ChanTypePrinter, 'qcount'))
+
+	def __init__(self):
+		gdb.Function.__init__(self, "len")
+
+	def invoke(self, obj):
+		typename = str(obj.type)
+		for klass, fld in self.how:
+			if klass.pattern.match(typename):
+				return obj[fld]
+
+
+class GoCapFunc(gdb.Function):
+	"Capacity of slices or channels"
+
+	how = ((SliceTypePrinter, 'cap'), (ChanTypePrinter, 'dataqsiz'))
+
+	def __init__(self):
+		gdb.Function.__init__(self, "cap")
+
+	def invoke(self, obj):
+		typename = str(obj.type)
+		for klass, fld in self.how:
+			if klass.pattern.match(typename):
+				return obj[fld]
+
+
+class DTypeFunc(gdb.Function):
+	"""Cast Interface values to their dynamic type.
+
+	For non-interface types this behaves as the identity operation.
+	"""
+
+	def __init__(self):
+		gdb.Function.__init__(self, "dtype")
+
+	def invoke(self, obj):
+		try:
+			return obj['data'].cast(iface_dtype(obj))
+		except gdb.error:
+			pass
+		return obj
+
+#
+#  Commands
+#
+
+sts = ('idle', 'runnable', 'running', 'syscall', 'waiting', 'moribund', 'dead', 'recovery')
+
+
+def linked_list(ptr, linkfield):
+	while ptr:
+		yield ptr
+		ptr = ptr[linkfield]
+
+
+class GoroutinesCmd(gdb.Command):
+	"List all goroutines."
+
+	def __init__(self):
+		gdb.Command.__init__(self, "info goroutines", gdb.COMMAND_STACK, gdb.COMPLETE_NONE)
+
+	def invoke(self, _arg, _from_tty):
+		# args = gdb.string_to_argv(arg)
+		vp = gdb.lookup_type('void').pointer()
+		for ptr in linked_list(gdb.parse_and_eval("'runtime.allg'"), 'alllink'):
+			if ptr['status'] == 6:  # 'gdead'
+				continue
+			s = ' '
+			if ptr['m']:
+				s = '*'
+			pc = ptr['sched']['pc'].cast(vp)
+			# python2 will not cast pc (type void*) to an int cleanly
+			# instead python2 and python3 work with the hex string representation
+			# of the void pointer which we can parse back into an int.
+			# int(pc) will not work.
+			try:
+				#python3 / newer versions of gdb
+				pc = int(pc)
+			except gdb.error:
+				pc = int(str(pc), 16)
+			blk = gdb.block_for_pc(pc)
+			print(s, ptr['goid'], "{0:8s}".format(sts[int(ptr['status'])]), blk.function)
+
+
+def find_goroutine(goid):
+	"""
+	find_goroutine attempts to find the goroutine identified by goid.
+	It returns a touple of gdv.Value's representing the stack pointer
+	and program counter pointer for the goroutine.
+
+	@param int goid
+
+	@return tuple (gdb.Value, gdb.Value)
+	"""
+	vp = gdb.lookup_type('void').pointer()
+	for ptr in linked_list(gdb.parse_and_eval("'runtime.allg'"), 'alllink'):
+		if ptr['status'] == 6:  # 'gdead'
+			continue
+		if ptr['goid'] == goid:
+			return (ptr['sched'][x].cast(vp) for x in ('pc', 'sp'))
+	return None, None
+
+
+class GoroutineCmd(gdb.Command):
+	"""Execute gdb command in the context of goroutine <goid>.
+
+	Switch PC and SP to the ones in the goroutine's G structure,
+	execute an arbitrary gdb command, and restore PC and SP.
+
+	Usage: (gdb) goroutine <goid> <gdbcmd>
+
+	Note that it is ill-defined to modify state in the context of a goroutine.
+	Restrict yourself to inspecting values.
+	"""
+
+	def __init__(self):
+		gdb.Command.__init__(self, "goroutine", gdb.COMMAND_STACK, gdb.COMPLETE_NONE)
+
+	def invoke(self, arg, _from_tty):
+		goid, cmd = arg.split(None, 1)
+		goid = gdb.parse_and_eval(goid)
+		pc, sp = find_goroutine(int(goid))
+		if not pc:
+			print("No such goroutine: ", goid)
+			return
+		try:
+			#python3 / newer versions of gdb
+			pc = int(pc)
+		except gdb.error:
+			pc = int(str(pc), 16)
+		save_frame = gdb.selected_frame()
+		gdb.parse_and_eval('$save_pc = $pc')
+		gdb.parse_and_eval('$save_sp = $sp')
+		gdb.parse_and_eval('$pc = {0}'.format(str(pc)))
+		gdb.parse_and_eval('$sp = {0}'.format(str(sp)))
+		try:
+			gdb.execute(cmd)
+		finally:
+			gdb.parse_and_eval('$pc = $save_pc')
+			gdb.parse_and_eval('$sp = $save_sp')
+			save_frame.select()
+
+
+class GoIfaceCmd(gdb.Command):
+	"Print Static and dynamic interface types"
+
+	def __init__(self):
+		gdb.Command.__init__(self, "iface", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL)
+
+	def invoke(self, arg, _from_tty):
+		for obj in gdb.string_to_argv(arg):
+			try:
+				#TODO fix quoting for qualified variable names
+				obj = gdb.parse_and_eval(str(obj))
+			except Exception as e:
+				print("Can't parse ", obj, ": ", e)
+				continue
+
+			if obj['data'] == 0:
+				dtype = "nil"
+			else:
+				dtype = iface_dtype(obj)
+
+			if dtype is None:
+				print("Not an interface: ", obj.type)
+				continue
+
+			print("{0}: {1}".format(obj.type, dtype))
+
+# TODO: print interface's methods and dynamic type's func pointers thereof.
+#rsc: "to find the number of entries in the itab's Fn field look at
+# itab.inter->numMethods
+# i am sure i have the names wrong but look at the interface type
+# and its method count"
+# so Itype will start with a commontype which has kind = interface
+
+#
+# Register all convenience functions and CLI commands
+#
+GoLenFunc()
+GoCapFunc()
+DTypeFunc()
+GoroutinesCmd()
+GoroutineCmd()
+GoIfaceCmd()
diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c
new file mode 100644
index 000000000..42ce1dadf
--- /dev/null
+++ b/src/runtime/runtime.c
@@ -0,0 +1,391 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "stack.h"
+#include "arch_GOARCH.h"
+#include "textflag.h"
+
+// Keep a cached value to make gotraceback fast,
+// since we call it on every call to gentraceback.
+// The cached value is a uint32 in which the low bit
+// is the "crash" setting and the top 31 bits are the
+// gotraceback value.
+static uint32 traceback_cache = 2<<1;
+
+// The GOTRACEBACK environment variable controls the
+// behavior of a Go program that is crashing and exiting.
+//	GOTRACEBACK=0   suppress all tracebacks
+//	GOTRACEBACK=1   default behavior - show tracebacks but exclude runtime frames
+//	GOTRACEBACK=2   show tracebacks including runtime frames
+//	GOTRACEBACK=crash   show tracebacks including runtime frames, then crash (core dump etc)
+#pragma textflag NOSPLIT
+int32
+runtime·gotraceback(bool *crash)
+{
+	if(crash != nil)
+		*crash = false;
+	if(g->m->traceback != 0)
+		return g->m->traceback;
+	if(crash != nil)
+		*crash = traceback_cache&1;
+	return traceback_cache>>1;
+}
+
+int32
+runtime·mcmp(byte *s1, byte *s2, uintptr n)
+{
+	uintptr i;
+	byte c1, c2;
+
+	for(i=0; i<n; i++) {
+		c1 = s1[i];
+		c2 = s2[i];
+		if(c1 < c2)
+			return -1;
+		if(c1 > c2)
+			return +1;
+	}
+	return 0;
+}
+
+
+byte*
+runtime·mchr(byte *p, byte c, byte *ep)
+{
+	for(; p < ep; p++)
+		if(*p == c)
+			return p;
+	return nil;
+}
+
+static int32	argc;
+static uint8**	argv;
+
+Slice os·Args;
+Slice syscall·envs;
+
+void (*runtime·sysargs)(int32, uint8**);
+
+void
+runtime·args(int32 c, uint8 **v)
+{
+	argc = c;
+	argv = v;
+	if(runtime·sysargs != nil)
+		runtime·sysargs(c, v);
+}
+
+int32 runtime·isplan9;
+int32 runtime·issolaris;
+int32 runtime·iswindows;
+
+// Information about what cpu features are available.
+// Set on startup in asm_{x86/amd64}.s.
+uint32 runtime·cpuid_ecx;
+uint32 runtime·cpuid_edx;
+
+void
+runtime·goargs(void)
+{
+	String *s;
+	int32 i;
+
+	// for windows implementation see "os" package
+	if(Windows)
+		return;
+
+	s = runtime·mallocgc(argc*sizeof s[0], nil, 0);
+	for(i=0; i<argc; i++)
+		s[i] = runtime·gostringnocopy(argv[i]);
+	os·Args.array = (byte*)s;
+	os·Args.len = argc;
+	os·Args.cap = argc;
+}
+
+void
+runtime·goenvs_unix(void)
+{
+	String *s;
+	int32 i, n;
+
+	for(n=0; argv[argc+1+n] != 0; n++)
+		;
+
+	s = runtime·mallocgc(n*sizeof s[0], nil, 0);
+	for(i=0; i<n; i++)
+		s[i] = runtime·gostringnocopy(argv[argc+1+i]);
+	syscall·envs.array = (byte*)s;
+	syscall·envs.len = n;
+	syscall·envs.cap = n;
+}
+
+#pragma textflag NOSPLIT
+Slice
+runtime·environ()
+{
+	return syscall·envs;
+}
+
+int32
+runtime·atoi(byte *p)
+{
+	int32 n;
+
+	n = 0;
+	while('0' <= *p && *p <= '9')
+		n = n*10 + *p++ - '0';
+	return n;
+}
+
+static void
+TestAtomic64(void)
+{
+	uint64 z64, x64;
+
+	z64 = 42;
+	x64 = 0;
+	PREFETCH(&z64);
+	if(runtime·cas64(&z64, x64, 1))
+		runtime·throw("cas64 failed");
+	if(x64 != 0)
+		runtime·throw("cas64 failed");
+	x64 = 42;
+	if(!runtime·cas64(&z64, x64, 1))
+		runtime·throw("cas64 failed");
+	if(x64 != 42 || z64 != 1)
+		runtime·throw("cas64 failed");
+	if(runtime·atomicload64(&z64) != 1)
+		runtime·throw("load64 failed");
+	runtime·atomicstore64(&z64, (1ull<<40)+1);
+	if(runtime·atomicload64(&z64) != (1ull<<40)+1)
+		runtime·throw("store64 failed");
+	if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
+		runtime·throw("xadd64 failed");
+	if(runtime·atomicload64(&z64) != (2ull<<40)+2)
+		runtime·throw("xadd64 failed");
+	if(runtime·xchg64(&z64, (3ull<<40)+3) != (2ull<<40)+2)
+		runtime·throw("xchg64 failed");
+	if(runtime·atomicload64(&z64) != (3ull<<40)+3)
+		runtime·throw("xchg64 failed");
+}
+
+void
+runtime·check(void)
+{
+	int8 a;
+	uint8 b;
+	int16 c;
+	uint16 d;
+	int32 e;
+	uint32 f;
+	int64 g;
+	uint64 h;
+	float32 i, i1;
+	float64 j, j1;
+	byte *k, *k1;
+	uint16* l;
+	struct x1 {
+		byte x;
+	};
+	struct y1 {
+		struct x1 x1;
+		byte y;
+	};
+
+	if(sizeof(a) != 1) runtime·throw("bad a");
+	if(sizeof(b) != 1) runtime·throw("bad b");
+	if(sizeof(c) != 2) runtime·throw("bad c");
+	if(sizeof(d) != 2) runtime·throw("bad d");
+	if(sizeof(e) != 4) runtime·throw("bad e");
+	if(sizeof(f) != 4) runtime·throw("bad f");
+	if(sizeof(g) != 8) runtime·throw("bad g");
+	if(sizeof(h) != 8) runtime·throw("bad h");
+	if(sizeof(i) != 4) runtime·throw("bad i");
+	if(sizeof(j) != 8) runtime·throw("bad j");
+	if(sizeof(k) != sizeof(uintptr)) runtime·throw("bad k");
+	if(sizeof(l) != sizeof(uintptr)) runtime·throw("bad l");
+	if(sizeof(struct x1) != 1) runtime·throw("bad sizeof x1");
+	if(offsetof(struct y1, y) != 1) runtime·throw("bad offsetof y1.y");
+	if(sizeof(struct y1) != 2) runtime·throw("bad sizeof y1");
+
+	if(runtime·timediv(12345LL*1000000000+54321, 1000000000, &e) != 12345 || e != 54321)
+		runtime·throw("bad timediv");
+
+	uint32 z;
+	z = 1;
+	if(!runtime·cas(&z, 1, 2))
+		runtime·throw("cas1");
+	if(z != 2)
+		runtime·throw("cas2");
+
+	z = 4;
+	if(runtime·cas(&z, 5, 6))
+		runtime·throw("cas3");
+	if(z != 4)
+		runtime·throw("cas4");
+
+	k = (byte*)0xfedcb123;
+	if(sizeof(void*) == 8)
+		k = (byte*)((uintptr)k<<10);
+	if(runtime·casp((void**)&k, nil, nil))
+		runtime·throw("casp1");
+	k1 = k+1;
+	if(!runtime·casp((void**)&k, k, k1))
+		runtime·throw("casp2");
+	if(k != k1)
+		runtime·throw("casp3");
+
+	*(uint64*)&j = ~0ULL;
+	if(j == j)
+		runtime·throw("float64nan");
+	if(!(j != j))
+		runtime·throw("float64nan1");
+
+	*(uint64*)&j1 = ~1ULL;
+	if(j == j1)
+		runtime·throw("float64nan2");
+	if(!(j != j1))
+		runtime·throw("float64nan3");
+
+	*(uint32*)&i = ~0UL;
+	if(i == i)
+		runtime·throw("float32nan");
+	if(!(i != i))
+		runtime·throw("float32nan1");
+
+	*(uint32*)&i1 = ~1UL;
+	if(i == i1)
+		runtime·throw("float32nan2");
+	if(!(i != i1))
+		runtime·throw("float32nan3");
+
+	TestAtomic64();
+
+	if(FixedStack != runtime·round2(FixedStack))
+		runtime·throw("FixedStack is not power-of-2");
+}
+
+#pragma dataflag NOPTR
+DebugVars	runtime·debug;
+
+static struct {
+	int8*	name;
+	int32*	value;
+} dbgvar[] = {
+	{"allocfreetrace", &runtime·debug.allocfreetrace},
+	{"efence", &runtime·debug.efence},
+	{"gctrace", &runtime·debug.gctrace},
+	{"gcdead", &runtime·debug.gcdead},
+	{"scheddetail", &runtime·debug.scheddetail},
+	{"schedtrace", &runtime·debug.schedtrace},
+	{"scavenge", &runtime·debug.scavenge},
+};
+
+void
+runtime·parsedebugvars(void)
+{
+	byte *p;
+	intgo i, n;
+
+	p = runtime·getenv("GODEBUG");
+	if(p == nil)
+		return;
+	for(;;) {
+		for(i=0; i<nelem(dbgvar); i++) {
+			n = runtime·findnull((byte*)dbgvar[i].name);
+			if(runtime·mcmp(p, (byte*)dbgvar[i].name, n) == 0 && p[n] == '=')
+				*dbgvar[i].value = runtime·atoi(p+n+1);
+		}
+		p = runtime·strstr(p, (byte*)",");
+		if(p == nil)
+			break;
+		p++;
+	}
+
+	p = runtime·getenv("GOTRACEBACK");
+	if(p == nil)
+		p = (byte*)"";
+	if(p[0] == '\0')
+		traceback_cache = 1<<1;
+	else if(runtime·strcmp(p, (byte*)"crash") == 0)
+		traceback_cache = (2<<1) | 1;
+	else
+		traceback_cache = runtime·atoi(p)<<1;	
+}
+
+// Poor mans 64-bit division.
+// This is a very special function, do not use it if you are not sure what you are doing.
+// int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions.
+// Handles overflow in a time-specific manner.
+#pragma textflag NOSPLIT
+int32
+runtime·timediv(int64 v, int32 div, int32 *rem)
+{
+	int32 res, bit;
+
+	res = 0;
+	for(bit = 30; bit >= 0; bit--) {
+		if(v >= ((int64)div<<bit)) {
+			v = v - ((int64)div<<bit);
+			res += 1<<bit;
+		}
+	}
+	if(v >= (int64)div) {
+		if(rem != nil)
+			*rem = 0;
+		return 0x7fffffff;
+	}
+	if(rem != nil)
+		*rem = v;
+	return res;
+}
+
+// Helpers for Go. Must be NOSPLIT, must only call NOSPLIT functions, and must not block.
+
+#pragma textflag NOSPLIT
+G*
+runtime·getg(void)
+{
+	return g;
+}
+
+#pragma textflag NOSPLIT
+M*
+runtime·acquirem(void)
+{
+	g->m->locks++;
+	return g->m;
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·releasem(M *mp)
+{
+	mp->locks--;
+	if(mp->locks == 0 && g->preempt) {
+		// restore the preemption request in case we've cleared it in newstack
+		g->stackguard0 = StackPreempt;
+	}
+}
+
+#pragma textflag NOSPLIT
+MCache*
+runtime·gomcache(void)
+{
+	return g->m->mcache;
+}
+
+#pragma textflag NOSPLIT
+Slice
+reflect·typelinks(void)
+{
+	extern Type *runtime·typelink[], *runtime·etypelink[];
+	Slice ret;
+
+	ret.array = (byte*)runtime·typelink;
+	ret.len = runtime·etypelink - runtime·typelink;
+	ret.cap = ret.len;
+	return ret;
+}
diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go
new file mode 100644
index 000000000..d5b31559a
--- /dev/null
+++ b/src/runtime/runtime.go
@@ -0,0 +1,37 @@
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+var ticks struct {
+	lock mutex
+	val  uint64
+}
+
+// Note: Called by runtime/pprof in addition to runtime code.
+func tickspersecond() int64 {
+	r := int64(atomicload64(&ticks.val))
+	if r != 0 {
+		return r
+	}
+	lock(&ticks.lock)
+	r = int64(ticks.val)
+	if r == 0 {
+		t0 := nanotime()
+		c0 := cputicks()
+		usleep(100 * 1000)
+		t1 := nanotime()
+		c1 := cputicks()
+		if t1 == t0 {
+			t1++
+		}
+		r = (c1 - c0) * 1000 * 1000 * 1000 / (t1 - t0)
+		if r == 0 {
+			r++
+		}
+		atomicstore64(&ticks.val, uint64(r))
+	}
+	unlock(&ticks.lock)
+	return r
+}
diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
new file mode 100644
index 000000000..02563fd36
--- /dev/null
+++ b/src/runtime/runtime.h
@@ -0,0 +1,1079 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * basic types
+ */
+typedef	signed char		int8;
+typedef	unsigned char		uint8;
+typedef	signed short		int16;
+typedef	unsigned short		uint16;
+typedef	signed int		int32;
+typedef	unsigned int		uint32;
+typedef	signed long long int	int64;
+typedef	unsigned long long int	uint64;
+typedef	float			float32;
+typedef	double			float64;
+
+#ifdef _64BIT
+typedef	uint64		uintptr;
+typedef	int64		intptr;
+typedef	int64		intgo; // Go's int
+typedef	uint64		uintgo; // Go's uint
+#else
+typedef	uint32		uintptr;
+typedef	int32		intptr;
+typedef	int32		intgo; // Go's int
+typedef	uint32		uintgo; // Go's uint
+#endif
+
+#ifdef _64BITREG
+typedef	uint64		uintreg;
+#else
+typedef	uint32		uintreg;
+#endif
+
+/*
+ * get rid of C types
+ * the / / / forces a syntax error immediately,
+ * which will show "last name: XXunsigned".
+ */
+#define	unsigned		XXunsigned / / /
+#define	signed			XXsigned / / /
+#define	char			XXchar / / /
+#define	short			XXshort / / /
+#define	int			XXint / / /
+#define	long			XXlong / / /
+#define	float			XXfloat / / /
+#define	double			XXdouble / / /
+
+/*
+ * defined types
+ */
+typedef	uint8			bool;
+typedef	uint8			byte;
+typedef	struct	Func		Func;
+typedef	struct	G		G;
+typedef	struct	Gobuf		Gobuf;
+typedef	struct	SudoG		SudoG;
+typedef	struct	Mutex		Mutex;
+typedef	struct	M		M;
+typedef	struct	P		P;
+typedef	struct	Note		Note;
+typedef	struct	Slice		Slice;
+typedef	struct	Stktop		Stktop;
+typedef	struct	String		String;
+typedef	struct	FuncVal		FuncVal;
+typedef	struct	SigTab		SigTab;
+typedef	struct	MCache		MCache;
+typedef	struct	FixAlloc	FixAlloc;
+typedef	struct	Iface		Iface;
+typedef	struct	Itab		Itab;
+typedef	struct	InterfaceType	InterfaceType;
+typedef	struct	Eface		Eface;
+typedef	struct	Type		Type;
+typedef	struct	PtrType		PtrType;
+typedef	struct	ChanType		ChanType;
+typedef	struct	MapType		MapType;
+typedef	struct	Defer		Defer;
+typedef	struct	Panic		Panic;
+typedef	struct	Hmap		Hmap;
+typedef	struct	Hiter			Hiter;
+typedef	struct	Hchan		Hchan;
+typedef	struct	Complex64	Complex64;
+typedef	struct	Complex128	Complex128;
+typedef	struct	LibCall		LibCall;
+typedef	struct	WinCallbackContext	WinCallbackContext;
+typedef	struct	GCStats		GCStats;
+typedef	struct	LFNode		LFNode;
+typedef	struct	ParFor		ParFor;
+typedef	struct	ParForThread	ParForThread;
+typedef	struct	CgoMal		CgoMal;
+typedef	struct	PollDesc	PollDesc;
+typedef	struct	DebugVars	DebugVars;
+typedef struct	ForceGCState	ForceGCState;
+
+/*
+ * Per-CPU declaration.
+ *
+ * "extern register" is a special storage class implemented by 6c, 8c, etc.
+ * On the ARM, it is an actual register; elsewhere it is a slot in thread-
+ * local storage indexed by a pseudo-register TLS. See zasmhdr in
+ * src/cmd/dist/buildruntime.c for details, and be aware that the linker may
+ * make further OS-specific changes to the compiler's output. For example,
+ * 6l/linux rewrites 0(TLS) as -8(FS).
+ *
+ * Every C file linked into a Go program must include runtime.h so that the
+ * C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated
+ * registers. The Go compiler (6g, 8g, etc.) knows to avoid them.
+ */
+extern	register	G*	g;
+
+/*
+ * defined constants
+ */
+enum
+{
+	// G status
+	//
+	// If you add to this list, add to the list
+	// of "okay during garbage collection" status
+	// in mgc0.c too.
+	Gidle,                                 // 0
+	Grunnable,                             // 1 runnable and on a run queue
+	Grunning,                              // 2
+	Gsyscall,                              // 3
+	Gwaiting,                              // 4
+	Gmoribund_unused,                      // 5 currently unused, but hardcoded in gdb scripts
+	Gdead,                                 // 6
+	Genqueue,                              // 7 Only the Gscanenqueue is used.
+	Gcopystack,                            // 8 in this state when newstack is moving the stack
+	// the following encode that the GC is scanning the stack and what to do when it is done 
+	Gscan = 0x1000,                        // atomicstatus&~Gscan = the non-scan state,
+	// Gscanidle =     Gscan + Gidle,      // Not used. Gidle only used with newly malloced gs
+	Gscanrunnable = Gscan + Grunnable,     //  0x1001 When scanning complets make Grunnable (it is already on run queue)
+	Gscanrunning =  Gscan + Grunning,      //  0x1002 Used to tell preemption newstack routine to scan preempted stack.
+	Gscansyscall =  Gscan + Gsyscall,      //  0x1003 When scanning completes make is Gsyscall
+	Gscanwaiting =  Gscan + Gwaiting,      //  0x1004 When scanning completes make it Gwaiting
+	// Gscanmoribund_unused,               //  not possible
+	// Gscandead,                          //  not possible
+	Gscanenqueue = Gscan + Genqueue,       //  When scanning completes make it Grunnable and put on runqueue
+};
+enum
+{
+	// P status
+	Pidle,
+	Prunning,
+	Psyscall,
+	Pgcstop,
+	Pdead,
+};
+enum
+{
+	true	= 1,
+	false	= 0,
+};
+enum
+{
+	PtrSize = sizeof(void*),
+};
+/*
+ * structures
+ */
+struct	Mutex
+{
+	// Futex-based impl treats it as uint32 key,
+	// while sema-based impl as M* waitm.
+	// Used to be a union, but unions break precise GC.
+	uintptr	key;
+};
+struct	Note
+{
+	// Futex-based impl treats it as uint32 key,
+	// while sema-based impl as M* waitm.
+	// Used to be a union, but unions break precise GC.
+	uintptr	key;
+};
+struct String
+{
+	byte*	str;
+	intgo	len;
+};
+struct FuncVal
+{
+	void	(*fn)(void);
+	// variable-size, fn-specific data here
+};
+struct Iface
+{
+	Itab*	tab;
+	void*	data;
+};
+struct Eface
+{
+	Type*	type;
+	void*	data;
+};
+struct Complex64
+{
+	float32	real;
+	float32	imag;
+};
+struct Complex128
+{
+	float64	real;
+	float64	imag;
+};
+
+struct	Slice
+{				// must not move anything
+	byte*	array;		// actual data
+	uintgo	len;		// number of elements
+	uintgo	cap;		// allocated number of elements
+};
+struct	Gobuf
+{
+	// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
+	uintptr	sp;
+	uintptr	pc;
+	G*	g;
+	void*	ctxt; // this has to be a pointer so that GC scans it
+	uintreg	ret;
+	uintptr	lr;
+};
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+struct	SudoG
+{
+	G*	g;
+	uint32*	selectdone;
+	SudoG*	next;
+	SudoG*	prev;
+	void*	elem;		// data element
+	int64	releasetime;
+	int32	nrelease;	// -1 for acquire
+	SudoG*	waitlink;	// G.waiting list
+};
+struct	GCStats
+{
+	// the struct must consist of only uint64's,
+	// because it is casted to uint64[].
+	uint64	nhandoff;
+	uint64	nhandoffcnt;
+	uint64	nprocyield;
+	uint64	nosyield;
+	uint64	nsleep;
+};
+
+struct	LibCall
+{
+	void*	fn;
+	uintptr	n;	// number of parameters
+	void*	args;	// parameters
+	uintptr	r1;	// return values
+	uintptr	r2;
+	uintptr	err;	// error number
+};
+
+// describes how to handle callback
+struct	WinCallbackContext
+{
+	void*	gobody;		// Go function to call
+	uintptr	argsize;	// callback arguments size (in bytes)
+	uintptr	restorestack;	// adjust stack on return by (in bytes) (386 only)
+	bool	cleanstack;
+};
+
+struct	G
+{
+	// stackguard0 can be set to StackPreempt as opposed to stackguard
+	uintptr	stackguard0;	// cannot move - also known to liblink, libmach, runtime/cgo
+	uintptr	stackbase;	// cannot move - also known to libmach, runtime/cgo
+	Panic*	panic;	// cannot move - also known to liblink
+	Defer*	defer;
+	Gobuf	sched;
+	uintptr	syscallstack;	// if status==Gsyscall, syscallstack = stackbase to use during gc
+	uintptr	syscallsp;	// if status==Gsyscall, syscallsp = sched.sp to use during gc
+	uintptr	syscallpc;	// if status==Gsyscall, syscallpc = sched.pc to use during gc
+	uintptr	syscallguard;	// if status==Gsyscall, syscallguard = stackguard to use during gc
+	uintptr	stackguard;	// same as stackguard0, but not set to StackPreempt
+	uintptr	stack0;
+	uintptr	stacksize;
+	void*	param;		// passed parameter on wakeup
+	uint32	atomicstatus;
+	int64	goid;
+	int64	waitsince;	// approx time when the G become blocked
+	String	waitreason;	// if status==Gwaiting
+	G*	schedlink;
+	bool	issystem;	// do not output in stack dump, ignore in deadlock detector
+	bool	preempt;	// preemption signal, duplicates stackguard0 = StackPreempt
+	bool	paniconfault;	// panic (instead of crash) on unexpected fault address
+	bool    preemptscan;    // preempted g does scan for GC
+	bool    gcworkdone;     // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle
+	bool	throwsplit; // must not split stack
+	int8	raceignore;	// ignore race detection events
+	M*	m;		// for debuggers, but offset not hard-coded
+	M*	lockedm;
+	int32	sig;
+	int32	writenbuf;
+	Slice	writebuf;
+	uintptr	sigcode0;
+	uintptr	sigcode1;
+	uintptr	sigpc;
+	uintptr	gopc;		// pc of go statement that created this goroutine
+	uintptr	racectx;
+	SudoG   *waiting;	// sudog structures this G is waiting on (that have a valid elem ptr)
+	uintptr	end[];
+};
+
+struct	M
+{
+	G*	g0;		// goroutine with scheduling stack
+	void*	moreargp;	// argument pointer for more stack
+	Gobuf	morebuf;	// gobuf arg to morestack
+
+	// Fields not known to debuggers.
+	uint32	moreframesize;	// size arguments to morestack
+	uint32	moreargsize;	// known by amd64 asm to follow moreframesize
+	uintreg	cret;		// return value from C
+	uint64	procid;		// for debuggers, but offset not hard-coded
+	G*	gsignal;	// signal-handling G
+	uintptr	tls[4];		// thread-local storage (for x86 extern register)
+	void	(*mstartfn)(void);
+	G*	curg;		// current running goroutine
+	G*	caughtsig;	// goroutine running during fatal signal
+	P*	p;		// attached P for executing Go code (nil if not executing Go code)
+	P*	nextp;
+	int32	id;
+	int32	mallocing;
+	int32	throwing;
+	int32	gcing;
+	int32	locks;
+	int32	softfloat;
+	int32	dying;
+	int32	profilehz;
+	int32	helpgc;
+	bool	spinning;	// M is out of work and is actively looking for work
+	bool	blocked;	// M is blocked on a Note
+	uint32	fastrand;
+	uint64	ncgocall;	// number of cgo calls in total
+	int32	ncgo;		// number of cgo calls currently in progress
+	CgoMal*	cgomal;
+	Note	park;
+	M*	alllink;	// on allm
+	M*	schedlink;
+	uint32	machport;	// Return address for Mach IPC (OS X)
+	MCache*	mcache;
+	G*	lockedg;
+	uintptr	createstack[32];// Stack that created this thread.
+	uint32	freglo[16];	// D[i] lsb and F[i]
+	uint32	freghi[16];	// D[i] msb and F[i+16]
+	uint32	fflag;		// floating point compare flags
+	uint32	locked;		// tracking for LockOSThread
+	M*	nextwaitm;	// next M waiting for lock
+	uintptr	waitsema;	// semaphore for parking on locks
+	uint32	waitsemacount;
+	uint32	waitsemalock;
+	GCStats	gcstats;
+	bool	needextram;
+	uint8	traceback;
+	bool	(*waitunlockf)(G*, void*);
+	void*	waitlock;
+	uintptr	forkstackguard;
+	uintptr scalararg[4];	// scalar argument/return for mcall
+	void*   ptrarg[4];	// pointer argument/return for mcall
+#ifdef GOOS_windows
+	void*	thread;		// thread handle
+	// these are here because they are too large to be on the stack
+	// of low-level NOSPLIT functions.
+	LibCall	libcall;
+	uintptr	libcallpc;	// for cpu profiler
+	uintptr	libcallsp;
+	G*	libcallg;
+#endif
+#ifdef GOOS_solaris
+	int32*	perrno; 	// pointer to TLS errno
+	// these are here because they are too large to be on the stack
+	// of low-level NOSPLIT functions.
+	LibCall	libcall;
+	struct {
+		int64	tv_sec;
+		int64	tv_nsec;
+	} ts;
+	struct {
+		uintptr v[6];
+	} scratch;
+#endif
+#ifdef GOOS_plan9
+	int8*	notesig;
+	byte*	errstr;
+#endif
+	uintptr	end[];
+};
+
+struct P
+{
+	Mutex	lock;
+
+	int32	id;
+	uint32	status;		// one of Pidle/Prunning/...
+	P*	link;
+	uint32	schedtick;	// incremented on every scheduler call
+	uint32	syscalltick;	// incremented on every system call
+	M*	m;		// back-link to associated M (nil if idle)
+	MCache*	mcache;
+	Defer*	deferpool[5];	// pool of available Defer structs of different sizes (see panic.c)
+
+	// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
+	uint64	goidcache;
+	uint64	goidcacheend;
+
+	// Queue of runnable goroutines.
+	uint32	runqhead;
+	uint32	runqtail;
+	G*	runq[256];
+
+	// Available G's (status == Gdead)
+	G*	gfree;
+	int32	gfreecnt;
+
+	byte	pad[64];
+};
+
+enum {
+	// The max value of GOMAXPROCS.
+	// There are no fundamental restrictions on the value.
+	MaxGomaxprocs = 1<<8,
+};
+
+// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
+// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
+// External locks are not recursive; a second lock is silently ignored.
+// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread
+// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
+// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
+// goroutine is holding the lock during the initialization phase.
+enum
+{
+	LockExternal = 1,
+	LockInternal = 2,
+};
+
+struct	Stktop
+{
+	// The offsets of these fields are known to (hard-coded in) libmach.
+	uintptr	stackguard;
+	uintptr	stackbase;
+	Gobuf	gobuf;
+	uint32	argsize;
+
+	uint8*	argp;	// pointer to arguments in old frame
+};
+struct	SigTab
+{
+	int32	flags;
+	int8	*name;
+};
+enum
+{
+	SigNotify = 1<<0,	// let signal.Notify have signal, even if from kernel
+	SigKill = 1<<1,		// if signal.Notify doesn't take it, exit quietly
+	SigThrow = 1<<2,	// if signal.Notify doesn't take it, exit loudly
+	SigPanic = 1<<3,	// if the signal is from the kernel, panic
+	SigDefault = 1<<4,	// if the signal isn't explicitly requested, don't monitor it
+	SigHandling = 1<<5,	// our signal handler is registered
+	SigIgnored = 1<<6,	// the signal was ignored before we registered for it
+	SigGoExit = 1<<7,	// cause all runtime procs to exit (only used on Plan 9).
+};
+
+// Layout of in-memory per-function information prepared by linker
+// See http://golang.org/s/go12symtab.
+// Keep in sync with linker and with ../../libmach/sym.c
+// and with package debug/gosym and with symtab.go in package runtime.
+struct	Func
+{
+	uintptr	entry;	// start pc
+	int32	nameoff;// function name
+	
+	int32	args;	// in/out args size
+	int32	frame;	// legacy frame size; use pcsp if possible
+
+	int32	pcsp;
+	int32	pcfile;
+	int32	pcln;
+	int32	npcdata;
+	int32	nfuncdata;
+};
+
+// layout of Itab known to compilers
+// allocated in non-garbage-collected memory
+struct	Itab
+{
+	InterfaceType*	inter;
+	Type*	type;
+	Itab*	link;
+	int32	bad;
+	int32	unused;
+	void	(*fun[])(void);
+};
+
+#ifdef GOOS_nacl
+enum {
+   NaCl = 1,
+};
+#else
+enum {
+   NaCl = 0,
+};
+#endif
+
+#ifdef GOOS_windows
+enum {
+   Windows = 1
+};
+#else
+enum {
+   Windows = 0
+};
+#endif
+#ifdef GOOS_solaris
+enum {
+   Solaris = 1
+};
+#else
+enum {
+   Solaris = 0
+};
+#endif
+
+// Lock-free stack node.
+struct LFNode
+{
+	LFNode	*next;
+	uintptr	pushcnt;
+};
+
+// Parallel for descriptor.
+struct ParFor
+{
+	void (*body)(ParFor*, uint32);	// executed for each element
+	uint32 done;			// number of idle threads
+	uint32 nthr;			// total number of threads
+	uint32 nthrmax;			// maximum number of threads
+	uint32 thrseq;			// thread id sequencer
+	uint32 cnt;			// iteration space [0, cnt)
+	void *ctx;			// arbitrary user context
+	bool wait;			// if true, wait while all threads finish processing,
+					// otherwise parfor may return while other threads are still working
+	ParForThread *thr;		// array of thread descriptors
+	uint32 pad;			// to align ParForThread.pos for 64-bit atomic operations
+	// stats
+	uint64 nsteal;
+	uint64 nstealcnt;
+	uint64 nprocyield;
+	uint64 nosyield;
+	uint64 nsleep;
+};
+
+// Track memory allocated by code not written in Go during a cgo call,
+// so that the garbage collector can see them.
+struct CgoMal
+{
+	CgoMal	*next;
+	void	*alloc;
+};
+
+// Holds variables parsed from GODEBUG env var.
+struct DebugVars
+{
+	int32	allocfreetrace;
+	int32	efence;
+	int32	gctrace;
+	int32	gcdead;
+	int32	scheddetail;
+	int32	schedtrace;
+	int32	scavenge;
+};
+
+// Indicates to write barrier and sychronization task to preform.
+enum
+{                   // Synchronization            Write barrier
+	GCoff,      // stop and start             nop
+	GCquiesce,  // stop and start             nop
+	GCstw,      // stop the ps                nop
+	GCmark,     // scan the stacks and start  no white to black
+	GCsweep,    // stop and start             nop
+};
+
+struct ForceGCState
+{
+	Mutex	lock;
+	G*	g;
+	uint32	idle;
+};
+
+extern uint32 runtime·gcphase;
+extern bool runtime·precisestack;
+extern bool runtime·copystack;
+
+/*
+ * defined macros
+ *    you need super-gopher-guru privilege
+ *    to add this list.
+ */
+#define	nelem(x)	(sizeof(x)/sizeof((x)[0]))
+#define	nil		((void*)0)
+#define	offsetof(s,m)	(uint32)(&(((s*)0)->m))
+#define	ROUND(x, n)	(((x)+(n)-1)&~(uintptr)((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
+
+/*
+ * known to compiler
+ */
+enum {
+	Structrnd = sizeof(uintreg),
+};
+
+byte*	runtime·startup_random_data;
+uint32	runtime·startup_random_data_len;
+
+enum {
+	// hashinit wants this many random bytes
+	HashRandomBytes = 32
+};
+
+uint32  runtime·readgstatus(G*);
+void    runtime·casgstatus(G*, uint32, uint32);
+void    runtime·quiesce(G*);
+bool    runtime·stopg(G*);
+void    runtime·restartg(G*);
+void    runtime·gcphasework(G*);
+
+/*
+ * deferred subroutine calls
+ */
+struct Defer
+{
+	int32	siz;
+	bool	special;	// not part of defer frame
+	uintptr	argp;		// where args were copied from
+	uintptr	pc;
+	FuncVal*	fn;
+	Defer*	link;
+	void*	args[1];	// padded to actual size
+};
+
+// argp used in Defer structs when there is no argp.
+#define NoArgs ((uintptr)-1)
+
+/*
+ * panics
+ */
+struct Panic
+{
+	uintptr	argp;	// pointer to arguments of deferred call run during panic; cannot move - known to liblink
+	Eface	arg;		// argument to panic
+	Panic*	link;		// link to earlier panic
+	Defer*	defer;		// current executing defer
+	bool	recovered;	// whether this panic is over
+	bool	aborted;	// the panic was aborted
+};
+
+/*
+ * stack traces
+ */
+typedef struct Stkframe Stkframe;
+struct Stkframe
+{
+	Func*	fn;	// function being run
+	uintptr	pc;	// program counter within fn
+	uintptr	continpc;	// program counter where execution can continue, or 0 if not
+	uintptr	lr;	// program counter at caller aka link register
+	uintptr	sp;	// stack pointer at pc
+	uintptr	fp;	// stack pointer at caller aka frame pointer
+	uintptr	varp;	// top of local variables
+	uintptr	argp;	// pointer to function arguments
+	uintptr	arglen;	// number of bytes at argp
+};
+
+intgo	runtime·gentraceback(uintptr, uintptr, uintptr, G*, intgo, uintptr*, intgo, bool(**)(Stkframe*, void*), void*, bool);
+void	runtime·traceback(uintptr pc, uintptr sp, uintptr lr, G* gp);
+void	runtime·tracebackothers(G*);
+bool	runtime·haszeroargs(uintptr pc);
+bool	runtime·topofstack(Func*);
+enum
+{
+	// The maximum number of frames we print for a traceback
+	TracebackMaxFrames = 100,
+};
+
+/*
+ * external data
+ */
+extern	String	runtime·emptystring;
+extern	uintptr runtime·zerobase;
+extern	G**	runtime·allg;
+extern	Slice	runtime·allgs; // []*G
+extern	uintptr runtime·allglen;
+extern	G*	runtime·lastg;
+extern	M*	runtime·allm;
+extern	P*	runtime·allp[MaxGomaxprocs+1];
+extern	int32	runtime·gomaxprocs;
+extern	uint32	runtime·needextram;
+extern	uint32	runtime·panicking;
+extern	int8*	runtime·goos;
+extern	int32	runtime·ncpu;
+extern	bool	runtime·iscgo;
+extern 	void	(*runtime·sysargs)(int32, uint8**);
+extern	uintptr	runtime·maxstring;
+extern	uint32	runtime·cpuid_ecx;
+extern	uint32	runtime·cpuid_edx;
+extern	DebugVars	runtime·debug;
+extern	uintptr	runtime·maxstacksize;
+extern	Note	runtime·signote;
+extern	ForceGCState	runtime·forcegc;
+
+/*
+ * common functions and data
+ */
+int32	runtime·strcmp(byte*, byte*);
+int32	runtime·strncmp(byte*, byte*, uintptr);
+byte*	runtime·strstr(byte*, byte*);
+intgo	runtime·findnull(byte*);
+intgo	runtime·findnullw(uint16*);
+void	runtime·dump(byte*, int32);
+int32	runtime·runetochar(byte*, int32);
+int32	runtime·charntorune(int32*, uint8*, int32);
+
+
+/*
+ * This macro is used when writing C functions
+ * called as if they were Go functions.
+ * Passed the address of a result before a return statement,
+ * it makes sure the result has been flushed to memory
+ * before the return.
+ *
+ * It is difficult to write such functions portably, because
+ * of the varying requirements on the alignment of the
+ * first output value. Almost all code should write such
+ * functions in .goc files, where goc2c (part of cmd/dist)
+ * can arrange the correct alignment for the target system.
+ * Goc2c also takes care of conveying to the garbage collector
+ * which parts of the argument list are inputs vs outputs.
+ *
+ * Therefore, do NOT use this macro if at all possible.
+ */ 
+#define FLUSH(x)	USED(x)
+
+/*
+ * GoOutput is a type with the same alignment requirements as the
+ * initial output argument from a Go function. Only for use in cases
+ * where using goc2c is not possible. See comment on FLUSH above.
+ */
+typedef uint64 GoOutput;
+
+void	runtime·gogo(Gobuf*);
+void	runtime·gostartcall(Gobuf*, void(*)(void), void*);
+void	runtime·gostartcallfn(Gobuf*, FuncVal*);
+void	runtime·gosave(Gobuf*);
+void	runtime·lessstack(void);
+void	runtime·goargs(void);
+void	runtime·goenvs(void);
+void	runtime·goenvs_unix(void);
+void*	runtime·getu(void);
+void	runtime·throw(int8*);
+void	runtime·panicstring(int8*);
+bool	runtime·canpanic(G*);
+void	runtime·prints(int8*);
+void	runtime·printf(int8*, ...);
+void	runtime·snprintf(byte*, int32, int8*, ...);
+byte*	runtime·mchr(byte*, byte, byte*);
+int32	runtime·mcmp(byte*, byte*, uintptr);
+void	runtime·memmove(void*, void*, uintptr);
+String	runtime·catstring(String, String);
+String	runtime·gostring(byte*);
+String  runtime·gostringn(byte*, intgo);
+Slice	runtime·gobytes(byte*, intgo);
+String	runtime·gostringnocopy(byte*);
+String	runtime·gostringw(uint16*);
+void	runtime·initsig(void);
+void	runtime·sigenable(uint32 sig);
+void	runtime·sigdisable(uint32 sig);
+int32	runtime·gotraceback(bool *crash);
+void	runtime·goroutineheader(G*);
+int32	runtime·open(int8*, int32, int32);
+int32	runtime·read(int32, void*, int32);
+int32	runtime·write(uintptr, void*, int32); // use uintptr to accommodate windows.
+int32	runtime·close(int32);
+int32	runtime·mincore(void*, uintptr, byte*);
+void	runtime·jmpdefer(FuncVal*, uintptr);
+void	runtime·exit1(int32);
+void	runtime·ready(G*);
+byte*	runtime·getenv(int8*);
+int32	runtime·atoi(byte*);
+void	runtime·newosproc(M *mp, void *stk);
+void	runtime·mstart(void);
+G*	runtime·malg(int32);
+void	runtime·asminit(void);
+void	runtime·mpreinit(M*);
+void	runtime·minit(void);
+void	runtime·unminit(void);
+void	runtime·signalstack(byte*, int32);
+void	runtime·symtabinit(void);
+Func*	runtime·findfunc(uintptr);
+int32	runtime·funcline(Func*, uintptr, String*);
+int32	runtime·funcarglen(Func*, uintptr);
+int32	runtime·funcspdelta(Func*, uintptr);
+int8*	runtime·funcname(Func*);
+int32	runtime·pcdatavalue(Func*, int32, uintptr);
+void	runtime·stackinit(void);
+void*	runtime·stackalloc(G*, uint32);
+void	runtime·stackfree(G*, void*, Stktop*);
+void	runtime·shrinkstack(G*);
+MCache*	runtime·allocmcache(void);
+void	runtime·freemcache(MCache*);
+void	runtime·mallocinit(void);
+void	runtime·gcinit(void);
+void*	runtime·mallocgc(uintptr size, Type* typ, uint32 flag);
+void	runtime·runpanic(Panic*);
+uintptr	runtime·getcallersp(void*);
+int32	runtime·mcount(void);
+int32	runtime·gcount(void);
+void	runtime·mcall(void(**)(G*));
+void	runtime·onM(void(**)(void));
+uint32	runtime·fastrand1(void);
+void	runtime·rewindmorestack(Gobuf*);
+int32	runtime·timediv(int64, int32, int32*);
+int32	runtime·round2(int32 x); // round x up to a power of 2.
+
+// atomic operations
+bool	runtime·cas(uint32*, uint32, uint32);
+bool	runtime·cas64(uint64*, uint64, uint64);
+bool	runtime·casp(void**, void*, void*);
+// Don't confuse with XADD x86 instruction,
+// this one is actually 'addx', that is, add-and-fetch.
+uint32	runtime·xadd(uint32 volatile*, int32);
+uint64	runtime·xadd64(uint64 volatile*, int64);
+uint32	runtime·xchg(uint32 volatile*, uint32);
+uint64	runtime·xchg64(uint64 volatile*, uint64);
+void*	runtime·xchgp(void* volatile*, void*);
+uint32	runtime·atomicload(uint32 volatile*);
+void	runtime·atomicstore(uint32 volatile*, uint32);
+void	runtime·atomicstore64(uint64 volatile*, uint64);
+uint64	runtime·atomicload64(uint64 volatile*);
+void*	runtime·atomicloadp(void* volatile*);
+void	runtime·atomicstorep(void* volatile*, void*);
+void	runtime·atomicor8(byte volatile*, byte);
+
+void	runtime·setg(G*);
+void	runtime·newextram(void);
+void	runtime·exit(int32);
+void	runtime·breakpoint(void);
+void	runtime·gosched(void);
+void	runtime·gosched_m(G*);
+void	runtime·schedtrace(bool);
+void	runtime·park(bool(*)(G*, void*), void*, String);
+void	runtime·parkunlock(Mutex*, String);
+void	runtime·tsleep(int64, String);
+M*	runtime·newm(void);
+void	runtime·goexit(void);
+void	runtime·asmcgocall(void (*fn)(void*), void*);
+int32	runtime·asmcgocall_errno(void (*fn)(void*), void*);
+void	runtime·entersyscall(void);
+void	runtime·entersyscallblock(void);
+void	runtime·exitsyscall(void);
+G*	runtime·newproc1(FuncVal*, byte*, int32, int32, void*);
+bool	runtime·sigsend(int32 sig);
+intgo	runtime·callers(intgo, uintptr*, intgo);
+intgo	runtime·gcallers(G*, intgo, uintptr*, intgo);
+int64	runtime·nanotime(void);	// monotonic time
+int64	runtime·unixnanotime(void); // real time, can skip
+void	runtime·dopanic(int32);
+void	runtime·startpanic(void);
+void	runtime·freezetheworld(void);
+void	runtime·unwindstack(G*, byte*);
+void	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp);
+void	runtime·resetcpuprofiler(int32);
+void	runtime·setcpuprofilerate(int32);
+void	runtime·usleep(uint32);
+int64	runtime·cputicks(void);
+int64	runtime·tickspersecond(void);
+void	runtime·blockevent(int64, intgo);
+G*	runtime·netpoll(bool);
+void	runtime·netpollready(G**, PollDesc*, int32);
+uintptr	runtime·netpollfd(PollDesc*);
+void**	runtime·netpolluser(PollDesc*);
+bool	runtime·netpollclosing(PollDesc*);
+void	runtime·netpolllock(PollDesc*);
+void	runtime·netpollunlock(PollDesc*);
+void	runtime·crash(void);
+void	runtime·parsedebugvars(void);
+void*	runtime·funcdata(Func*, int32);
+void	runtime·setmaxthreads_m(void);
+G*	runtime·timejump(void);
+void	runtime·iterate_itabs(void (**callback)(Itab*));
+void	runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*));
+
+#pragma	varargck	argpos	runtime·printf	1
+#pragma	varargck	type	"c"	int32
+#pragma	varargck	type	"d"	int32
+#pragma	varargck	type	"d"	uint32
+#pragma	varargck	type	"D"	int64
+#pragma	varargck	type	"D"	uint64
+#pragma	varargck	type	"x"	int32
+#pragma	varargck	type	"x"	uint32
+#pragma	varargck	type	"X"	int64
+#pragma	varargck	type	"X"	uint64
+#pragma	varargck	type	"p"	void*
+#pragma	varargck	type	"p"	uintptr
+#pragma	varargck	type	"s"	int8*
+#pragma	varargck	type	"s"	uint8*
+#pragma	varargck	type	"S"	String
+
+void	runtime·stoptheworld(void);
+void	runtime·starttheworld(void);
+extern uint32 runtime·worldsema;
+
+/*
+ * mutual exclusion locks.  in the uncontended case,
+ * as fast as spin locks (just a few user-level instructions),
+ * but on the contention path they sleep in the kernel.
+ * a zeroed Mutex is unlocked (no need to initialize each lock).
+ */
+void	runtime·lock(Mutex*);
+void	runtime·unlock(Mutex*);
+
+/*
+ * sleep and wakeup on one-time events.
+ * before any calls to notesleep or notewakeup,
+ * must call noteclear to initialize the Note.
+ * then, exactly one thread can call notesleep
+ * and exactly one thread can call notewakeup (once).
+ * once notewakeup has been called, the notesleep
+ * will return.  future notesleep will return immediately.
+ * subsequent noteclear must be called only after
+ * previous notesleep has returned, e.g. it's disallowed
+ * to call noteclear straight after notewakeup.
+ *
+ * notetsleep is like notesleep but wakes up after
+ * a given number of nanoseconds even if the event
+ * has not yet happened.  if a goroutine uses notetsleep to
+ * wake up early, it must wait to call noteclear until it
+ * can be sure that no other goroutine is calling
+ * notewakeup.
+ *
+ * notesleep/notetsleep are generally called on g0,
+ * notetsleepg is similar to notetsleep but is called on user g.
+ */
+void	runtime·noteclear(Note*);
+void	runtime·notesleep(Note*);
+void	runtime·notewakeup(Note*);
+bool	runtime·notetsleep(Note*, int64);  // false - timeout
+bool	runtime·notetsleepg(Note*, int64);  // false - timeout
+
+/*
+ * low-level synchronization for implementing the above
+ */
+uintptr	runtime·semacreate(void);
+int32	runtime·semasleep(int64);
+void	runtime·semawakeup(M*);
+// or
+void	runtime·futexsleep(uint32*, uint32, int64);
+void	runtime·futexwakeup(uint32*, uint32);
+
+/*
+ * Mutex-free stack.
+ * Initialize uint64 head to 0, compare with 0 to test for emptiness.
+ * The stack does not keep pointers to nodes,
+ * so they can be garbage collected if there are no other pointers to nodes.
+ */
+void	runtime·lfstackpush(uint64 *head, LFNode *node);
+LFNode*	runtime·lfstackpop(uint64 *head);
+
+/*
+ * Parallel for over [0, n).
+ * body() is executed for each iteration.
+ * nthr - total number of worker threads.
+ * ctx - arbitrary user context.
+ * if wait=true, threads return from parfor() when all work is done;
+ * otherwise, threads can return while other threads are still finishing processing.
+ */
+ParFor*	runtime·parforalloc(uint32 nthrmax);
+void	runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32));
+void	runtime·parfordo(ParFor *desc);
+void	runtime·parforiters(ParFor*, uintptr, uintptr*, uintptr*);
+
+/*
+ * low level C-called
+ */
+// for mmap, we only pass the lower 32 bits of file offset to the 
+// assembly routine; the higher bits (if required), should be provided
+// by the assembly routine as 0.
+uint8*	runtime·mmap(byte*, uintptr, int32, int32, int32, uint32);
+void	runtime·munmap(byte*, uintptr);
+void	runtime·madvise(byte*, uintptr, int32);
+void	runtime·memclr(byte*, uintptr);
+void	runtime·setcallerpc(void*, void*);
+void*	runtime·getcallerpc(void*);
+void	runtime·printbool(bool);
+void	runtime·printbyte(int8);
+void	runtime·printfloat(float64);
+void	runtime·printint(int64);
+void	runtime·printiface(Iface);
+void	runtime·printeface(Eface);
+void	runtime·printstring(String);
+void	runtime·printpc(void*);
+void	runtime·printpointer(void*);
+void	runtime·printuint(uint64);
+void	runtime·printhex(uint64);
+void	runtime·printslice(Slice);
+void	runtime·printcomplex(Complex128);
+
+/*
+ * runtime go-called
+ */
+void	runtime·gopanic(Eface);
+void	runtime·panicindex(void);
+void	runtime·panicslice(void);
+void	runtime·panicdivide(void);
+
+/*
+ * runtime c-called (but written in Go)
+ */
+void	runtime·printany(Eface);
+void	runtime·newTypeAssertionError(String*, String*, String*, String*, Eface*);
+void	runtime·newErrorString(String, Eface*);
+void	runtime·newErrorCString(int8*, Eface*);
+void	runtime·fadd64c(uint64, uint64, uint64*);
+void	runtime·fsub64c(uint64, uint64, uint64*);
+void	runtime·fmul64c(uint64, uint64, uint64*);
+void	runtime·fdiv64c(uint64, uint64, uint64*);
+void	runtime·fneg64c(uint64, uint64*);
+void	runtime·f32to64c(uint32, uint64*);
+void	runtime·f64to32c(uint64, uint32*);
+void	runtime·fcmp64c(uint64, uint64, int32*, bool*);
+void	runtime·fintto64c(int64, uint64*);
+void	runtime·f64tointc(uint64, int64*, bool*);
+
+/*
+ * wrapped for go users
+ */
+float64	runtime·Inf(int32 sign);
+float64	runtime·NaN(void);
+float32	runtime·float32frombits(uint32 i);
+uint32	runtime·float32tobits(float32 f);
+float64	runtime·float64frombits(uint64 i);
+uint64	runtime·float64tobits(float64 f);
+float64	runtime·frexp(float64 d, int32 *ep);
+bool	runtime·isInf(float64 f, int32 sign);
+bool	runtime·isNaN(float64 f);
+float64	runtime·ldexp(float64 d, int32 e);
+float64	runtime·modf(float64 d, float64 *ip);
+void	runtime·semacquire(uint32*, bool);
+void	runtime·semrelease(uint32*);
+int32	runtime·gomaxprocsfunc(int32 n);
+void	runtime·procyield(uint32);
+void	runtime·osyield(void);
+void	runtime·lockOSThread(void);
+void	runtime·unlockOSThread(void);
+
+bool	runtime·showframe(Func*, G*);
+void	runtime·printcreatedby(G*);
+
+void	runtime·ifaceE2I(InterfaceType*, Eface, Iface*);
+bool	runtime·ifaceE2I2(InterfaceType*, Eface, Iface*);
+uintptr	runtime·memlimit(void);
+
+// float.c
+extern float64 runtime·nan;
+extern float64 runtime·posinf;
+extern float64 runtime·neginf;
+extern uint64 ·nan;
+extern uint64 ·posinf;
+extern uint64 ·neginf;
+#define ISNAN(f) ((f) != (f))
+
+enum
+{
+	UseSpanType = 1,
+};
diff --git a/src/runtime/runtime_linux_test.go b/src/runtime/runtime_linux_test.go
new file mode 100644
index 000000000..5344ed205
--- /dev/null
+++ b/src/runtime/runtime_linux_test.go
@@ -0,0 +1,29 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"syscall"
+	"testing"
+)
+
+var pid, tid int
+
+func init() {
+	// Record pid and tid of init thread for use during test.
+	// The call to LockOSThread is just to exercise it;
+	// we can't test that it does anything.
+	// Instead we're testing that the conditions are good
+	// for how it is used in init (must be on main thread).
+	pid, tid = syscall.Getpid(), syscall.Gettid()
+	LockOSThread()
+}
+
+func TestLockOSThread(t *testing.T) {
+	if pid != tid {
+		t.Fatalf("pid=%d but tid=%d", pid, tid)
+	}
+}
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
new file mode 100644
index 000000000..cffc9f7d3
--- /dev/null
+++ b/src/runtime/runtime_test.go
@@ -0,0 +1,243 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	. "runtime"
+	"runtime/debug"
+	"strconv"
+	"strings"
+	"testing"
+	"unsafe"
+)
+
+var errf error
+
+func errfn() error {
+	return errf
+}
+
+func errfn1() error {
+	return io.EOF
+}
+
+func BenchmarkIfaceCmp100(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for j := 0; j < 100; j++ {
+			if errfn() == io.EOF {
+				b.Fatal("bad comparison")
+			}
+		}
+	}
+}
+
+func BenchmarkIfaceCmpNil100(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for j := 0; j < 100; j++ {
+			if errfn1() == nil {
+				b.Fatal("bad comparison")
+			}
+		}
+	}
+}
+
+func BenchmarkDefer(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		defer1()
+	}
+}
+
+func defer1() {
+	defer func(x, y, z int) {
+		if recover() != nil || x != 1 || y != 2 || z != 3 {
+			panic("bad recover")
+		}
+	}(1, 2, 3)
+	return
+}
+
+func BenchmarkDefer10(b *testing.B) {
+	for i := 0; i < b.N/10; i++ {
+		defer2()
+	}
+}
+
+func defer2() {
+	for i := 0; i < 10; i++ {
+		defer func(x, y, z int) {
+			if recover() != nil || x != 1 || y != 2 || z != 3 {
+				panic("bad recover")
+			}
+		}(1, 2, 3)
+	}
+}
+
+func BenchmarkDeferMany(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		defer func(x, y, z int) {
+			if recover() != nil || x != 1 || y != 2 || z != 3 {
+				panic("bad recover")
+			}
+		}(1, 2, 3)
+	}
+}
+
+// The profiling signal handler needs to know whether it is executing runtime.gogo.
+// The constant RuntimeGogoBytes in arch_*.h gives the size of the function;
+// we don't have a way to obtain it from the linker (perhaps someday).
+// Test that the constant matches the size determined by 'go tool nm -S'.
+// The value reported will include the padding between runtime.gogo and the
+// next function in memory. That's fine.
+func TestRuntimeGogoBytes(t *testing.T) {
+	switch GOOS {
+	case "android", "nacl":
+		t.Skipf("skipping on %s", GOOS)
+	}
+
+	dir, err := ioutil.TempDir("", "go-build")
+	if err != nil {
+		t.Fatalf("failed to create temp directory: %v", err)
+	}
+	defer os.RemoveAll(dir)
+
+	out, err := exec.Command("go", "build", "-o", dir+"/hello", "../../test/helloworld.go").CombinedOutput()
+	if err != nil {
+		t.Fatalf("building hello world: %v\n%s", err, out)
+	}
+
+	out, err = exec.Command("go", "tool", "nm", "-size", dir+"/hello").CombinedOutput()
+	if err != nil {
+		t.Fatalf("go tool nm: %v\n%s", err, out)
+	}
+
+	for _, line := range strings.Split(string(out), "\n") {
+		f := strings.Fields(line)
+		if len(f) == 4 && f[3] == "runtime.gogo" {
+			size, _ := strconv.Atoi(f[1])
+			if GogoBytes() != int32(size) {
+				t.Fatalf("RuntimeGogoBytes = %d, should be %d", GogoBytes(), size)
+			}
+			return
+		}
+	}
+
+	t.Fatalf("go tool nm did not report size for runtime.gogo")
+}
+
+// golang.org/issue/7063
+func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
+	SetCPUProfileRate(0)
+}
+
+// Addresses to test for faulting behavior.
+// This is less a test of SetPanicOnFault and more a check that
+// the operating system and the runtime can process these faults
+// correctly. That is, we're indirectly testing that without SetPanicOnFault
+// these would manage to turn into ordinary crashes.
+// Note that these are truncated on 32-bit systems, so the bottom 32 bits
+// of the larger addresses must themselves be invalid addresses.
+// We might get unlucky and the OS might have mapped one of these
+// addresses, but probably not: they're all in the first page, very high
+// adderesses that normally an OS would reserve for itself, or malformed
+// addresses. Even so, we might have to remove one or two on different
+// systems. We will see.
+
+var faultAddrs = []uint64{
+	// low addresses
+	0,
+	1,
+	0xfff,
+	// high (kernel) addresses
+	// or else malformed.
+	0xffffffffffffffff,
+	0xfffffffffffff001,
+	// no 0xffffffffffff0001; 0xffff0001 is mapped for 32-bit user space on OS X
+	// no 0xfffffffffff00001; 0xfff00001 is mapped for 32-bit user space sometimes on Linux
+	0xffffffffff000001,
+	0xfffffffff0000001,
+	0xffffffff00000001,
+	0xfffffff000000001,
+	0xffffff0000000001,
+	0xfffff00000000001,
+	0xffff000000000001,
+	0xfff0000000000001,
+	0xff00000000000001,
+	0xf000000000000001,
+	0x8000000000000001,
+}
+
+func TestSetPanicOnFault(t *testing.T) {
+	// This currently results in a fault in the signal trampoline on
+	// dragonfly/386 - see issue 7421.
+	if GOOS == "dragonfly" && GOARCH == "386" {
+		t.Skip("skipping test on dragonfly/386")
+	}
+
+	old := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(old)
+
+	for _, addr := range faultAddrs {
+		testSetPanicOnFault(t, uintptr(addr))
+	}
+}
+
+func testSetPanicOnFault(t *testing.T, addr uintptr) {
+	if GOOS == "nacl" {
+		t.Skip("nacl doesn't seem to fault on high addresses")
+	}
+
+	defer func() {
+		if err := recover(); err == nil {
+			t.Fatalf("did not find error in recover")
+		}
+	}()
+
+	var p *int
+	p = (*int)(unsafe.Pointer(addr))
+	println(*p)
+	t.Fatalf("still here - should have faulted on address %#x", addr)
+}
+
+func eqstring_generic(s1, s2 string) bool {
+	if len(s1) != len(s2) {
+		return false
+	}
+	// optimization in assembly versions:
+	// if s1.str == s2.str { return true }
+	for i := 0; i < len(s1); i++ {
+		if s1[i] != s2[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func TestEqString(t *testing.T) {
+	// This isn't really an exhaustive test of eqstring, it's
+	// just a convenient way of documenting (via eqstring_generic)
+	// what eqstring does.
+	s := []string{
+		"",
+		"a",
+		"c",
+		"aaa",
+		"ccc",
+		"cccc"[:3], // same contents, different string
+		"1234567890",
+	}
+	for _, s1 := range s {
+		for _, s2 := range s {
+			x := s1 == s2
+			y := eqstring_generic(s1, s2)
+			if x != y {
+				t.Errorf(`eqstring("%s","%s") = %t, want %t`, s1, s2, x, y)
+			}
+		}
+	}
+}
diff --git a/src/runtime/runtime_unix_test.go b/src/runtime/runtime_unix_test.go
new file mode 100644
index 000000000..963de8cdb
--- /dev/null
+++ b/src/runtime/runtime_unix_test.go
@@ -0,0 +1,56 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Only works on systems with syscall.Close.
+// We need a fast system call to provoke the race,
+// and Close(-1) is nearly universally fast.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd plan9
+
+package runtime_test
+
+import (
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"syscall"
+	"testing"
+)
+
+func TestGoroutineProfile(t *testing.T) {
+	// GoroutineProfile used to use the wrong starting sp for
+	// goroutines coming out of system calls, causing possible
+	// crashes.
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(100))
+
+	var stop uint32
+	defer atomic.StoreUint32(&stop, 1) // in case of panic
+
+	var wg sync.WaitGroup
+	for i := 0; i < 4; i++ {
+		wg.Add(1)
+		go func() {
+			for atomic.LoadUint32(&stop) == 0 {
+				syscall.Close(-1)
+			}
+			wg.Done()
+		}()
+	}
+
+	max := 10000
+	if testing.Short() {
+		max = 100
+	}
+	stk := make([]runtime.StackRecord, 100)
+	for n := 0; n < max; n++ {
+		_, ok := runtime.GoroutineProfile(stk)
+		if !ok {
+			t.Fatalf("GoroutineProfile failed")
+		}
+	}
+
+	// If the program didn't crash, we passed.
+	atomic.StoreUint32(&stop, 1)
+	wg.Wait()
+}
diff --git a/src/runtime/select.go b/src/runtime/select.go
new file mode 100644
index 000000000..6d2531e7f
--- /dev/null
+++ b/src/runtime/select.go
@@ -0,0 +1,636 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// This file contains the implementation of Go select statements.
+
+import "unsafe"
+
+const (
+	debugSelect = false
+)
+
+var (
+	chansendpc = funcPC(chansend)
+	chanrecvpc = funcPC(chanrecv)
+)
+
+func selectsize(size uintptr) uintptr {
+	selsize := unsafe.Sizeof(_select{}) +
+		(size-1)*unsafe.Sizeof(_select{}.scase[0]) +
+		size*unsafe.Sizeof(*_select{}.lockorder) +
+		size*unsafe.Sizeof(*_select{}.pollorder)
+	return round(selsize, _Int64Align)
+}
+
+func newselect(sel *_select, selsize int64, size int32) {
+	if selsize != int64(selectsize(uintptr(size))) {
+		print("runtime: bad select size ", selsize, ", want ", selectsize(uintptr(size)), "\n")
+		gothrow("bad select size")
+	}
+	sel.tcase = uint16(size)
+	sel.ncase = 0
+	sel.lockorder = (**hchan)(add(unsafe.Pointer(&sel.scase), uintptr(size)*unsafe.Sizeof(_select{}.scase[0])))
+	sel.pollorder = (*uint16)(add(unsafe.Pointer(sel.lockorder), uintptr(size)*unsafe.Sizeof(*_select{}.lockorder)))
+
+	if debugSelect {
+		print("newselect s=", sel, " size=", size, "\n")
+	}
+}
+
+//go:nosplit
+func selectsend(sel *_select, c *hchan, elem unsafe.Pointer) (selected bool) {
+	// nil cases do not compete
+	if c != nil {
+		selectsendImpl(sel, c, getcallerpc(unsafe.Pointer(&sel)), elem, uintptr(unsafe.Pointer(&selected))-uintptr(unsafe.Pointer(&sel)))
+	}
+	return
+}
+
+// cut in half to give stack a chance to split
+func selectsendImpl(sel *_select, c *hchan, pc uintptr, elem unsafe.Pointer, so uintptr) {
+	i := sel.ncase
+	if i >= sel.tcase {
+		gothrow("selectsend: too many cases")
+	}
+	sel.ncase = i + 1
+	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
+
+	cas.pc = pc
+	cas._chan = c
+	cas.so = uint16(so)
+	cas.kind = _CaseSend
+	cas.elem = elem
+
+	if debugSelect {
+		print("selectsend s=", sel, " pc=", hex(cas.pc), " chan=", cas._chan, " so=", cas.so, "\n")
+	}
+}
+
+//go:nosplit
+func selectrecv(sel *_select, c *hchan, elem unsafe.Pointer) (selected bool) {
+	// nil cases do not compete
+	if c != nil {
+		selectrecvImpl(sel, c, getcallerpc(unsafe.Pointer(&sel)), elem, nil, uintptr(unsafe.Pointer(&selected))-uintptr(unsafe.Pointer(&sel)))
+	}
+	return
+}
+
+//go:nosplit
+func selectrecv2(sel *_select, c *hchan, elem unsafe.Pointer, received *bool) (selected bool) {
+	// nil cases do not compete
+	if c != nil {
+		selectrecvImpl(sel, c, getcallerpc(unsafe.Pointer(&sel)), elem, received, uintptr(unsafe.Pointer(&selected))-uintptr(unsafe.Pointer(&sel)))
+	}
+	return
+}
+
+func selectrecvImpl(sel *_select, c *hchan, pc uintptr, elem unsafe.Pointer, received *bool, so uintptr) {
+	i := sel.ncase
+	if i >= sel.tcase {
+		gothrow("selectrecv: too many cases")
+	}
+	sel.ncase = i + 1
+	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
+	cas.pc = pc
+	cas._chan = c
+	cas.so = uint16(so)
+	cas.kind = _CaseRecv
+	cas.elem = elem
+	cas.receivedp = received
+
+	if debugSelect {
+		print("selectrecv s=", sel, " pc=", hex(cas.pc), " chan=", cas._chan, " so=", cas.so, "\n")
+	}
+}
+
+//go:nosplit
+func selectdefault(sel *_select) (selected bool) {
+	selectdefaultImpl(sel, getcallerpc(unsafe.Pointer(&sel)), uintptr(unsafe.Pointer(&selected))-uintptr(unsafe.Pointer(&sel)))
+	return
+}
+
+func selectdefaultImpl(sel *_select, callerpc uintptr, so uintptr) {
+	i := sel.ncase
+	if i >= sel.tcase {
+		gothrow("selectdefault: too many cases")
+	}
+	sel.ncase = i + 1
+	cas := (*scase)(add(unsafe.Pointer(&sel.scase), uintptr(i)*unsafe.Sizeof(sel.scase[0])))
+	cas.pc = callerpc
+	cas._chan = nil
+	cas.so = uint16(so)
+	cas.kind = _CaseDefault
+
+	if debugSelect {
+		print("selectdefault s=", sel, " pc=", hex(cas.pc), " so=", cas.so, "\n")
+	}
+}
+
+func sellock(sel *_select) {
+	lockslice := sliceStruct{unsafe.Pointer(sel.lockorder), int(sel.ncase), int(sel.ncase)}
+	lockorder := *(*[]*hchan)(unsafe.Pointer(&lockslice))
+	var c *hchan
+	for _, c0 := range lockorder {
+		if c0 != nil && c0 != c {
+			c = c0
+			lock(&c.lock)
+		}
+	}
+}
+
+func selunlock(sel *_select) {
+	// We must be very careful here to not touch sel after we have unlocked
+	// the last lock, because sel can be freed right after the last unlock.
+	// Consider the following situation.
+	// First M calls runtime·park() in runtime·selectgo() passing the sel.
+	// Once runtime·park() has unlocked the last lock, another M makes
+	// the G that calls select runnable again and schedules it for execution.
+	// When the G runs on another M, it locks all the locks and frees sel.
+	// Now if the first M touches sel, it will access freed memory.
+	n := int(sel.ncase)
+	r := 0
+	lockslice := sliceStruct{unsafe.Pointer(sel.lockorder), n, n}
+	lockorder := *(*[]*hchan)(unsafe.Pointer(&lockslice))
+	// skip the default case
+	if n > 0 && lockorder[0] == nil {
+		r = 1
+	}
+	for i := n - 1; i >= r; i-- {
+		c := lockorder[i]
+		if i > 0 && c == lockorder[i-1] {
+			continue // will unlock it on the next iteration
+		}
+		unlock(&c.lock)
+	}
+}
+
+func selparkcommit(gp *g, sel *_select) bool {
+	selunlock(sel)
+	return true
+}
+
+func block() {
+	gopark(nil, nil, "select (no cases)") // forever
+}
+
+// overwrites return pc on stack to signal which case of the select
+// to run, so cannot appear at the top of a split stack.
+//go:nosplit
+func selectgo(sel *_select) {
+	pc, offset := selectgoImpl(sel)
+	*(*bool)(add(unsafe.Pointer(&sel), uintptr(offset))) = true
+	setcallerpc(unsafe.Pointer(&sel), pc)
+}
+
+// selectgoImpl returns scase.pc and scase.so for the select
+// case which fired.
+func selectgoImpl(sel *_select) (uintptr, uint16) {
+	if debugSelect {
+		print("select: sel=", sel, "\n")
+	}
+
+	scaseslice := sliceStruct{unsafe.Pointer(&sel.scase), int(sel.ncase), int(sel.ncase)}
+	scases := *(*[]scase)(unsafe.Pointer(&scaseslice))
+
+	var t0 int64
+	if blockprofilerate > 0 {
+		t0 = cputicks()
+		for i := 0; i < int(sel.ncase); i++ {
+			scases[i].releasetime = -1
+		}
+	}
+
+	// The compiler rewrites selects that statically have
+	// only 0 or 1 cases plus default into simpler constructs.
+	// The only way we can end up with such small sel.ncase
+	// values here is for a larger select in which most channels
+	// have been nilled out.  The general code handles those
+	// cases correctly, and they are rare enough not to bother
+	// optimizing (and needing to test).
+
+	// generate permuted order
+	pollslice := sliceStruct{unsafe.Pointer(sel.pollorder), int(sel.ncase), int(sel.ncase)}
+	pollorder := *(*[]uint16)(unsafe.Pointer(&pollslice))
+	for i := 0; i < int(sel.ncase); i++ {
+		pollorder[i] = uint16(i)
+	}
+	for i := 1; i < int(sel.ncase); i++ {
+		o := pollorder[i]
+		j := int(fastrand1()) % (i + 1)
+		pollorder[i] = pollorder[j]
+		pollorder[j] = o
+	}
+
+	// sort the cases by Hchan address to get the locking order.
+	// simple heap sort, to guarantee n log n time and constant stack footprint.
+	lockslice := sliceStruct{unsafe.Pointer(sel.lockorder), int(sel.ncase), int(sel.ncase)}
+	lockorder := *(*[]*hchan)(unsafe.Pointer(&lockslice))
+	for i := 0; i < int(sel.ncase); i++ {
+		j := i
+		c := scases[j]._chan
+		for j > 0 && lockorder[(j-1)/2].sortkey() < c.sortkey() {
+			k := (j - 1) / 2
+			lockorder[j] = lockorder[k]
+			j = k
+		}
+		lockorder[j] = c
+	}
+	for i := int(sel.ncase) - 1; i >= 0; i-- {
+		c := lockorder[i]
+		lockorder[i] = lockorder[0]
+		j := 0
+		for {
+			k := j*2 + 1
+			if k >= i {
+				break
+			}
+			if k+1 < i && lockorder[k].sortkey() < lockorder[k+1].sortkey() {
+				k++
+			}
+			if c.sortkey() < lockorder[k].sortkey() {
+				lockorder[j] = lockorder[k]
+				j = k
+				continue
+			}
+			break
+		}
+		lockorder[j] = c
+	}
+	/*
+		for i := 0; i+1 < int(sel.ncase); i++ {
+			if lockorder[i].sortkey() > lockorder[i+1].sortkey() {
+				print("i=", i, " x=", lockorder[i], " y=", lockorder[i+1], "\n")
+				gothrow("select: broken sort")
+			}
+		}
+	*/
+
+	// lock all the channels involved in the select
+	sellock(sel)
+
+	var (
+		gp     *g
+		done   uint32
+		sg     *sudog
+		c      *hchan
+		k      *scase
+		sglist *sudog
+		sgnext *sudog
+	)
+
+loop:
+	// pass 1 - look for something already waiting
+	var dfl *scase
+	var cas *scase
+	for i := 0; i < int(sel.ncase); i++ {
+		cas = &scases[pollorder[i]]
+		c = cas._chan
+
+		switch cas.kind {
+		case _CaseRecv:
+			if c.dataqsiz > 0 {
+				if c.qcount > 0 {
+					goto asyncrecv
+				}
+			} else {
+				sg = c.sendq.dequeue()
+				if sg != nil {
+					goto syncrecv
+				}
+			}
+			if c.closed != 0 {
+				goto rclose
+			}
+
+		case _CaseSend:
+			if raceenabled {
+				racereadpc(unsafe.Pointer(c), cas.pc, chansendpc)
+			}
+			if c.closed != 0 {
+				goto sclose
+			}
+			if c.dataqsiz > 0 {
+				if c.qcount < c.dataqsiz {
+					goto asyncsend
+				}
+			} else {
+				sg = c.recvq.dequeue()
+				if sg != nil {
+					goto syncsend
+				}
+			}
+
+		case _CaseDefault:
+			dfl = cas
+		}
+	}
+
+	if dfl != nil {
+		selunlock(sel)
+		cas = dfl
+		goto retc
+	}
+
+	// pass 2 - enqueue on all chans
+	gp = getg()
+	done = 0
+	for i := 0; i < int(sel.ncase); i++ {
+		cas = &scases[pollorder[i]]
+		c = cas._chan
+		sg := acquireSudog()
+		sg.g = gp
+		// Note: selectdone is adjusted for stack copies in stack.c:adjustsudogs
+		sg.selectdone = (*uint32)(noescape(unsafe.Pointer(&done)))
+		sg.elem = cas.elem
+		sg.releasetime = 0
+		if t0 != 0 {
+			sg.releasetime = -1
+		}
+		sg.waitlink = gp.waiting
+		gp.waiting = sg
+
+		switch cas.kind {
+		case _CaseRecv:
+			c.recvq.enqueue(sg)
+
+		case _CaseSend:
+			c.sendq.enqueue(sg)
+		}
+	}
+
+	// wait for someone to wake us up
+	gp.param = nil
+	gopark(unsafe.Pointer(funcPC(selparkcommit)), unsafe.Pointer(sel), "select")
+
+	// someone woke us up
+	sellock(sel)
+	sg = (*sudog)(gp.param)
+
+	// pass 3 - dequeue from unsuccessful chans
+	// otherwise they stack up on quiet channels
+	// record the successful case, if any.
+	// We singly-linked up the SudoGs in case order, so when
+	// iterating through the linked list they are in reverse order.
+	cas = nil
+	sglist = gp.waiting
+	gp.waiting = nil
+	for i := int(sel.ncase) - 1; i >= 0; i-- {
+		k = &scases[pollorder[i]]
+		if sglist.releasetime > 0 {
+			k.releasetime = sglist.releasetime
+		}
+		if sg == sglist {
+			cas = k
+		} else {
+			c = k._chan
+			if k.kind == _CaseSend {
+				c.sendq.dequeueg(gp)
+			} else {
+				c.recvq.dequeueg(gp)
+			}
+		}
+		sgnext = sglist.waitlink
+		releaseSudog(sglist)
+		sglist = sgnext
+	}
+
+	if cas == nil {
+		goto loop
+	}
+
+	c = cas._chan
+
+	if c.dataqsiz > 0 {
+		gothrow("selectgo: shouldn't happen")
+	}
+
+	if debugSelect {
+		print("wait-return: sel=", sel, " c=", c, " cas=", cas, " kind=", cas.kind, "\n")
+	}
+
+	if cas.kind == _CaseRecv {
+		if cas.receivedp != nil {
+			*cas.receivedp = true
+		}
+	}
+
+	if raceenabled {
+		if cas.kind == _CaseRecv && cas.elem != nil {
+			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
+		} else if cas.kind == _CaseSend {
+			raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+		}
+	}
+
+	selunlock(sel)
+	goto retc
+
+asyncrecv:
+	// can receive from buffer
+	if raceenabled {
+		if cas.elem != nil {
+			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
+		}
+		raceacquire(chanbuf(c, c.recvx))
+		racerelease(chanbuf(c, c.recvx))
+	}
+	if cas.receivedp != nil {
+		*cas.receivedp = true
+	}
+	if cas.elem != nil {
+		memmove(cas.elem, chanbuf(c, c.recvx), uintptr(c.elemsize))
+	}
+	memclr(chanbuf(c, c.recvx), uintptr(c.elemsize))
+	c.recvx++
+	if c.recvx == c.dataqsiz {
+		c.recvx = 0
+	}
+	c.qcount--
+	sg = c.sendq.dequeue()
+	if sg != nil {
+		gp = sg.g
+		selunlock(sel)
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(gp)
+	} else {
+		selunlock(sel)
+	}
+	goto retc
+
+asyncsend:
+	// can send to buffer
+	if raceenabled {
+		raceacquire(chanbuf(c, c.sendx))
+		racerelease(chanbuf(c, c.sendx))
+		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+	}
+	memmove(chanbuf(c, c.sendx), cas.elem, uintptr(c.elemsize))
+	c.sendx++
+	if c.sendx == c.dataqsiz {
+		c.sendx = 0
+	}
+	c.qcount++
+	sg = c.recvq.dequeue()
+	if sg != nil {
+		gp = sg.g
+		selunlock(sel)
+		if sg.releasetime != 0 {
+			sg.releasetime = cputicks()
+		}
+		goready(gp)
+	} else {
+		selunlock(sel)
+	}
+	goto retc
+
+syncrecv:
+	// can receive from sleeping sender (sg)
+	if raceenabled {
+		if cas.elem != nil {
+			raceWriteObjectPC(c.elemtype, cas.elem, cas.pc, chanrecvpc)
+		}
+		racesync(c, sg)
+	}
+	selunlock(sel)
+	if debugSelect {
+		print("syncrecv: sel=", sel, " c=", c, "\n")
+	}
+	if cas.receivedp != nil {
+		*cas.receivedp = true
+	}
+	if cas.elem != nil {
+		memmove(cas.elem, sg.elem, uintptr(c.elemsize))
+	}
+	gp = sg.g
+	gp.param = unsafe.Pointer(sg)
+	if sg.releasetime != 0 {
+		sg.releasetime = cputicks()
+	}
+	goready(gp)
+	goto retc
+
+rclose:
+	// read at end of closed channel
+	selunlock(sel)
+	if cas.receivedp != nil {
+		*cas.receivedp = false
+	}
+	if cas.elem != nil {
+		memclr(cas.elem, uintptr(c.elemsize))
+	}
+	if raceenabled {
+		raceacquire(unsafe.Pointer(c))
+	}
+	goto retc
+
+syncsend:
+	// can send to sleeping receiver (sg)
+	if raceenabled {
+		raceReadObjectPC(c.elemtype, cas.elem, cas.pc, chansendpc)
+		racesync(c, sg)
+	}
+	selunlock(sel)
+	if debugSelect {
+		print("syncsend: sel=", sel, " c=", c, "\n")
+	}
+	if sg.elem != nil {
+		memmove(sg.elem, cas.elem, uintptr(c.elemsize))
+	}
+	gp = sg.g
+	gp.param = unsafe.Pointer(sg)
+	if sg.releasetime != 0 {
+		sg.releasetime = cputicks()
+	}
+	goready(gp)
+
+retc:
+	if cas.releasetime > 0 {
+		blockevent(cas.releasetime-t0, 2)
+	}
+	return cas.pc, cas.so
+
+sclose:
+	// send on closed channel
+	selunlock(sel)
+	panic("send on closed channel")
+}
+
+func (c *hchan) sortkey() uintptr {
+	// TODO(khr): if we have a moving garbage collector, we'll need to
+	// change this function.
+	return uintptr(unsafe.Pointer(c))
+}
+
+// A runtimeSelect is a single case passed to rselect.
+// This must match ../reflect/value.go:/runtimeSelect
+type runtimeSelect struct {
+	dir selectDir
+	typ unsafe.Pointer // channel type (not used here)
+	ch  *hchan         // channel
+	val unsafe.Pointer // ptr to data (SendDir) or ptr to receive buffer (RecvDir)
+}
+
+// These values must match ../reflect/value.go:/SelectDir.
+type selectDir int
+
+const (
+	_             selectDir = iota
+	selectSend              // case Chan <- Send
+	selectRecv              // case <-Chan:
+	selectDefault           // default
+)
+
+func reflect_rselect(cases []runtimeSelect) (chosen int, recvOK bool) {
+	// flagNoScan is safe here, because all objects are also referenced from cases.
+	size := selectsize(uintptr(len(cases)))
+	sel := (*_select)(gomallocgc(size, nil, flagNoScan))
+	newselect(sel, int64(size), int32(len(cases)))
+	r := new(bool)
+	for i := range cases {
+		rc := &cases[i]
+		switch rc.dir {
+		case selectDefault:
+			selectdefaultImpl(sel, uintptr(i), 0)
+		case selectSend:
+			if rc.ch == nil {
+				break
+			}
+			selectsendImpl(sel, rc.ch, uintptr(i), rc.val, 0)
+		case selectRecv:
+			if rc.ch == nil {
+				break
+			}
+			selectrecvImpl(sel, rc.ch, uintptr(i), rc.val, r, 0)
+		}
+	}
+
+	pc, _ := selectgoImpl(sel)
+	chosen = int(pc)
+	recvOK = *r
+	return
+}
+
+func (q *waitq) dequeueg(gp *g) {
+	var prevsgp *sudog
+	l := &q.first
+	for {
+		sgp := *l
+		if sgp == nil {
+			return
+		}
+		if sgp.g == gp {
+			*l = sgp.next
+			if q.last == sgp {
+				q.last = prevsgp
+			}
+			return
+		}
+		l = &sgp.next
+		prevsgp = sgp
+	}
+}
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
new file mode 100644
index 000000000..87ba5463b
--- /dev/null
+++ b/src/runtime/sema.go
@@ -0,0 +1,266 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Semaphore implementation exposed to Go.
+// Intended use is provide a sleep and wakeup
+// primitive that can be used in the contended case
+// of other synchronization primitives.
+// Thus it targets the same goal as Linux's futex,
+// but it has much simpler semantics.
+//
+// That is, don't think of these as semaphores.
+// Think of them as a way to implement sleep and wakeup
+// such that every sleep is paired with a single wakeup,
+// even if, due to races, the wakeup happens before the sleep.
+//
+// See Mullender and Cox, ``Semaphores in Plan 9,''
+// http://swtch.com/semaphore.pdf
+
+package runtime
+
+import "unsafe"
+
+// Asynchronous semaphore for sync.Mutex.
+
+type semaRoot struct {
+	lock  mutex
+	head  *sudog
+	tail  *sudog
+	nwait uint32 // Number of waiters. Read w/o the lock.
+}
+
+// Prime to not correlate with any user patterns.
+const semTabSize = 251
+
+var semtable [semTabSize]struct {
+	root semaRoot
+	pad  [cacheLineSize - unsafe.Sizeof(semaRoot{})]byte
+}
+
+// Called from sync/net packages.
+func asyncsemacquire(addr *uint32) {
+	semacquire(addr, true)
+}
+
+func asyncsemrelease(addr *uint32) {
+	semrelease(addr)
+}
+
+// Called from runtime.
+func semacquire(addr *uint32, profile bool) {
+	// Easy case.
+	if cansemacquire(addr) {
+		return
+	}
+
+	// Harder case:
+	//	increment waiter count
+	//	try cansemacquire one more time, return if succeeded
+	//	enqueue itself as a waiter
+	//	sleep
+	//	(waiter descriptor is dequeued by signaler)
+	s := acquireSudog()
+	root := semroot(addr)
+	t0 := int64(0)
+	s.releasetime = 0
+	if profile && blockprofilerate > 0 {
+		t0 = cputicks()
+		s.releasetime = -1
+	}
+	for {
+		lock(&root.lock)
+		// Add ourselves to nwait to disable "easy case" in semrelease.
+		xadd(&root.nwait, 1)
+		// Check cansemacquire to avoid missed wakeup.
+		if cansemacquire(addr) {
+			xadd(&root.nwait, -1)
+			unlock(&root.lock)
+			break
+		}
+		// Any semrelease after the cansemacquire knows we're waiting
+		// (we set nwait above), so go to sleep.
+		root.queue(addr, s)
+		goparkunlock(&root.lock, "semacquire")
+		if cansemacquire(addr) {
+			break
+		}
+	}
+	if s.releasetime > 0 {
+		blockevent(int64(s.releasetime)-t0, 3)
+	}
+	releaseSudog(s)
+}
+
+func semrelease(addr *uint32) {
+	root := semroot(addr)
+	xadd(addr, 1)
+
+	// Easy case: no waiters?
+	// This check must happen after the xadd, to avoid a missed wakeup
+	// (see loop in semacquire).
+	if atomicload(&root.nwait) == 0 {
+		return
+	}
+
+	// Harder case: search for a waiter and wake it.
+	lock(&root.lock)
+	if atomicload(&root.nwait) == 0 {
+		// The count is already consumed by another goroutine,
+		// so no need to wake up another goroutine.
+		unlock(&root.lock)
+		return
+	}
+	s := root.head
+	for ; s != nil; s = s.next {
+		if s.elem == unsafe.Pointer(addr) {
+			xadd(&root.nwait, -1)
+			root.dequeue(s)
+			break
+		}
+	}
+	unlock(&root.lock)
+	if s != nil {
+		if s.releasetime != 0 {
+			s.releasetime = cputicks()
+		}
+		goready(s.g)
+	}
+}
+
+func semroot(addr *uint32) *semaRoot {
+	return &semtable[(uintptr(unsafe.Pointer(addr))>>3)%semTabSize].root
+}
+
+func cansemacquire(addr *uint32) bool {
+	for {
+		v := atomicload(addr)
+		if v == 0 {
+			return false
+		}
+		if cas(addr, v, v-1) {
+			return true
+		}
+	}
+}
+
+func (root *semaRoot) queue(addr *uint32, s *sudog) {
+	s.g = getg()
+	s.elem = unsafe.Pointer(addr)
+	s.next = nil
+	s.prev = root.tail
+	if root.tail != nil {
+		root.tail.next = s
+	} else {
+		root.head = s
+	}
+	root.tail = s
+}
+
+func (root *semaRoot) dequeue(s *sudog) {
+	if s.next != nil {
+		s.next.prev = s.prev
+	} else {
+		root.tail = s.prev
+	}
+	if s.prev != nil {
+		s.prev.next = s.next
+	} else {
+		root.head = s.next
+	}
+	s.next = nil
+	s.prev = nil
+}
+
+// Synchronous semaphore for sync.Cond.
+type syncSema struct {
+	lock mutex
+	head *sudog
+	tail *sudog
+}
+
+// Syncsemacquire waits for a pairing syncsemrelease on the same semaphore s.
+func syncsemacquire(s *syncSema) {
+	lock(&s.lock)
+	if s.head != nil && s.head.nrelease > 0 {
+		// Have pending release, consume it.
+		var wake *sudog
+		s.head.nrelease--
+		if s.head.nrelease == 0 {
+			wake = s.head
+			s.head = wake.next
+			if s.head == nil {
+				s.tail = nil
+			}
+		}
+		unlock(&s.lock)
+		if wake != nil {
+			goready(wake.g)
+		}
+	} else {
+		// Enqueue itself.
+		w := acquireSudog()
+		w.g = getg()
+		w.nrelease = -1
+		w.next = nil
+		w.releasetime = 0
+		t0 := int64(0)
+		if blockprofilerate > 0 {
+			t0 = cputicks()
+			w.releasetime = -1
+		}
+		if s.tail == nil {
+			s.head = w
+		} else {
+			s.tail.next = w
+		}
+		s.tail = w
+		goparkunlock(&s.lock, "semacquire")
+		if t0 != 0 {
+			blockevent(int64(w.releasetime)-t0, 2)
+		}
+		releaseSudog(w)
+	}
+}
+
+// Syncsemrelease waits for n pairing syncsemacquire on the same semaphore s.
+func syncsemrelease(s *syncSema, n uint32) {
+	lock(&s.lock)
+	for n > 0 && s.head != nil && s.head.nrelease < 0 {
+		// Have pending acquire, satisfy it.
+		wake := s.head
+		s.head = wake.next
+		if s.head == nil {
+			s.tail = nil
+		}
+		if wake.releasetime != 0 {
+			wake.releasetime = cputicks()
+		}
+		goready(wake.g)
+		n--
+	}
+	if n > 0 {
+		// enqueue itself
+		w := acquireSudog()
+		w.g = getg()
+		w.nrelease = int32(n)
+		w.next = nil
+		w.releasetime = 0
+		if s.tail == nil {
+			s.head = w
+		} else {
+			s.tail.next = w
+		}
+		s.tail = w
+		goparkunlock(&s.lock, "semarelease")
+	} else {
+		unlock(&s.lock)
+	}
+}
+
+func syncsemcheck(sz uintptr) {
+	if sz != unsafe.Sizeof(syncSema{}) {
+		print("runtime: bad syncSema size - sync=", sz, " runtime=", unsafe.Sizeof(syncSema{}), "\n")
+		gothrow("bad syncSema size")
+	}
+}
diff --git a/src/runtime/signal.c b/src/runtime/signal.c
new file mode 100644
index 000000000..0674bfb22
--- /dev/null
+++ b/src/runtime/signal.c
@@ -0,0 +1,25 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+void
+runtime·sigenable_m(void)
+{
+	uint32 s;
+	
+	s = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	runtime·sigenable(s);
+}
+
+void
+runtime·sigdisable_m(void)
+{
+	uint32 s;
+	
+	s = g->m->scalararg[0];
+	g->m->scalararg[0] = 0;
+	runtime·sigdisable(s);
+}
diff --git a/src/runtime/signal_386.c b/src/runtime/signal_386.c
new file mode 100644
index 000000000..d55e30452
--- /dev/null
+++ b/src/runtime/signal_386.c
@@ -0,0 +1,122 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_GOOS_GOARCH.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Siginfo *info, void *ctxt)
+{
+	USED(info);
+	USED(ctxt);
+	
+	runtime·printf("eax     %x\n", SIG_EAX(info, ctxt));
+	runtime·printf("ebx     %x\n", SIG_EBX(info, ctxt));
+	runtime·printf("ecx     %x\n", SIG_ECX(info, ctxt));
+	runtime·printf("edx     %x\n", SIG_EDX(info, ctxt));
+	runtime·printf("edi     %x\n", SIG_EDI(info, ctxt));
+	runtime·printf("esi     %x\n", SIG_ESI(info, ctxt));
+	runtime·printf("ebp     %x\n", SIG_EBP(info, ctxt));
+	runtime·printf("esp     %x\n", SIG_ESP(info, ctxt));
+	runtime·printf("eip     %x\n", SIG_EIP(info, ctxt));
+	runtime·printf("eflags  %x\n", SIG_EFLAGS(info, ctxt));
+	runtime·printf("cs      %x\n", SIG_CS(info, ctxt));
+	runtime·printf("fs      %x\n", SIG_FS(info, ctxt));
+	runtime·printf("gs      %x\n", SIG_GS(info, ctxt));
+}
+
+void
+runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
+{
+	uintptr *sp;
+	SigTab *t;
+	bool crash;
+
+	if(sig == SIGPROF) {
+		runtime·sigprof((byte*)SIG_EIP(info, ctxt), (byte*)SIG_ESP(info, ctxt), nil, gp, g->m);
+		return;
+	}
+
+	t = &runtime·sigtab[sig];
+	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp->sig = sig;
+		gp->sigcode0 = SIG_CODE0(info, ctxt);
+		gp->sigcode1 = SIG_CODE1(info, ctxt);
+		gp->sigpc = SIG_EIP(info, ctxt);
+
+#ifdef GOOS_darwin
+		// Work around Leopard bug that doesn't set FPE_INTDIV.
+		// Look at instruction to see if it is a divide.
+		// Not necessary in Snow Leopard (si_code will be != 0).
+		if(sig == SIGFPE && gp->sigcode0 == 0) {
+			byte *pc;
+			pc = (byte*)gp->sigpc;
+			if(pc[0] == 0x66)	// 16-bit instruction prefix
+				pc++;
+			if(pc[0] == 0xF6 || pc[0] == 0xF7)
+				gp->sigcode0 = FPE_INTDIV;
+		}
+#endif
+
+		// Only push runtime·sigpanic if eip != 0.
+		// If eip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime·sigpanic instead.
+		// (Otherwise the trace will end at runtime·sigpanic and we
+		// won't get to see who faulted.)
+		if(SIG_EIP(info, ctxt) != 0) {
+			sp = (uintptr*)SIG_ESP(info, ctxt);
+			*--sp = SIG_EIP(info, ctxt);
+			SIG_ESP(info, ctxt) = (uintptr)sp;
+		}
+		SIG_EIP(info, ctxt) = (uintptr)runtime·sigpanic;
+		return;
+	}
+
+	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
+		if(runtime·sigsend(sig))
+			return;
+	if(t->flags & SigKill)
+		runtime·exit(2);
+	if(!(t->flags & SigThrow))
+		return;
+
+	g->m->throwing = 1;
+	g->m->caughtsig = gp;
+	runtime·startpanic();
+
+	if(sig < 0 || sig >= NSIG)
+		runtime·printf("Signal %d\n", sig);
+	else
+		runtime·printf("%s\n", runtime·sigtab[sig].name);
+
+	runtime·printf("PC=%x\n", SIG_EIP(info, ctxt));
+	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+		runtime·printf("signal arrived during cgo execution\n");
+		gp = g->m->lockedg;
+	}
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)){
+		runtime·goroutineheader(gp);
+		runtime·traceback(SIG_EIP(info, ctxt), SIG_ESP(info, ctxt), 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·printf("\n");
+		runtime·dumpregs(info, ctxt);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+}
diff --git a/src/runtime/signal_amd64x.c b/src/runtime/signal_amd64x.c
new file mode 100644
index 000000000..44e68cecf
--- /dev/null
+++ b/src/runtime/signal_amd64x.c
@@ -0,0 +1,156 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_GOOS_GOARCH.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Siginfo *info, void *ctxt)
+{
+	USED(info);
+	USED(ctxt);
+	
+	runtime·printf("rax     %X\n", SIG_RAX(info, ctxt));
+	runtime·printf("rbx     %X\n", SIG_RBX(info, ctxt));
+	runtime·printf("rcx     %X\n", SIG_RCX(info, ctxt));
+	runtime·printf("rdx     %X\n", SIG_RDX(info, ctxt));
+	runtime·printf("rdi     %X\n", SIG_RDI(info, ctxt));
+	runtime·printf("rsi     %X\n", SIG_RSI(info, ctxt));
+	runtime·printf("rbp     %X\n", SIG_RBP(info, ctxt));
+	runtime·printf("rsp     %X\n", SIG_RSP(info, ctxt));
+	runtime·printf("r8      %X\n", SIG_R8(info, ctxt) );
+	runtime·printf("r9      %X\n", SIG_R9(info, ctxt) );
+	runtime·printf("r10     %X\n", SIG_R10(info, ctxt));
+	runtime·printf("r11     %X\n", SIG_R11(info, ctxt));
+	runtime·printf("r12     %X\n", SIG_R12(info, ctxt));
+	runtime·printf("r13     %X\n", SIG_R13(info, ctxt));
+	runtime·printf("r14     %X\n", SIG_R14(info, ctxt));
+	runtime·printf("r15     %X\n", SIG_R15(info, ctxt));
+	runtime·printf("rip     %X\n", SIG_RIP(info, ctxt));
+	runtime·printf("rflags  %X\n", SIG_RFLAGS(info, ctxt));
+	runtime·printf("cs      %X\n", SIG_CS(info, ctxt));
+	runtime·printf("fs      %X\n", SIG_FS(info, ctxt));
+	runtime·printf("gs      %X\n", SIG_GS(info, ctxt));
+}
+
+void
+runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
+{
+	uintptr *sp;
+	SigTab *t;
+	bool crash;
+
+	if(sig == SIGPROF) {
+		runtime·sigprof((byte*)SIG_RIP(info, ctxt), (byte*)SIG_RSP(info, ctxt), nil, gp, g->m);
+		return;
+	}
+
+#ifdef GOOS_darwin
+	// x86-64 has 48-bit virtual addresses. The top 16 bits must echo bit 47.
+	// The hardware delivers a different kind of fault for a malformed address
+	// than it does for an attempt to access a valid but unmapped address.
+	// OS X 10.9.2 mishandles the malformed address case, making it look like
+	// a user-generated signal (like someone ran kill -SEGV ourpid).
+	// We pass user-generated signals to os/signal, or else ignore them.
+	// Doing that here - and returning to the faulting code - results in an
+	// infinite loop. It appears the best we can do is rewrite what the kernel
+	// delivers into something more like the truth. The address used below
+	// has very little chance of being the one that caused the fault, but it is
+	// malformed, it is clearly not a real pointer, and if it does get printed
+	// in real life, people will probably search for it and find this code.
+	// There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e
+	// as I type this comment.
+	if(sig == SIGSEGV && SIG_CODE0(info, ctxt) == SI_USER) {
+		SIG_CODE0(info, ctxt) = SI_USER+1;
+		info->si_addr = (void*)(uintptr)0xb01dfacedebac1eULL;
+	}
+#endif
+
+	t = &runtime·sigtab[sig];
+	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp->sig = sig;
+		gp->sigcode0 = SIG_CODE0(info, ctxt);
+		gp->sigcode1 = SIG_CODE1(info, ctxt);
+		gp->sigpc = SIG_RIP(info, ctxt);
+
+#ifdef GOOS_darwin
+		// Work around Leopard bug that doesn't set FPE_INTDIV.
+		// Look at instruction to see if it is a divide.
+		// Not necessary in Snow Leopard (si_code will be != 0).
+		if(sig == SIGFPE && gp->sigcode0 == 0) {
+			byte *pc;
+			pc = (byte*)gp->sigpc;
+			if((pc[0]&0xF0) == 0x40)	// 64-bit REX prefix
+				pc++;
+			else if(pc[0] == 0x66)	// 16-bit instruction prefix
+				pc++;
+			if(pc[0] == 0xF6 || pc[0] == 0xF7)
+				gp->sigcode0 = FPE_INTDIV;
+		}
+#endif
+
+		// Only push runtime·sigpanic if rip != 0.
+		// If rip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime·sigpanic instead.
+		// (Otherwise the trace will end at runtime·sigpanic and we
+		// won't get to see who faulted.)
+		if(SIG_RIP(info, ctxt) != 0) {
+			sp = (uintptr*)SIG_RSP(info, ctxt);
+			if(sizeof(uintreg) > sizeof(uintptr))
+				*--sp = 0;
+			*--sp = SIG_RIP(info, ctxt);
+			SIG_RSP(info, ctxt) = (uintptr)sp;
+		}
+		SIG_RIP(info, ctxt) = (uintptr)runtime·sigpanic;
+		return;
+	}
+
+	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
+		if(runtime·sigsend(sig))
+			return;
+	if(t->flags & SigKill)
+		runtime·exit(2);
+	if(!(t->flags & SigThrow))
+		return;
+
+	g->m->throwing = 1;
+	g->m->caughtsig = gp;
+	runtime·startpanic();
+
+	if(sig < 0 || sig >= NSIG)
+		runtime·printf("Signal %d\n", sig);
+	else
+		runtime·printf("%s\n", runtime·sigtab[sig].name);
+
+	runtime·printf("PC=%X\n", SIG_RIP(info, ctxt));
+	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+		runtime·printf("signal arrived during cgo execution\n");
+		gp = g->m->lockedg;
+	}
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)){
+		runtime·goroutineheader(gp);
+		runtime·traceback(SIG_RIP(info, ctxt), SIG_RSP(info, ctxt), 0, gp);
+		runtime·tracebackothers(gp);
+		runtime·printf("\n");
+		runtime·dumpregs(info, ctxt);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+}
diff --git a/src/runtime/signal_android_386.h b/src/runtime/signal_android_386.h
new file mode 100644
index 000000000..2a1bb4b3e
--- /dev/null
+++ b/src/runtime/signal_android_386.h
@@ -0,0 +1 @@
+#include "signal_linux_386.h"
diff --git a/src/runtime/signal_android_arm.h b/src/runtime/signal_android_arm.h
new file mode 100644
index 000000000..8a05e21e5
--- /dev/null
+++ b/src/runtime/signal_android_arm.h
@@ -0,0 +1 @@
+#include "signal_linux_arm.h"
diff --git a/src/runtime/signal_arm.c b/src/runtime/signal_arm.c
new file mode 100644
index 000000000..3571cf3ac
--- /dev/null
+++ b/src/runtime/signal_arm.c
@@ -0,0 +1,121 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_GOOS_GOARCH.h"
+#include "signals_GOOS.h"
+
+void
+runtime·dumpregs(Siginfo *info, void *ctxt)
+{
+	USED(info);
+	USED(ctxt);
+
+	runtime·printf("trap    %x\n", SIG_TRAP(info, ctxt));
+	runtime·printf("error   %x\n", SIG_ERROR(info, ctxt));
+	runtime·printf("oldmask %x\n", SIG_OLDMASK(info, ctxt));
+	runtime·printf("r0      %x\n", SIG_R0(info, ctxt));
+	runtime·printf("r1      %x\n", SIG_R1(info, ctxt));
+	runtime·printf("r2      %x\n", SIG_R2(info, ctxt));
+	runtime·printf("r3      %x\n", SIG_R3(info, ctxt));
+	runtime·printf("r4      %x\n", SIG_R4(info, ctxt));
+	runtime·printf("r5      %x\n", SIG_R5(info, ctxt));
+	runtime·printf("r6      %x\n", SIG_R6(info, ctxt));
+	runtime·printf("r7      %x\n", SIG_R7(info, ctxt));
+	runtime·printf("r8      %x\n", SIG_R8(info, ctxt));
+	runtime·printf("r9      %x\n", SIG_R9(info, ctxt));
+	runtime·printf("r10     %x\n", SIG_R10(info, ctxt));
+	runtime·printf("fp      %x\n", SIG_FP(info, ctxt));
+	runtime·printf("ip      %x\n", SIG_IP(info, ctxt));
+	runtime·printf("sp      %x\n", SIG_SP(info, ctxt));
+	runtime·printf("lr      %x\n", SIG_LR(info, ctxt));
+	runtime·printf("pc      %x\n", SIG_PC(info, ctxt));
+	runtime·printf("cpsr    %x\n", SIG_CPSR(info, ctxt));
+	runtime·printf("fault   %x\n", SIG_FAULT(info, ctxt));
+}
+
+void
+runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
+{
+	SigTab *t;
+	bool crash;
+
+	if(sig == SIGPROF) {
+		runtime·sigprof((uint8*)SIG_PC(info, ctxt), (uint8*)SIG_SP(info, ctxt), (uint8*)SIG_LR(info, ctxt), gp, g->m);
+		return;
+	}
+
+	t = &runtime·sigtab[sig];
+	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp->sig = sig;
+		gp->sigcode0 = SIG_CODE0(info, ctxt);
+		gp->sigcode1 = SIG_FAULT(info, ctxt);
+		gp->sigpc = SIG_PC(info, ctxt);
+
+		// We arrange lr, and pc to pretend the panicking
+		// function calls sigpanic directly.
+		// Always save LR to stack so that panics in leaf
+		// functions are correctly handled. This smashes
+		// the stack frame but we're not going back there
+		// anyway.
+		SIG_SP(info, ctxt) -= 4;
+		*(uint32*)SIG_SP(info, ctxt) = SIG_LR(info, ctxt);
+		// Don't bother saving PC if it's zero, which is
+		// probably a call to a nil func: the old link register
+		// is more useful in the stack trace.
+		if(gp->sigpc != 0)
+			SIG_LR(info, ctxt) = gp->sigpc;
+		// In case we are panicking from external C code
+		SIG_R10(info, ctxt) = (uintptr)gp;
+		SIG_PC(info, ctxt) = (uintptr)runtime·sigpanic;
+		return;
+	}
+
+	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
+		if(runtime·sigsend(sig))
+			return;
+	if(t->flags & SigKill)
+		runtime·exit(2);
+	if(!(t->flags & SigThrow))
+		return;
+
+	g->m->throwing = 1;
+	g->m->caughtsig = gp;
+	if(runtime·panicking)	// traceback already printed
+		runtime·exit(2);
+	runtime·panicking = 1;
+
+	if(sig < 0 || sig >= NSIG)
+		runtime·printf("Signal %d\n", sig);
+	else
+		runtime·printf("%s\n", runtime·sigtab[sig].name);
+
+	runtime·printf("PC=%x\n", SIG_PC(info, ctxt));
+	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
+		runtime·printf("signal arrived during cgo execution\n");
+		gp = g->m->lockedg;
+	}
+	runtime·printf("\n");
+
+	if(runtime·gotraceback(&crash)){
+		runtime·goroutineheader(gp);
+		runtime·traceback(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LR(info, ctxt), gp);
+		runtime·tracebackothers(gp);
+		runtime·printf("\n");
+		runtime·dumpregs(info, ctxt);
+	}
+	
+	if(crash)
+		runtime·crash();
+
+	runtime·exit(2);
+}
diff --git a/src/runtime/signal_darwin_386.h b/src/runtime/signal_darwin_386.h
new file mode 100644
index 000000000..5459e10a1
--- /dev/null
+++ b/src/runtime/signal_darwin_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext->ss)
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).eflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_darwin_amd64.h b/src/runtime/signal_darwin_amd64.h
new file mode 100644
index 000000000..e3da6de3a
--- /dev/null
+++ b/src/runtime/signal_darwin_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext->ss)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).rip)
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).rflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_dragonfly_386.h b/src/runtime/signal_dragonfly_386.h
new file mode 100644
index 000000000..a24f1ee96
--- /dev/null
+++ b/src/runtime/signal_dragonfly_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).mc_eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).mc_ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).mc_ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).mc_edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).mc_edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).mc_esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).mc_ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).mc_esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).mc_eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_eflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_dragonfly_amd64.h b/src/runtime/signal_dragonfly_amd64.h
new file mode 100644
index 000000000..5b4f97782
--- /dev/null
+++ b/src/runtime/signal_dragonfly_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).mc_rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).mc_rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).mc_rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).mc_rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).mc_rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).mc_rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).mc_rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).mc_rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).mc_r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).mc_r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).mc_r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).mc_r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).mc_r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).mc_r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).mc_r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).mc_r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).mc_rip)
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_rflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_ss)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_ss)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd_386.h b/src/runtime/signal_freebsd_386.h
new file mode 100644
index 000000000..a24f1ee96
--- /dev/null
+++ b/src/runtime/signal_freebsd_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).mc_eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).mc_ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).mc_ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).mc_edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).mc_edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).mc_esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).mc_ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).mc_esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).mc_eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_eflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd_amd64.h b/src/runtime/signal_freebsd_amd64.h
new file mode 100644
index 000000000..7d35b7f85
--- /dev/null
+++ b/src/runtime/signal_freebsd_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).mc_rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).mc_rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).mc_rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).mc_rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).mc_rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).mc_rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).mc_rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).mc_rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).mc_r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).mc_r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).mc_r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).mc_r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).mc_r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).mc_r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).mc_r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).mc_r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).mc_rip)
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_rflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd_arm.h b/src/runtime/signal_freebsd_arm.h
new file mode 100644
index 000000000..87a45aa27
--- /dev/null
+++ b/src/runtime/signal_freebsd_arm.h
@@ -0,0 +1,28 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).__gregs[0])
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).__gregs[1])
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).__gregs[2])
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).__gregs[3])
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).__gregs[4])
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).__gregs[5])
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).__gregs[6])
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).__gregs[7])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).__gregs[8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).__gregs[9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).__gregs[10])
+#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).__gregs[11])
+#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).__gregs[12])
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).__gregs[13])
+#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).__gregs[14])
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).__gregs[15])
+#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).__gregs[16])
+#define SIG_FAULT(info, ctxt) ((uintptr)(info)->si_addr)
+#define SIG_TRAP(info, ctxt) (0)
+#define SIG_ERROR(info, ctxt) (0)
+#define SIG_OLDMASK(info, ctxt) (0)
+#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
diff --git a/src/runtime/signal_linux_386.h b/src/runtime/signal_linux_386.h
new file mode 100644
index 000000000..f77f1c9d5
--- /dev/null
+++ b/src/runtime/signal_linux_386.h
@@ -0,0 +1,24 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).eflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) (((uintptr*)(info))[2])
+
diff --git a/src/runtime/signal_linux_amd64.h b/src/runtime/signal_linux_amd64.h
new file mode 100644
index 000000000..5a9a3e5da
--- /dev/null
+++ b/src/runtime/signal_linux_amd64.h
@@ -0,0 +1,32 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).rip)
+#define SIG_RFLAGS(info, ctxt) ((uint64)SIG_REGS(ctxt).eflags)
+
+#define SIG_CS(info, ctxt) ((uint64)SIG_REGS(ctxt).cs)
+#define SIG_FS(info, ctxt) ((uint64)SIG_REGS(ctxt).fs)
+#define SIG_GS(info, ctxt) ((uint64)SIG_REGS(ctxt).gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) (((uintptr*)(info))[2])
+
diff --git a/src/runtime/signal_linux_arm.h b/src/runtime/signal_linux_arm.h
new file mode 100644
index 000000000..a674c0d57
--- /dev/null
+++ b/src/runtime/signal_linux_arm.h
@@ -0,0 +1,28 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).arm_r0)
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).arm_r1)
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).arm_r2)
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).arm_r3)
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).arm_r4)
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).arm_r5)
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).arm_r6)
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).arm_r7)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).arm_r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).arm_r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).arm_r10)
+#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).arm_fp)
+#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).arm_ip)
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).arm_sp)
+#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).arm_lr)
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).arm_pc)
+#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).arm_cpsr)
+#define SIG_FAULT(info, ctxt) (SIG_REGS(ctxt).fault_address)
+#define SIG_TRAP(info, ctxt) (SIG_REGS(ctxt).trap_no)
+#define SIG_ERROR(info, ctxt) (SIG_REGS(ctxt).error_code)
+#define SIG_OLDMASK(info, ctxt) (SIG_REGS(ctxt).oldmask)
+#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
diff --git a/src/runtime/signal_nacl_386.h b/src/runtime/signal_nacl_386.h
new file mode 100644
index 000000000..c9481b5f4
--- /dev/null
+++ b/src/runtime/signal_nacl_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((ExcContext*)(ctxt))->regs)
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).eflags)
+
+#define SIG_CS(info, ctxt) (~0)
+#define SIG_FS(info, ctxt) (~0)
+#define SIG_GS(info, ctxt) (~0)
+
+#define SIG_CODE0(info, ctxt) (~0)
+#define SIG_CODE1(info, ctxt) (0)
diff --git a/src/runtime/signal_nacl_amd64p32.h b/src/runtime/signal_nacl_amd64p32.h
new file mode 100644
index 000000000..f62305cb5
--- /dev/null
+++ b/src/runtime/signal_nacl_amd64p32.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((ExcContext*)(ctxt))->regs.regs64)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).rip)
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).rflags)
+
+#define SIG_CS(info, ctxt) (~0)
+#define SIG_FS(info, ctxt) (~0)
+#define SIG_GS(info, ctxt) (~0)
+
+#define SIG_CODE0(info, ctxt) (~0)
+#define SIG_CODE1(info, ctxt) (0)
diff --git a/src/runtime/signal_nacl_arm.h b/src/runtime/signal_nacl_arm.h
new file mode 100644
index 000000000..e5bbb211d
--- /dev/null
+++ b/src/runtime/signal_nacl_arm.h
@@ -0,0 +1,28 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((ExcContext*)(ctxt))->regs)
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).r0)
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).r1)
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).r2)
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).r3)
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).r4)
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).r5)
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).r6)
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).r7)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
+#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).r11)
+#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).r12)
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).sp)
+#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).lr)
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).pc)
+#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).cpsr)
+#define SIG_FAULT(info, ctxt) (~0)
+#define SIG_TRAP(info, ctxt) (~0)
+#define SIG_ERROR(info, ctxt) (~0)
+#define SIG_OLDMASK(info, ctxt) (~0)
+#define SIG_CODE0(info, ctxt) (~0)
diff --git a/src/runtime/signal_netbsd_386.h b/src/runtime/signal_netbsd_386.h
new file mode 100644
index 000000000..d5a8a0c4b
--- /dev/null
+++ b/src/runtime/signal_netbsd_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((UcontextT*)(ctxt))->uc_mcontext)
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EAX])
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EBX])
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_ECX])
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EDX])
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EDI])
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_ESI])
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EBP])
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_UESP])
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EIP])
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_EFL])
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_CS])
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_FS])
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_GS])
+
+#define SIG_CODE0(info, ctxt) ((info)->_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)&(info)->_reason[0])
diff --git a/src/runtime/signal_netbsd_amd64.h b/src/runtime/signal_netbsd_amd64.h
new file mode 100644
index 000000000..7ec4cd98c
--- /dev/null
+++ b/src/runtime/signal_netbsd_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((UcontextT*)(ctxt))->uc_mcontext)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RAX])
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RBX])
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RCX])
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RDX])
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RDI])
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RSI])
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RBP])
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RSP])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R10])
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R11])
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R12])
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R13])
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R14])
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R15])
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RIP])
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_RFLAGS])
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_CS])
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_FS])
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_GS])
+
+#define SIG_CODE0(info, ctxt) ((info)->_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)&(info)->_reason[0])
diff --git a/src/runtime/signal_netbsd_arm.h b/src/runtime/signal_netbsd_arm.h
new file mode 100644
index 000000000..12f5827a6
--- /dev/null
+++ b/src/runtime/signal_netbsd_arm.h
@@ -0,0 +1,30 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((UcontextT*)(ctxt))->uc_mcontext)
+
+#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R0])
+#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R1])
+#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R2])
+#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R3])
+#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R4])
+#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R5])
+#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R6])
+#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R7])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R10])
+#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R11])
+#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R12])
+#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R13])
+#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R14])
+#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_R15])
+#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).__gregs[REG_CPSR])
+#define SIG_FAULT(info, ctxt) (*(uintptr*)&(info)->_reason[0])
+#define SIG_TRAP(info, ctxt) (0)
+#define SIG_ERROR(info, ctxt) (0)
+#define SIG_OLDMASK(info, ctxt) (0)
+
+#define SIG_CODE0(info, ctxt) ((info)->_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)&(info)->_reason[0])
diff --git a/src/runtime/signal_openbsd_386.h b/src/runtime/signal_openbsd_386.h
new file mode 100644
index 000000000..6742db8d4
--- /dev/null
+++ b/src/runtime/signal_openbsd_386.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*(Sigcontext*)(ctxt))
+
+#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).sc_eax)
+#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).sc_ebx)
+#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).sc_ecx)
+#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).sc_edx)
+#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).sc_edi)
+#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).sc_esi)
+#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).sc_ebp)
+#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).sc_esp)
+#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).sc_eip)
+#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).sc_eflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).sc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).sc_fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).sc_gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)((byte*)info + 12))
diff --git a/src/runtime/signal_openbsd_amd64.h b/src/runtime/signal_openbsd_amd64.h
new file mode 100644
index 000000000..b46a5dfa6
--- /dev/null
+++ b/src/runtime/signal_openbsd_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (*(Sigcontext*)(ctxt))
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).sc_rax)
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).sc_rbx)
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).sc_rcx)
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).sc_rdx)
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).sc_rdi)
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).sc_rsi)
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).sc_rbp)
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).sc_rsp)
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).sc_r8)
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).sc_r9)
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).sc_r10)
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).sc_r11)
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).sc_r12)
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).sc_r13)
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).sc_r14)
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).sc_r15)
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).sc_rip)
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).sc_rflags)
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).sc_cs)
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).sc_fs)
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).sc_gs)
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)((byte*)(info) + 16))
diff --git a/src/runtime/signal_solaris_amd64.h b/src/runtime/signal_solaris_amd64.h
new file mode 100644
index 000000000..c2e0a1549
--- /dev/null
+++ b/src/runtime/signal_solaris_amd64.h
@@ -0,0 +1,31 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
+
+#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RAX])
+#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RBX])
+#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RCX])
+#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RDX])
+#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RDI])
+#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RSI])
+#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RBP])
+#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RSP])
+#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R8])
+#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R9])
+#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R10])
+#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R11])
+#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R12])
+#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R13])
+#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R14])
+#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R15])
+#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RIP])
+#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RFLAGS])
+
+#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_CS])
+#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_FS])
+#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_GS])
+
+#define SIG_CODE0(info, ctxt) ((info)->si_code)
+#define SIG_CODE1(info, ctxt) (*(uintptr*)&(info)->__data[0])
diff --git a/src/runtime/signal_unix.c b/src/runtime/signal_unix.c
new file mode 100644
index 000000000..0e33ece49
--- /dev/null
+++ b/src/runtime/signal_unix.c
@@ -0,0 +1,119 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+#include "runtime.h"
+#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
+#include "signal_unix.h"
+
+extern SigTab runtime·sigtab[];
+
+void
+runtime·initsig(void)
+{
+	int32 i;
+	SigTab *t;
+
+	// First call: basic setup.
+	for(i = 0; i<NSIG; i++) {
+		t = &runtime·sigtab[i];
+		if((t->flags == 0) || (t->flags & SigDefault))
+			continue;
+
+		// For some signals, we respect an inherited SIG_IGN handler
+		// rather than insist on installing our own default handler.
+		// Even these signals can be fetched using the os/signal package.
+		switch(i) {
+		case SIGHUP:
+		case SIGINT:
+			if(runtime·getsig(i) == SIG_IGN) {
+				t->flags = SigNotify | SigIgnored;
+				continue;
+			}
+		}
+
+		t->flags |= SigHandling;
+		runtime·setsig(i, runtime·sighandler, true);
+	}
+}
+
+void
+runtime·sigenable(uint32 sig)
+{
+	SigTab *t;
+
+	if(sig >= NSIG)
+		return;
+
+	t = &runtime·sigtab[sig];
+	if((t->flags & SigNotify) && !(t->flags & SigHandling)) {
+		t->flags |= SigHandling;
+		if(runtime·getsig(sig) == SIG_IGN)
+			t->flags |= SigIgnored;
+		runtime·setsig(sig, runtime·sighandler, true);
+	}
+}
+
+void
+runtime·sigdisable(uint32 sig)
+{
+	SigTab *t;
+
+	if(sig >= NSIG)
+		return;
+
+	t = &runtime·sigtab[sig];
+	if((t->flags & SigNotify) && (t->flags & SigHandling)) {
+		t->flags &= ~SigHandling;
+		if(t->flags & SigIgnored)
+			runtime·setsig(sig, SIG_IGN, true);
+		else
+			runtime·setsig(sig, SIG_DFL, true);
+	}
+}
+
+void
+runtime·resetcpuprofiler(int32 hz)
+{
+	Itimerval it;
+
+	runtime·memclr((byte*)&it, sizeof it);
+	if(hz == 0) {
+		runtime·setitimer(ITIMER_PROF, &it, nil);
+	} else {
+		it.it_interval.tv_sec = 0;
+		it.it_interval.tv_usec = 1000000 / hz;
+		it.it_value = it.it_interval;
+		runtime·setitimer(ITIMER_PROF, &it, nil);
+	}
+	g->m->profilehz = hz;
+}
+
+void
+runtime·sigpipe(void)
+{
+	runtime·setsig(SIGPIPE, SIG_DFL, false);
+	runtime·raise(SIGPIPE);
+}
+
+void
+runtime·crash(void)
+{
+#ifdef GOOS_darwin
+	// OS X core dumps are linear dumps of the mapped memory,
+	// from the first virtual byte to the last, with zeros in the gaps.
+	// Because of the way we arrange the address space on 64-bit systems,
+	// this means the OS X core file will be >128 GB and even on a zippy
+	// workstation can take OS X well over an hour to write (uninterruptible).
+	// Save users from making that mistake.
+	if(sizeof(void*) == 8)
+		return;
+#endif
+
+	runtime·unblocksignals();
+	runtime·setsig(SIGABRT, SIG_DFL, false);
+	runtime·raise(SIGABRT);
+}
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
new file mode 100644
index 000000000..ba77b6e7b
--- /dev/null
+++ b/src/runtime/signal_unix.go
@@ -0,0 +1,13 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package runtime
+
+func sigpipe()
+
+func os_sigpipe() {
+	onM(sigpipe)
+}
diff --git a/src/runtime/signal_unix.h b/src/runtime/signal_unix.h
new file mode 100644
index 000000000..2d84a0186
--- /dev/null
+++ b/src/runtime/signal_unix.h
@@ -0,0 +1,14 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define SIG_DFL ((void*)0)
+#define SIG_IGN ((void*)1)
+
+typedef void GoSighandler(int32, Siginfo*, void*, G*);
+void	runtime·setsig(int32, GoSighandler*, bool);
+GoSighandler* runtime·getsig(int32);
+
+void	runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp);
+void	runtime·raise(int32);
+
diff --git a/src/runtime/signals_android.h b/src/runtime/signals_android.h
new file mode 100644
index 000000000..5140d8a18
--- /dev/null
+++ b/src/runtime/signals_android.h
@@ -0,0 +1 @@
+#include "signals_linux.h"
diff --git a/src/runtime/signals_darwin.h b/src/runtime/signals_darwin.h
new file mode 100644
index 000000000..229b58590
--- /dev/null
+++ b/src/runtime/signals_darwin.h
@@ -0,0 +1,50 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */	0, "SIGNONE: no trap",
+	/* 1 */	N+K, "SIGHUP: terminal line hangup",
+	/* 2 */	N+K, "SIGINT: interrupt",
+	/* 3 */	N+T, "SIGQUIT: quit",
+	/* 4 */	T, "SIGILL: illegal instruction",
+	/* 5 */	T, "SIGTRAP: trace trap",
+	/* 6 */	N+T, "SIGABRT: abort",
+	/* 7 */	T, "SIGEMT: emulate instruction executed",
+	/* 8 */	P, "SIGFPE: floating-point exception",
+	/* 9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_dragonfly.h b/src/runtime/signals_dragonfly.h
new file mode 100644
index 000000000..4d27e050d
--- /dev/null
+++ b/src/runtime/signals_dragonfly.h
@@ -0,0 +1,51 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */	0, "SIGNONE: no trap",
+	/* 1 */	N+K, "SIGHUP: terminal line hangup",
+	/* 2 */	N+K, "SIGINT: interrupt",
+	/* 3 */	N+T, "SIGQUIT: quit",
+	/* 4 */	T, "SIGILL: illegal instruction",
+	/* 5 */	T, "SIGTRAP: trace trap",
+	/* 6 */	N+T, "SIGABRT: abort",
+	/* 7 */	T, "SIGEMT: emulate instruction executed",
+	/* 8 */	P, "SIGFPE: floating-point exception",
+	/* 9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+	/* 32 */	N, "SIGTHR: reserved",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_freebsd.h b/src/runtime/signals_freebsd.h
new file mode 100644
index 000000000..8d45c50c3
--- /dev/null
+++ b/src/runtime/signals_freebsd.h
@@ -0,0 +1,51 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */	0, "SIGNONE: no trap",
+	/* 1 */	N+K, "SIGHUP: terminal line hangup",
+	/* 2 */	N+K, "SIGINT: interrupt",
+	/* 3 */	N+T, "SIGQUIT: quit",
+	/* 4 */	T, "SIGILL: illegal instruction",
+	/* 5 */	T, "SIGTRAP: trace trap",
+	/* 6 */	N+T, "SIGABRT: abort",
+	/* 7 */	T, "SIGEMT: emulate instruction executed",
+	/* 8 */	P, "SIGFPE: floating-point exception",
+	/* 9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	N, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+	/* 32 */	N, "SIGTHR: reserved",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_linux.h b/src/runtime/signals_linux.h
new file mode 100644
index 000000000..368afc1c8
--- /dev/null
+++ b/src/runtime/signals_linux.h
@@ -0,0 +1,83 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */	0, "SIGNONE: no trap",
+	/* 1 */	N+K, "SIGHUP: terminal line hangup",
+	/* 2 */	N+K, "SIGINT: interrupt",
+	/* 3 */	N+T, "SIGQUIT: quit",
+	/* 4 */	T, "SIGILL: illegal instruction",
+	/* 5 */	T, "SIGTRAP: trace trap",
+	/* 6 */	N+T, "SIGABRT: abort",
+	/* 7 */	P, "SIGBUS: bus error",
+	/* 8 */	P, "SIGFPE: floating-point exception",
+	/* 9 */	0, "SIGKILL: kill",
+	/* 10 */	N, "SIGUSR1: user-defined signal 1",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	N, "SIGUSR2: user-defined signal 2",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	T, "SIGSTKFLT: stack fault",
+	/* 17 */	N, "SIGCHLD: child status has changed",
+	/* 18 */	0, "SIGCONT: continue",
+	/* 19 */	0, "SIGSTOP: stop, unblockable",
+	/* 20 */	N+D, "SIGTSTP: keyboard stop",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGURG: urgent condition on socket",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGIO: i/o now possible",
+	/* 30 */	N, "SIGPWR: power failure restart",
+	/* 31 */	N, "SIGSYS: bad system call",
+	/* 32 */	0, "signal 32", /* SIGCANCEL; see issue 6997 */
+	/* 33 */	0, "signal 33", /* SIGSETXID; see issue 3871 */
+	/* 34 */	N, "signal 34",
+	/* 35 */	N, "signal 35",
+	/* 36 */	N, "signal 36",
+	/* 37 */	N, "signal 37",
+	/* 38 */	N, "signal 38",
+	/* 39 */	N, "signal 39",
+	/* 40 */	N, "signal 40",
+	/* 41 */	N, "signal 41",
+	/* 42 */	N, "signal 42",
+	/* 43 */	N, "signal 43",
+	/* 44 */	N, "signal 44",
+	/* 45 */	N, "signal 45",
+	/* 46 */	N, "signal 46",
+	/* 47 */	N, "signal 47",
+	/* 48 */	N, "signal 48",
+	/* 49 */	N, "signal 49",
+	/* 50 */	N, "signal 50",
+	/* 51 */	N, "signal 51",
+	/* 52 */	N, "signal 52",
+	/* 53 */	N, "signal 53",
+	/* 54 */	N, "signal 54",
+	/* 55 */	N, "signal 55",
+	/* 56 */	N, "signal 56",
+	/* 57 */	N, "signal 57",
+	/* 58 */	N, "signal 58",
+	/* 59 */	N, "signal 59",
+	/* 60 */	N, "signal 60",
+	/* 61 */	N, "signal 61",
+	/* 62 */	N, "signal 62",
+	/* 63 */	N, "signal 63",
+	/* 64 */	N, "signal 64",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_nacl.h b/src/runtime/signals_nacl.h
new file mode 100644
index 000000000..229b58590
--- /dev/null
+++ b/src/runtime/signals_nacl.h
@@ -0,0 +1,50 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */	0, "SIGNONE: no trap",
+	/* 1 */	N+K, "SIGHUP: terminal line hangup",
+	/* 2 */	N+K, "SIGINT: interrupt",
+	/* 3 */	N+T, "SIGQUIT: quit",
+	/* 4 */	T, "SIGILL: illegal instruction",
+	/* 5 */	T, "SIGTRAP: trace trap",
+	/* 6 */	N+T, "SIGABRT: abort",
+	/* 7 */	T, "SIGEMT: emulate instruction executed",
+	/* 8 */	P, "SIGFPE: floating-point exception",
+	/* 9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_netbsd.h b/src/runtime/signals_netbsd.h
new file mode 100644
index 000000000..7140de86f
--- /dev/null
+++ b/src/runtime/signals_netbsd.h
@@ -0,0 +1,51 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/*  0 */	0, "SIGNONE: no trap",
+	/*  1 */	N+K, "SIGHUP: terminal line hangup",
+	/*  2 */	N+K, "SIGINT: interrupt",
+	/*  3 */	N+T, "SIGQUIT: quit",
+	/*  4 */	T, "SIGILL: illegal instruction",
+	/*  5 */	T, "SIGTRAP: trace trap",
+	/*  6 */	N+T, "SIGABRT: abort",
+	/*  7 */	T, "SIGEMT: emulate instruction executed",
+	/*  8 */	P, "SIGFPE: floating-point exception",
+	/*  9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+	/* 32 */	N, "SIGTHR: reserved",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_openbsd.h b/src/runtime/signals_openbsd.h
new file mode 100644
index 000000000..7140de86f
--- /dev/null
+++ b/src/runtime/signals_openbsd.h
@@ -0,0 +1,51 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/*  0 */	0, "SIGNONE: no trap",
+	/*  1 */	N+K, "SIGHUP: terminal line hangup",
+	/*  2 */	N+K, "SIGINT: interrupt",
+	/*  3 */	N+T, "SIGQUIT: quit",
+	/*  4 */	T, "SIGILL: illegal instruction",
+	/*  5 */	T, "SIGTRAP: trace trap",
+	/*  6 */	N+T, "SIGABRT: abort",
+	/*  7 */	T, "SIGEMT: emulate instruction executed",
+	/*  8 */	P, "SIGFPE: floating-point exception",
+	/*  9 */	0, "SIGKILL: kill",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad system call",
+	/* 13 */	N, "SIGPIPE: write to broken pipe",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: termination",
+	/* 16 */	N, "SIGURG: urgent condition on socket",
+	/* 17 */	0, "SIGSTOP: stop",
+	/* 18 */	N+D, "SIGTSTP: keyboard stop",
+	/* 19 */	0, "SIGCONT: continue after stop",
+	/* 20 */	N, "SIGCHLD: child status has changed",
+	/* 21 */	N+D, "SIGTTIN: background read from tty",
+	/* 22 */	N+D, "SIGTTOU: background write to tty",
+	/* 23 */	N, "SIGIO: i/o now possible",
+	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
+	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
+	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
+	/* 27 */	N, "SIGPROF: profiling alarm clock",
+	/* 28 */	N, "SIGWINCH: window size change",
+	/* 29 */	N, "SIGINFO: status request from keyboard",
+	/* 30 */	N, "SIGUSR1: user-defined signal 1",
+	/* 31 */	N, "SIGUSR2: user-defined signal 2",
+	/* 32 */	N, "SIGTHR: reserved",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_plan9.h b/src/runtime/signals_plan9.h
new file mode 100644
index 000000000..818f508cf
--- /dev/null
+++ b/src/runtime/signals_plan9.h
@@ -0,0 +1,60 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define E SigGoExit
+
+// Incoming notes are compared against this table using strncmp, so the
+// order matters: longer patterns must appear before their prefixes.
+// There are #defined SIG constants in os_plan9.h for the table index of
+// some of these.
+//
+// If you add entries to this table, you must respect the prefix ordering
+// and also update the constant values is os_plan9.h.
+
+SigTab runtime·sigtab[] = {
+	// Traps that we cannot be recovered.
+	T,	"sys: trap: debug exception",
+	T,	"sys: trap: invalid opcode",
+
+	// We can recover from some memory errors in runtime·sigpanic.
+	P,	"sys: trap: fault read addr",	// SIGRFAULT
+	P,	"sys: trap: fault write addr",	// SIGWFAULT
+
+	// We can also recover from math errors.
+	P,	"sys: trap: divide error",	// SIGINTDIV
+	P,	"sys: fp:",	// SIGFLOAT
+
+	// All other traps are normally handled as if they were marked SigThrow.
+	// We mark them SigPanic here so that debug.SetPanicOnFault will work.
+	P,	"sys: trap:",	// SIGTRAP
+
+	// Writes to a closed pipe can be handled if desired, otherwise they're ignored.
+	N,	"sys: write on closed pipe",
+
+	// Other system notes are more serious and cannot be recovered.
+	T,	"sys:",
+
+	// Issued to all other procs when calling runtime·exit.
+	E,	"go: exit ",
+
+	// Kill is sent by external programs to cause an exit.
+	K,	"kill",
+
+	// Interrupts can be handled if desired, otherwise they cause an exit.
+	N+K,	"interrupt",
+	N+K,	"hangup",
+
+	// Alarms can be handled if desired, otherwise they're ignored.
+	N,	"alarm",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef E
diff --git a/src/runtime/signals_solaris.h b/src/runtime/signals_solaris.h
new file mode 100644
index 000000000..c272cad29
--- /dev/null
+++ b/src/runtime/signals_solaris.h
@@ -0,0 +1,94 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#define N SigNotify
+#define K SigKill
+#define T SigThrow
+#define P SigPanic
+#define D SigDefault
+
+SigTab runtime·sigtab[] = {
+	/* 0 */		0, "SIGNONE: no trap",
+	/* 1 */		N+K, "SIGHUP: hangup",
+	/* 2 */		N+K, "SIGINT: interrupt (rubout)",
+	/* 3 */		N+T, "SIGQUIT: quit (ASCII FS)",
+	/* 4 */		T, "SIGILL: illegal instruction (not reset when caught)",
+	/* 5 */		T, "SIGTRAP: trace trap (not reset when caught)",
+	/* 6 */		N+T, "SIGABRT: used by abort, replace SIGIOT in the future",
+	/* 7 */		T, "SIGEMT: EMT instruction",
+	/* 8 */		P, "SIGFPE: floating point exception",
+	/* 9 */		0, "SIGKILL: kill (cannot be caught or ignored)",
+	/* 10 */	P, "SIGBUS: bus error",
+	/* 11 */	P, "SIGSEGV: segmentation violation",
+	/* 12 */	T, "SIGSYS: bad argument to system call",
+	/* 13 */	N, "SIGPIPE: write on a pipe with no one to read it",
+	/* 14 */	N, "SIGALRM: alarm clock",
+	/* 15 */	N+K, "SIGTERM: software termination signal from kill",
+	/* 16 */	N, "SIGUSR1: user defined signal 1",
+	/* 17 */	N, "SIGUSR2: user defined signal 2",
+	/* 18 */	N, "SIGCLD: child status change",
+	/* 18 */	N, "SIGCHLD: child status change alias (POSIX)",
+	/* 19 */	N, "SIGPWR: power-fail restart",
+	/* 20 */	N, "SIGWINCH: window size change",
+	/* 21 */	N, "SIGURG: urgent socket condition",
+	/* 22 */	N, "SIGPOLL: pollable event occured",
+	/* 23 */	N+D, "SIGSTOP: stop (cannot be caught or ignored)",
+	/* 24 */	0, "SIGTSTP: user stop requested from tty",
+	/* 25 */	0, "SIGCONT: stopped process has been continued",
+	/* 26 */	N+D, "SIGTTIN: background tty read attempted",
+	/* 27 */	N+D, "SIGTTOU: background tty write attempted",
+	/* 28 */	N, "SIGVTALRM: virtual timer expired",
+	/* 29 */	N, "SIGPROF: profiling timer expired",
+	/* 30 */	N, "SIGXCPU: exceeded cpu limit",
+	/* 31 */	N, "SIGXFSZ: exceeded file size limit",
+	/* 32 */	N, "SIGWAITING: reserved signal no longer used by",
+	/* 33 */	N, "SIGLWP: reserved signal no longer used by",
+	/* 34 */	N, "SIGFREEZE: special signal used by CPR",
+	/* 35 */	N, "SIGTHAW: special signal used by CPR",
+	/* 36 */	0, "SIGCANCEL: reserved signal for thread cancellation",
+	/* 37 */	N, "SIGLOST: resource lost (eg, record-lock lost)",
+	/* 38 */	N, "SIGXRES: resource control exceeded",
+	/* 39 */	N, "SIGJVM1: reserved signal for Java Virtual Machine",
+	/* 40 */	N, "SIGJVM2: reserved signal for Java Virtual Machine",
+
+	/* TODO(aram): what should be do about these signals? D or N? is this set static? */
+	/* 41 */	N, "real time signal",
+	/* 42 */	N, "real time signal",
+	/* 43 */	N, "real time signal",
+	/* 44 */	N, "real time signal",
+	/* 45 */	N, "real time signal",
+	/* 46 */	N, "real time signal",
+	/* 47 */	N, "real time signal",
+	/* 48 */	N, "real time signal",
+	/* 49 */	N, "real time signal",
+	/* 50 */	N, "real time signal",
+	/* 51 */	N, "real time signal",
+	/* 52 */	N, "real time signal",
+	/* 53 */	N, "real time signal",
+	/* 54 */	N, "real time signal",
+	/* 55 */	N, "real time signal",
+	/* 56 */	N, "real time signal",
+	/* 57 */	N, "real time signal",
+	/* 58 */	N, "real time signal",
+	/* 59 */	N, "real time signal",
+	/* 60 */	N, "real time signal",
+	/* 61 */	N, "real time signal",
+	/* 62 */	N, "real time signal",
+	/* 63 */	N, "real time signal",
+	/* 64 */	N, "real time signal",
+	/* 65 */	N, "real time signal",
+	/* 66 */	N, "real time signal",
+	/* 67 */	N, "real time signal",
+	/* 68 */	N, "real time signal",
+	/* 69 */	N, "real time signal",
+	/* 70 */	N, "real time signal",
+	/* 71 */	N, "real time signal",
+	/* 72 */	N, "real time signal",
+};
+
+#undef N
+#undef K
+#undef T
+#undef P
+#undef D
diff --git a/src/runtime/signals_windows.h b/src/runtime/signals_windows.h
new file mode 100644
index 000000000..6943714b0
--- /dev/null
+++ b/src/runtime/signals_windows.h
@@ -0,0 +1,3 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
new file mode 100644
index 000000000..2d9c24d2d
--- /dev/null
+++ b/src/runtime/sigqueue.go
@@ -0,0 +1,173 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements runtime support for signal handling.
+//
+// Most synchronization primitives are not available from
+// the signal handler (it cannot block, allocate memory, or use locks)
+// so the handler communicates with a processing goroutine
+// via struct sig, below.
+//
+// sigsend is called by the signal handler to queue a new signal.
+// signal_recv is called by the Go program to receive a newly queued signal.
+// Synchronization between sigsend and signal_recv is based on the sig.state
+// variable.  It can be in 3 states: sigIdle, sigReceiving and sigSending.
+// sigReceiving means that signal_recv is blocked on sig.Note and there are no
+// new pending signals.
+// sigSending means that sig.mask *may* contain new pending signals,
+// signal_recv can't be blocked in this state.
+// sigIdle means that there are no new pending signals and signal_recv is not blocked.
+// Transitions between states are done atomically with CAS.
+// When signal_recv is unblocked, it resets sig.Note and rechecks sig.mask.
+// If several sigsends and signal_recv execute concurrently, it can lead to
+// unnecessary rechecks of sig.mask, but it cannot lead to missed signals
+// nor deadlocks.
+
+package runtime
+
+import "unsafe"
+
+var sig struct {
+	note   note
+	mask   [(_NSIG + 31) / 32]uint32
+	wanted [(_NSIG + 31) / 32]uint32
+	recv   [(_NSIG + 31) / 32]uint32
+	state  uint32
+	inuse  bool
+}
+
+const (
+	sigIdle = iota
+	sigReceiving
+	sigSending
+)
+
+// Called from sighandler to send a signal back out of the signal handling thread.
+// Reports whether the signal was sent. If not, the caller typically crashes the program.
+func sigsend(s int32) bool {
+	bit := uint32(1) << uint(s&31)
+	if !sig.inuse || s < 0 || int(s) >= 32*len(sig.wanted) || sig.wanted[s/32]&bit == 0 {
+		return false
+	}
+
+	// Add signal to outgoing queue.
+	for {
+		mask := sig.mask[s/32]
+		if mask&bit != 0 {
+			return true // signal already in queue
+		}
+		if cas(&sig.mask[s/32], mask, mask|bit) {
+			break
+		}
+	}
+
+	// Notify receiver that queue has new bit.
+Send:
+	for {
+		switch atomicload(&sig.state) {
+		default:
+			gothrow("sigsend: inconsistent state")
+		case sigIdle:
+			if cas(&sig.state, sigIdle, sigSending) {
+				break Send
+			}
+		case sigSending:
+			// notification already pending
+			break Send
+		case sigReceiving:
+			if cas(&sig.state, sigReceiving, sigIdle) {
+				notewakeup(&sig.note)
+				break Send
+			}
+		}
+	}
+
+	return true
+}
+
+// Called to receive the next queued signal.
+// Must only be called from a single goroutine at a time.
+func signal_recv() uint32 {
+	for {
+		// Serve any signals from local copy.
+		for i := uint32(0); i < _NSIG; i++ {
+			if sig.recv[i/32]&(1<<(i&31)) != 0 {
+				sig.recv[i/32] &^= 1 << (i & 31)
+				return i
+			}
+		}
+
+		// Wait for updates to be available from signal sender.
+	Receive:
+		for {
+			switch atomicload(&sig.state) {
+			default:
+				gothrow("signal_recv: inconsistent state")
+			case sigIdle:
+				if cas(&sig.state, sigIdle, sigReceiving) {
+					notetsleepg(&sig.note, -1)
+					noteclear(&sig.note)
+					break Receive
+				}
+			case sigSending:
+				if cas(&sig.state, sigSending, sigIdle) {
+					break Receive
+				}
+			}
+		}
+
+		// Incorporate updates from sender into local copy.
+		for i := range sig.mask {
+			sig.recv[i] = xchg(&sig.mask[i], 0)
+		}
+	}
+}
+
+// Must only be called from a single goroutine at a time.
+func signal_enable(s uint32) {
+	if !sig.inuse {
+		// The first call to signal_enable is for us
+		// to use for initialization.  It does not pass
+		// signal information in m.
+		sig.inuse = true // enable reception of signals; cannot disable
+		noteclear(&sig.note)
+		return
+	}
+
+	if int(s) >= len(sig.wanted)*32 {
+		return
+	}
+	sig.wanted[s/32] |= 1 << (s & 31)
+	sigenable_go(s)
+}
+
+// Must only be called from a single goroutine at a time.
+func signal_disable(s uint32) {
+	if int(s) >= len(sig.wanted)*32 {
+		return
+	}
+	sig.wanted[s/32] &^= 1 << (s & 31)
+	sigdisable_go(s)
+}
+
+// This runs on a foreign stack, without an m or a g.  No stack split.
+//go:nosplit
+func badsignal(sig uintptr) {
+	cgocallback(unsafe.Pointer(funcPC(sigsend)), noescape(unsafe.Pointer(&sig)), unsafe.Sizeof(sig))
+}
+
+func sigenable_m()
+func sigdisable_m()
+
+func sigenable_go(s uint32) {
+	g := getg()
+	g.m.scalararg[0] = uintptr(s)
+	onM(sigenable_m)
+}
+
+func sigdisable_go(s uint32) {
+	g := getg()
+	g.m.scalararg[0] = uintptr(s)
+	onM(sigdisable_m)
+}
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
new file mode 100644
index 000000000..3b88927c6
--- /dev/null
+++ b/src/runtime/slice.go
@@ -0,0 +1,139 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+type sliceStruct struct {
+	array unsafe.Pointer
+	len   int
+	cap   int
+}
+
+// TODO: take uintptrs instead of int64s?
+func makeslice(t *slicetype, len64 int64, cap64 int64) sliceStruct {
+	// NOTE: The len > MaxMem/elemsize check here is not strictly necessary,
+	// but it produces a 'len out of range' error instead of a 'cap out of range' error
+	// when someone does make([]T, bignumber). 'cap out of range' is true too,
+	// but since the cap is only being supplied implicitly, saying len is clearer.
+	// See issue 4085.
+	len := int(len64)
+	if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > maxMem/uintptr(t.elem.size) {
+		panic(errorString("makeslice: len out of range"))
+	}
+	cap := int(cap64)
+	if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > maxMem/uintptr(t.elem.size) {
+		panic(errorString("makeslice: cap out of range"))
+	}
+	p := newarray(t.elem, uintptr(cap))
+	return sliceStruct{p, len, cap}
+}
+
+// TODO: take uintptr instead of int64?
+func growslice(t *slicetype, old sliceStruct, n int64) sliceStruct {
+	if n < 1 {
+		panic(errorString("growslice: invalid n"))
+	}
+
+	cap64 := int64(old.cap) + n
+	cap := int(cap64)
+
+	if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > maxMem/uintptr(t.elem.size) {
+		panic(errorString("growslice: cap out of range"))
+	}
+
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&t))
+		racereadrangepc(old.array, uintptr(old.len*int(t.elem.size)), callerpc, funcPC(growslice))
+	}
+
+	et := t.elem
+	if et.size == 0 {
+		return sliceStruct{old.array, old.len, cap}
+	}
+
+	newcap := old.cap
+	if newcap+newcap < cap {
+		newcap = cap
+	} else {
+		for {
+			if old.len < 1024 {
+				newcap += newcap
+			} else {
+				newcap += newcap / 4
+			}
+			if newcap >= cap {
+				break
+			}
+		}
+	}
+
+	if uintptr(newcap) >= maxMem/uintptr(et.size) {
+		panic(errorString("growslice: cap out of range"))
+	}
+	lenmem := uintptr(old.len) * uintptr(et.size)
+	capmem := goroundupsize(uintptr(newcap) * uintptr(et.size))
+	newcap = int(capmem / uintptr(et.size))
+	var p unsafe.Pointer
+	if et.kind&kindNoPointers != 0 {
+		p = rawmem(capmem)
+		memclr(add(p, lenmem), capmem-lenmem)
+	} else {
+		// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory
+		p = newarray(et, uintptr(newcap))
+	}
+	memmove(p, old.array, lenmem)
+
+	return sliceStruct{p, old.len, newcap}
+}
+
+func slicecopy(to sliceStruct, fm sliceStruct, width uintptr) int {
+	if fm.len == 0 || to.len == 0 || width == 0 {
+		return 0
+	}
+
+	n := fm.len
+	if to.len < n {
+		n = to.len
+	}
+
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&to))
+		pc := funcPC(slicecopy)
+		racewriterangepc(to.array, uintptr(n*int(width)), callerpc, pc)
+		racereadrangepc(fm.array, uintptr(n*int(width)), callerpc, pc)
+	}
+
+	size := uintptr(n) * width
+	if size == 1 { // common case worth about 2x to do here
+		// TODO: is this still worth it with new memmove impl?
+		*(*byte)(to.array) = *(*byte)(fm.array) // known to be a byte pointer
+	} else {
+		memmove(to.array, fm.array, size)
+	}
+	return int(n)
+}
+
+func slicestringcopy(to []byte, fm string) int {
+	if len(fm) == 0 || len(to) == 0 {
+		return 0
+	}
+
+	n := len(fm)
+	if len(to) < n {
+		n = len(to)
+	}
+
+	if raceenabled {
+		callerpc := getcallerpc(unsafe.Pointer(&to))
+		pc := funcPC(slicestringcopy)
+		racewriterangepc(unsafe.Pointer(&to[0]), uintptr(n), callerpc, pc)
+	}
+
+	memmove(unsafe.Pointer(&to[0]), unsafe.Pointer((*stringStruct)(unsafe.Pointer(&fm)).str), uintptr(n))
+	return n
+}
diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go
new file mode 100644
index 000000000..4fcf8f269
--- /dev/null
+++ b/src/runtime/softfloat64.go
@@ -0,0 +1,498 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Software IEEE754 64-bit floating point.
+// Only referred to (and thus linked in) by arm port
+// and by tests in this directory.
+
+package runtime
+
+const (
+	mantbits64 uint = 52
+	expbits64  uint = 11
+	bias64          = -1<<(expbits64-1) + 1
+
+	nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1
+	inf64 uint64 = (1<<expbits64 - 1) << mantbits64
+	neg64 uint64 = 1 << (expbits64 + mantbits64)
+
+	mantbits32 uint = 23
+	expbits32  uint = 8
+	bias32          = -1<<(expbits32-1) + 1
+
+	nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1
+	inf32 uint32 = (1<<expbits32 - 1) << mantbits32
+	neg32 uint32 = 1 << (expbits32 + mantbits32)
+)
+
+func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
+	sign = f & (1 << (mantbits64 + expbits64))
+	mant = f & (1<<mantbits64 - 1)
+	exp = int(f>>mantbits64) & (1<<expbits64 - 1)
+
+	switch exp {
+	case 1<<expbits64 - 1:
+		if mant != 0 {
+			nan = true
+			return
+		}
+		inf = true
+		return
+
+	case 0:
+		// denormalized
+		if mant != 0 {
+			exp += bias64 + 1
+			for mant < 1<<mantbits64 {
+				mant <<= 1
+				exp--
+			}
+		}
+
+	default:
+		// add implicit top bit
+		mant |= 1 << mantbits64
+		exp += bias64
+	}
+	return
+}
+
+func funpack32(f uint32) (sign, mant uint32, exp int, inf, nan bool) {
+	sign = f & (1 << (mantbits32 + expbits32))
+	mant = f & (1<<mantbits32 - 1)
+	exp = int(f>>mantbits32) & (1<<expbits32 - 1)
+
+	switch exp {
+	case 1<<expbits32 - 1:
+		if mant != 0 {
+			nan = true
+			return
+		}
+		inf = true
+		return
+
+	case 0:
+		// denormalized
+		if mant != 0 {
+			exp += bias32 + 1
+			for mant < 1<<mantbits32 {
+				mant <<= 1
+				exp--
+			}
+		}
+
+	default:
+		// add implicit top bit
+		mant |= 1 << mantbits32
+		exp += bias32
+	}
+	return
+}
+
+func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 {
+	mant0, exp0, trunc0 := mant, exp, trunc
+	if mant == 0 {
+		return sign
+	}
+	for mant < 1<<mantbits64 {
+		mant <<= 1
+		exp--
+	}
+	for mant >= 4<<mantbits64 {
+		trunc |= mant & 1
+		mant >>= 1
+		exp++
+	}
+	if mant >= 2<<mantbits64 {
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+			if mant >= 4<<mantbits64 {
+				mant >>= 1
+				exp++
+			}
+		}
+		mant >>= 1
+		exp++
+	}
+	if exp >= 1<<expbits64-1+bias64 {
+		return sign ^ inf64
+	}
+	if exp < bias64+1 {
+		if exp < bias64-int(mantbits64) {
+			return sign | 0
+		}
+		// repeat expecting denormal
+		mant, exp, trunc = mant0, exp0, trunc0
+		for exp < bias64 {
+			trunc |= mant & 1
+			mant >>= 1
+			exp++
+		}
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+		}
+		mant >>= 1
+		exp++
+		if mant < 1<<mantbits64 {
+			return sign | mant
+		}
+	}
+	return sign | uint64(exp-bias64)<<mantbits64 | mant&(1<<mantbits64-1)
+}
+
+func fpack32(sign, mant uint32, exp int, trunc uint32) uint32 {
+	mant0, exp0, trunc0 := mant, exp, trunc
+	if mant == 0 {
+		return sign
+	}
+	for mant < 1<<mantbits32 {
+		mant <<= 1
+		exp--
+	}
+	for mant >= 4<<mantbits32 {
+		trunc |= mant & 1
+		mant >>= 1
+		exp++
+	}
+	if mant >= 2<<mantbits32 {
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+			if mant >= 4<<mantbits32 {
+				mant >>= 1
+				exp++
+			}
+		}
+		mant >>= 1
+		exp++
+	}
+	if exp >= 1<<expbits32-1+bias32 {
+		return sign ^ inf32
+	}
+	if exp < bias32+1 {
+		if exp < bias32-int(mantbits32) {
+			return sign | 0
+		}
+		// repeat expecting denormal
+		mant, exp, trunc = mant0, exp0, trunc0
+		for exp < bias32 {
+			trunc |= mant & 1
+			mant >>= 1
+			exp++
+		}
+		if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
+			mant++
+		}
+		mant >>= 1
+		exp++
+		if mant < 1<<mantbits32 {
+			return sign | mant
+		}
+	}
+	return sign | uint32(exp-bias32)<<mantbits32 | mant&(1<<mantbits32-1)
+}
+
+func fadd64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN + x or x + NaN = NaN
+		return nan64
+
+	case fi && gi && fs != gs: // +Inf + -Inf or -Inf + +Inf = NaN
+		return nan64
+
+	case fi: // ±Inf + g = ±Inf
+		return f
+
+	case gi: // f + ±Inf = ±Inf
+		return g
+
+	case fm == 0 && gm == 0 && fs != 0 && gs != 0: // -0 + -0 = -0
+		return f
+
+	case fm == 0: // 0 + g = g but 0 + -0 = +0
+		if gm == 0 {
+			g ^= gs
+		}
+		return g
+
+	case gm == 0: // f + 0 = f
+		return f
+
+	}
+
+	if fe < ge || fe == ge && fm < gm {
+		f, g, fs, fm, fe, gs, gm, ge = g, f, gs, gm, ge, fs, fm, fe
+	}
+
+	shift := uint(fe - ge)
+	fm <<= 2
+	gm <<= 2
+	trunc := gm & (1<<shift - 1)
+	gm >>= shift
+	if fs == gs {
+		fm += gm
+	} else {
+		fm -= gm
+		if trunc != 0 {
+			fm--
+		}
+	}
+	if fm == 0 {
+		fs = 0
+	}
+	return fpack64(fs, fm, fe-2, trunc)
+}
+
+func fsub64(f, g uint64) uint64 {
+	return fadd64(f, fneg64(g))
+}
+
+func fneg64(f uint64) uint64 {
+	return f ^ (1 << (mantbits64 + expbits64))
+}
+
+func fmul64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN * g or f * NaN = NaN
+		return nan64
+
+	case fi && gi: // Inf * Inf = Inf (with sign adjusted)
+		return f ^ gs
+
+	case fi && gm == 0, fm == 0 && gi: // 0 * Inf = Inf * 0 = NaN
+		return nan64
+
+	case fm == 0: // 0 * x = 0 (with sign adjusted)
+		return f ^ gs
+
+	case gm == 0: // x * 0 = 0 (with sign adjusted)
+		return g ^ fs
+	}
+
+	// 53-bit * 53-bit = 107- or 108-bit
+	lo, hi := mullu(fm, gm)
+	shift := mantbits64 - 1
+	trunc := lo & (1<<shift - 1)
+	mant := hi<<(64-shift) | lo>>shift
+	return fpack64(fs^gs, mant, fe+ge-1, trunc)
+}
+
+func fdiv64(f, g uint64) uint64 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	gs, gm, ge, gi, gn := funpack64(g)
+
+	// Special cases.
+	switch {
+	case fn || gn: // NaN / g = f / NaN = NaN
+		return nan64
+
+	case fi && gi: // ±Inf / ±Inf = NaN
+		return nan64
+
+	case !fi && !gi && fm == 0 && gm == 0: // 0 / 0 = NaN
+		return nan64
+
+	case fi, !gi && gm == 0: // Inf / g = f / 0 = Inf
+		return fs ^ gs ^ inf64
+
+	case gi, fm == 0: // f / Inf = 0 / g = Inf
+		return fs ^ gs ^ 0
+	}
+	_, _, _, _ = fi, fn, gi, gn
+
+	// 53-bit<<54 / 53-bit = 53- or 54-bit.
+	shift := mantbits64 + 2
+	q, r := divlu(fm>>(64-shift), fm<<shift, gm)
+	return fpack64(fs^gs, q, fe-ge-2, r)
+}
+
+func f64to32(f uint64) uint32 {
+	fs, fm, fe, fi, fn := funpack64(f)
+	if fn {
+		return nan32
+	}
+	fs32 := uint32(fs >> 32)
+	if fi {
+		return fs32 ^ inf32
+	}
+	const d = mantbits64 - mantbits32 - 1
+	return fpack32(fs32, uint32(fm>>d), fe-1, uint32(fm&(1<<d-1)))
+}
+
+func f32to64(f uint32) uint64 {
+	const d = mantbits64 - mantbits32
+	fs, fm, fe, fi, fn := funpack32(f)
+	if fn {
+		return nan64
+	}
+	fs64 := uint64(fs) << 32
+	if fi {
+		return fs64 ^ inf64
+	}
+	return fpack64(fs64, uint64(fm)<<d, fe, 0)
+}
+
+func fcmp64(f, g uint64) (cmp int, isnan bool) {
+	fs, fm, _, fi, fn := funpack64(f)
+	gs, gm, _, gi, gn := funpack64(g)
+
+	switch {
+	case fn, gn: // flag NaN
+		return 0, true
+
+	case !fi && !gi && fm == 0 && gm == 0: // ±0 == ±0
+		return 0, false
+
+	case fs > gs: // f < 0, g > 0
+		return -1, false
+
+	case fs < gs: // f > 0, g < 0
+		return +1, false
+
+	// Same sign, not NaN.
+	// Can compare encodings directly now.
+	// Reverse for sign.
+	case fs == 0 && f < g, fs != 0 && f > g:
+		return -1, false
+
+	case fs == 0 && f > g, fs != 0 && f < g:
+		return +1, false
+	}
+
+	// f == g
+	return 0, false
+}
+
+func f64toint(f uint64) (val int64, ok bool) {
+	fs, fm, fe, fi, fn := funpack64(f)
+
+	switch {
+	case fi, fn: // NaN
+		return 0, false
+
+	case fe < -1: // f < 0.5
+		return 0, false
+
+	case fe > 63: // f >= 2^63
+		if fs != 0 && fm == 0 { // f == -2^63
+			return -1 << 63, true
+		}
+		if fs != 0 {
+			return 0, false
+		}
+		return 0, false
+	}
+
+	for fe > int(mantbits64) {
+		fe--
+		fm <<= 1
+	}
+	for fe < int(mantbits64) {
+		fe++
+		fm >>= 1
+	}
+	val = int64(fm)
+	if fs != 0 {
+		val = -val
+	}
+	return val, true
+}
+
+func fintto64(val int64) (f uint64) {
+	fs := uint64(val) & (1 << 63)
+	mant := uint64(val)
+	if fs != 0 {
+		mant = -mant
+	}
+	return fpack64(fs, mant, int(mantbits64), 0)
+}
+
+// 64x64 -> 128 multiply.
+// adapted from hacker's delight.
+func mullu(u, v uint64) (lo, hi uint64) {
+	const (
+		s    = 32
+		mask = 1<<s - 1
+	)
+	u0 := u & mask
+	u1 := u >> s
+	v0 := v & mask
+	v1 := v >> s
+	w0 := u0 * v0
+	t := u1*v0 + w0>>s
+	w1 := t & mask
+	w2 := t >> s
+	w1 += u0 * v1
+	return u * v, u1*v1 + w2 + w1>>s
+}
+
+// 128/64 -> 64 quotient, 64 remainder.
+// adapted from hacker's delight
+func divlu(u1, u0, v uint64) (q, r uint64) {
+	const b = 1 << 32
+
+	if u1 >= v {
+		return 1<<64 - 1, 1<<64 - 1
+	}
+
+	// s = nlz(v); v <<= s
+	s := uint(0)
+	for v&(1<<63) == 0 {
+		s++
+		v <<= 1
+	}
+
+	vn1 := v >> 32
+	vn0 := v & (1<<32 - 1)
+	un32 := u1<<s | u0>>(64-s)
+	un10 := u0 << s
+	un1 := un10 >> 32
+	un0 := un10 & (1<<32 - 1)
+	q1 := un32 / vn1
+	rhat := un32 - q1*vn1
+
+again1:
+	if q1 >= b || q1*vn0 > b*rhat+un1 {
+		q1--
+		rhat += vn1
+		if rhat < b {
+			goto again1
+		}
+	}
+
+	un21 := un32*b + un1 - q1*v
+	q0 := un21 / vn1
+	rhat = un21 - q0*vn1
+
+again2:
+	if q0 >= b || q0*vn0 > b*rhat+un0 {
+		q0--
+		rhat += vn1
+		if rhat < b {
+			goto again2
+		}
+	}
+
+	return q1*b + q0, (un21*b + un0 - q0*v) >> s
+}
+
+// callable from C
+
+func fadd64c(f, g uint64, ret *uint64)            { *ret = fadd64(f, g) }
+func fsub64c(f, g uint64, ret *uint64)            { *ret = fsub64(f, g) }
+func fmul64c(f, g uint64, ret *uint64)            { *ret = fmul64(f, g) }
+func fdiv64c(f, g uint64, ret *uint64)            { *ret = fdiv64(f, g) }
+func fneg64c(f uint64, ret *uint64)               { *ret = fneg64(f) }
+func f32to64c(f uint32, ret *uint64)              { *ret = f32to64(f) }
+func f64to32c(f uint64, ret *uint32)              { *ret = f64to32(f) }
+func fcmp64c(f, g uint64, ret *int, retnan *bool) { *ret, *retnan = fcmp64(f, g) }
+func fintto64c(val int64, ret *uint64)            { *ret = fintto64(val) }
+func f64tointc(f uint64, ret *int64, retok *bool) { *ret, *retok = f64toint(f) }
diff --git a/src/runtime/softfloat64_test.go b/src/runtime/softfloat64_test.go
new file mode 100644
index 000000000..df63010fb
--- /dev/null
+++ b/src/runtime/softfloat64_test.go
@@ -0,0 +1,198 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"math"
+	"math/rand"
+	. "runtime"
+	"testing"
+)
+
+// turn uint64 op into float64 op
+func fop(f func(x, y uint64) uint64) func(x, y float64) float64 {
+	return func(x, y float64) float64 {
+		bx := math.Float64bits(x)
+		by := math.Float64bits(y)
+		return math.Float64frombits(f(bx, by))
+	}
+}
+
+func add(x, y float64) float64 { return x + y }
+func sub(x, y float64) float64 { return x - y }
+func mul(x, y float64) float64 { return x * y }
+func div(x, y float64) float64 { return x / y }
+
+func TestFloat64(t *testing.T) {
+	base := []float64{
+		0,
+		math.Copysign(0, -1),
+		-1,
+		1,
+		math.NaN(),
+		math.Inf(+1),
+		math.Inf(-1),
+		0.1,
+		1.5,
+		1.9999999999999998,     // all 1s mantissa
+		1.3333333333333333,     // 1.010101010101...
+		1.1428571428571428,     // 1.001001001001...
+		1.112536929253601e-308, // first normal
+		2,
+		4,
+		8,
+		16,
+		32,
+		64,
+		128,
+		256,
+		3,
+		12,
+		1234,
+		123456,
+		-0.1,
+		-1.5,
+		-1.9999999999999998,
+		-1.3333333333333333,
+		-1.1428571428571428,
+		-2,
+		-3,
+		1e-200,
+		1e-300,
+		1e-310,
+		5e-324,
+		1e-105,
+		1e-305,
+		1e+200,
+		1e+306,
+		1e+307,
+		1e+308,
+	}
+	all := make([]float64, 200)
+	copy(all, base)
+	for i := len(base); i < len(all); i++ {
+		all[i] = rand.NormFloat64()
+	}
+
+	test(t, "+", add, fop(Fadd64), all)
+	test(t, "-", sub, fop(Fsub64), all)
+	if GOARCH != "386" { // 386 is not precise!
+		test(t, "*", mul, fop(Fmul64), all)
+		test(t, "/", div, fop(Fdiv64), all)
+	}
+}
+
+// 64 -hw-> 32 -hw-> 64
+func trunc32(f float64) float64 {
+	return float64(float32(f))
+}
+
+// 64 -sw->32 -hw-> 64
+func to32sw(f float64) float64 {
+	return float64(math.Float32frombits(F64to32(math.Float64bits(f))))
+}
+
+// 64 -hw->32 -sw-> 64
+func to64sw(f float64) float64 {
+	return math.Float64frombits(F32to64(math.Float32bits(float32(f))))
+}
+
+// float64 -hw-> int64 -hw-> float64
+func hwint64(f float64) float64 {
+	return float64(int64(f))
+}
+
+// float64 -hw-> int32 -hw-> float64
+func hwint32(f float64) float64 {
+	return float64(int32(f))
+}
+
+// float64 -sw-> int64 -hw-> float64
+func toint64sw(f float64) float64 {
+	i, ok := F64toint(math.Float64bits(f))
+	if !ok {
+		// There's no right answer for out of range.
+		// Match the hardware to pass the test.
+		i = int64(f)
+	}
+	return float64(i)
+}
+
+// float64 -hw-> int64 -sw-> float64
+func fromint64sw(f float64) float64 {
+	return math.Float64frombits(Fintto64(int64(f)))
+}
+
+var nerr int
+
+func err(t *testing.T, format string, args ...interface{}) {
+	t.Errorf(format, args...)
+
+	// cut errors off after a while.
+	// otherwise we spend all our time
+	// allocating memory to hold the
+	// formatted output.
+	if nerr++; nerr >= 10 {
+		t.Fatal("too many errors")
+	}
+}
+
+func test(t *testing.T, op string, hw, sw func(float64, float64) float64, all []float64) {
+	for _, f := range all {
+		for _, g := range all {
+			h := hw(f, g)
+			s := sw(f, g)
+			if !same(h, s) {
+				err(t, "%g %s %g = sw %g, hw %g\n", f, op, g, s, h)
+			}
+			testu(t, "to32", trunc32, to32sw, h)
+			testu(t, "to64", trunc32, to64sw, h)
+			testu(t, "toint64", hwint64, toint64sw, h)
+			testu(t, "fromint64", hwint64, fromint64sw, h)
+			testcmp(t, f, h)
+			testcmp(t, h, f)
+			testcmp(t, g, h)
+			testcmp(t, h, g)
+		}
+	}
+}
+
+func testu(t *testing.T, op string, hw, sw func(float64) float64, v float64) {
+	h := hw(v)
+	s := sw(v)
+	if !same(h, s) {
+		err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
+	}
+}
+
+func hwcmp(f, g float64) (cmp int, isnan bool) {
+	switch {
+	case f < g:
+		return -1, false
+	case f > g:
+		return +1, false
+	case f == g:
+		return 0, false
+	}
+	return 0, true // must be NaN
+}
+
+func testcmp(t *testing.T, f, g float64) {
+	hcmp, hisnan := hwcmp(f, g)
+	scmp, sisnan := Fcmp64(math.Float64bits(f), math.Float64bits(g))
+	if hcmp != scmp || hisnan != sisnan {
+		err(t, "cmp(%g, %g) = sw %v, %v, hw %v, %v\n", f, g, scmp, sisnan, hcmp, hisnan)
+	}
+}
+
+func same(f, g float64) bool {
+	if math.IsNaN(f) && math.IsNaN(g) {
+		return true
+	}
+	if math.Copysign(1, f) != math.Copysign(1, g) {
+		return false
+	}
+	return f == g
+}
diff --git a/src/runtime/softfloat_arm.c b/src/runtime/softfloat_arm.c
new file mode 100644
index 000000000..3f3f33a19
--- /dev/null
+++ b/src/runtime/softfloat_arm.c
@@ -0,0 +1,687 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Software floating point interpretaton of ARM 7500 FP instructions.
+// The interpretation is not bit compatible with the 7500.
+// It uses true little-endian doubles, while the 7500 used mixed-endian.
+
+#include "runtime.h"
+#include "textflag.h"
+
+#define CPSR 14
+#define FLAGS_N (1U << 31)
+#define FLAGS_Z (1U << 30)
+#define FLAGS_C (1U << 29)
+#define FLAGS_V (1U << 28)
+
+void	runtime·abort(void);
+void	runtime·sqrtC(uint64, uint64*);
+
+static	uint32	trace = 0;
+
+static void
+fabort(void)
+{
+	if (1) {
+		runtime·printf("Unsupported floating point instruction\n");
+		runtime·abort();
+	}
+}
+
+static void
+putf(uint32 reg, uint32 val)
+{
+	g->m->freglo[reg] = val;
+}
+
+static void
+putd(uint32 reg, uint64 val)
+{
+	g->m->freglo[reg] = (uint32)val;
+	g->m->freghi[reg] = (uint32)(val>>32);
+}
+
+static uint64
+getd(uint32 reg)
+{
+	return (uint64)g->m->freglo[reg] | ((uint64)g->m->freghi[reg]<<32);
+}
+
+static void
+fprint(void)
+{
+	uint32 i;
+	for (i = 0; i < 16; i++) {
+		runtime·printf("\tf%d:\t%X %X\n", i, g->m->freghi[i], g->m->freglo[i]);
+	}
+}
+
+static uint32
+d2f(uint64 d)
+{
+	uint32 x;
+
+	runtime·f64to32c(d, &x);
+	return x;
+}
+
+static uint64
+f2d(uint32 f)
+{
+	uint64 x;
+
+	runtime·f32to64c(f, &x);
+	return x;
+}
+
+static uint32
+fstatus(bool nan, int32 cmp)
+{
+	if(nan)
+		return FLAGS_C | FLAGS_V;
+	if(cmp == 0)
+		return FLAGS_Z | FLAGS_C;
+	if(cmp < 0)
+		return FLAGS_N;
+	return FLAGS_C;
+}
+
+// conditions array record the required CPSR cond field for the
+// first 5 pairs of conditional execution opcodes
+// higher 4 bits are must set, lower 4 bits are must clear
+#pragma dataflag NOPTR
+static const uint8 conditions[10/2] = {
+	[0/2] = (FLAGS_Z >> 24) | 0, // 0: EQ (Z set), 1: NE (Z clear)
+	[2/2] = (FLAGS_C >> 24) | 0, // 2: CS/HS (C set), 3: CC/LO (C clear)
+	[4/2] = (FLAGS_N >> 24) | 0, // 4: MI (N set), 5: PL (N clear)
+	[6/2] = (FLAGS_V >> 24) | 0, // 6: VS (V set), 7: VC (V clear)
+	[8/2] = (FLAGS_C >> 24) | 
+	        (FLAGS_Z >> 28),     // 8: HI (C set and Z clear), 9: LS (C clear and Z set)
+};
+
+#define FAULT (0x80000000U) // impossible PC offset
+
+// returns number of words that the fp instruction
+// is occupying, 0 if next instruction isn't float.
+static uint32
+stepflt(uint32 *pc, uint32 *regs)
+{
+	uint32 i, opc, regd, regm, regn, cpsr;
+	int32 delta;
+	uint32 *addr;
+	uint64 uval;
+	int64 sval;
+	bool nan, ok;
+	int32 cmp;
+	M *m;
+
+	// m is locked in vlop_arm.s, so g->m cannot change during this function call,
+	// so caching it in a local variable is safe.
+	m = g->m;
+	i = *pc;
+
+	if(trace)
+		runtime·printf("stepflt %p %x (cpsr %x)\n", pc, i, regs[CPSR] >> 28);
+
+	opc = i >> 28;
+	if(opc == 14) // common case first
+		goto execute;
+	cpsr = regs[CPSR] >> 28;
+	switch(opc) {
+	case 0: case 1: case 2: case 3: case 4: 
+	case 5: case 6: case 7: case 8: case 9:
+		if(((cpsr & (conditions[opc/2] >> 4)) == (conditions[opc/2] >> 4)) &&
+		   ((cpsr & (conditions[opc/2] & 0xf)) == 0)) {
+			if(opc & 1) return 1;
+		} else {
+			if(!(opc & 1)) return 1;
+		}
+		break;
+	case 10: // GE (N == V)
+	case 11: // LT (N != V)
+		if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28))) {
+			if(opc & 1) return 1;
+		} else {
+			if(!(opc & 1)) return 1;
+		}
+		break;
+	case 12: // GT (N == V and Z == 0)
+	case 13: // LE (N != V or Z == 1)
+		if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28)) &&
+		   (cpsr & (FLAGS_Z >> 28)) == 0) {
+			if(opc & 1) return 1;
+		} else {
+			if(!(opc & 1)) return 1;
+		}
+		break;
+	case 14: // AL
+		break;
+	case 15: // shouldn't happen
+		return 0;
+	}
+	if(trace)
+		runtime·printf("conditional %x (cpsr %x) pass\n", opc, cpsr);
+	i = (0xeU << 28) | (i & 0xfffffff);
+
+execute:
+	// special cases
+	if((i&0xfffff000) == 0xe59fb000) {
+		// load r11 from pc-relative address.
+		// might be part of a floating point move
+		// (or might not, but no harm in simulating
+		// one instruction too many).
+		addr = (uint32*)((uint8*)pc + (i&0xfff) + 8);
+		regs[11] = addr[0];
+
+		if(trace)
+			runtime·printf("*** cpu R[%d] = *(%p) %x\n",
+				11, addr, regs[11]);
+		return 1;
+	}
+	if(i == 0xe08bb00d) {
+		// add sp to r11.
+		// might be part of a large stack offset address
+		// (or might not, but again no harm done).
+		regs[11] += regs[13];
+
+		if(trace)
+			runtime·printf("*** cpu R[%d] += R[%d] %x\n",
+				11, 13, regs[11]);
+		return 1;
+	}
+	if(i == 0xeef1fa10) {
+		regs[CPSR] = (regs[CPSR]&0x0fffffff) | m->fflag;
+
+		if(trace)
+			runtime·printf("*** fpsr R[CPSR] = F[CPSR] %x\n", regs[CPSR]);
+		return 1;
+	}
+	if((i&0xff000000) == 0xea000000) {
+		// unconditional branch
+		// can happen in the middle of floating point
+		// if the linker decides it is time to lay down
+		// a sequence of instruction stream constants.
+		delta = i&0xffffff;
+		delta = (delta<<8) >> 8;	// sign extend
+
+		if(trace)
+			runtime·printf("*** cpu PC += %x\n", (delta+2)*4);
+		return delta+2;
+	}
+
+	goto stage1;
+
+stage1:	// load/store regn is cpureg, regm is 8bit offset
+	regd = i>>12 & 0xf;
+	regn = i>>16 & 0xf;
+	regm = (i & 0xff) << 2;	// PLUS or MINUS ??
+
+	switch(i & 0xfff00f00) {
+	default:
+		goto stage2;
+
+	case 0xed900a00:	// single load
+		addr = (uint32*)(regs[regn] + regm);
+		if((uintptr)addr < 4096) {
+			if(trace)
+				runtime·printf("*** load @%p => fault\n", addr);
+			return FAULT;
+		}
+		m->freglo[regd] = addr[0];
+
+		if(trace)
+			runtime·printf("*** load F[%d] = %x\n",
+				regd, m->freglo[regd]);
+		break;
+
+	case 0xed900b00:	// double load
+		addr = (uint32*)(regs[regn] + regm);
+		if((uintptr)addr < 4096) {
+			if(trace)
+				runtime·printf("*** double load @%p => fault\n", addr);
+			return FAULT;
+		}
+		m->freglo[regd] = addr[0];
+		m->freghi[regd] = addr[1];
+
+		if(trace)
+			runtime·printf("*** load D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xed800a00:	// single store
+		addr = (uint32*)(regs[regn] + regm);
+		if((uintptr)addr < 4096) {
+			if(trace)
+				runtime·printf("*** store @%p => fault\n", addr);
+			return FAULT;
+		}
+		addr[0] = m->freglo[regd];
+
+		if(trace)
+			runtime·printf("*** *(%p) = %x\n",
+				addr, addr[0]);
+		break;
+
+	case 0xed800b00:	// double store
+		addr = (uint32*)(regs[regn] + regm);
+		if((uintptr)addr < 4096) {
+			if(trace)
+				runtime·printf("*** double store @%p => fault\n", addr);
+			return FAULT;
+		}
+		addr[0] = m->freglo[regd];
+		addr[1] = m->freghi[regd];
+
+		if(trace)
+			runtime·printf("*** *(%p) = %x-%x\n",
+				addr, addr[1], addr[0]);
+		break;
+	}
+	return 1;
+
+stage2:	// regd, regm, regn are 4bit variables
+	regm = i>>0 & 0xf;
+	switch(i & 0xfff00ff0) {
+	default:
+		goto stage3;
+
+	case 0xf3000110:	// veor
+		m->freglo[regd] = m->freglo[regm]^m->freglo[regn];
+		m->freghi[regd] = m->freghi[regm]^m->freghi[regn];
+
+		if(trace)
+			runtime·printf("*** veor D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00b00:	// D[regd] = const(regn,regm)
+		regn = (regn<<4) | regm;
+		regm = 0x40000000UL;
+		if(regn & 0x80)
+			regm |= 0x80000000UL;
+		if(regn & 0x40)
+			regm ^= 0x7fc00000UL;
+		regm |= (regn & 0x3f) << 16;
+		m->freglo[regd] = 0;
+		m->freghi[regd] = regm;
+
+		if(trace)
+			runtime·printf("*** immed D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00a00:	// F[regd] = const(regn,regm)
+		regn = (regn<<4) | regm;
+		regm = 0x40000000UL;
+		if(regn & 0x80)
+			regm |= 0x80000000UL;
+		if(regn & 0x40)
+			regm ^= 0x7e000000UL;
+		regm |= (regn & 0x3f) << 19;
+		m->freglo[regd] = regm;
+
+		if(trace)
+			runtime·printf("*** immed D[%d] = %x\n",
+				regd, m->freglo[regd]);
+		break;
+
+	case 0xee300b00:	// D[regd] = D[regn]+D[regm]
+		runtime·fadd64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** add D[%d] = D[%d]+D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee300a00:	// F[regd] = F[regn]+F[regm]
+		runtime·fadd64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** add F[%d] = F[%d]+F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee300b40:	// D[regd] = D[regn]-D[regm]
+		runtime·fsub64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** sub D[%d] = D[%d]-D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee300a40:	// F[regd] = F[regn]-F[regm]
+		runtime·fsub64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** sub F[%d] = F[%d]-F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee200b00:	// D[regd] = D[regn]*D[regm]
+		runtime·fmul64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** mul D[%d] = D[%d]*D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee200a00:	// F[regd] = F[regn]*F[regm]
+		runtime·fmul64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** mul F[%d] = F[%d]*F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee800b00:	// D[regd] = D[regn]/D[regm]
+		runtime·fdiv64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** div D[%d] = D[%d]/D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee800a00:	// F[regd] = F[regn]/F[regm]
+		runtime·fdiv64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** div F[%d] = F[%d]/F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee000b10:	// S[regn] = R[regd] (MOVW) (regm ignored)
+		m->freglo[regn] = regs[regd];
+
+		if(trace)
+			runtime·printf("*** cpy S[%d] = R[%d] %x\n",
+				regn, regd, m->freglo[regn]);
+		break;
+
+	case 0xee100b10:	// R[regd] = S[regn] (MOVW) (regm ignored)
+		regs[regd] = m->freglo[regn];
+
+		if(trace)
+			runtime·printf("*** cpy R[%d] = S[%d] %x\n",
+				regd, regn, regs[regd]);
+		break;
+	}
+	return 1;
+
+stage3:	// regd, regm are 4bit variables
+	switch(i & 0xffff0ff0) {
+	default:
+		goto done;
+
+	case 0xeeb00a40:	// F[regd] = F[regm] (MOVF)
+		m->freglo[regd] = m->freglo[regm];
+
+		if(trace)
+			runtime·printf("*** F[%d] = F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeeb00b40:	// D[regd] = D[regm] (MOVD)
+		m->freglo[regd] = m->freglo[regm];
+		m->freghi[regd] = m->freghi[regm];
+
+		if(trace)
+			runtime·printf("*** D[%d] = D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb10bc0:	// D[regd] = sqrt D[regm]
+		runtime·sqrtC(getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** D[%d] = sqrt D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00bc0:	// D[regd] = abs D[regm]
+		m->freglo[regd] = m->freglo[regm];
+		m->freghi[regd] = m->freghi[regm] & ((1<<31)-1);
+
+		if(trace)
+			runtime·printf("*** D[%d] = abs D[%d] %x-%x\n",
+					regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00ac0:	// F[regd] = abs F[regm]
+		m->freglo[regd] = m->freglo[regm] & ((1<<31)-1);
+
+		if(trace)
+			runtime·printf("*** F[%d] = abs F[%d] %x\n",
+					regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeeb40bc0:	// D[regd] :: D[regm] (CMPD)
+		runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan);
+		m->fflag = fstatus(nan, cmp);
+
+		if(trace)
+			runtime·printf("*** cmp D[%d]::D[%d] %x\n",
+				regd, regm, m->fflag);
+		break;
+
+	case 0xeeb40ac0:	// F[regd] :: F[regm] (CMPF)
+		runtime·fcmp64c(f2d(m->freglo[regd]), f2d(m->freglo[regm]), &cmp, &nan);
+		m->fflag = fstatus(nan, cmp);
+
+		if(trace)
+			runtime·printf("*** cmp F[%d]::F[%d] %x\n",
+				regd, regm, m->fflag);
+		break;
+
+	case 0xeeb70ac0:	// D[regd] = F[regm] (MOVFD)
+		putd(regd, f2d(m->freglo[regm]));
+
+		if(trace)
+			runtime·printf("*** f2d D[%d]=F[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb70bc0:	// F[regd] = D[regm] (MOVDF)
+		m->freglo[regd] = d2f(getd(regm));
+
+		if(trace)
+			runtime·printf("*** d2f F[%d]=D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeebd0ac0:	// S[regd] = F[regm] (MOVFW)
+		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
+		if(!ok || (int32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix S[%d]=F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebc0ac0:	// S[regd] = F[regm] (MOVFW.U)
+		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
+		if(!ok || (uint32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix unsigned S[%d]=F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebd0bc0:	// S[regd] = D[regm] (MOVDW)
+		runtime·f64tointc(getd(regm), &sval, &ok);
+		if(!ok || (int32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix S[%d]=D[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebc0bc0:	// S[regd] = D[regm] (MOVDW.U)
+		runtime·f64tointc(getd(regm), &sval, &ok);
+		if(!ok || (uint32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix unsigned S[%d]=D[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeeb80ac0:	// D[regd] = S[regm] (MOVWF)
+		cmp = m->freglo[regm];
+		if(cmp < 0) {
+			runtime·fintto64c(-cmp, &uval);
+			putf(regd, d2f(uval));
+			m->freglo[regd] ^= 0x80000000;
+		} else {
+			runtime·fintto64c(cmp, &uval);
+			putf(regd, d2f(uval));
+		}
+
+		if(trace)
+			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80a40:	// D[regd] = S[regm] (MOVWF.U)
+		runtime·fintto64c(m->freglo[regm], &uval);
+		putf(regd, d2f(uval));
+
+		if(trace)
+			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80bc0:	// D[regd] = S[regm] (MOVWD)
+		cmp = m->freglo[regm];
+		if(cmp < 0) {
+			runtime·fintto64c(-cmp, &uval);
+			putd(regd, uval);
+			m->freghi[regd] ^= 0x80000000;
+		} else {
+			runtime·fintto64c(cmp, &uval);
+			putd(regd, uval);
+		}
+
+		if(trace)
+			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80b40:	// D[regd] = S[regm] (MOVWD.U)
+		runtime·fintto64c(m->freglo[regm], &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+	}
+	return 1;
+
+done:
+	if((i&0xff000000) == 0xee000000 ||
+	   (i&0xff000000) == 0xed000000) {
+		runtime·printf("stepflt %p %x\n", pc, i);
+		fabort();
+	}
+	return 0;
+}
+
+typedef struct Sfregs Sfregs;
+
+// NOTE: These are all recorded as pointers because they are possibly live registers,
+// and we don't know what they contain. Recording them as pointers should be
+// safer than not.
+struct Sfregs
+{
+	uint32 *r0;
+	uint32 *r1;
+	uint32 *r2;
+	uint32 *r3;
+	uint32 *r4;
+	uint32 *r5;
+	uint32 *r6;
+	uint32 *r7;
+	uint32 *r8;
+	uint32 *r9;
+	uint32 *r10;
+	uint32 *r11;
+	uint32 *r12;
+	uint32 *r13;
+	uint32 cspr;
+};
+
+static void sfloat2(void);
+void _sfloatpanic(void);
+
+#pragma textflag NOSPLIT
+uint32*
+runtime·_sfloat2(uint32 *pc, Sfregs regs)
+{
+	void (*fn)(void);
+	
+	g->m->ptrarg[0] = pc;
+	g->m->ptrarg[1] = &regs;
+	fn = sfloat2;
+	runtime·onM(&fn);
+	pc = g->m->ptrarg[0];
+	g->m->ptrarg[0] = nil;
+	return pc;
+}
+
+static void
+sfloat2(void)
+{
+	uint32 *pc;
+	G *curg;
+	Sfregs *regs;
+	int32 skip;
+	bool first;
+	
+	pc = g->m->ptrarg[0];
+	regs = g->m->ptrarg[1];
+	g->m->ptrarg[0] = nil;
+	g->m->ptrarg[1] = nil;
+
+	first = true;
+	while(skip = stepflt(pc, (uint32*)&regs->r0)) {
+		first = false;
+		if(skip == FAULT) {
+			// Encountered bad address in store/load.
+			// Record signal information and return to assembly
+			// trampoline that fakes the call.
+			enum { SIGSEGV = 11 };
+			curg = g->m->curg;
+			curg->sig = SIGSEGV;
+			curg->sigcode0 = 0;
+			curg->sigcode1 = 0;
+			curg->sigpc = (uint32)pc;
+			pc = (uint32*)_sfloatpanic;
+			break;
+		}
+		pc += skip;
+	}
+	if(first) {
+		runtime·printf("sfloat2 %p %x\n", pc, *pc);
+		fabort(); // not ok to fail first instruction
+	}
+		
+	g->m->ptrarg[0] = pc;
+}
diff --git a/src/runtime/sqrt.go b/src/runtime/sqrt.go
new file mode 100644
index 000000000..34a8c3806
--- /dev/null
+++ b/src/runtime/sqrt.go
@@ -0,0 +1,150 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Copy of math/sqrt.go, here for use by ARM softfloat.
+
+package runtime
+
+import "unsafe"
+
+// The original C code and the long comment below are
+// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
+// came with this notice.  The go code is a simplified
+// version of the original C.
+//
+// ====================================================
+// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+//
+// Developed at SunPro, a Sun Microsystems, Inc. business.
+// Permission to use, copy, modify, and distribute this
+// software is freely granted, provided that this notice
+// is preserved.
+// ====================================================
+//
+// __ieee754_sqrt(x)
+// Return correctly rounded sqrt.
+//           -----------------------------------------
+//           | Use the hardware sqrt if you have one |
+//           -----------------------------------------
+// Method:
+//   Bit by bit method using integer arithmetic. (Slow, but portable)
+//   1. Normalization
+//      Scale x to y in [1,4) with even powers of 2:
+//      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
+//              sqrt(x) = 2**k * sqrt(y)
+//   2. Bit by bit computation
+//      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
+//           i                                                   0
+//                                     i+1         2
+//          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
+//           i      i            i                 i
+//
+//      To compute q    from q , one checks whether
+//                  i+1       i
+//
+//                            -(i+1) 2
+//                      (q + 2      )  <= y.                     (2)
+//                        i
+//                                                            -(i+1)
+//      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
+//                             i+1   i             i+1   i
+//
+//      With some algebraic manipulation, it is not difficult to see
+//      that (2) is equivalent to
+//                             -(i+1)
+//                      s  +  2       <= y                       (3)
+//                       i                i
+//
+//      The advantage of (3) is that s  and y  can be computed by
+//                                    i      i
+//      the following recurrence formula:
+//          if (3) is false
+//
+//          s     =  s  ,       y    = y   ;                     (4)
+//           i+1      i          i+1    i
+//
+//      otherwise,
+//                         -i                      -(i+1)
+//          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
+//           i+1      i          i+1    i     i
+//
+//      One may easily use induction to prove (4) and (5).
+//      Note. Since the left hand side of (3) contain only i+2 bits,
+//            it does not necessary to do a full (53-bit) comparison
+//            in (3).
+//   3. Final rounding
+//      After generating the 53 bits result, we compute one more bit.
+//      Together with the remainder, we can decide whether the
+//      result is exact, bigger than 1/2ulp, or less than 1/2ulp
+//      (it will never equal to 1/2ulp).
+//      The rounding mode can be detected by checking whether
+//      huge + tiny is equal to huge, and whether huge - tiny is
+//      equal to huge for some floating point number "huge" and "tiny".
+//
+//
+// Notes:  Rounding mode detection omitted.
+
+const (
+	uvnan      = 0x7FF8000000000001
+	uvinf      = 0x7FF0000000000000
+	uvneginf   = 0xFFF0000000000000
+	mask       = 0x7FF
+	shift      = 64 - 11 - 1
+	bias       = 1023
+	maxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
+)
+
+func float64bits(f float64) uint64     { return *(*uint64)(unsafe.Pointer(&f)) }
+func float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) }
+
+func sqrt(x float64) float64 {
+	// special cases
+	switch {
+	case x == 0 || x != x || x > maxFloat64:
+		return x
+	case x < 0:
+		return nan
+	}
+	ix := float64bits(x)
+	// normalize x
+	exp := int((ix >> shift) & mask)
+	if exp == 0 { // subnormal x
+		for ix&1<<shift == 0 {
+			ix <<= 1
+			exp--
+		}
+		exp++
+	}
+	exp -= bias // unbias exponent
+	ix &^= mask << shift
+	ix |= 1 << shift
+	if exp&1 == 1 { // odd exp, double x to make it even
+		ix <<= 1
+	}
+	exp >>= 1 // exp = exp/2, exponent of square root
+	// generate sqrt(x) bit by bit
+	ix <<= 1
+	var q, s uint64               // q = sqrt(x)
+	r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB
+	for r != 0 {
+		t := s + r
+		if t <= ix {
+			s = t + r
+			ix -= t
+			q += r
+		}
+		ix <<= 1
+		r >>= 1
+	}
+	// final rounding
+	if ix != 0 { // remainder, result not exact
+		q += q & 1 // round according to extra bit
+	}
+	ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent
+	return float64frombits(ix)
+}
+
+func sqrtC(f float64, r *float64) {
+	*r = sqrt(f)
+}
diff --git a/src/runtime/stack.c b/src/runtime/stack.c
new file mode 100644
index 000000000..18b3f4064
--- /dev/null
+++ b/src/runtime/stack.c
@@ -0,0 +1,1128 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "stack.h"
+#include "funcdata.h"
+#include "typekind.h"
+#include "type.h"
+#include "race.h"
+#include "mgc0.h"
+#include "textflag.h"
+
+enum
+{
+	// StackDebug == 0: no logging
+	//            == 1: logging of per-stack operations
+	//            == 2: logging of per-frame operations
+	//            == 3: logging of per-word updates
+	//            == 4: logging of per-word reads
+	StackDebug = 0,
+	StackFromSystem = 0,	// allocate stacks from system memory instead of the heap
+	StackFaultOnFree = 0,	// old stacks are mapped noaccess to detect use after free
+
+	StackCache = 1,
+	
+	StackCopyAlways = 0,	// expect to be able to copy stacks 100% of the time
+};
+
+// Global pool of spans that have free stacks.
+// Stacks are assigned an order according to size.
+//     order = log_2(size/FixedStack)
+// There is a free list for each order.
+static MSpan stackpool[NumStackOrders];
+static Mutex stackpoolmu;
+// TODO: one lock per order?
+
+void
+runtime·stackinit(void)
+{
+	int32 i;
+
+	if((StackCacheSize & PageMask) != 0)
+		runtime·throw("cache size must be a multiple of page size");
+
+	for(i = 0; i < NumStackOrders; i++)
+		runtime·MSpanList_Init(&stackpool[i]);
+}
+
+// Allocates a stack from the free pool.  Must be called with
+// stackpoolmu held.
+static MLink*
+poolalloc(uint8 order)
+{
+	MSpan *list;
+	MSpan *s;
+	MLink *x;
+	uintptr i;
+
+	list = &stackpool[order];
+	s = list->next;
+	if(s == list) {
+		// no free stacks.  Allocate another span worth.
+		s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift);
+		if(s == nil)
+			runtime·throw("out of memory");
+		if(s->ref != 0)
+			runtime·throw("bad ref");
+		if(s->freelist != nil)
+			runtime·throw("bad freelist");
+		for(i = 0; i < StackCacheSize; i += FixedStack << order) {
+			x = (MLink*)((s->start << PageShift) + i);
+			x->next = s->freelist;
+			s->freelist = x;
+		}
+		runtime·MSpanList_Insert(list, s);
+	}
+	x = s->freelist;
+	if(x == nil)
+		runtime·throw("span has no free stacks");
+	s->freelist = x->next;
+	s->ref++;
+	if(s->freelist == nil) {
+		// all stacks in s are allocated.
+		runtime·MSpanList_Remove(s);
+	}
+	return x;
+}
+
+// Adds stack x to the free pool.  Must be called with stackpoolmu held.
+static void
+poolfree(MLink *x, uint8 order)
+{
+	MSpan *s;
+
+	s = runtime·MHeap_Lookup(&runtime·mheap, x);
+	if(s->state != MSpanStack)
+		runtime·throw("freeing stack not in a stack span");
+	if(s->freelist == nil) {
+		// s will now have a free stack
+		runtime·MSpanList_Insert(&stackpool[order], s);
+	}
+	x->next = s->freelist;
+	s->freelist = x;
+	s->ref--;
+	if(s->ref == 0) {
+		// span is completely free - return to heap
+		runtime·MSpanList_Remove(s);
+		s->freelist = nil;
+		runtime·MHeap_FreeStack(&runtime·mheap, s);
+	}
+}
+
+// stackcacherefill/stackcacherelease implement a global pool of stack segments.
+// The pool is required to prevent unlimited growth of per-thread caches.
+static void
+stackcacherefill(MCache *c, uint8 order)
+{
+	MLink *x, *list;
+	uintptr size;
+
+	if(StackDebug >= 1)
+		runtime·printf("stackcacherefill order=%d\n", order);
+
+	// Grab some stacks from the global cache.
+	// Grab half of the allowed capacity (to prevent thrashing).
+	list = nil;
+	size = 0;
+	runtime·lock(&stackpoolmu);
+	while(size < StackCacheSize/2) {
+		x = poolalloc(order);
+		x->next = list;
+		list = x;
+		size += FixedStack << order;
+	}
+	runtime·unlock(&stackpoolmu);
+
+	c->stackcache[order].list = list;
+	c->stackcache[order].size = size;
+}
+
+static void
+stackcacherelease(MCache *c, uint8 order)
+{
+	MLink *x, *y;
+	uintptr size;
+
+	if(StackDebug >= 1)
+		runtime·printf("stackcacherelease order=%d\n", order);
+	x = c->stackcache[order].list;
+	size = c->stackcache[order].size;
+	runtime·lock(&stackpoolmu);
+	while(size > StackCacheSize/2) {
+		y = x->next;
+		poolfree(x, order);
+		x = y;
+		size -= FixedStack << order;
+	}
+	runtime·unlock(&stackpoolmu);
+	c->stackcache[order].list = x;
+	c->stackcache[order].size = size;
+}
+
+void
+runtime·stackcache_clear(MCache *c)
+{
+	uint8 order;
+	MLink *x, *y;
+
+	if(StackDebug >= 1)
+		runtime·printf("stackcache clear\n");
+	runtime·lock(&stackpoolmu);
+	for(order = 0; order < NumStackOrders; order++) {
+		x = c->stackcache[order].list;
+		while(x != nil) {
+			y = x->next;
+			poolfree(x, order);
+			x = y;
+		}
+		c->stackcache[order].list = nil;
+		c->stackcache[order].size = 0;
+	}
+	runtime·unlock(&stackpoolmu);
+}
+
+void*
+runtime·stackalloc(G *gp, uint32 n)
+{
+	uint8 order;
+	uint32 n2;
+	void *v;
+	Stktop *top;
+	MLink *x;
+	MSpan *s;
+	MCache *c;
+
+	// Stackalloc must be called on scheduler stack, so that we
+	// never try to grow the stack during the code that stackalloc runs.
+	// Doing so would cause a deadlock (issue 1547).
+	if(g != g->m->g0)
+		runtime·throw("stackalloc not on scheduler stack");
+	if((n & (n-1)) != 0)
+		runtime·throw("stack size not a power of 2");
+	if(StackDebug >= 1)
+		runtime·printf("stackalloc %d\n", n);
+
+	gp->stacksize += n;
+	if(runtime·debug.efence || StackFromSystem) {
+		v = runtime·sysAlloc(ROUND(n, PageSize), &mstats.stacks_sys);
+		if(v == nil)
+			runtime·throw("out of memory (stackalloc)");
+		return v;
+	}
+
+	// Small stacks are allocated with a fixed-size free-list allocator.
+	// If we need a stack of a bigger size, we fall back on allocating
+	// a dedicated span.
+	if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
+		order = 0;
+		n2 = n;
+		while(n2 > FixedStack) {
+			order++;
+			n2 >>= 1;
+		}
+		c = g->m->mcache;
+		if(c == nil || g->m->gcing || g->m->helpgc) {
+			// c == nil can happen in the guts of exitsyscall or
+			// procresize. Just get a stack from the global pool.
+			// Also don't touch stackcache during gc
+			// as it's flushed concurrently.
+			runtime·lock(&stackpoolmu);
+			x = poolalloc(order);
+			runtime·unlock(&stackpoolmu);
+		} else {
+			x = c->stackcache[order].list;
+			if(x == nil) {
+				stackcacherefill(c, order);
+				x = c->stackcache[order].list;
+			}
+			c->stackcache[order].list = x->next;
+			c->stackcache[order].size -= n;
+		}
+		v = (byte*)x;
+	} else {
+		s = runtime·MHeap_AllocStack(&runtime·mheap, ROUND(n, PageSize) >> PageShift);
+		if(s == nil)
+			runtime·throw("out of memory");
+		v = (byte*)(s->start<<PageShift);
+	}
+	top = (Stktop*)((byte*)v+n-sizeof(Stktop));
+	runtime·memclr((byte*)top, sizeof(*top));
+	if(raceenabled)
+		runtime·racemalloc(v, n);
+	if(StackDebug >= 1)
+		runtime·printf("  allocated %p\n", v);
+	return v;
+}
+
+void
+runtime·stackfree(G *gp, void *v, Stktop *top)
+{
+	uint8 order;
+	uintptr n, n2;
+	MSpan *s;
+	MLink *x;
+	MCache *c;
+	
+	n = (uintptr)(top+1) - (uintptr)v;
+	if(n & (n-1))
+		runtime·throw("stack not a power of 2");
+	if(StackDebug >= 1) {
+		runtime·printf("stackfree %p %d\n", v, (int32)n);
+		runtime·memclr(v, n); // for testing, clobber stack data
+	}
+	gp->stacksize -= n;
+	if(runtime·debug.efence || StackFromSystem) {
+		if(runtime·debug.efence || StackFaultOnFree)
+			runtime·SysFault(v, n);
+		else
+			runtime·SysFree(v, n, &mstats.stacks_sys);
+		return;
+	}
+	if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
+		order = 0;
+		n2 = n;
+		while(n2 > FixedStack) {
+			order++;
+			n2 >>= 1;
+		}
+		x = (MLink*)v;
+		c = g->m->mcache;
+		if(c == nil || g->m->gcing || g->m->helpgc) {
+			runtime·lock(&stackpoolmu);
+			poolfree(x, order);
+			runtime·unlock(&stackpoolmu);
+		} else {
+			if(c->stackcache[order].size >= StackCacheSize)
+				stackcacherelease(c, order);
+			x->next = c->stackcache[order].list;
+			c->stackcache[order].list = x;
+			c->stackcache[order].size += n;
+		}
+	} else {
+		s = runtime·MHeap_Lookup(&runtime·mheap, v);
+		if(s->state != MSpanStack) {
+			runtime·printf("%p %p\n", s->start<<PageShift, v);
+			runtime·throw("bad span state");
+		}
+		runtime·MHeap_FreeStack(&runtime·mheap, s);
+	}
+}
+
+// Called from runtime·lessstack when returning from a function which
+// allocated a new stack segment.  The function's return value is in
+// m->cret.
+void
+runtime·oldstack(void)
+{
+	Stktop *top;
+	uint32 argsize;
+	byte *sp, *old;
+	uintptr *src, *dst, *dstend;
+	G *gp;
+	int64 goid;
+	int32 oldstatus;
+
+	if(StackCopyAlways)
+		runtime·throw("unexpected call to oldstack");
+
+	gp = g->m->curg;
+	top = (Stktop*)gp->stackbase;
+	if(top == nil)
+		runtime·throw("nil stackbase");
+	old = (byte*)gp->stackguard - StackGuard;
+	sp = (byte*)top;
+	argsize = top->argsize;
+
+	if(StackDebug >= 1) {
+		runtime·printf("runtime: oldstack gobuf={pc:%p sp:%p lr:%p} cret=%p argsize=%p\n",
+			top->gobuf.pc, top->gobuf.sp, top->gobuf.lr, (uintptr)g->m->cret, (uintptr)argsize);
+	}
+
+	gp->sched = top->gobuf;
+	gp->sched.ret = g->m->cret;
+	g->m->cret = 0; // drop reference
+	// gp->status is usually Grunning, but it could be Gsyscall if a stack overflow
+	// happens during a function call inside entersyscall.
+
+	oldstatus = runtime·readgstatus(gp);
+	oldstatus &= ~Gscan;
+	if(oldstatus != Grunning && oldstatus != Gsyscall) {
+		runtime·printf("runtime: oldstack status=%d\n", oldstatus);
+		runtime·throw("oldstack");
+	}
+	runtime·casgstatus(gp, oldstatus, Gcopystack);
+	gp->waitreason = runtime·gostringnocopy((byte*)"stack unsplit");	
+
+	if(argsize > 0) {
+		sp -= argsize;
+		dst = (uintptr*)top->argp;
+		dstend = dst + argsize/sizeof(*dst);
+		src = (uintptr*)sp;
+		while(dst < dstend)
+			*dst++ = *src++;
+	}
+	goid = top->gobuf.g->goid;	// fault if g is bad, before gogo
+	USED(goid);
+
+	gp->stackbase = top->stackbase;
+	gp->stackguard = top->stackguard;
+	gp->stackguard0 = gp->stackguard;
+	runtime·stackfree(gp, old, top);
+	runtime·casgstatus(gp, Gcopystack, oldstatus); // oldstatus is Grunning or Gsyscall
+	runtime·gogo(&gp->sched);
+}
+
+uintptr runtime·maxstacksize = 1<<20; // enough until runtime.main sets it for real
+
+static uint8*
+mapnames[] = {
+	(uint8*)"---",
+	(uint8*)"scalar",
+	(uint8*)"ptr",
+	(uint8*)"multi",
+};
+
+// Stack frame layout
+//
+// (x86)
+// +------------------+
+// | args from caller |
+// +------------------+ <- frame->argp
+// |  return address  |
+// +------------------+ <- frame->varp
+// |     locals       |
+// +------------------+
+// |  args to callee  |
+// +------------------+ <- frame->sp
+//
+// (arm: TODO)
+
+typedef struct CopyableInfo CopyableInfo;
+struct CopyableInfo {
+	byte *stk;	// bottom address of segment
+	byte *base;	// top address of segment (including Stktop)
+	int32 frames;	// count of copyable frames (-1 = not copyable)
+};
+
+void runtime·main(void);
+void runtime·switchtoM(void(*)(void));
+
+static bool
+checkframecopy(Stkframe *frame, void *arg)
+{
+	CopyableInfo *cinfo;
+	Func *f;
+	StackMap *stackmap;
+
+	cinfo = arg;
+	f = frame->fn;
+	if(StackDebug >= 2)
+		runtime·printf("    checking %s frame=[%p,%p] stk=[%p,%p]\n", runtime·funcname(f), frame->sp, frame->fp, cinfo->stk, cinfo->base);
+	// if we're not in the segment any more, return immediately.
+	if((byte*)frame->varp < cinfo->stk || (byte*)frame->varp >= cinfo->base) {
+		if(StackDebug >= 2)
+			runtime·printf("    <next segment>\n");
+		return false; // stop traceback
+	}
+	if(f->entry == (uintptr)runtime·main) {
+		// A special routine at the TOS of the main routine.
+		// We will allow it to be copied even though we don't
+		// have full GC info for it (because it is written in C).
+		cinfo->frames++;
+		return false; // stop traceback
+	}
+	if(f->entry == (uintptr)runtime·switchtoM) {
+		// A special routine at the bottom of stack of a goroutine that does onM call.
+		// We will allow it to be copied even though we don't
+		// have full GC info for it (because it is written in asm).
+		cinfo->frames++;
+		return true;
+	}
+	if((byte*)frame->varp != (byte*)frame->sp) { // not in prologue (and has at least one local or outarg)
+		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+		if(stackmap == nil) {
+			cinfo->frames = -1;
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: no locals info for %s\n", runtime·funcname(f));
+			return false;
+		}
+		if(stackmap->n <= 0) {
+			cinfo->frames = -1;
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: locals size info only for %s\n", runtime·funcname(f));
+			return false;
+		}
+	}
+	if(frame->arglen != 0) {
+		stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+		if(stackmap == nil) {
+			cinfo->frames = -1;
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: no arg info for %s\n", runtime·funcname(f));
+			return false;
+		}
+	}
+	cinfo->frames++;
+	return true; // this frame is ok; keep going
+}
+
+// If the top segment of the stack contains an uncopyable
+// frame, return -1.  Otherwise return the number of frames
+// in the top segment, all of which are copyable.
+static int32
+copyabletopsegment(G *gp)
+{
+	CopyableInfo cinfo;
+	Defer *d;
+	Func *f;
+	FuncVal *fn;
+	StackMap *stackmap;
+	bool (*cb)(Stkframe*, void*);
+
+	if(gp->stackbase == 0)
+		runtime·throw("stackbase == 0");
+	cinfo.stk = (byte*)gp->stackguard - StackGuard;
+	cinfo.base = (byte*)gp->stackbase + sizeof(Stktop);
+	cinfo.frames = 0;
+
+	// Check that each frame is copyable.  As a side effect,
+	// count the frames.
+	cb = checkframecopy;
+	runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &cb, &cinfo, false);
+	if(StackDebug >= 1 && cinfo.frames != -1)
+		runtime·printf("copystack: %d copyable frames\n", cinfo.frames);
+
+	if(cinfo.frames == -1)
+		return -1;
+
+	// Check to make sure all Defers are copyable
+	for(d = gp->defer; d != nil; d = d->link) {
+		if(cinfo.stk <= (byte*)d && (byte*)d < cinfo.base) {
+			// Defer is on the stack.  Its copyableness has
+			// been established during stack walking.
+			// For now, this only happens with the Defer in runtime.main.
+			continue;
+		}
+		if((byte*)d->argp < cinfo.stk || cinfo.base <= (byte*)d->argp)
+			break; // a defer for the next segment
+		fn = d->fn;
+		if(fn == nil) // See issue 8047
+			continue;
+		f = runtime·findfunc((uintptr)fn->fn);
+		if(f == nil) {
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: no func for deferred pc %p\n", fn->fn);
+			return -1;
+		}
+
+		// Check to make sure we have an args pointer map for the defer's args.
+		// We only need the args map, but we check
+		// for the locals map also, because when the locals map
+		// isn't provided it means the ptr map came from C and
+		// C (particularly, cgo) lies to us.  See issue 7695.
+		stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+		if(stackmap == nil || stackmap->n <= 0) {
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: no arg info for deferred %s\n", runtime·funcname(f));
+			return -1;
+		}
+		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+		if(stackmap == nil || stackmap->n <= 0) {
+			if(StackDebug >= 1 || StackCopyAlways)
+				runtime·printf("runtime: copystack: no local info for deferred %s\n", runtime·funcname(f));
+			return -1;
+		}
+
+		if(cinfo.stk <= (byte*)fn && (byte*)fn < cinfo.base) {
+			// FuncVal is on the stack.  Again, its copyableness
+			// was established during stack walking.
+			continue;
+		}
+		// The FuncVal may have pointers in it, but fortunately for us
+		// the compiler won't put pointers into the stack in a
+		// heap-allocated FuncVal.
+		// One day if we do need to check this, we'll need maps of the
+		// pointerness of the closure args.  The only place we have that map
+		// right now is in the gc program for the FuncVal.  Ugh.
+	}
+
+	return cinfo.frames;
+}
+
+typedef struct AdjustInfo AdjustInfo;
+struct AdjustInfo {
+	byte *oldstk;	// bottom address of segment
+	byte *oldbase;	// top address of segment (after Stktop)
+	uintptr delta;  // ptr distance from old to new stack (newbase - oldbase)
+};
+
+// bv describes the memory starting at address scanp.
+// Adjust any pointers contained therein.
+static void
+adjustpointers(byte **scanp, BitVector *bv, AdjustInfo *adjinfo, Func *f)
+{
+	uintptr delta;
+	int32 num, i;
+	byte *p, *minp, *maxp;
+	Type *t;
+	Itab *tab;
+	
+	minp = adjinfo->oldstk;
+	maxp = adjinfo->oldbase;
+	delta = adjinfo->delta;
+	num = bv->n / BitsPerPointer;
+	for(i = 0; i < num; i++) {
+		if(StackDebug >= 4)
+			runtime·printf("        %p:%s:%p\n", &scanp[i], mapnames[bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3], scanp[i]);
+		switch(bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3) {
+		case BitsDead:
+			if(runtime·debug.gcdead)
+				scanp[i] = (byte*)PoisonStack;
+			break;
+		case BitsScalar:
+			break;
+		case BitsPointer:
+			p = scanp[i];
+			if(f != nil && (byte*)0 < p && (p < (byte*)PageSize || (uintptr)p == PoisonGC || (uintptr)p == PoisonStack)) {
+				// Looks like a junk value in a pointer slot.
+				// Live analysis wrong?
+				g->m->traceback = 2;
+				runtime·printf("runtime: bad pointer in frame %s at %p: %p\n", runtime·funcname(f), &scanp[i], p);
+				runtime·throw("bad pointer!");
+			}
+			if(minp <= p && p < maxp) {
+				if(StackDebug >= 3)
+					runtime·printf("adjust ptr %p %s\n", p, runtime·funcname(f));
+				scanp[i] = p + delta;
+			}
+			break;
+		case BitsMultiWord:
+			switch(bv->data[(i+1) / (32 / BitsPerPointer)] >> ((i+1) * BitsPerPointer & 31) & 3) {
+			default:
+				runtime·throw("unexpected garbage collection bits");
+			case BitsEface:
+				t = (Type*)scanp[i];
+				if(t != nil && ((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0)) {
+					p = scanp[i+1];
+					if(minp <= p && p < maxp) {
+						if(StackDebug >= 3)
+							runtime·printf("adjust eface %p\n", p);
+						if(t->size > PtrSize) // currently we always allocate such objects on the heap
+							runtime·throw("large interface value found on stack");
+						scanp[i+1] = p + delta;
+					}
+				}
+				i++;
+				break;
+			case BitsIface:
+				tab = (Itab*)scanp[i];
+				if(tab != nil) {
+					t = tab->type;
+					//runtime·printf("          type=%p\n", t);
+					if((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0) {
+						p = scanp[i+1];
+						if(minp <= p && p < maxp) {
+							if(StackDebug >= 3)
+								runtime·printf("adjust iface %p\n", p);
+							if(t->size > PtrSize) // currently we always allocate such objects on the heap
+								runtime·throw("large interface value found on stack");
+							scanp[i+1] = p + delta;
+						}
+					}
+				}
+				i++;
+				break;
+			}
+			break;
+		}
+	}
+}
+
+// Note: the argument/return area is adjusted by the callee.
+static bool
+adjustframe(Stkframe *frame, void *arg)
+{
+	AdjustInfo *adjinfo;
+	Func *f;
+	StackMap *stackmap;
+	int32 pcdata;
+	BitVector bv;
+	uintptr targetpc;
+
+	adjinfo = arg;
+	f = frame->fn;
+	if(StackDebug >= 2)
+		runtime·printf("    adjusting %s frame=[%p,%p] pc=%p continpc=%p\n", runtime·funcname(f), frame->sp, frame->fp, frame->pc, frame->continpc);
+	if(f->entry == (uintptr)runtime·main ||
+		f->entry == (uintptr)runtime·switchtoM)
+		return true;
+	targetpc = frame->continpc;
+	if(targetpc == 0) {
+		// Frame is dead.
+		return true;
+	}
+	if(targetpc != f->entry)
+		targetpc--;
+	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
+	if(pcdata == -1)
+		pcdata = 0; // in prologue
+
+	// adjust local pointers
+	if((byte*)frame->varp != (byte*)frame->sp) {
+		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
+		if(stackmap == nil)
+			runtime·throw("no locals info");
+		if(stackmap->n <= 0)
+			runtime·throw("locals size info only");
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		if(StackDebug >= 3)
+			runtime·printf("      locals\n");
+		adjustpointers((byte**)frame->varp - bv.n / BitsPerPointer, &bv, adjinfo, f);
+	}
+	// adjust inargs and outargs
+	if(frame->arglen != 0) {
+		stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+		if(stackmap == nil) {
+			runtime·printf("size %d\n", (int32)frame->arglen);
+			runtime·throw("no arg info");
+		}
+		bv = runtime·stackmapdata(stackmap, pcdata);
+		if(StackDebug >= 3)
+			runtime·printf("      args\n");
+		adjustpointers((byte**)frame->argp, &bv, adjinfo, nil);
+	}
+	return true;
+}
+
+static void
+adjustctxt(G *gp, AdjustInfo *adjinfo)
+{
+	if(adjinfo->oldstk <= (byte*)gp->sched.ctxt && (byte*)gp->sched.ctxt < adjinfo->oldbase)
+		gp->sched.ctxt = (byte*)gp->sched.ctxt + adjinfo->delta;
+}
+
+static void
+adjustdefers(G *gp, AdjustInfo *adjinfo)
+{
+	Defer *d, **dp;
+	Func *f;
+	FuncVal *fn;
+	StackMap *stackmap;
+	BitVector bv;
+
+	for(dp = &gp->defer, d = *dp; d != nil; dp = &d->link, d = *dp) {
+		if(adjinfo->oldstk <= (byte*)d && (byte*)d < adjinfo->oldbase) {
+			// The Defer record is on the stack.  Its fields will
+			// get adjusted appropriately.
+			// This only happens for runtime.main and runtime.gopanic now,
+			// but a compiler optimization could do more of this.
+			*dp = (Defer*)((byte*)d + adjinfo->delta);
+			continue;
+		}
+		if((byte*)d->argp < adjinfo->oldstk || adjinfo->oldbase <= (byte*)d->argp)
+			break; // a defer for the next segment
+		fn = d->fn;
+		if(fn == nil) {
+			// Defer of nil function.  It will panic when run, and there
+			// aren't any args to adjust.  See issue 8047.
+			d->argp += adjinfo->delta;
+			continue;
+		}
+		f = runtime·findfunc((uintptr)fn->fn);
+		if(f == nil)
+			runtime·throw("can't adjust unknown defer");
+		if(StackDebug >= 4)
+			runtime·printf("  checking defer %s\n", runtime·funcname(f));
+		// Defer's FuncVal might be on the stack
+		if(adjinfo->oldstk <= (byte*)fn && (byte*)fn < adjinfo->oldbase) {
+			if(StackDebug >= 3)
+				runtime·printf("    adjust defer fn %s\n", runtime·funcname(f));
+			d->fn = (FuncVal*)((byte*)fn + adjinfo->delta);
+		} else {
+			// deferred function's args might point into the stack.
+			if(StackDebug >= 3)
+				runtime·printf("    adjust deferred args for %s\n", runtime·funcname(f));
+			stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
+			if(stackmap == nil)
+				runtime·throw("runtime: deferred function has no arg ptr map");
+			bv = runtime·stackmapdata(stackmap, 0);
+			adjustpointers(d->args, &bv, adjinfo, f);
+		}
+		d->argp += adjinfo->delta;
+	}
+}
+
+static void
+adjustpanics(G *gp, AdjustInfo *adjinfo)
+{
+	Panic *p, **l;
+
+	// only the topmost panic is on the current stack
+	for(l = &gp->panic; (p = *l) != nil; ) {
+		if(adjinfo->oldstk <= (byte*)p && (byte*)p < adjinfo->oldbase)
+			*l = (Panic*)((byte*)p + adjinfo->delta);
+		l = &p->link;
+		
+		if(adjinfo->oldstk <= (byte*)p->argp && (byte*)p->argp < adjinfo->oldbase)
+			p->argp += adjinfo->delta;
+	}
+}
+
+static void
+adjustsudogs(G *gp, AdjustInfo *adjinfo)
+{
+	SudoG *s;
+	byte *e;
+
+	// the data elements pointed to by a SudoG structure
+	// might be in the stack.
+	for(s = gp->waiting; s != nil; s = s->waitlink) {
+		e = s->elem;
+		if(adjinfo->oldstk <= e && e < adjinfo->oldbase)
+			s->elem = e + adjinfo->delta;
+		e = (byte*)s->selectdone;
+		if(adjinfo->oldstk <= e && e < adjinfo->oldbase)
+			s->selectdone = (uint32*)(e + adjinfo->delta);
+	}
+}
+
+// Copies the top stack segment of gp to a new stack segment of a
+// different size.  The top segment must contain nframes frames.
+static void
+copystack(G *gp, uintptr nframes, uintptr newsize)
+{
+	byte *oldstk, *oldbase, *newstk, *newbase;
+	uintptr oldsize, used;
+	AdjustInfo adjinfo;
+	Stktop *oldtop, *newtop;
+	uint32 oldstatus;
+	bool (*cb)(Stkframe*, void*);
+
+	if(gp->syscallstack != 0)
+		runtime·throw("can't handle stack copy in syscall yet");
+	oldstk = (byte*)gp->stackguard - StackGuard;
+	if(gp->stackbase == 0)
+		runtime·throw("nil stackbase");
+	oldbase = (byte*)gp->stackbase + sizeof(Stktop);
+	oldsize = oldbase - oldstk;
+	used = oldbase - (byte*)gp->sched.sp;
+	oldtop = (Stktop*)gp->stackbase;
+
+	// allocate new stack
+	newstk = runtime·stackalloc(gp, newsize);
+	newbase = newstk + newsize;
+	newtop = (Stktop*)(newbase - sizeof(Stktop));
+
+	if(StackDebug >= 1)
+		runtime·printf("copystack gp=%p [%p %p]/%d -> [%p %p]/%d\n", gp, oldstk, oldbase, (int32)oldsize, newstk, newbase, (int32)newsize);
+	USED(oldsize);
+	
+	// adjust pointers in the to-be-copied frames
+	adjinfo.oldstk = oldstk;
+	adjinfo.oldbase = oldbase;
+	adjinfo.delta = newbase - oldbase;
+	cb = adjustframe;
+	runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, nframes, &cb, &adjinfo, false);
+	
+	// adjust other miscellaneous things that have pointers into stacks.
+	adjustctxt(gp, &adjinfo);
+	adjustdefers(gp, &adjinfo);
+	adjustpanics(gp, &adjinfo);
+	adjustsudogs(gp, &adjinfo);
+	
+	// copy the stack (including Stktop) to the new location
+	runtime·memmove(newbase - used, oldbase - used, used);
+	oldstatus = runtime·readgstatus(gp);
+	oldstatus &= ~Gscan;
+	if (oldstatus == Gwaiting || oldstatus == Grunnable)
+		runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
+	else
+		runtime·throw("copystack: bad status, not Gwaiting or Grunnable");
+	// Swap out old stack for new one
+	gp->stackbase = (uintptr)newtop;
+	gp->stackguard = (uintptr)newstk + StackGuard;
+	gp->stackguard0 = (uintptr)newstk + StackGuard; // NOTE: might clobber a preempt request
+	if(gp->stack0 == (uintptr)oldstk)
+		gp->stack0 = (uintptr)newstk;
+	gp->sched.sp = (uintptr)(newbase - used);
+
+	runtime·casgstatus(gp, Gcopystack, oldstatus); // oldstatus is Gwaiting or Grunnable
+
+	// free old stack
+	runtime·stackfree(gp, oldstk, oldtop);
+}
+
+// round x up to a power of 2.
+int32
+runtime·round2(int32 x)
+{
+	int32 s;
+
+	s = 0;
+	while((1 << s) < x)
+		s++;
+	return 1 << s;
+}
+
+// Called from runtime·morestack when a new stack segment is needed.
+// Allocate a new stack big enough for m->moreframesize bytes,
+// copy m->moreargsize bytes to the new frame,
+// and then act as though runtime·lessstack called the function at m->morepc.
+//
+// g->atomicstatus will be Grunning, Gsyscall or Gscanrunning, Gscansyscall upon entry. 
+// If the GC is trying to stop this g then it will set preemptscan to true.
+void
+runtime·newstack(void)
+{
+	int32 framesize, argsize, oldstatus, oldsize, newsize, nframes;
+	Stktop *top;
+	byte *stk, *oldstk, *oldbase;
+	uintptr sp;
+	uintptr *src, *dst, *dstend;
+	G *gp;
+	Gobuf label, morebuf;
+	void *moreargp;
+
+	if(g->m->forkstackguard)
+		runtime·throw("split stack after fork");
+	if(g->m->morebuf.g != g->m->curg) {
+		runtime·printf("runtime: newstack called from g=%p\n"
+			"\tm=%p m->curg=%p m->g0=%p m->gsignal=%p\n",
+			g->m->morebuf.g, g->m, g->m->curg, g->m->g0, g->m->gsignal);
+		morebuf = g->m->morebuf;
+		runtime·traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g);
+		runtime·throw("runtime: wrong goroutine in newstack");
+	}
+	if(g->throwsplit)
+		runtime·throw("runtime: stack split at bad time");
+
+	// The goroutine must be executing in order to call newstack, so the possible states are
+	// Grunning and Gsyscall (and, due to GC, also Gscanrunning and Gscansyscall).	
+
+	// gp->status is usually Grunning, but it could be Gsyscall if a stack overflow
+	// happens during a function call inside entersyscall.
+	gp = g->m->curg;
+	oldstatus = runtime·readgstatus(gp) & ~Gscan;
+	framesize = g->m->moreframesize;
+	argsize = g->m->moreargsize;
+	moreargp = g->m->moreargp;
+	g->m->moreargp = nil;
+	morebuf = g->m->morebuf;
+	g->m->morebuf.pc = (uintptr)nil;
+	g->m->morebuf.lr = (uintptr)nil;
+	g->m->morebuf.sp = (uintptr)nil;
+
+	runtime·casgstatus(gp, oldstatus, Gwaiting); // oldstatus is not in a Gscan status
+	gp->waitreason = runtime·gostringnocopy((byte*)"stack growth");
+
+	runtime·rewindmorestack(&gp->sched);
+
+	if(gp->stackbase == 0)
+		runtime·throw("nil stackbase");
+	sp = gp->sched.sp;
+	if(thechar == '6' || thechar == '8') {
+		// The call to morestack cost a word.
+		sp -= sizeof(uintreg);
+	}
+	if(StackDebug >= 1 || sp < gp->stackguard - StackGuard) {
+		runtime·printf("runtime: newstack framesize=%p argsize=%p sp=%p stack=[%p, %p]\n"
+			"\tmorebuf={pc:%p sp:%p lr:%p}\n"
+			"\tsched={pc:%p sp:%p lr:%p ctxt:%p}\n",
+			(uintptr)framesize, (uintptr)argsize, sp, gp->stackguard - StackGuard, gp->stackbase,
+			g->m->morebuf.pc, g->m->morebuf.sp, g->m->morebuf.lr,
+			gp->sched.pc, gp->sched.sp, gp->sched.lr, gp->sched.ctxt);
+	}
+	if(sp < gp->stackguard - StackGuard) {
+		runtime·printf("runtime: gp=%p, gp->status=%d, oldstatus=%d\n ", (void*)gp, runtime·readgstatus(gp), oldstatus);
+		runtime·printf("runtime: split stack overflow: %p < %p\n", sp, gp->stackguard - StackGuard);
+		runtime·throw("runtime: split stack overflow");
+	}
+
+	if(argsize % sizeof(uintptr) != 0) {
+		runtime·printf("runtime: stack growth with misaligned argsize %d\n", argsize);
+		runtime·throw("runtime: stack growth argsize");
+	}
+
+	if(gp->stackguard0 == (uintptr)StackPreempt) {
+		if(gp == g->m->g0)
+			runtime·throw("runtime: preempt g0");
+		if(oldstatus == Grunning && g->m->p == nil && g->m->locks == 0)
+			runtime·throw("runtime: g is running but p is not");
+		if(oldstatus == Gsyscall && g->m->locks == 0)
+			runtime·throw("runtime: stack growth during syscall");
+		if(oldstatus == Grunning && gp->preemptscan) {
+			runtime·gcphasework(gp);
+			runtime·casgstatus(gp, Gwaiting, Grunning);
+			gp->stackguard0 = gp->stackguard;
+			gp->preempt = false; 
+			gp->preemptscan = false;        // Tells the GC premption was successful.
+			runtime·gogo(&gp->sched);	// never return 
+		}
+
+		// Be conservative about where we preempt.
+		// We are interested in preempting user Go code, not runtime code.
+		if(oldstatus != Grunning || g->m->locks || g->m->mallocing || g->m->gcing || g->m->p->status != Prunning) {
+			// Let the goroutine keep running for now.
+			// gp->preempt is set, so it will be preempted next time.
+			gp->stackguard0 = gp->stackguard;
+			runtime·casgstatus(gp, Gwaiting, oldstatus); // oldstatus is Gsyscall or Grunning
+			runtime·gogo(&gp->sched);	// never return
+		}
+		// Act like goroutine called runtime.Gosched.
+		runtime·casgstatus(gp, Gwaiting, oldstatus); // oldstatus is Gsyscall or Grunning
+		runtime·gosched_m(gp);	// never return
+	}
+
+	// If every frame on the top segment is copyable, allocate a bigger segment
+	// and move the segment instead of allocating a new segment.
+	if(runtime·copystack) {
+		if(!runtime·precisestack)
+			runtime·throw("can't copy stacks without precise stacks");
+		nframes = copyabletopsegment(gp);
+		if(nframes != -1) {
+			oldstk = (byte*)gp->stackguard - StackGuard;
+			oldbase = (byte*)gp->stackbase + sizeof(Stktop);
+			oldsize = oldbase - oldstk;
+			newsize = oldsize * 2;
+			// Note that the concurrent GC might be scanning the stack as we try to replace it.
+			// copystack takes care of the appropriate coordination with the stack scanner.
+			copystack(gp, nframes, newsize);
+			if(StackDebug >= 1)
+				runtime·printf("stack grow done\n");
+			if(gp->stacksize > runtime·maxstacksize) {
+				runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize);
+				runtime·throw("stack overflow");
+			}
+			runtime·casgstatus(gp, Gwaiting, oldstatus); // oldstatus is Gsyscall or Grunning
+			runtime·gogo(&gp->sched);
+		}
+		// TODO: if stack is uncopyable because we're in C code, patch return value at
+		// end of C code to trigger a copy as soon as C code exits.  That way, we'll
+		// have stack available if we get this deep again.
+	}
+	
+	if(StackCopyAlways)
+		runtime·throw("split stack not allowed");
+
+	// allocate new segment.
+	framesize += argsize;
+	framesize += StackExtra;	// room for more functions, Stktop.
+	if(framesize < StackMin)
+		framesize = StackMin;
+	framesize += StackSystem;
+	framesize = runtime·round2(framesize);
+	stk = runtime·stackalloc(gp, framesize);
+	if(gp->stacksize > runtime·maxstacksize) {
+		runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize);
+		runtime·throw("stack overflow");
+	}
+	top = (Stktop*)(stk+framesize-sizeof(*top));
+
+	if(StackDebug >= 1) {
+		runtime·printf("\t-> new stack gp=%p [%p, %p]\n", gp, stk, top);
+	}
+
+	top->stackbase = gp->stackbase;
+	top->stackguard = gp->stackguard;
+	top->gobuf = morebuf;
+	top->argp = moreargp;
+	top->argsize = argsize;
+
+	gp->stackbase = (uintptr)top;
+	gp->stackguard = (uintptr)stk + StackGuard;
+	gp->stackguard0 = gp->stackguard;
+
+	sp = (uintptr)top;
+	if(argsize > 0) {
+		sp -= argsize;
+		dst = (uintptr*)sp;
+		dstend = dst + argsize/sizeof(*dst);
+		src = (uintptr*)top->argp;
+		while(dst < dstend)
+			*dst++ = *src++;
+	}
+	
+	if(thechar == '5') {
+		// caller would have saved its LR below args.
+		sp -= sizeof(void*);
+		*(void**)sp = nil;
+	}
+
+	// Continue as if lessstack had just called m->morepc
+	// (the PC that decided to grow the stack).
+	runtime·memclr((byte*)&label, sizeof label);
+	label.sp = sp;
+	label.pc = (uintptr)runtime·lessstack;
+	label.g = g->m->curg;
+	runtime·gostartcall(&label, (void(*)(void))gp->sched.pc, gp->sched.ctxt);
+	gp->sched.ctxt = nil;
+	runtime·casgstatus(gp, Gwaiting, oldstatus); // oldstatus is Grunning or Gsyscall
+	runtime·gogo(&label);
+
+	*(int32*)345 = 123;	// never return
+}
+
+#pragma textflag NOSPLIT
+void
+runtime·nilfunc(void)
+{
+	*(byte*)0 = 0;
+}
+
+// adjust Gobuf as if it executed a call to fn
+// and then did an immediate gosave.
+void
+runtime·gostartcallfn(Gobuf *gobuf, FuncVal *fv)
+{
+	void *fn;
+
+	if(fv != nil)
+		fn = fv->fn;
+	else
+		fn = runtime·nilfunc;
+	runtime·gostartcall(gobuf, fn, fv);
+}
+
+// Maybe shrink the stack being used by gp.
+// Called at garbage collection time.
+void
+runtime·shrinkstack(G *gp)
+{
+	int32 nframes;
+	byte *oldstk, *oldbase;
+	uintptr used, oldsize, newsize;
+
+	if(!runtime·copystack)
+		return;
+	if(runtime·readgstatus(gp) == Gdead)
+		return;
+	if(gp->stackbase == 0)
+		runtime·throw("stackbase == 0");
+	//return; // TODO: why does this happen?
+	oldstk = (byte*)gp->stackguard - StackGuard;
+	oldbase = (byte*)gp->stackbase + sizeof(Stktop);
+	oldsize = oldbase - oldstk;
+	newsize = oldsize / 2;
+	if(newsize < FixedStack)
+		return; // don't shrink below the minimum-sized stack
+	used = oldbase - (byte*)gp->sched.sp;
+	if(used >= oldsize / 4)
+		return; // still using at least 1/4 of the segment.
+
+	if(gp->syscallstack != (uintptr)nil) // TODO: can we handle this case?
+		return;
+#ifdef GOOS_windows
+	if(gp->m != nil && gp->m->libcallsp != 0)
+		return;
+#endif
+	if(StackDebug > 0)
+		runtime·printf("shrinking stack %D->%D\n", (uint64)oldsize, (uint64)newsize);
+	nframes = copyabletopsegment(gp);
+	if(nframes == -1)
+		return;
+	copystack(gp, nframes, newsize);
+}
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
new file mode 100644
index 000000000..f1b7d32d2
--- /dev/null
+++ b/src/runtime/stack.go
@@ -0,0 +1,13 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	// Goroutine preemption request.
+	// Stored into g->stackguard0 to cause split stack check failure.
+	// Must be greater than any real sp.
+	// 0xfffffade in hex.
+	stackPreempt = ^uintptr(1313)
+)
diff --git a/src/runtime/stack.h b/src/runtime/stack.h
new file mode 100644
index 000000000..7f8c43ee5
--- /dev/null
+++ b/src/runtime/stack.h
@@ -0,0 +1,119 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Stack layout parameters.
+Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
+
+The per-goroutine g->stackguard is set to point StackGuard bytes
+above the bottom of the stack.  Each function compares its stack
+pointer against g->stackguard to check for overflow.  To cut one
+instruction from the check sequence for functions with tiny frames,
+the stack is allowed to protrude StackSmall bytes below the stack
+guard.  Functions with large frames don't bother with the check and
+always call morestack.  The sequences are (for amd64, others are
+similar):
+ 
+	guard = g->stackguard
+	frame = function's stack frame size
+	argsize = size of function arguments (call + return)
+
+	stack frame size <= StackSmall:
+		CMPQ guard, SP
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size > StackSmall but < StackBig
+		LEAQ (frame-StackSmall)(SP), R0
+		CMPQ guard, R0
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size >= StackBig:
+		MOVQ m->morearg, $((argsize << 32) | frame)
+		CALL morestack(SB)
+
+The bottom StackGuard - StackSmall bytes are important: there has
+to be enough room to execute functions that refuse to check for
+stack overflow, either because they need to be adjacent to the
+actual caller's frame (deferproc) or because they handle the imminent
+stack overflow (morestack).
+
+For example, deferproc might call malloc, which does one of the
+above checks (without allocating a full frame), which might trigger
+a call to morestack.  This sequence needs to fit in the bottom
+section of the stack.  On amd64, morestack's frame is 40 bytes, and
+deferproc's frame is 56 bytes.  That fits well within the
+StackGuard - StackSmall bytes at the bottom.  
+The linkers explore all possible call traces involving non-splitting
+functions to make sure that this limit cannot be violated.
+ */
+
+enum {
+	// StackSystem is a number of additional bytes to add
+	// to each stack below the usual guard area for OS-specific
+	// purposes like signal handling. Used on Windows and on
+	// Plan 9 because they do not use a separate stack.
+#ifdef GOOS_windows
+	StackSystem = 512 * sizeof(uintptr),
+#else
+#ifdef GOOS_plan9
+	// The size of the note handler frame varies among architectures,
+	// but 512 bytes should be enough for every implementation.
+	StackSystem = 512,
+#else
+	StackSystem = 0,
+#endif	// Plan 9
+#endif	// Windows
+
+	// The amount of extra stack to allocate beyond the size
+	// needed for the single frame that triggered the split.
+	StackExtra = 2048,
+
+	// The minimum stack segment size to allocate.
+	// If the amount needed for the splitting frame + StackExtra
+	// is less than this number, the stack will have this size instead.
+	StackMin = 8192,
+	StackSystemRounded = StackSystem + (-StackSystem & (StackMin-1)),
+	FixedStack = StackMin + StackSystemRounded,
+
+	// Functions that need frames bigger than this use an extra
+	// instruction to do the stack split check, to avoid overflow
+	// in case SP - framesize wraps below zero.
+	// This value can be no bigger than the size of the unmapped
+	// space at zero.
+	StackBig = 4096,
+
+	// The stack guard is a pointer this many bytes above the
+	// bottom of the stack.
+	StackGuard = 512 + StackSystem,
+
+	// After a stack split check the SP is allowed to be this
+	// many bytes below the stack guard.  This saves an instruction
+	// in the checking sequence for tiny frames.
+	StackSmall = 128,
+
+	// The maximum number of bytes that a chain of NOSPLIT
+	// functions can use.
+	StackLimit = StackGuard - StackSystem - StackSmall,
+	
+	// The assumed size of the top-of-stack data block.
+	// The actual size can be smaller than this but cannot be larger.
+	// Checked in proc.c's runtime.malg.
+	StackTop = 88,
+};
+
+// Goroutine preemption request.
+// Stored into g->stackguard0 to cause split stack check failure.
+// Must be greater than any real sp.
+// 0xfffffade in hex.
+#define StackPreempt ((uint64)-1314)
+/*c2go
+enum
+{
+	StackPreempt = -1314,
+};
+*/
diff --git a/src/runtime/stack_gen_test.go b/src/runtime/stack_gen_test.go
new file mode 100644
index 000000000..28101062c
--- /dev/null
+++ b/src/runtime/stack_gen_test.go
@@ -0,0 +1,1473 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+)
+
+var splitTests = []func() (uintptr, uintptr){
+	// Edit .+1,/^}/-1|seq 4 4 5000 | sed 's/.*/	stack&,/' | fmt
+	stack4, stack8, stack12, stack16, stack20, stack24, stack28,
+	stack32, stack36, stack40, stack44, stack48, stack52, stack56,
+	stack60, stack64, stack68, stack72, stack76, stack80, stack84,
+	stack88, stack92, stack96, stack100, stack104, stack108, stack112,
+	stack116, stack120, stack124, stack128, stack132, stack136,
+	stack140, stack144, stack148, stack152, stack156, stack160,
+	stack164, stack168, stack172, stack176, stack180, stack184,
+	stack188, stack192, stack196, stack200, stack204, stack208,
+	stack212, stack216, stack220, stack224, stack228, stack232,
+	stack236, stack240, stack244, stack248, stack252, stack256,
+	stack260, stack264, stack268, stack272, stack276, stack280,
+	stack284, stack288, stack292, stack296, stack300, stack304,
+	stack308, stack312, stack316, stack320, stack324, stack328,
+	stack332, stack336, stack340, stack344, stack348, stack352,
+	stack356, stack360, stack364, stack368, stack372, stack376,
+	stack380, stack384, stack388, stack392, stack396, stack400,
+	stack404, stack408, stack412, stack416, stack420, stack424,
+	stack428, stack432, stack436, stack440, stack444, stack448,
+	stack452, stack456, stack460, stack464, stack468, stack472,
+	stack476, stack480, stack484, stack488, stack492, stack496,
+	stack500, stack504, stack508, stack512, stack516, stack520,
+	stack524, stack528, stack532, stack536, stack540, stack544,
+	stack548, stack552, stack556, stack560, stack564, stack568,
+	stack572, stack576, stack580, stack584, stack588, stack592,
+	stack596, stack600, stack604, stack608, stack612, stack616,
+	stack620, stack624, stack628, stack632, stack636, stack640,
+	stack644, stack648, stack652, stack656, stack660, stack664,
+	stack668, stack672, stack676, stack680, stack684, stack688,
+	stack692, stack696, stack700, stack704, stack708, stack712,
+	stack716, stack720, stack724, stack728, stack732, stack736,
+	stack740, stack744, stack748, stack752, stack756, stack760,
+	stack764, stack768, stack772, stack776, stack780, stack784,
+	stack788, stack792, stack796, stack800, stack804, stack808,
+	stack812, stack816, stack820, stack824, stack828, stack832,
+	stack836, stack840, stack844, stack848, stack852, stack856,
+	stack860, stack864, stack868, stack872, stack876, stack880,
+	stack884, stack888, stack892, stack896, stack900, stack904,
+	stack908, stack912, stack916, stack920, stack924, stack928,
+	stack932, stack936, stack940, stack944, stack948, stack952,
+	stack956, stack960, stack964, stack968, stack972, stack976,
+	stack980, stack984, stack988, stack992, stack996, stack1000,
+	stack1004, stack1008, stack1012, stack1016, stack1020, stack1024,
+	stack1028, stack1032, stack1036, stack1040, stack1044, stack1048,
+	stack1052, stack1056, stack1060, stack1064, stack1068, stack1072,
+	stack1076, stack1080, stack1084, stack1088, stack1092, stack1096,
+	stack1100, stack1104, stack1108, stack1112, stack1116, stack1120,
+	stack1124, stack1128, stack1132, stack1136, stack1140, stack1144,
+	stack1148, stack1152, stack1156, stack1160, stack1164, stack1168,
+	stack1172, stack1176, stack1180, stack1184, stack1188, stack1192,
+	stack1196, stack1200, stack1204, stack1208, stack1212, stack1216,
+	stack1220, stack1224, stack1228, stack1232, stack1236, stack1240,
+	stack1244, stack1248, stack1252, stack1256, stack1260, stack1264,
+	stack1268, stack1272, stack1276, stack1280, stack1284, stack1288,
+	stack1292, stack1296, stack1300, stack1304, stack1308, stack1312,
+	stack1316, stack1320, stack1324, stack1328, stack1332, stack1336,
+	stack1340, stack1344, stack1348, stack1352, stack1356, stack1360,
+	stack1364, stack1368, stack1372, stack1376, stack1380, stack1384,
+	stack1388, stack1392, stack1396, stack1400, stack1404, stack1408,
+	stack1412, stack1416, stack1420, stack1424, stack1428, stack1432,
+	stack1436, stack1440, stack1444, stack1448, stack1452, stack1456,
+	stack1460, stack1464, stack1468, stack1472, stack1476, stack1480,
+	stack1484, stack1488, stack1492, stack1496, stack1500, stack1504,
+	stack1508, stack1512, stack1516, stack1520, stack1524, stack1528,
+	stack1532, stack1536, stack1540, stack1544, stack1548, stack1552,
+	stack1556, stack1560, stack1564, stack1568, stack1572, stack1576,
+	stack1580, stack1584, stack1588, stack1592, stack1596, stack1600,
+	stack1604, stack1608, stack1612, stack1616, stack1620, stack1624,
+	stack1628, stack1632, stack1636, stack1640, stack1644, stack1648,
+	stack1652, stack1656, stack1660, stack1664, stack1668, stack1672,
+	stack1676, stack1680, stack1684, stack1688, stack1692, stack1696,
+	stack1700, stack1704, stack1708, stack1712, stack1716, stack1720,
+	stack1724, stack1728, stack1732, stack1736, stack1740, stack1744,
+	stack1748, stack1752, stack1756, stack1760, stack1764, stack1768,
+	stack1772, stack1776, stack1780, stack1784, stack1788, stack1792,
+	stack1796, stack1800, stack1804, stack1808, stack1812, stack1816,
+	stack1820, stack1824, stack1828, stack1832, stack1836, stack1840,
+	stack1844, stack1848, stack1852, stack1856, stack1860, stack1864,
+	stack1868, stack1872, stack1876, stack1880, stack1884, stack1888,
+	stack1892, stack1896, stack1900, stack1904, stack1908, stack1912,
+	stack1916, stack1920, stack1924, stack1928, stack1932, stack1936,
+	stack1940, stack1944, stack1948, stack1952, stack1956, stack1960,
+	stack1964, stack1968, stack1972, stack1976, stack1980, stack1984,
+	stack1988, stack1992, stack1996, stack2000, stack2004, stack2008,
+	stack2012, stack2016, stack2020, stack2024, stack2028, stack2032,
+	stack2036, stack2040, stack2044, stack2048, stack2052, stack2056,
+	stack2060, stack2064, stack2068, stack2072, stack2076, stack2080,
+	stack2084, stack2088, stack2092, stack2096, stack2100, stack2104,
+	stack2108, stack2112, stack2116, stack2120, stack2124, stack2128,
+	stack2132, stack2136, stack2140, stack2144, stack2148, stack2152,
+	stack2156, stack2160, stack2164, stack2168, stack2172, stack2176,
+	stack2180, stack2184, stack2188, stack2192, stack2196, stack2200,
+	stack2204, stack2208, stack2212, stack2216, stack2220, stack2224,
+	stack2228, stack2232, stack2236, stack2240, stack2244, stack2248,
+	stack2252, stack2256, stack2260, stack2264, stack2268, stack2272,
+	stack2276, stack2280, stack2284, stack2288, stack2292, stack2296,
+	stack2300, stack2304, stack2308, stack2312, stack2316, stack2320,
+	stack2324, stack2328, stack2332, stack2336, stack2340, stack2344,
+	stack2348, stack2352, stack2356, stack2360, stack2364, stack2368,
+	stack2372, stack2376, stack2380, stack2384, stack2388, stack2392,
+	stack2396, stack2400, stack2404, stack2408, stack2412, stack2416,
+	stack2420, stack2424, stack2428, stack2432, stack2436, stack2440,
+	stack2444, stack2448, stack2452, stack2456, stack2460, stack2464,
+	stack2468, stack2472, stack2476, stack2480, stack2484, stack2488,
+	stack2492, stack2496, stack2500, stack2504, stack2508, stack2512,
+	stack2516, stack2520, stack2524, stack2528, stack2532, stack2536,
+	stack2540, stack2544, stack2548, stack2552, stack2556, stack2560,
+	stack2564, stack2568, stack2572, stack2576, stack2580, stack2584,
+	stack2588, stack2592, stack2596, stack2600, stack2604, stack2608,
+	stack2612, stack2616, stack2620, stack2624, stack2628, stack2632,
+	stack2636, stack2640, stack2644, stack2648, stack2652, stack2656,
+	stack2660, stack2664, stack2668, stack2672, stack2676, stack2680,
+	stack2684, stack2688, stack2692, stack2696, stack2700, stack2704,
+	stack2708, stack2712, stack2716, stack2720, stack2724, stack2728,
+	stack2732, stack2736, stack2740, stack2744, stack2748, stack2752,
+	stack2756, stack2760, stack2764, stack2768, stack2772, stack2776,
+	stack2780, stack2784, stack2788, stack2792, stack2796, stack2800,
+	stack2804, stack2808, stack2812, stack2816, stack2820, stack2824,
+	stack2828, stack2832, stack2836, stack2840, stack2844, stack2848,
+	stack2852, stack2856, stack2860, stack2864, stack2868, stack2872,
+	stack2876, stack2880, stack2884, stack2888, stack2892, stack2896,
+	stack2900, stack2904, stack2908, stack2912, stack2916, stack2920,
+	stack2924, stack2928, stack2932, stack2936, stack2940, stack2944,
+	stack2948, stack2952, stack2956, stack2960, stack2964, stack2968,
+	stack2972, stack2976, stack2980, stack2984, stack2988, stack2992,
+	stack2996, stack3000, stack3004, stack3008, stack3012, stack3016,
+	stack3020, stack3024, stack3028, stack3032, stack3036, stack3040,
+	stack3044, stack3048, stack3052, stack3056, stack3060, stack3064,
+	stack3068, stack3072, stack3076, stack3080, stack3084, stack3088,
+	stack3092, stack3096, stack3100, stack3104, stack3108, stack3112,
+	stack3116, stack3120, stack3124, stack3128, stack3132, stack3136,
+	stack3140, stack3144, stack3148, stack3152, stack3156, stack3160,
+	stack3164, stack3168, stack3172, stack3176, stack3180, stack3184,
+	stack3188, stack3192, stack3196, stack3200, stack3204, stack3208,
+	stack3212, stack3216, stack3220, stack3224, stack3228, stack3232,
+	stack3236, stack3240, stack3244, stack3248, stack3252, stack3256,
+	stack3260, stack3264, stack3268, stack3272, stack3276, stack3280,
+	stack3284, stack3288, stack3292, stack3296, stack3300, stack3304,
+	stack3308, stack3312, stack3316, stack3320, stack3324, stack3328,
+	stack3332, stack3336, stack3340, stack3344, stack3348, stack3352,
+	stack3356, stack3360, stack3364, stack3368, stack3372, stack3376,
+	stack3380, stack3384, stack3388, stack3392, stack3396, stack3400,
+	stack3404, stack3408, stack3412, stack3416, stack3420, stack3424,
+	stack3428, stack3432, stack3436, stack3440, stack3444, stack3448,
+	stack3452, stack3456, stack3460, stack3464, stack3468, stack3472,
+	stack3476, stack3480, stack3484, stack3488, stack3492, stack3496,
+	stack3500, stack3504, stack3508, stack3512, stack3516, stack3520,
+	stack3524, stack3528, stack3532, stack3536, stack3540, stack3544,
+	stack3548, stack3552, stack3556, stack3560, stack3564, stack3568,
+	stack3572, stack3576, stack3580, stack3584, stack3588, stack3592,
+	stack3596, stack3600, stack3604, stack3608, stack3612, stack3616,
+	stack3620, stack3624, stack3628, stack3632, stack3636, stack3640,
+	stack3644, stack3648, stack3652, stack3656, stack3660, stack3664,
+	stack3668, stack3672, stack3676, stack3680, stack3684, stack3688,
+	stack3692, stack3696, stack3700, stack3704, stack3708, stack3712,
+	stack3716, stack3720, stack3724, stack3728, stack3732, stack3736,
+	stack3740, stack3744, stack3748, stack3752, stack3756, stack3760,
+	stack3764, stack3768, stack3772, stack3776, stack3780, stack3784,
+	stack3788, stack3792, stack3796, stack3800, stack3804, stack3808,
+	stack3812, stack3816, stack3820, stack3824, stack3828, stack3832,
+	stack3836, stack3840, stack3844, stack3848, stack3852, stack3856,
+	stack3860, stack3864, stack3868, stack3872, stack3876, stack3880,
+	stack3884, stack3888, stack3892, stack3896, stack3900, stack3904,
+	stack3908, stack3912, stack3916, stack3920, stack3924, stack3928,
+	stack3932, stack3936, stack3940, stack3944, stack3948, stack3952,
+	stack3956, stack3960, stack3964, stack3968, stack3972, stack3976,
+	stack3980, stack3984, stack3988, stack3992, stack3996, stack4000,
+	stack4004, stack4008, stack4012, stack4016, stack4020, stack4024,
+	stack4028, stack4032, stack4036, stack4040, stack4044, stack4048,
+	stack4052, stack4056, stack4060, stack4064, stack4068, stack4072,
+	stack4076, stack4080, stack4084, stack4088, stack4092, stack4096,
+	stack4100, stack4104, stack4108, stack4112, stack4116, stack4120,
+	stack4124, stack4128, stack4132, stack4136, stack4140, stack4144,
+	stack4148, stack4152, stack4156, stack4160, stack4164, stack4168,
+	stack4172, stack4176, stack4180, stack4184, stack4188, stack4192,
+	stack4196, stack4200, stack4204, stack4208, stack4212, stack4216,
+	stack4220, stack4224, stack4228, stack4232, stack4236, stack4240,
+	stack4244, stack4248, stack4252, stack4256, stack4260, stack4264,
+	stack4268, stack4272, stack4276, stack4280, stack4284, stack4288,
+	stack4292, stack4296, stack4300, stack4304, stack4308, stack4312,
+	stack4316, stack4320, stack4324, stack4328, stack4332, stack4336,
+	stack4340, stack4344, stack4348, stack4352, stack4356, stack4360,
+	stack4364, stack4368, stack4372, stack4376, stack4380, stack4384,
+	stack4388, stack4392, stack4396, stack4400, stack4404, stack4408,
+	stack4412, stack4416, stack4420, stack4424, stack4428, stack4432,
+	stack4436, stack4440, stack4444, stack4448, stack4452, stack4456,
+	stack4460, stack4464, stack4468, stack4472, stack4476, stack4480,
+	stack4484, stack4488, stack4492, stack4496, stack4500, stack4504,
+	stack4508, stack4512, stack4516, stack4520, stack4524, stack4528,
+	stack4532, stack4536, stack4540, stack4544, stack4548, stack4552,
+	stack4556, stack4560, stack4564, stack4568, stack4572, stack4576,
+	stack4580, stack4584, stack4588, stack4592, stack4596, stack4600,
+	stack4604, stack4608, stack4612, stack4616, stack4620, stack4624,
+	stack4628, stack4632, stack4636, stack4640, stack4644, stack4648,
+	stack4652, stack4656, stack4660, stack4664, stack4668, stack4672,
+	stack4676, stack4680, stack4684, stack4688, stack4692, stack4696,
+	stack4700, stack4704, stack4708, stack4712, stack4716, stack4720,
+	stack4724, stack4728, stack4732, stack4736, stack4740, stack4744,
+	stack4748, stack4752, stack4756, stack4760, stack4764, stack4768,
+	stack4772, stack4776, stack4780, stack4784, stack4788, stack4792,
+	stack4796, stack4800, stack4804, stack4808, stack4812, stack4816,
+	stack4820, stack4824, stack4828, stack4832, stack4836, stack4840,
+	stack4844, stack4848, stack4852, stack4856, stack4860, stack4864,
+	stack4868, stack4872, stack4876, stack4880, stack4884, stack4888,
+	stack4892, stack4896, stack4900, stack4904, stack4908, stack4912,
+	stack4916, stack4920, stack4924, stack4928, stack4932, stack4936,
+	stack4940, stack4944, stack4948, stack4952, stack4956, stack4960,
+	stack4964, stack4968, stack4972, stack4976, stack4980, stack4984,
+	stack4988, stack4992, stack4996, stack5000,
+}
+
+// Edit .+1,$ | seq 4 4 5000 | sed 's/.*/func stack&()(uintptr, uintptr) { var buf [&]byte; use(buf[:]); return Stackguard() }/'
+func stack4() (uintptr, uintptr)    { var buf [4]byte; use(buf[:]); return Stackguard() }
+func stack8() (uintptr, uintptr)    { var buf [8]byte; use(buf[:]); return Stackguard() }
+func stack12() (uintptr, uintptr)   { var buf [12]byte; use(buf[:]); return Stackguard() }
+func stack16() (uintptr, uintptr)   { var buf [16]byte; use(buf[:]); return Stackguard() }
+func stack20() (uintptr, uintptr)   { var buf [20]byte; use(buf[:]); return Stackguard() }
+func stack24() (uintptr, uintptr)   { var buf [24]byte; use(buf[:]); return Stackguard() }
+func stack28() (uintptr, uintptr)   { var buf [28]byte; use(buf[:]); return Stackguard() }
+func stack32() (uintptr, uintptr)   { var buf [32]byte; use(buf[:]); return Stackguard() }
+func stack36() (uintptr, uintptr)   { var buf [36]byte; use(buf[:]); return Stackguard() }
+func stack40() (uintptr, uintptr)   { var buf [40]byte; use(buf[:]); return Stackguard() }
+func stack44() (uintptr, uintptr)   { var buf [44]byte; use(buf[:]); return Stackguard() }
+func stack48() (uintptr, uintptr)   { var buf [48]byte; use(buf[:]); return Stackguard() }
+func stack52() (uintptr, uintptr)   { var buf [52]byte; use(buf[:]); return Stackguard() }
+func stack56() (uintptr, uintptr)   { var buf [56]byte; use(buf[:]); return Stackguard() }
+func stack60() (uintptr, uintptr)   { var buf [60]byte; use(buf[:]); return Stackguard() }
+func stack64() (uintptr, uintptr)   { var buf [64]byte; use(buf[:]); return Stackguard() }
+func stack68() (uintptr, uintptr)   { var buf [68]byte; use(buf[:]); return Stackguard() }
+func stack72() (uintptr, uintptr)   { var buf [72]byte; use(buf[:]); return Stackguard() }
+func stack76() (uintptr, uintptr)   { var buf [76]byte; use(buf[:]); return Stackguard() }
+func stack80() (uintptr, uintptr)   { var buf [80]byte; use(buf[:]); return Stackguard() }
+func stack84() (uintptr, uintptr)   { var buf [84]byte; use(buf[:]); return Stackguard() }
+func stack88() (uintptr, uintptr)   { var buf [88]byte; use(buf[:]); return Stackguard() }
+func stack92() (uintptr, uintptr)   { var buf [92]byte; use(buf[:]); return Stackguard() }
+func stack96() (uintptr, uintptr)   { var buf [96]byte; use(buf[:]); return Stackguard() }
+func stack100() (uintptr, uintptr)  { var buf [100]byte; use(buf[:]); return Stackguard() }
+func stack104() (uintptr, uintptr)  { var buf [104]byte; use(buf[:]); return Stackguard() }
+func stack108() (uintptr, uintptr)  { var buf [108]byte; use(buf[:]); return Stackguard() }
+func stack112() (uintptr, uintptr)  { var buf [112]byte; use(buf[:]); return Stackguard() }
+func stack116() (uintptr, uintptr)  { var buf [116]byte; use(buf[:]); return Stackguard() }
+func stack120() (uintptr, uintptr)  { var buf [120]byte; use(buf[:]); return Stackguard() }
+func stack124() (uintptr, uintptr)  { var buf [124]byte; use(buf[:]); return Stackguard() }
+func stack128() (uintptr, uintptr)  { var buf [128]byte; use(buf[:]); return Stackguard() }
+func stack132() (uintptr, uintptr)  { var buf [132]byte; use(buf[:]); return Stackguard() }
+func stack136() (uintptr, uintptr)  { var buf [136]byte; use(buf[:]); return Stackguard() }
+func stack140() (uintptr, uintptr)  { var buf [140]byte; use(buf[:]); return Stackguard() }
+func stack144() (uintptr, uintptr)  { var buf [144]byte; use(buf[:]); return Stackguard() }
+func stack148() (uintptr, uintptr)  { var buf [148]byte; use(buf[:]); return Stackguard() }
+func stack152() (uintptr, uintptr)  { var buf [152]byte; use(buf[:]); return Stackguard() }
+func stack156() (uintptr, uintptr)  { var buf [156]byte; use(buf[:]); return Stackguard() }
+func stack160() (uintptr, uintptr)  { var buf [160]byte; use(buf[:]); return Stackguard() }
+func stack164() (uintptr, uintptr)  { var buf [164]byte; use(buf[:]); return Stackguard() }
+func stack168() (uintptr, uintptr)  { var buf [168]byte; use(buf[:]); return Stackguard() }
+func stack172() (uintptr, uintptr)  { var buf [172]byte; use(buf[:]); return Stackguard() }
+func stack176() (uintptr, uintptr)  { var buf [176]byte; use(buf[:]); return Stackguard() }
+func stack180() (uintptr, uintptr)  { var buf [180]byte; use(buf[:]); return Stackguard() }
+func stack184() (uintptr, uintptr)  { var buf [184]byte; use(buf[:]); return Stackguard() }
+func stack188() (uintptr, uintptr)  { var buf [188]byte; use(buf[:]); return Stackguard() }
+func stack192() (uintptr, uintptr)  { var buf [192]byte; use(buf[:]); return Stackguard() }
+func stack196() (uintptr, uintptr)  { var buf [196]byte; use(buf[:]); return Stackguard() }
+func stack200() (uintptr, uintptr)  { var buf [200]byte; use(buf[:]); return Stackguard() }
+func stack204() (uintptr, uintptr)  { var buf [204]byte; use(buf[:]); return Stackguard() }
+func stack208() (uintptr, uintptr)  { var buf [208]byte; use(buf[:]); return Stackguard() }
+func stack212() (uintptr, uintptr)  { var buf [212]byte; use(buf[:]); return Stackguard() }
+func stack216() (uintptr, uintptr)  { var buf [216]byte; use(buf[:]); return Stackguard() }
+func stack220() (uintptr, uintptr)  { var buf [220]byte; use(buf[:]); return Stackguard() }
+func stack224() (uintptr, uintptr)  { var buf [224]byte; use(buf[:]); return Stackguard() }
+func stack228() (uintptr, uintptr)  { var buf [228]byte; use(buf[:]); return Stackguard() }
+func stack232() (uintptr, uintptr)  { var buf [232]byte; use(buf[:]); return Stackguard() }
+func stack236() (uintptr, uintptr)  { var buf [236]byte; use(buf[:]); return Stackguard() }
+func stack240() (uintptr, uintptr)  { var buf [240]byte; use(buf[:]); return Stackguard() }
+func stack244() (uintptr, uintptr)  { var buf [244]byte; use(buf[:]); return Stackguard() }
+func stack248() (uintptr, uintptr)  { var buf [248]byte; use(buf[:]); return Stackguard() }
+func stack252() (uintptr, uintptr)  { var buf [252]byte; use(buf[:]); return Stackguard() }
+func stack256() (uintptr, uintptr)  { var buf [256]byte; use(buf[:]); return Stackguard() }
+func stack260() (uintptr, uintptr)  { var buf [260]byte; use(buf[:]); return Stackguard() }
+func stack264() (uintptr, uintptr)  { var buf [264]byte; use(buf[:]); return Stackguard() }
+func stack268() (uintptr, uintptr)  { var buf [268]byte; use(buf[:]); return Stackguard() }
+func stack272() (uintptr, uintptr)  { var buf [272]byte; use(buf[:]); return Stackguard() }
+func stack276() (uintptr, uintptr)  { var buf [276]byte; use(buf[:]); return Stackguard() }
+func stack280() (uintptr, uintptr)  { var buf [280]byte; use(buf[:]); return Stackguard() }
+func stack284() (uintptr, uintptr)  { var buf [284]byte; use(buf[:]); return Stackguard() }
+func stack288() (uintptr, uintptr)  { var buf [288]byte; use(buf[:]); return Stackguard() }
+func stack292() (uintptr, uintptr)  { var buf [292]byte; use(buf[:]); return Stackguard() }
+func stack296() (uintptr, uintptr)  { var buf [296]byte; use(buf[:]); return Stackguard() }
+func stack300() (uintptr, uintptr)  { var buf [300]byte; use(buf[:]); return Stackguard() }
+func stack304() (uintptr, uintptr)  { var buf [304]byte; use(buf[:]); return Stackguard() }
+func stack308() (uintptr, uintptr)  { var buf [308]byte; use(buf[:]); return Stackguard() }
+func stack312() (uintptr, uintptr)  { var buf [312]byte; use(buf[:]); return Stackguard() }
+func stack316() (uintptr, uintptr)  { var buf [316]byte; use(buf[:]); return Stackguard() }
+func stack320() (uintptr, uintptr)  { var buf [320]byte; use(buf[:]); return Stackguard() }
+func stack324() (uintptr, uintptr)  { var buf [324]byte; use(buf[:]); return Stackguard() }
+func stack328() (uintptr, uintptr)  { var buf [328]byte; use(buf[:]); return Stackguard() }
+func stack332() (uintptr, uintptr)  { var buf [332]byte; use(buf[:]); return Stackguard() }
+func stack336() (uintptr, uintptr)  { var buf [336]byte; use(buf[:]); return Stackguard() }
+func stack340() (uintptr, uintptr)  { var buf [340]byte; use(buf[:]); return Stackguard() }
+func stack344() (uintptr, uintptr)  { var buf [344]byte; use(buf[:]); return Stackguard() }
+func stack348() (uintptr, uintptr)  { var buf [348]byte; use(buf[:]); return Stackguard() }
+func stack352() (uintptr, uintptr)  { var buf [352]byte; use(buf[:]); return Stackguard() }
+func stack356() (uintptr, uintptr)  { var buf [356]byte; use(buf[:]); return Stackguard() }
+func stack360() (uintptr, uintptr)  { var buf [360]byte; use(buf[:]); return Stackguard() }
+func stack364() (uintptr, uintptr)  { var buf [364]byte; use(buf[:]); return Stackguard() }
+func stack368() (uintptr, uintptr)  { var buf [368]byte; use(buf[:]); return Stackguard() }
+func stack372() (uintptr, uintptr)  { var buf [372]byte; use(buf[:]); return Stackguard() }
+func stack376() (uintptr, uintptr)  { var buf [376]byte; use(buf[:]); return Stackguard() }
+func stack380() (uintptr, uintptr)  { var buf [380]byte; use(buf[:]); return Stackguard() }
+func stack384() (uintptr, uintptr)  { var buf [384]byte; use(buf[:]); return Stackguard() }
+func stack388() (uintptr, uintptr)  { var buf [388]byte; use(buf[:]); return Stackguard() }
+func stack392() (uintptr, uintptr)  { var buf [392]byte; use(buf[:]); return Stackguard() }
+func stack396() (uintptr, uintptr)  { var buf [396]byte; use(buf[:]); return Stackguard() }
+func stack400() (uintptr, uintptr)  { var buf [400]byte; use(buf[:]); return Stackguard() }
+func stack404() (uintptr, uintptr)  { var buf [404]byte; use(buf[:]); return Stackguard() }
+func stack408() (uintptr, uintptr)  { var buf [408]byte; use(buf[:]); return Stackguard() }
+func stack412() (uintptr, uintptr)  { var buf [412]byte; use(buf[:]); return Stackguard() }
+func stack416() (uintptr, uintptr)  { var buf [416]byte; use(buf[:]); return Stackguard() }
+func stack420() (uintptr, uintptr)  { var buf [420]byte; use(buf[:]); return Stackguard() }
+func stack424() (uintptr, uintptr)  { var buf [424]byte; use(buf[:]); return Stackguard() }
+func stack428() (uintptr, uintptr)  { var buf [428]byte; use(buf[:]); return Stackguard() }
+func stack432() (uintptr, uintptr)  { var buf [432]byte; use(buf[:]); return Stackguard() }
+func stack436() (uintptr, uintptr)  { var buf [436]byte; use(buf[:]); return Stackguard() }
+func stack440() (uintptr, uintptr)  { var buf [440]byte; use(buf[:]); return Stackguard() }
+func stack444() (uintptr, uintptr)  { var buf [444]byte; use(buf[:]); return Stackguard() }
+func stack448() (uintptr, uintptr)  { var buf [448]byte; use(buf[:]); return Stackguard() }
+func stack452() (uintptr, uintptr)  { var buf [452]byte; use(buf[:]); return Stackguard() }
+func stack456() (uintptr, uintptr)  { var buf [456]byte; use(buf[:]); return Stackguard() }
+func stack460() (uintptr, uintptr)  { var buf [460]byte; use(buf[:]); return Stackguard() }
+func stack464() (uintptr, uintptr)  { var buf [464]byte; use(buf[:]); return Stackguard() }
+func stack468() (uintptr, uintptr)  { var buf [468]byte; use(buf[:]); return Stackguard() }
+func stack472() (uintptr, uintptr)  { var buf [472]byte; use(buf[:]); return Stackguard() }
+func stack476() (uintptr, uintptr)  { var buf [476]byte; use(buf[:]); return Stackguard() }
+func stack480() (uintptr, uintptr)  { var buf [480]byte; use(buf[:]); return Stackguard() }
+func stack484() (uintptr, uintptr)  { var buf [484]byte; use(buf[:]); return Stackguard() }
+func stack488() (uintptr, uintptr)  { var buf [488]byte; use(buf[:]); return Stackguard() }
+func stack492() (uintptr, uintptr)  { var buf [492]byte; use(buf[:]); return Stackguard() }
+func stack496() (uintptr, uintptr)  { var buf [496]byte; use(buf[:]); return Stackguard() }
+func stack500() (uintptr, uintptr)  { var buf [500]byte; use(buf[:]); return Stackguard() }
+func stack504() (uintptr, uintptr)  { var buf [504]byte; use(buf[:]); return Stackguard() }
+func stack508() (uintptr, uintptr)  { var buf [508]byte; use(buf[:]); return Stackguard() }
+func stack512() (uintptr, uintptr)  { var buf [512]byte; use(buf[:]); return Stackguard() }
+func stack516() (uintptr, uintptr)  { var buf [516]byte; use(buf[:]); return Stackguard() }
+func stack520() (uintptr, uintptr)  { var buf [520]byte; use(buf[:]); return Stackguard() }
+func stack524() (uintptr, uintptr)  { var buf [524]byte; use(buf[:]); return Stackguard() }
+func stack528() (uintptr, uintptr)  { var buf [528]byte; use(buf[:]); return Stackguard() }
+func stack532() (uintptr, uintptr)  { var buf [532]byte; use(buf[:]); return Stackguard() }
+func stack536() (uintptr, uintptr)  { var buf [536]byte; use(buf[:]); return Stackguard() }
+func stack540() (uintptr, uintptr)  { var buf [540]byte; use(buf[:]); return Stackguard() }
+func stack544() (uintptr, uintptr)  { var buf [544]byte; use(buf[:]); return Stackguard() }
+func stack548() (uintptr, uintptr)  { var buf [548]byte; use(buf[:]); return Stackguard() }
+func stack552() (uintptr, uintptr)  { var buf [552]byte; use(buf[:]); return Stackguard() }
+func stack556() (uintptr, uintptr)  { var buf [556]byte; use(buf[:]); return Stackguard() }
+func stack560() (uintptr, uintptr)  { var buf [560]byte; use(buf[:]); return Stackguard() }
+func stack564() (uintptr, uintptr)  { var buf [564]byte; use(buf[:]); return Stackguard() }
+func stack568() (uintptr, uintptr)  { var buf [568]byte; use(buf[:]); return Stackguard() }
+func stack572() (uintptr, uintptr)  { var buf [572]byte; use(buf[:]); return Stackguard() }
+func stack576() (uintptr, uintptr)  { var buf [576]byte; use(buf[:]); return Stackguard() }
+func stack580() (uintptr, uintptr)  { var buf [580]byte; use(buf[:]); return Stackguard() }
+func stack584() (uintptr, uintptr)  { var buf [584]byte; use(buf[:]); return Stackguard() }
+func stack588() (uintptr, uintptr)  { var buf [588]byte; use(buf[:]); return Stackguard() }
+func stack592() (uintptr, uintptr)  { var buf [592]byte; use(buf[:]); return Stackguard() }
+func stack596() (uintptr, uintptr)  { var buf [596]byte; use(buf[:]); return Stackguard() }
+func stack600() (uintptr, uintptr)  { var buf [600]byte; use(buf[:]); return Stackguard() }
+func stack604() (uintptr, uintptr)  { var buf [604]byte; use(buf[:]); return Stackguard() }
+func stack608() (uintptr, uintptr)  { var buf [608]byte; use(buf[:]); return Stackguard() }
+func stack612() (uintptr, uintptr)  { var buf [612]byte; use(buf[:]); return Stackguard() }
+func stack616() (uintptr, uintptr)  { var buf [616]byte; use(buf[:]); return Stackguard() }
+func stack620() (uintptr, uintptr)  { var buf [620]byte; use(buf[:]); return Stackguard() }
+func stack624() (uintptr, uintptr)  { var buf [624]byte; use(buf[:]); return Stackguard() }
+func stack628() (uintptr, uintptr)  { var buf [628]byte; use(buf[:]); return Stackguard() }
+func stack632() (uintptr, uintptr)  { var buf [632]byte; use(buf[:]); return Stackguard() }
+func stack636() (uintptr, uintptr)  { var buf [636]byte; use(buf[:]); return Stackguard() }
+func stack640() (uintptr, uintptr)  { var buf [640]byte; use(buf[:]); return Stackguard() }
+func stack644() (uintptr, uintptr)  { var buf [644]byte; use(buf[:]); return Stackguard() }
+func stack648() (uintptr, uintptr)  { var buf [648]byte; use(buf[:]); return Stackguard() }
+func stack652() (uintptr, uintptr)  { var buf [652]byte; use(buf[:]); return Stackguard() }
+func stack656() (uintptr, uintptr)  { var buf [656]byte; use(buf[:]); return Stackguard() }
+func stack660() (uintptr, uintptr)  { var buf [660]byte; use(buf[:]); return Stackguard() }
+func stack664() (uintptr, uintptr)  { var buf [664]byte; use(buf[:]); return Stackguard() }
+func stack668() (uintptr, uintptr)  { var buf [668]byte; use(buf[:]); return Stackguard() }
+func stack672() (uintptr, uintptr)  { var buf [672]byte; use(buf[:]); return Stackguard() }
+func stack676() (uintptr, uintptr)  { var buf [676]byte; use(buf[:]); return Stackguard() }
+func stack680() (uintptr, uintptr)  { var buf [680]byte; use(buf[:]); return Stackguard() }
+func stack684() (uintptr, uintptr)  { var buf [684]byte; use(buf[:]); return Stackguard() }
+func stack688() (uintptr, uintptr)  { var buf [688]byte; use(buf[:]); return Stackguard() }
+func stack692() (uintptr, uintptr)  { var buf [692]byte; use(buf[:]); return Stackguard() }
+func stack696() (uintptr, uintptr)  { var buf [696]byte; use(buf[:]); return Stackguard() }
+func stack700() (uintptr, uintptr)  { var buf [700]byte; use(buf[:]); return Stackguard() }
+func stack704() (uintptr, uintptr)  { var buf [704]byte; use(buf[:]); return Stackguard() }
+func stack708() (uintptr, uintptr)  { var buf [708]byte; use(buf[:]); return Stackguard() }
+func stack712() (uintptr, uintptr)  { var buf [712]byte; use(buf[:]); return Stackguard() }
+func stack716() (uintptr, uintptr)  { var buf [716]byte; use(buf[:]); return Stackguard() }
+func stack720() (uintptr, uintptr)  { var buf [720]byte; use(buf[:]); return Stackguard() }
+func stack724() (uintptr, uintptr)  { var buf [724]byte; use(buf[:]); return Stackguard() }
+func stack728() (uintptr, uintptr)  { var buf [728]byte; use(buf[:]); return Stackguard() }
+func stack732() (uintptr, uintptr)  { var buf [732]byte; use(buf[:]); return Stackguard() }
+func stack736() (uintptr, uintptr)  { var buf [736]byte; use(buf[:]); return Stackguard() }
+func stack740() (uintptr, uintptr)  { var buf [740]byte; use(buf[:]); return Stackguard() }
+func stack744() (uintptr, uintptr)  { var buf [744]byte; use(buf[:]); return Stackguard() }
+func stack748() (uintptr, uintptr)  { var buf [748]byte; use(buf[:]); return Stackguard() }
+func stack752() (uintptr, uintptr)  { var buf [752]byte; use(buf[:]); return Stackguard() }
+func stack756() (uintptr, uintptr)  { var buf [756]byte; use(buf[:]); return Stackguard() }
+func stack760() (uintptr, uintptr)  { var buf [760]byte; use(buf[:]); return Stackguard() }
+func stack764() (uintptr, uintptr)  { var buf [764]byte; use(buf[:]); return Stackguard() }
+func stack768() (uintptr, uintptr)  { var buf [768]byte; use(buf[:]); return Stackguard() }
+func stack772() (uintptr, uintptr)  { var buf [772]byte; use(buf[:]); return Stackguard() }
+func stack776() (uintptr, uintptr)  { var buf [776]byte; use(buf[:]); return Stackguard() }
+func stack780() (uintptr, uintptr)  { var buf [780]byte; use(buf[:]); return Stackguard() }
+func stack784() (uintptr, uintptr)  { var buf [784]byte; use(buf[:]); return Stackguard() }
+func stack788() (uintptr, uintptr)  { var buf [788]byte; use(buf[:]); return Stackguard() }
+func stack792() (uintptr, uintptr)  { var buf [792]byte; use(buf[:]); return Stackguard() }
+func stack796() (uintptr, uintptr)  { var buf [796]byte; use(buf[:]); return Stackguard() }
+func stack800() (uintptr, uintptr)  { var buf [800]byte; use(buf[:]); return Stackguard() }
+func stack804() (uintptr, uintptr)  { var buf [804]byte; use(buf[:]); return Stackguard() }
+func stack808() (uintptr, uintptr)  { var buf [808]byte; use(buf[:]); return Stackguard() }
+func stack812() (uintptr, uintptr)  { var buf [812]byte; use(buf[:]); return Stackguard() }
+func stack816() (uintptr, uintptr)  { var buf [816]byte; use(buf[:]); return Stackguard() }
+func stack820() (uintptr, uintptr)  { var buf [820]byte; use(buf[:]); return Stackguard() }
+func stack824() (uintptr, uintptr)  { var buf [824]byte; use(buf[:]); return Stackguard() }
+func stack828() (uintptr, uintptr)  { var buf [828]byte; use(buf[:]); return Stackguard() }
+func stack832() (uintptr, uintptr)  { var buf [832]byte; use(buf[:]); return Stackguard() }
+func stack836() (uintptr, uintptr)  { var buf [836]byte; use(buf[:]); return Stackguard() }
+func stack840() (uintptr, uintptr)  { var buf [840]byte; use(buf[:]); return Stackguard() }
+func stack844() (uintptr, uintptr)  { var buf [844]byte; use(buf[:]); return Stackguard() }
+func stack848() (uintptr, uintptr)  { var buf [848]byte; use(buf[:]); return Stackguard() }
+func stack852() (uintptr, uintptr)  { var buf [852]byte; use(buf[:]); return Stackguard() }
+func stack856() (uintptr, uintptr)  { var buf [856]byte; use(buf[:]); return Stackguard() }
+func stack860() (uintptr, uintptr)  { var buf [860]byte; use(buf[:]); return Stackguard() }
+func stack864() (uintptr, uintptr)  { var buf [864]byte; use(buf[:]); return Stackguard() }
+func stack868() (uintptr, uintptr)  { var buf [868]byte; use(buf[:]); return Stackguard() }
+func stack872() (uintptr, uintptr)  { var buf [872]byte; use(buf[:]); return Stackguard() }
+func stack876() (uintptr, uintptr)  { var buf [876]byte; use(buf[:]); return Stackguard() }
+func stack880() (uintptr, uintptr)  { var buf [880]byte; use(buf[:]); return Stackguard() }
+func stack884() (uintptr, uintptr)  { var buf [884]byte; use(buf[:]); return Stackguard() }
+func stack888() (uintptr, uintptr)  { var buf [888]byte; use(buf[:]); return Stackguard() }
+func stack892() (uintptr, uintptr)  { var buf [892]byte; use(buf[:]); return Stackguard() }
+func stack896() (uintptr, uintptr)  { var buf [896]byte; use(buf[:]); return Stackguard() }
+func stack900() (uintptr, uintptr)  { var buf [900]byte; use(buf[:]); return Stackguard() }
+func stack904() (uintptr, uintptr)  { var buf [904]byte; use(buf[:]); return Stackguard() }
+func stack908() (uintptr, uintptr)  { var buf [908]byte; use(buf[:]); return Stackguard() }
+func stack912() (uintptr, uintptr)  { var buf [912]byte; use(buf[:]); return Stackguard() }
+func stack916() (uintptr, uintptr)  { var buf [916]byte; use(buf[:]); return Stackguard() }
+func stack920() (uintptr, uintptr)  { var buf [920]byte; use(buf[:]); return Stackguard() }
+func stack924() (uintptr, uintptr)  { var buf [924]byte; use(buf[:]); return Stackguard() }
+func stack928() (uintptr, uintptr)  { var buf [928]byte; use(buf[:]); return Stackguard() }
+func stack932() (uintptr, uintptr)  { var buf [932]byte; use(buf[:]); return Stackguard() }
+func stack936() (uintptr, uintptr)  { var buf [936]byte; use(buf[:]); return Stackguard() }
+func stack940() (uintptr, uintptr)  { var buf [940]byte; use(buf[:]); return Stackguard() }
+func stack944() (uintptr, uintptr)  { var buf [944]byte; use(buf[:]); return Stackguard() }
+func stack948() (uintptr, uintptr)  { var buf [948]byte; use(buf[:]); return Stackguard() }
+func stack952() (uintptr, uintptr)  { var buf [952]byte; use(buf[:]); return Stackguard() }
+func stack956() (uintptr, uintptr)  { var buf [956]byte; use(buf[:]); return Stackguard() }
+func stack960() (uintptr, uintptr)  { var buf [960]byte; use(buf[:]); return Stackguard() }
+func stack964() (uintptr, uintptr)  { var buf [964]byte; use(buf[:]); return Stackguard() }
+func stack968() (uintptr, uintptr)  { var buf [968]byte; use(buf[:]); return Stackguard() }
+func stack972() (uintptr, uintptr)  { var buf [972]byte; use(buf[:]); return Stackguard() }
+func stack976() (uintptr, uintptr)  { var buf [976]byte; use(buf[:]); return Stackguard() }
+func stack980() (uintptr, uintptr)  { var buf [980]byte; use(buf[:]); return Stackguard() }
+func stack984() (uintptr, uintptr)  { var buf [984]byte; use(buf[:]); return Stackguard() }
+func stack988() (uintptr, uintptr)  { var buf [988]byte; use(buf[:]); return Stackguard() }
+func stack992() (uintptr, uintptr)  { var buf [992]byte; use(buf[:]); return Stackguard() }
+func stack996() (uintptr, uintptr)  { var buf [996]byte; use(buf[:]); return Stackguard() }
+func stack1000() (uintptr, uintptr) { var buf [1000]byte; use(buf[:]); return Stackguard() }
+func stack1004() (uintptr, uintptr) { var buf [1004]byte; use(buf[:]); return Stackguard() }
+func stack1008() (uintptr, uintptr) { var buf [1008]byte; use(buf[:]); return Stackguard() }
+func stack1012() (uintptr, uintptr) { var buf [1012]byte; use(buf[:]); return Stackguard() }
+func stack1016() (uintptr, uintptr) { var buf [1016]byte; use(buf[:]); return Stackguard() }
+func stack1020() (uintptr, uintptr) { var buf [1020]byte; use(buf[:]); return Stackguard() }
+func stack1024() (uintptr, uintptr) { var buf [1024]byte; use(buf[:]); return Stackguard() }
+func stack1028() (uintptr, uintptr) { var buf [1028]byte; use(buf[:]); return Stackguard() }
+func stack1032() (uintptr, uintptr) { var buf [1032]byte; use(buf[:]); return Stackguard() }
+func stack1036() (uintptr, uintptr) { var buf [1036]byte; use(buf[:]); return Stackguard() }
+func stack1040() (uintptr, uintptr) { var buf [1040]byte; use(buf[:]); return Stackguard() }
+func stack1044() (uintptr, uintptr) { var buf [1044]byte; use(buf[:]); return Stackguard() }
+func stack1048() (uintptr, uintptr) { var buf [1048]byte; use(buf[:]); return Stackguard() }
+func stack1052() (uintptr, uintptr) { var buf [1052]byte; use(buf[:]); return Stackguard() }
+func stack1056() (uintptr, uintptr) { var buf [1056]byte; use(buf[:]); return Stackguard() }
+func stack1060() (uintptr, uintptr) { var buf [1060]byte; use(buf[:]); return Stackguard() }
+func stack1064() (uintptr, uintptr) { var buf [1064]byte; use(buf[:]); return Stackguard() }
+func stack1068() (uintptr, uintptr) { var buf [1068]byte; use(buf[:]); return Stackguard() }
+func stack1072() (uintptr, uintptr) { var buf [1072]byte; use(buf[:]); return Stackguard() }
+func stack1076() (uintptr, uintptr) { var buf [1076]byte; use(buf[:]); return Stackguard() }
+func stack1080() (uintptr, uintptr) { var buf [1080]byte; use(buf[:]); return Stackguard() }
+func stack1084() (uintptr, uintptr) { var buf [1084]byte; use(buf[:]); return Stackguard() }
+func stack1088() (uintptr, uintptr) { var buf [1088]byte; use(buf[:]); return Stackguard() }
+func stack1092() (uintptr, uintptr) { var buf [1092]byte; use(buf[:]); return Stackguard() }
+func stack1096() (uintptr, uintptr) { var buf [1096]byte; use(buf[:]); return Stackguard() }
+func stack1100() (uintptr, uintptr) { var buf [1100]byte; use(buf[:]); return Stackguard() }
+func stack1104() (uintptr, uintptr) { var buf [1104]byte; use(buf[:]); return Stackguard() }
+func stack1108() (uintptr, uintptr) { var buf [1108]byte; use(buf[:]); return Stackguard() }
+func stack1112() (uintptr, uintptr) { var buf [1112]byte; use(buf[:]); return Stackguard() }
+func stack1116() (uintptr, uintptr) { var buf [1116]byte; use(buf[:]); return Stackguard() }
+func stack1120() (uintptr, uintptr) { var buf [1120]byte; use(buf[:]); return Stackguard() }
+func stack1124() (uintptr, uintptr) { var buf [1124]byte; use(buf[:]); return Stackguard() }
+func stack1128() (uintptr, uintptr) { var buf [1128]byte; use(buf[:]); return Stackguard() }
+func stack1132() (uintptr, uintptr) { var buf [1132]byte; use(buf[:]); return Stackguard() }
+func stack1136() (uintptr, uintptr) { var buf [1136]byte; use(buf[:]); return Stackguard() }
+func stack1140() (uintptr, uintptr) { var buf [1140]byte; use(buf[:]); return Stackguard() }
+func stack1144() (uintptr, uintptr) { var buf [1144]byte; use(buf[:]); return Stackguard() }
+func stack1148() (uintptr, uintptr) { var buf [1148]byte; use(buf[:]); return Stackguard() }
+func stack1152() (uintptr, uintptr) { var buf [1152]byte; use(buf[:]); return Stackguard() }
+func stack1156() (uintptr, uintptr) { var buf [1156]byte; use(buf[:]); return Stackguard() }
+func stack1160() (uintptr, uintptr) { var buf [1160]byte; use(buf[:]); return Stackguard() }
+func stack1164() (uintptr, uintptr) { var buf [1164]byte; use(buf[:]); return Stackguard() }
+func stack1168() (uintptr, uintptr) { var buf [1168]byte; use(buf[:]); return Stackguard() }
+func stack1172() (uintptr, uintptr) { var buf [1172]byte; use(buf[:]); return Stackguard() }
+func stack1176() (uintptr, uintptr) { var buf [1176]byte; use(buf[:]); return Stackguard() }
+func stack1180() (uintptr, uintptr) { var buf [1180]byte; use(buf[:]); return Stackguard() }
+func stack1184() (uintptr, uintptr) { var buf [1184]byte; use(buf[:]); return Stackguard() }
+func stack1188() (uintptr, uintptr) { var buf [1188]byte; use(buf[:]); return Stackguard() }
+func stack1192() (uintptr, uintptr) { var buf [1192]byte; use(buf[:]); return Stackguard() }
+func stack1196() (uintptr, uintptr) { var buf [1196]byte; use(buf[:]); return Stackguard() }
+func stack1200() (uintptr, uintptr) { var buf [1200]byte; use(buf[:]); return Stackguard() }
+func stack1204() (uintptr, uintptr) { var buf [1204]byte; use(buf[:]); return Stackguard() }
+func stack1208() (uintptr, uintptr) { var buf [1208]byte; use(buf[:]); return Stackguard() }
+func stack1212() (uintptr, uintptr) { var buf [1212]byte; use(buf[:]); return Stackguard() }
+func stack1216() (uintptr, uintptr) { var buf [1216]byte; use(buf[:]); return Stackguard() }
+func stack1220() (uintptr, uintptr) { var buf [1220]byte; use(buf[:]); return Stackguard() }
+func stack1224() (uintptr, uintptr) { var buf [1224]byte; use(buf[:]); return Stackguard() }
+func stack1228() (uintptr, uintptr) { var buf [1228]byte; use(buf[:]); return Stackguard() }
+func stack1232() (uintptr, uintptr) { var buf [1232]byte; use(buf[:]); return Stackguard() }
+func stack1236() (uintptr, uintptr) { var buf [1236]byte; use(buf[:]); return Stackguard() }
+func stack1240() (uintptr, uintptr) { var buf [1240]byte; use(buf[:]); return Stackguard() }
+func stack1244() (uintptr, uintptr) { var buf [1244]byte; use(buf[:]); return Stackguard() }
+func stack1248() (uintptr, uintptr) { var buf [1248]byte; use(buf[:]); return Stackguard() }
+func stack1252() (uintptr, uintptr) { var buf [1252]byte; use(buf[:]); return Stackguard() }
+func stack1256() (uintptr, uintptr) { var buf [1256]byte; use(buf[:]); return Stackguard() }
+func stack1260() (uintptr, uintptr) { var buf [1260]byte; use(buf[:]); return Stackguard() }
+func stack1264() (uintptr, uintptr) { var buf [1264]byte; use(buf[:]); return Stackguard() }
+func stack1268() (uintptr, uintptr) { var buf [1268]byte; use(buf[:]); return Stackguard() }
+func stack1272() (uintptr, uintptr) { var buf [1272]byte; use(buf[:]); return Stackguard() }
+func stack1276() (uintptr, uintptr) { var buf [1276]byte; use(buf[:]); return Stackguard() }
+func stack1280() (uintptr, uintptr) { var buf [1280]byte; use(buf[:]); return Stackguard() }
+func stack1284() (uintptr, uintptr) { var buf [1284]byte; use(buf[:]); return Stackguard() }
+func stack1288() (uintptr, uintptr) { var buf [1288]byte; use(buf[:]); return Stackguard() }
+func stack1292() (uintptr, uintptr) { var buf [1292]byte; use(buf[:]); return Stackguard() }
+func stack1296() (uintptr, uintptr) { var buf [1296]byte; use(buf[:]); return Stackguard() }
+func stack1300() (uintptr, uintptr) { var buf [1300]byte; use(buf[:]); return Stackguard() }
+func stack1304() (uintptr, uintptr) { var buf [1304]byte; use(buf[:]); return Stackguard() }
+func stack1308() (uintptr, uintptr) { var buf [1308]byte; use(buf[:]); return Stackguard() }
+func stack1312() (uintptr, uintptr) { var buf [1312]byte; use(buf[:]); return Stackguard() }
+func stack1316() (uintptr, uintptr) { var buf [1316]byte; use(buf[:]); return Stackguard() }
+func stack1320() (uintptr, uintptr) { var buf [1320]byte; use(buf[:]); return Stackguard() }
+func stack1324() (uintptr, uintptr) { var buf [1324]byte; use(buf[:]); return Stackguard() }
+func stack1328() (uintptr, uintptr) { var buf [1328]byte; use(buf[:]); return Stackguard() }
+func stack1332() (uintptr, uintptr) { var buf [1332]byte; use(buf[:]); return Stackguard() }
+func stack1336() (uintptr, uintptr) { var buf [1336]byte; use(buf[:]); return Stackguard() }
+func stack1340() (uintptr, uintptr) { var buf [1340]byte; use(buf[:]); return Stackguard() }
+func stack1344() (uintptr, uintptr) { var buf [1344]byte; use(buf[:]); return Stackguard() }
+func stack1348() (uintptr, uintptr) { var buf [1348]byte; use(buf[:]); return Stackguard() }
+func stack1352() (uintptr, uintptr) { var buf [1352]byte; use(buf[:]); return Stackguard() }
+func stack1356() (uintptr, uintptr) { var buf [1356]byte; use(buf[:]); return Stackguard() }
+func stack1360() (uintptr, uintptr) { var buf [1360]byte; use(buf[:]); return Stackguard() }
+func stack1364() (uintptr, uintptr) { var buf [1364]byte; use(buf[:]); return Stackguard() }
+func stack1368() (uintptr, uintptr) { var buf [1368]byte; use(buf[:]); return Stackguard() }
+func stack1372() (uintptr, uintptr) { var buf [1372]byte; use(buf[:]); return Stackguard() }
+func stack1376() (uintptr, uintptr) { var buf [1376]byte; use(buf[:]); return Stackguard() }
+func stack1380() (uintptr, uintptr) { var buf [1380]byte; use(buf[:]); return Stackguard() }
+func stack1384() (uintptr, uintptr) { var buf [1384]byte; use(buf[:]); return Stackguard() }
+func stack1388() (uintptr, uintptr) { var buf [1388]byte; use(buf[:]); return Stackguard() }
+func stack1392() (uintptr, uintptr) { var buf [1392]byte; use(buf[:]); return Stackguard() }
+func stack1396() (uintptr, uintptr) { var buf [1396]byte; use(buf[:]); return Stackguard() }
+func stack1400() (uintptr, uintptr) { var buf [1400]byte; use(buf[:]); return Stackguard() }
+func stack1404() (uintptr, uintptr) { var buf [1404]byte; use(buf[:]); return Stackguard() }
+func stack1408() (uintptr, uintptr) { var buf [1408]byte; use(buf[:]); return Stackguard() }
+func stack1412() (uintptr, uintptr) { var buf [1412]byte; use(buf[:]); return Stackguard() }
+func stack1416() (uintptr, uintptr) { var buf [1416]byte; use(buf[:]); return Stackguard() }
+func stack1420() (uintptr, uintptr) { var buf [1420]byte; use(buf[:]); return Stackguard() }
+func stack1424() (uintptr, uintptr) { var buf [1424]byte; use(buf[:]); return Stackguard() }
+func stack1428() (uintptr, uintptr) { var buf [1428]byte; use(buf[:]); return Stackguard() }
+func stack1432() (uintptr, uintptr) { var buf [1432]byte; use(buf[:]); return Stackguard() }
+func stack1436() (uintptr, uintptr) { var buf [1436]byte; use(buf[:]); return Stackguard() }
+func stack1440() (uintptr, uintptr) { var buf [1440]byte; use(buf[:]); return Stackguard() }
+func stack1444() (uintptr, uintptr) { var buf [1444]byte; use(buf[:]); return Stackguard() }
+func stack1448() (uintptr, uintptr) { var buf [1448]byte; use(buf[:]); return Stackguard() }
+func stack1452() (uintptr, uintptr) { var buf [1452]byte; use(buf[:]); return Stackguard() }
+func stack1456() (uintptr, uintptr) { var buf [1456]byte; use(buf[:]); return Stackguard() }
+func stack1460() (uintptr, uintptr) { var buf [1460]byte; use(buf[:]); return Stackguard() }
+func stack1464() (uintptr, uintptr) { var buf [1464]byte; use(buf[:]); return Stackguard() }
+func stack1468() (uintptr, uintptr) { var buf [1468]byte; use(buf[:]); return Stackguard() }
+func stack1472() (uintptr, uintptr) { var buf [1472]byte; use(buf[:]); return Stackguard() }
+func stack1476() (uintptr, uintptr) { var buf [1476]byte; use(buf[:]); return Stackguard() }
+func stack1480() (uintptr, uintptr) { var buf [1480]byte; use(buf[:]); return Stackguard() }
+func stack1484() (uintptr, uintptr) { var buf [1484]byte; use(buf[:]); return Stackguard() }
+func stack1488() (uintptr, uintptr) { var buf [1488]byte; use(buf[:]); return Stackguard() }
+func stack1492() (uintptr, uintptr) { var buf [1492]byte; use(buf[:]); return Stackguard() }
+func stack1496() (uintptr, uintptr) { var buf [1496]byte; use(buf[:]); return Stackguard() }
+func stack1500() (uintptr, uintptr) { var buf [1500]byte; use(buf[:]); return Stackguard() }
+func stack1504() (uintptr, uintptr) { var buf [1504]byte; use(buf[:]); return Stackguard() }
+func stack1508() (uintptr, uintptr) { var buf [1508]byte; use(buf[:]); return Stackguard() }
+func stack1512() (uintptr, uintptr) { var buf [1512]byte; use(buf[:]); return Stackguard() }
+func stack1516() (uintptr, uintptr) { var buf [1516]byte; use(buf[:]); return Stackguard() }
+func stack1520() (uintptr, uintptr) { var buf [1520]byte; use(buf[:]); return Stackguard() }
+func stack1524() (uintptr, uintptr) { var buf [1524]byte; use(buf[:]); return Stackguard() }
+func stack1528() (uintptr, uintptr) { var buf [1528]byte; use(buf[:]); return Stackguard() }
+func stack1532() (uintptr, uintptr) { var buf [1532]byte; use(buf[:]); return Stackguard() }
+func stack1536() (uintptr, uintptr) { var buf [1536]byte; use(buf[:]); return Stackguard() }
+func stack1540() (uintptr, uintptr) { var buf [1540]byte; use(buf[:]); return Stackguard() }
+func stack1544() (uintptr, uintptr) { var buf [1544]byte; use(buf[:]); return Stackguard() }
+func stack1548() (uintptr, uintptr) { var buf [1548]byte; use(buf[:]); return Stackguard() }
+func stack1552() (uintptr, uintptr) { var buf [1552]byte; use(buf[:]); return Stackguard() }
+func stack1556() (uintptr, uintptr) { var buf [1556]byte; use(buf[:]); return Stackguard() }
+func stack1560() (uintptr, uintptr) { var buf [1560]byte; use(buf[:]); return Stackguard() }
+func stack1564() (uintptr, uintptr) { var buf [1564]byte; use(buf[:]); return Stackguard() }
+func stack1568() (uintptr, uintptr) { var buf [1568]byte; use(buf[:]); return Stackguard() }
+func stack1572() (uintptr, uintptr) { var buf [1572]byte; use(buf[:]); return Stackguard() }
+func stack1576() (uintptr, uintptr) { var buf [1576]byte; use(buf[:]); return Stackguard() }
+func stack1580() (uintptr, uintptr) { var buf [1580]byte; use(buf[:]); return Stackguard() }
+func stack1584() (uintptr, uintptr) { var buf [1584]byte; use(buf[:]); return Stackguard() }
+func stack1588() (uintptr, uintptr) { var buf [1588]byte; use(buf[:]); return Stackguard() }
+func stack1592() (uintptr, uintptr) { var buf [1592]byte; use(buf[:]); return Stackguard() }
+func stack1596() (uintptr, uintptr) { var buf [1596]byte; use(buf[:]); return Stackguard() }
+func stack1600() (uintptr, uintptr) { var buf [1600]byte; use(buf[:]); return Stackguard() }
+func stack1604() (uintptr, uintptr) { var buf [1604]byte; use(buf[:]); return Stackguard() }
+func stack1608() (uintptr, uintptr) { var buf [1608]byte; use(buf[:]); return Stackguard() }
+func stack1612() (uintptr, uintptr) { var buf [1612]byte; use(buf[:]); return Stackguard() }
+func stack1616() (uintptr, uintptr) { var buf [1616]byte; use(buf[:]); return Stackguard() }
+func stack1620() (uintptr, uintptr) { var buf [1620]byte; use(buf[:]); return Stackguard() }
+func stack1624() (uintptr, uintptr) { var buf [1624]byte; use(buf[:]); return Stackguard() }
+func stack1628() (uintptr, uintptr) { var buf [1628]byte; use(buf[:]); return Stackguard() }
+func stack1632() (uintptr, uintptr) { var buf [1632]byte; use(buf[:]); return Stackguard() }
+func stack1636() (uintptr, uintptr) { var buf [1636]byte; use(buf[:]); return Stackguard() }
+func stack1640() (uintptr, uintptr) { var buf [1640]byte; use(buf[:]); return Stackguard() }
+func stack1644() (uintptr, uintptr) { var buf [1644]byte; use(buf[:]); return Stackguard() }
+func stack1648() (uintptr, uintptr) { var buf [1648]byte; use(buf[:]); return Stackguard() }
+func stack1652() (uintptr, uintptr) { var buf [1652]byte; use(buf[:]); return Stackguard() }
+func stack1656() (uintptr, uintptr) { var buf [1656]byte; use(buf[:]); return Stackguard() }
+func stack1660() (uintptr, uintptr) { var buf [1660]byte; use(buf[:]); return Stackguard() }
+func stack1664() (uintptr, uintptr) { var buf [1664]byte; use(buf[:]); return Stackguard() }
+func stack1668() (uintptr, uintptr) { var buf [1668]byte; use(buf[:]); return Stackguard() }
+func stack1672() (uintptr, uintptr) { var buf [1672]byte; use(buf[:]); return Stackguard() }
+func stack1676() (uintptr, uintptr) { var buf [1676]byte; use(buf[:]); return Stackguard() }
+func stack1680() (uintptr, uintptr) { var buf [1680]byte; use(buf[:]); return Stackguard() }
+func stack1684() (uintptr, uintptr) { var buf [1684]byte; use(buf[:]); return Stackguard() }
+func stack1688() (uintptr, uintptr) { var buf [1688]byte; use(buf[:]); return Stackguard() }
+func stack1692() (uintptr, uintptr) { var buf [1692]byte; use(buf[:]); return Stackguard() }
+func stack1696() (uintptr, uintptr) { var buf [1696]byte; use(buf[:]); return Stackguard() }
+func stack1700() (uintptr, uintptr) { var buf [1700]byte; use(buf[:]); return Stackguard() }
+func stack1704() (uintptr, uintptr) { var buf [1704]byte; use(buf[:]); return Stackguard() }
+func stack1708() (uintptr, uintptr) { var buf [1708]byte; use(buf[:]); return Stackguard() }
+func stack1712() (uintptr, uintptr) { var buf [1712]byte; use(buf[:]); return Stackguard() }
+func stack1716() (uintptr, uintptr) { var buf [1716]byte; use(buf[:]); return Stackguard() }
+func stack1720() (uintptr, uintptr) { var buf [1720]byte; use(buf[:]); return Stackguard() }
+func stack1724() (uintptr, uintptr) { var buf [1724]byte; use(buf[:]); return Stackguard() }
+func stack1728() (uintptr, uintptr) { var buf [1728]byte; use(buf[:]); return Stackguard() }
+func stack1732() (uintptr, uintptr) { var buf [1732]byte; use(buf[:]); return Stackguard() }
+func stack1736() (uintptr, uintptr) { var buf [1736]byte; use(buf[:]); return Stackguard() }
+func stack1740() (uintptr, uintptr) { var buf [1740]byte; use(buf[:]); return Stackguard() }
+func stack1744() (uintptr, uintptr) { var buf [1744]byte; use(buf[:]); return Stackguard() }
+func stack1748() (uintptr, uintptr) { var buf [1748]byte; use(buf[:]); return Stackguard() }
+func stack1752() (uintptr, uintptr) { var buf [1752]byte; use(buf[:]); return Stackguard() }
+func stack1756() (uintptr, uintptr) { var buf [1756]byte; use(buf[:]); return Stackguard() }
+func stack1760() (uintptr, uintptr) { var buf [1760]byte; use(buf[:]); return Stackguard() }
+func stack1764() (uintptr, uintptr) { var buf [1764]byte; use(buf[:]); return Stackguard() }
+func stack1768() (uintptr, uintptr) { var buf [1768]byte; use(buf[:]); return Stackguard() }
+func stack1772() (uintptr, uintptr) { var buf [1772]byte; use(buf[:]); return Stackguard() }
+func stack1776() (uintptr, uintptr) { var buf [1776]byte; use(buf[:]); return Stackguard() }
+func stack1780() (uintptr, uintptr) { var buf [1780]byte; use(buf[:]); return Stackguard() }
+func stack1784() (uintptr, uintptr) { var buf [1784]byte; use(buf[:]); return Stackguard() }
+func stack1788() (uintptr, uintptr) { var buf [1788]byte; use(buf[:]); return Stackguard() }
+func stack1792() (uintptr, uintptr) { var buf [1792]byte; use(buf[:]); return Stackguard() }
+func stack1796() (uintptr, uintptr) { var buf [1796]byte; use(buf[:]); return Stackguard() }
+func stack1800() (uintptr, uintptr) { var buf [1800]byte; use(buf[:]); return Stackguard() }
+func stack1804() (uintptr, uintptr) { var buf [1804]byte; use(buf[:]); return Stackguard() }
+func stack1808() (uintptr, uintptr) { var buf [1808]byte; use(buf[:]); return Stackguard() }
+func stack1812() (uintptr, uintptr) { var buf [1812]byte; use(buf[:]); return Stackguard() }
+func stack1816() (uintptr, uintptr) { var buf [1816]byte; use(buf[:]); return Stackguard() }
+func stack1820() (uintptr, uintptr) { var buf [1820]byte; use(buf[:]); return Stackguard() }
+func stack1824() (uintptr, uintptr) { var buf [1824]byte; use(buf[:]); return Stackguard() }
+func stack1828() (uintptr, uintptr) { var buf [1828]byte; use(buf[:]); return Stackguard() }
+func stack1832() (uintptr, uintptr) { var buf [1832]byte; use(buf[:]); return Stackguard() }
+func stack1836() (uintptr, uintptr) { var buf [1836]byte; use(buf[:]); return Stackguard() }
+func stack1840() (uintptr, uintptr) { var buf [1840]byte; use(buf[:]); return Stackguard() }
+func stack1844() (uintptr, uintptr) { var buf [1844]byte; use(buf[:]); return Stackguard() }
+func stack1848() (uintptr, uintptr) { var buf [1848]byte; use(buf[:]); return Stackguard() }
+func stack1852() (uintptr, uintptr) { var buf [1852]byte; use(buf[:]); return Stackguard() }
+func stack1856() (uintptr, uintptr) { var buf [1856]byte; use(buf[:]); return Stackguard() }
+func stack1860() (uintptr, uintptr) { var buf [1860]byte; use(buf[:]); return Stackguard() }
+func stack1864() (uintptr, uintptr) { var buf [1864]byte; use(buf[:]); return Stackguard() }
+func stack1868() (uintptr, uintptr) { var buf [1868]byte; use(buf[:]); return Stackguard() }
+func stack1872() (uintptr, uintptr) { var buf [1872]byte; use(buf[:]); return Stackguard() }
+func stack1876() (uintptr, uintptr) { var buf [1876]byte; use(buf[:]); return Stackguard() }
+func stack1880() (uintptr, uintptr) { var buf [1880]byte; use(buf[:]); return Stackguard() }
+func stack1884() (uintptr, uintptr) { var buf [1884]byte; use(buf[:]); return Stackguard() }
+func stack1888() (uintptr, uintptr) { var buf [1888]byte; use(buf[:]); return Stackguard() }
+func stack1892() (uintptr, uintptr) { var buf [1892]byte; use(buf[:]); return Stackguard() }
+func stack1896() (uintptr, uintptr) { var buf [1896]byte; use(buf[:]); return Stackguard() }
+func stack1900() (uintptr, uintptr) { var buf [1900]byte; use(buf[:]); return Stackguard() }
+func stack1904() (uintptr, uintptr) { var buf [1904]byte; use(buf[:]); return Stackguard() }
+func stack1908() (uintptr, uintptr) { var buf [1908]byte; use(buf[:]); return Stackguard() }
+func stack1912() (uintptr, uintptr) { var buf [1912]byte; use(buf[:]); return Stackguard() }
+func stack1916() (uintptr, uintptr) { var buf [1916]byte; use(buf[:]); return Stackguard() }
+func stack1920() (uintptr, uintptr) { var buf [1920]byte; use(buf[:]); return Stackguard() }
+func stack1924() (uintptr, uintptr) { var buf [1924]byte; use(buf[:]); return Stackguard() }
+func stack1928() (uintptr, uintptr) { var buf [1928]byte; use(buf[:]); return Stackguard() }
+func stack1932() (uintptr, uintptr) { var buf [1932]byte; use(buf[:]); return Stackguard() }
+func stack1936() (uintptr, uintptr) { var buf [1936]byte; use(buf[:]); return Stackguard() }
+func stack1940() (uintptr, uintptr) { var buf [1940]byte; use(buf[:]); return Stackguard() }
+func stack1944() (uintptr, uintptr) { var buf [1944]byte; use(buf[:]); return Stackguard() }
+func stack1948() (uintptr, uintptr) { var buf [1948]byte; use(buf[:]); return Stackguard() }
+func stack1952() (uintptr, uintptr) { var buf [1952]byte; use(buf[:]); return Stackguard() }
+func stack1956() (uintptr, uintptr) { var buf [1956]byte; use(buf[:]); return Stackguard() }
+func stack1960() (uintptr, uintptr) { var buf [1960]byte; use(buf[:]); return Stackguard() }
+func stack1964() (uintptr, uintptr) { var buf [1964]byte; use(buf[:]); return Stackguard() }
+func stack1968() (uintptr, uintptr) { var buf [1968]byte; use(buf[:]); return Stackguard() }
+func stack1972() (uintptr, uintptr) { var buf [1972]byte; use(buf[:]); return Stackguard() }
+func stack1976() (uintptr, uintptr) { var buf [1976]byte; use(buf[:]); return Stackguard() }
+func stack1980() (uintptr, uintptr) { var buf [1980]byte; use(buf[:]); return Stackguard() }
+func stack1984() (uintptr, uintptr) { var buf [1984]byte; use(buf[:]); return Stackguard() }
+func stack1988() (uintptr, uintptr) { var buf [1988]byte; use(buf[:]); return Stackguard() }
+func stack1992() (uintptr, uintptr) { var buf [1992]byte; use(buf[:]); return Stackguard() }
+func stack1996() (uintptr, uintptr) { var buf [1996]byte; use(buf[:]); return Stackguard() }
+func stack2000() (uintptr, uintptr) { var buf [2000]byte; use(buf[:]); return Stackguard() }
+func stack2004() (uintptr, uintptr) { var buf [2004]byte; use(buf[:]); return Stackguard() }
+func stack2008() (uintptr, uintptr) { var buf [2008]byte; use(buf[:]); return Stackguard() }
+func stack2012() (uintptr, uintptr) { var buf [2012]byte; use(buf[:]); return Stackguard() }
+func stack2016() (uintptr, uintptr) { var buf [2016]byte; use(buf[:]); return Stackguard() }
+func stack2020() (uintptr, uintptr) { var buf [2020]byte; use(buf[:]); return Stackguard() }
+func stack2024() (uintptr, uintptr) { var buf [2024]byte; use(buf[:]); return Stackguard() }
+func stack2028() (uintptr, uintptr) { var buf [2028]byte; use(buf[:]); return Stackguard() }
+func stack2032() (uintptr, uintptr) { var buf [2032]byte; use(buf[:]); return Stackguard() }
+func stack2036() (uintptr, uintptr) { var buf [2036]byte; use(buf[:]); return Stackguard() }
+func stack2040() (uintptr, uintptr) { var buf [2040]byte; use(buf[:]); return Stackguard() }
+func stack2044() (uintptr, uintptr) { var buf [2044]byte; use(buf[:]); return Stackguard() }
+func stack2048() (uintptr, uintptr) { var buf [2048]byte; use(buf[:]); return Stackguard() }
+func stack2052() (uintptr, uintptr) { var buf [2052]byte; use(buf[:]); return Stackguard() }
+func stack2056() (uintptr, uintptr) { var buf [2056]byte; use(buf[:]); return Stackguard() }
+func stack2060() (uintptr, uintptr) { var buf [2060]byte; use(buf[:]); return Stackguard() }
+func stack2064() (uintptr, uintptr) { var buf [2064]byte; use(buf[:]); return Stackguard() }
+func stack2068() (uintptr, uintptr) { var buf [2068]byte; use(buf[:]); return Stackguard() }
+func stack2072() (uintptr, uintptr) { var buf [2072]byte; use(buf[:]); return Stackguard() }
+func stack2076() (uintptr, uintptr) { var buf [2076]byte; use(buf[:]); return Stackguard() }
+func stack2080() (uintptr, uintptr) { var buf [2080]byte; use(buf[:]); return Stackguard() }
+func stack2084() (uintptr, uintptr) { var buf [2084]byte; use(buf[:]); return Stackguard() }
+func stack2088() (uintptr, uintptr) { var buf [2088]byte; use(buf[:]); return Stackguard() }
+func stack2092() (uintptr, uintptr) { var buf [2092]byte; use(buf[:]); return Stackguard() }
+func stack2096() (uintptr, uintptr) { var buf [2096]byte; use(buf[:]); return Stackguard() }
+func stack2100() (uintptr, uintptr) { var buf [2100]byte; use(buf[:]); return Stackguard() }
+func stack2104() (uintptr, uintptr) { var buf [2104]byte; use(buf[:]); return Stackguard() }
+func stack2108() (uintptr, uintptr) { var buf [2108]byte; use(buf[:]); return Stackguard() }
+func stack2112() (uintptr, uintptr) { var buf [2112]byte; use(buf[:]); return Stackguard() }
+func stack2116() (uintptr, uintptr) { var buf [2116]byte; use(buf[:]); return Stackguard() }
+func stack2120() (uintptr, uintptr) { var buf [2120]byte; use(buf[:]); return Stackguard() }
+func stack2124() (uintptr, uintptr) { var buf [2124]byte; use(buf[:]); return Stackguard() }
+func stack2128() (uintptr, uintptr) { var buf [2128]byte; use(buf[:]); return Stackguard() }
+func stack2132() (uintptr, uintptr) { var buf [2132]byte; use(buf[:]); return Stackguard() }
+func stack2136() (uintptr, uintptr) { var buf [2136]byte; use(buf[:]); return Stackguard() }
+func stack2140() (uintptr, uintptr) { var buf [2140]byte; use(buf[:]); return Stackguard() }
+func stack2144() (uintptr, uintptr) { var buf [2144]byte; use(buf[:]); return Stackguard() }
+func stack2148() (uintptr, uintptr) { var buf [2148]byte; use(buf[:]); return Stackguard() }
+func stack2152() (uintptr, uintptr) { var buf [2152]byte; use(buf[:]); return Stackguard() }
+func stack2156() (uintptr, uintptr) { var buf [2156]byte; use(buf[:]); return Stackguard() }
+func stack2160() (uintptr, uintptr) { var buf [2160]byte; use(buf[:]); return Stackguard() }
+func stack2164() (uintptr, uintptr) { var buf [2164]byte; use(buf[:]); return Stackguard() }
+func stack2168() (uintptr, uintptr) { var buf [2168]byte; use(buf[:]); return Stackguard() }
+func stack2172() (uintptr, uintptr) { var buf [2172]byte; use(buf[:]); return Stackguard() }
+func stack2176() (uintptr, uintptr) { var buf [2176]byte; use(buf[:]); return Stackguard() }
+func stack2180() (uintptr, uintptr) { var buf [2180]byte; use(buf[:]); return Stackguard() }
+func stack2184() (uintptr, uintptr) { var buf [2184]byte; use(buf[:]); return Stackguard() }
+func stack2188() (uintptr, uintptr) { var buf [2188]byte; use(buf[:]); return Stackguard() }
+func stack2192() (uintptr, uintptr) { var buf [2192]byte; use(buf[:]); return Stackguard() }
+func stack2196() (uintptr, uintptr) { var buf [2196]byte; use(buf[:]); return Stackguard() }
+func stack2200() (uintptr, uintptr) { var buf [2200]byte; use(buf[:]); return Stackguard() }
+func stack2204() (uintptr, uintptr) { var buf [2204]byte; use(buf[:]); return Stackguard() }
+func stack2208() (uintptr, uintptr) { var buf [2208]byte; use(buf[:]); return Stackguard() }
+func stack2212() (uintptr, uintptr) { var buf [2212]byte; use(buf[:]); return Stackguard() }
+func stack2216() (uintptr, uintptr) { var buf [2216]byte; use(buf[:]); return Stackguard() }
+func stack2220() (uintptr, uintptr) { var buf [2220]byte; use(buf[:]); return Stackguard() }
+func stack2224() (uintptr, uintptr) { var buf [2224]byte; use(buf[:]); return Stackguard() }
+func stack2228() (uintptr, uintptr) { var buf [2228]byte; use(buf[:]); return Stackguard() }
+func stack2232() (uintptr, uintptr) { var buf [2232]byte; use(buf[:]); return Stackguard() }
+func stack2236() (uintptr, uintptr) { var buf [2236]byte; use(buf[:]); return Stackguard() }
+func stack2240() (uintptr, uintptr) { var buf [2240]byte; use(buf[:]); return Stackguard() }
+func stack2244() (uintptr, uintptr) { var buf [2244]byte; use(buf[:]); return Stackguard() }
+func stack2248() (uintptr, uintptr) { var buf [2248]byte; use(buf[:]); return Stackguard() }
+func stack2252() (uintptr, uintptr) { var buf [2252]byte; use(buf[:]); return Stackguard() }
+func stack2256() (uintptr, uintptr) { var buf [2256]byte; use(buf[:]); return Stackguard() }
+func stack2260() (uintptr, uintptr) { var buf [2260]byte; use(buf[:]); return Stackguard() }
+func stack2264() (uintptr, uintptr) { var buf [2264]byte; use(buf[:]); return Stackguard() }
+func stack2268() (uintptr, uintptr) { var buf [2268]byte; use(buf[:]); return Stackguard() }
+func stack2272() (uintptr, uintptr) { var buf [2272]byte; use(buf[:]); return Stackguard() }
+func stack2276() (uintptr, uintptr) { var buf [2276]byte; use(buf[:]); return Stackguard() }
+func stack2280() (uintptr, uintptr) { var buf [2280]byte; use(buf[:]); return Stackguard() }
+func stack2284() (uintptr, uintptr) { var buf [2284]byte; use(buf[:]); return Stackguard() }
+func stack2288() (uintptr, uintptr) { var buf [2288]byte; use(buf[:]); return Stackguard() }
+func stack2292() (uintptr, uintptr) { var buf [2292]byte; use(buf[:]); return Stackguard() }
+func stack2296() (uintptr, uintptr) { var buf [2296]byte; use(buf[:]); return Stackguard() }
+func stack2300() (uintptr, uintptr) { var buf [2300]byte; use(buf[:]); return Stackguard() }
+func stack2304() (uintptr, uintptr) { var buf [2304]byte; use(buf[:]); return Stackguard() }
+func stack2308() (uintptr, uintptr) { var buf [2308]byte; use(buf[:]); return Stackguard() }
+func stack2312() (uintptr, uintptr) { var buf [2312]byte; use(buf[:]); return Stackguard() }
+func stack2316() (uintptr, uintptr) { var buf [2316]byte; use(buf[:]); return Stackguard() }
+func stack2320() (uintptr, uintptr) { var buf [2320]byte; use(buf[:]); return Stackguard() }
+func stack2324() (uintptr, uintptr) { var buf [2324]byte; use(buf[:]); return Stackguard() }
+func stack2328() (uintptr, uintptr) { var buf [2328]byte; use(buf[:]); return Stackguard() }
+func stack2332() (uintptr, uintptr) { var buf [2332]byte; use(buf[:]); return Stackguard() }
+func stack2336() (uintptr, uintptr) { var buf [2336]byte; use(buf[:]); return Stackguard() }
+func stack2340() (uintptr, uintptr) { var buf [2340]byte; use(buf[:]); return Stackguard() }
+func stack2344() (uintptr, uintptr) { var buf [2344]byte; use(buf[:]); return Stackguard() }
+func stack2348() (uintptr, uintptr) { var buf [2348]byte; use(buf[:]); return Stackguard() }
+func stack2352() (uintptr, uintptr) { var buf [2352]byte; use(buf[:]); return Stackguard() }
+func stack2356() (uintptr, uintptr) { var buf [2356]byte; use(buf[:]); return Stackguard() }
+func stack2360() (uintptr, uintptr) { var buf [2360]byte; use(buf[:]); return Stackguard() }
+func stack2364() (uintptr, uintptr) { var buf [2364]byte; use(buf[:]); return Stackguard() }
+func stack2368() (uintptr, uintptr) { var buf [2368]byte; use(buf[:]); return Stackguard() }
+func stack2372() (uintptr, uintptr) { var buf [2372]byte; use(buf[:]); return Stackguard() }
+func stack2376() (uintptr, uintptr) { var buf [2376]byte; use(buf[:]); return Stackguard() }
+func stack2380() (uintptr, uintptr) { var buf [2380]byte; use(buf[:]); return Stackguard() }
+func stack2384() (uintptr, uintptr) { var buf [2384]byte; use(buf[:]); return Stackguard() }
+func stack2388() (uintptr, uintptr) { var buf [2388]byte; use(buf[:]); return Stackguard() }
+func stack2392() (uintptr, uintptr) { var buf [2392]byte; use(buf[:]); return Stackguard() }
+func stack2396() (uintptr, uintptr) { var buf [2396]byte; use(buf[:]); return Stackguard() }
+func stack2400() (uintptr, uintptr) { var buf [2400]byte; use(buf[:]); return Stackguard() }
+func stack2404() (uintptr, uintptr) { var buf [2404]byte; use(buf[:]); return Stackguard() }
+func stack2408() (uintptr, uintptr) { var buf [2408]byte; use(buf[:]); return Stackguard() }
+func stack2412() (uintptr, uintptr) { var buf [2412]byte; use(buf[:]); return Stackguard() }
+func stack2416() (uintptr, uintptr) { var buf [2416]byte; use(buf[:]); return Stackguard() }
+func stack2420() (uintptr, uintptr) { var buf [2420]byte; use(buf[:]); return Stackguard() }
+func stack2424() (uintptr, uintptr) { var buf [2424]byte; use(buf[:]); return Stackguard() }
+func stack2428() (uintptr, uintptr) { var buf [2428]byte; use(buf[:]); return Stackguard() }
+func stack2432() (uintptr, uintptr) { var buf [2432]byte; use(buf[:]); return Stackguard() }
+func stack2436() (uintptr, uintptr) { var buf [2436]byte; use(buf[:]); return Stackguard() }
+func stack2440() (uintptr, uintptr) { var buf [2440]byte; use(buf[:]); return Stackguard() }
+func stack2444() (uintptr, uintptr) { var buf [2444]byte; use(buf[:]); return Stackguard() }
+func stack2448() (uintptr, uintptr) { var buf [2448]byte; use(buf[:]); return Stackguard() }
+func stack2452() (uintptr, uintptr) { var buf [2452]byte; use(buf[:]); return Stackguard() }
+func stack2456() (uintptr, uintptr) { var buf [2456]byte; use(buf[:]); return Stackguard() }
+func stack2460() (uintptr, uintptr) { var buf [2460]byte; use(buf[:]); return Stackguard() }
+func stack2464() (uintptr, uintptr) { var buf [2464]byte; use(buf[:]); return Stackguard() }
+func stack2468() (uintptr, uintptr) { var buf [2468]byte; use(buf[:]); return Stackguard() }
+func stack2472() (uintptr, uintptr) { var buf [2472]byte; use(buf[:]); return Stackguard() }
+func stack2476() (uintptr, uintptr) { var buf [2476]byte; use(buf[:]); return Stackguard() }
+func stack2480() (uintptr, uintptr) { var buf [2480]byte; use(buf[:]); return Stackguard() }
+func stack2484() (uintptr, uintptr) { var buf [2484]byte; use(buf[:]); return Stackguard() }
+func stack2488() (uintptr, uintptr) { var buf [2488]byte; use(buf[:]); return Stackguard() }
+func stack2492() (uintptr, uintptr) { var buf [2492]byte; use(buf[:]); return Stackguard() }
+func stack2496() (uintptr, uintptr) { var buf [2496]byte; use(buf[:]); return Stackguard() }
+func stack2500() (uintptr, uintptr) { var buf [2500]byte; use(buf[:]); return Stackguard() }
+func stack2504() (uintptr, uintptr) { var buf [2504]byte; use(buf[:]); return Stackguard() }
+func stack2508() (uintptr, uintptr) { var buf [2508]byte; use(buf[:]); return Stackguard() }
+func stack2512() (uintptr, uintptr) { var buf [2512]byte; use(buf[:]); return Stackguard() }
+func stack2516() (uintptr, uintptr) { var buf [2516]byte; use(buf[:]); return Stackguard() }
+func stack2520() (uintptr, uintptr) { var buf [2520]byte; use(buf[:]); return Stackguard() }
+func stack2524() (uintptr, uintptr) { var buf [2524]byte; use(buf[:]); return Stackguard() }
+func stack2528() (uintptr, uintptr) { var buf [2528]byte; use(buf[:]); return Stackguard() }
+func stack2532() (uintptr, uintptr) { var buf [2532]byte; use(buf[:]); return Stackguard() }
+func stack2536() (uintptr, uintptr) { var buf [2536]byte; use(buf[:]); return Stackguard() }
+func stack2540() (uintptr, uintptr) { var buf [2540]byte; use(buf[:]); return Stackguard() }
+func stack2544() (uintptr, uintptr) { var buf [2544]byte; use(buf[:]); return Stackguard() }
+func stack2548() (uintptr, uintptr) { var buf [2548]byte; use(buf[:]); return Stackguard() }
+func stack2552() (uintptr, uintptr) { var buf [2552]byte; use(buf[:]); return Stackguard() }
+func stack2556() (uintptr, uintptr) { var buf [2556]byte; use(buf[:]); return Stackguard() }
+func stack2560() (uintptr, uintptr) { var buf [2560]byte; use(buf[:]); return Stackguard() }
+func stack2564() (uintptr, uintptr) { var buf [2564]byte; use(buf[:]); return Stackguard() }
+func stack2568() (uintptr, uintptr) { var buf [2568]byte; use(buf[:]); return Stackguard() }
+func stack2572() (uintptr, uintptr) { var buf [2572]byte; use(buf[:]); return Stackguard() }
+func stack2576() (uintptr, uintptr) { var buf [2576]byte; use(buf[:]); return Stackguard() }
+func stack2580() (uintptr, uintptr) { var buf [2580]byte; use(buf[:]); return Stackguard() }
+func stack2584() (uintptr, uintptr) { var buf [2584]byte; use(buf[:]); return Stackguard() }
+func stack2588() (uintptr, uintptr) { var buf [2588]byte; use(buf[:]); return Stackguard() }
+func stack2592() (uintptr, uintptr) { var buf [2592]byte; use(buf[:]); return Stackguard() }
+func stack2596() (uintptr, uintptr) { var buf [2596]byte; use(buf[:]); return Stackguard() }
+func stack2600() (uintptr, uintptr) { var buf [2600]byte; use(buf[:]); return Stackguard() }
+func stack2604() (uintptr, uintptr) { var buf [2604]byte; use(buf[:]); return Stackguard() }
+func stack2608() (uintptr, uintptr) { var buf [2608]byte; use(buf[:]); return Stackguard() }
+func stack2612() (uintptr, uintptr) { var buf [2612]byte; use(buf[:]); return Stackguard() }
+func stack2616() (uintptr, uintptr) { var buf [2616]byte; use(buf[:]); return Stackguard() }
+func stack2620() (uintptr, uintptr) { var buf [2620]byte; use(buf[:]); return Stackguard() }
+func stack2624() (uintptr, uintptr) { var buf [2624]byte; use(buf[:]); return Stackguard() }
+func stack2628() (uintptr, uintptr) { var buf [2628]byte; use(buf[:]); return Stackguard() }
+func stack2632() (uintptr, uintptr) { var buf [2632]byte; use(buf[:]); return Stackguard() }
+func stack2636() (uintptr, uintptr) { var buf [2636]byte; use(buf[:]); return Stackguard() }
+func stack2640() (uintptr, uintptr) { var buf [2640]byte; use(buf[:]); return Stackguard() }
+func stack2644() (uintptr, uintptr) { var buf [2644]byte; use(buf[:]); return Stackguard() }
+func stack2648() (uintptr, uintptr) { var buf [2648]byte; use(buf[:]); return Stackguard() }
+func stack2652() (uintptr, uintptr) { var buf [2652]byte; use(buf[:]); return Stackguard() }
+func stack2656() (uintptr, uintptr) { var buf [2656]byte; use(buf[:]); return Stackguard() }
+func stack2660() (uintptr, uintptr) { var buf [2660]byte; use(buf[:]); return Stackguard() }
+func stack2664() (uintptr, uintptr) { var buf [2664]byte; use(buf[:]); return Stackguard() }
+func stack2668() (uintptr, uintptr) { var buf [2668]byte; use(buf[:]); return Stackguard() }
+func stack2672() (uintptr, uintptr) { var buf [2672]byte; use(buf[:]); return Stackguard() }
+func stack2676() (uintptr, uintptr) { var buf [2676]byte; use(buf[:]); return Stackguard() }
+func stack2680() (uintptr, uintptr) { var buf [2680]byte; use(buf[:]); return Stackguard() }
+func stack2684() (uintptr, uintptr) { var buf [2684]byte; use(buf[:]); return Stackguard() }
+func stack2688() (uintptr, uintptr) { var buf [2688]byte; use(buf[:]); return Stackguard() }
+func stack2692() (uintptr, uintptr) { var buf [2692]byte; use(buf[:]); return Stackguard() }
+func stack2696() (uintptr, uintptr) { var buf [2696]byte; use(buf[:]); return Stackguard() }
+func stack2700() (uintptr, uintptr) { var buf [2700]byte; use(buf[:]); return Stackguard() }
+func stack2704() (uintptr, uintptr) { var buf [2704]byte; use(buf[:]); return Stackguard() }
+func stack2708() (uintptr, uintptr) { var buf [2708]byte; use(buf[:]); return Stackguard() }
+func stack2712() (uintptr, uintptr) { var buf [2712]byte; use(buf[:]); return Stackguard() }
+func stack2716() (uintptr, uintptr) { var buf [2716]byte; use(buf[:]); return Stackguard() }
+func stack2720() (uintptr, uintptr) { var buf [2720]byte; use(buf[:]); return Stackguard() }
+func stack2724() (uintptr, uintptr) { var buf [2724]byte; use(buf[:]); return Stackguard() }
+func stack2728() (uintptr, uintptr) { var buf [2728]byte; use(buf[:]); return Stackguard() }
+func stack2732() (uintptr, uintptr) { var buf [2732]byte; use(buf[:]); return Stackguard() }
+func stack2736() (uintptr, uintptr) { var buf [2736]byte; use(buf[:]); return Stackguard() }
+func stack2740() (uintptr, uintptr) { var buf [2740]byte; use(buf[:]); return Stackguard() }
+func stack2744() (uintptr, uintptr) { var buf [2744]byte; use(buf[:]); return Stackguard() }
+func stack2748() (uintptr, uintptr) { var buf [2748]byte; use(buf[:]); return Stackguard() }
+func stack2752() (uintptr, uintptr) { var buf [2752]byte; use(buf[:]); return Stackguard() }
+func stack2756() (uintptr, uintptr) { var buf [2756]byte; use(buf[:]); return Stackguard() }
+func stack2760() (uintptr, uintptr) { var buf [2760]byte; use(buf[:]); return Stackguard() }
+func stack2764() (uintptr, uintptr) { var buf [2764]byte; use(buf[:]); return Stackguard() }
+func stack2768() (uintptr, uintptr) { var buf [2768]byte; use(buf[:]); return Stackguard() }
+func stack2772() (uintptr, uintptr) { var buf [2772]byte; use(buf[:]); return Stackguard() }
+func stack2776() (uintptr, uintptr) { var buf [2776]byte; use(buf[:]); return Stackguard() }
+func stack2780() (uintptr, uintptr) { var buf [2780]byte; use(buf[:]); return Stackguard() }
+func stack2784() (uintptr, uintptr) { var buf [2784]byte; use(buf[:]); return Stackguard() }
+func stack2788() (uintptr, uintptr) { var buf [2788]byte; use(buf[:]); return Stackguard() }
+func stack2792() (uintptr, uintptr) { var buf [2792]byte; use(buf[:]); return Stackguard() }
+func stack2796() (uintptr, uintptr) { var buf [2796]byte; use(buf[:]); return Stackguard() }
+func stack2800() (uintptr, uintptr) { var buf [2800]byte; use(buf[:]); return Stackguard() }
+func stack2804() (uintptr, uintptr) { var buf [2804]byte; use(buf[:]); return Stackguard() }
+func stack2808() (uintptr, uintptr) { var buf [2808]byte; use(buf[:]); return Stackguard() }
+func stack2812() (uintptr, uintptr) { var buf [2812]byte; use(buf[:]); return Stackguard() }
+func stack2816() (uintptr, uintptr) { var buf [2816]byte; use(buf[:]); return Stackguard() }
+func stack2820() (uintptr, uintptr) { var buf [2820]byte; use(buf[:]); return Stackguard() }
+func stack2824() (uintptr, uintptr) { var buf [2824]byte; use(buf[:]); return Stackguard() }
+func stack2828() (uintptr, uintptr) { var buf [2828]byte; use(buf[:]); return Stackguard() }
+func stack2832() (uintptr, uintptr) { var buf [2832]byte; use(buf[:]); return Stackguard() }
+func stack2836() (uintptr, uintptr) { var buf [2836]byte; use(buf[:]); return Stackguard() }
+func stack2840() (uintptr, uintptr) { var buf [2840]byte; use(buf[:]); return Stackguard() }
+func stack2844() (uintptr, uintptr) { var buf [2844]byte; use(buf[:]); return Stackguard() }
+func stack2848() (uintptr, uintptr) { var buf [2848]byte; use(buf[:]); return Stackguard() }
+func stack2852() (uintptr, uintptr) { var buf [2852]byte; use(buf[:]); return Stackguard() }
+func stack2856() (uintptr, uintptr) { var buf [2856]byte; use(buf[:]); return Stackguard() }
+func stack2860() (uintptr, uintptr) { var buf [2860]byte; use(buf[:]); return Stackguard() }
+func stack2864() (uintptr, uintptr) { var buf [2864]byte; use(buf[:]); return Stackguard() }
+func stack2868() (uintptr, uintptr) { var buf [2868]byte; use(buf[:]); return Stackguard() }
+func stack2872() (uintptr, uintptr) { var buf [2872]byte; use(buf[:]); return Stackguard() }
+func stack2876() (uintptr, uintptr) { var buf [2876]byte; use(buf[:]); return Stackguard() }
+func stack2880() (uintptr, uintptr) { var buf [2880]byte; use(buf[:]); return Stackguard() }
+func stack2884() (uintptr, uintptr) { var buf [2884]byte; use(buf[:]); return Stackguard() }
+func stack2888() (uintptr, uintptr) { var buf [2888]byte; use(buf[:]); return Stackguard() }
+func stack2892() (uintptr, uintptr) { var buf [2892]byte; use(buf[:]); return Stackguard() }
+func stack2896() (uintptr, uintptr) { var buf [2896]byte; use(buf[:]); return Stackguard() }
+func stack2900() (uintptr, uintptr) { var buf [2900]byte; use(buf[:]); return Stackguard() }
+func stack2904() (uintptr, uintptr) { var buf [2904]byte; use(buf[:]); return Stackguard() }
+func stack2908() (uintptr, uintptr) { var buf [2908]byte; use(buf[:]); return Stackguard() }
+func stack2912() (uintptr, uintptr) { var buf [2912]byte; use(buf[:]); return Stackguard() }
+func stack2916() (uintptr, uintptr) { var buf [2916]byte; use(buf[:]); return Stackguard() }
+func stack2920() (uintptr, uintptr) { var buf [2920]byte; use(buf[:]); return Stackguard() }
+func stack2924() (uintptr, uintptr) { var buf [2924]byte; use(buf[:]); return Stackguard() }
+func stack2928() (uintptr, uintptr) { var buf [2928]byte; use(buf[:]); return Stackguard() }
+func stack2932() (uintptr, uintptr) { var buf [2932]byte; use(buf[:]); return Stackguard() }
+func stack2936() (uintptr, uintptr) { var buf [2936]byte; use(buf[:]); return Stackguard() }
+func stack2940() (uintptr, uintptr) { var buf [2940]byte; use(buf[:]); return Stackguard() }
+func stack2944() (uintptr, uintptr) { var buf [2944]byte; use(buf[:]); return Stackguard() }
+func stack2948() (uintptr, uintptr) { var buf [2948]byte; use(buf[:]); return Stackguard() }
+func stack2952() (uintptr, uintptr) { var buf [2952]byte; use(buf[:]); return Stackguard() }
+func stack2956() (uintptr, uintptr) { var buf [2956]byte; use(buf[:]); return Stackguard() }
+func stack2960() (uintptr, uintptr) { var buf [2960]byte; use(buf[:]); return Stackguard() }
+func stack2964() (uintptr, uintptr) { var buf [2964]byte; use(buf[:]); return Stackguard() }
+func stack2968() (uintptr, uintptr) { var buf [2968]byte; use(buf[:]); return Stackguard() }
+func stack2972() (uintptr, uintptr) { var buf [2972]byte; use(buf[:]); return Stackguard() }
+func stack2976() (uintptr, uintptr) { var buf [2976]byte; use(buf[:]); return Stackguard() }
+func stack2980() (uintptr, uintptr) { var buf [2980]byte; use(buf[:]); return Stackguard() }
+func stack2984() (uintptr, uintptr) { var buf [2984]byte; use(buf[:]); return Stackguard() }
+func stack2988() (uintptr, uintptr) { var buf [2988]byte; use(buf[:]); return Stackguard() }
+func stack2992() (uintptr, uintptr) { var buf [2992]byte; use(buf[:]); return Stackguard() }
+func stack2996() (uintptr, uintptr) { var buf [2996]byte; use(buf[:]); return Stackguard() }
+func stack3000() (uintptr, uintptr) { var buf [3000]byte; use(buf[:]); return Stackguard() }
+func stack3004() (uintptr, uintptr) { var buf [3004]byte; use(buf[:]); return Stackguard() }
+func stack3008() (uintptr, uintptr) { var buf [3008]byte; use(buf[:]); return Stackguard() }
+func stack3012() (uintptr, uintptr) { var buf [3012]byte; use(buf[:]); return Stackguard() }
+func stack3016() (uintptr, uintptr) { var buf [3016]byte; use(buf[:]); return Stackguard() }
+func stack3020() (uintptr, uintptr) { var buf [3020]byte; use(buf[:]); return Stackguard() }
+func stack3024() (uintptr, uintptr) { var buf [3024]byte; use(buf[:]); return Stackguard() }
+func stack3028() (uintptr, uintptr) { var buf [3028]byte; use(buf[:]); return Stackguard() }
+func stack3032() (uintptr, uintptr) { var buf [3032]byte; use(buf[:]); return Stackguard() }
+func stack3036() (uintptr, uintptr) { var buf [3036]byte; use(buf[:]); return Stackguard() }
+func stack3040() (uintptr, uintptr) { var buf [3040]byte; use(buf[:]); return Stackguard() }
+func stack3044() (uintptr, uintptr) { var buf [3044]byte; use(buf[:]); return Stackguard() }
+func stack3048() (uintptr, uintptr) { var buf [3048]byte; use(buf[:]); return Stackguard() }
+func stack3052() (uintptr, uintptr) { var buf [3052]byte; use(buf[:]); return Stackguard() }
+func stack3056() (uintptr, uintptr) { var buf [3056]byte; use(buf[:]); return Stackguard() }
+func stack3060() (uintptr, uintptr) { var buf [3060]byte; use(buf[:]); return Stackguard() }
+func stack3064() (uintptr, uintptr) { var buf [3064]byte; use(buf[:]); return Stackguard() }
+func stack3068() (uintptr, uintptr) { var buf [3068]byte; use(buf[:]); return Stackguard() }
+func stack3072() (uintptr, uintptr) { var buf [3072]byte; use(buf[:]); return Stackguard() }
+func stack3076() (uintptr, uintptr) { var buf [3076]byte; use(buf[:]); return Stackguard() }
+func stack3080() (uintptr, uintptr) { var buf [3080]byte; use(buf[:]); return Stackguard() }
+func stack3084() (uintptr, uintptr) { var buf [3084]byte; use(buf[:]); return Stackguard() }
+func stack3088() (uintptr, uintptr) { var buf [3088]byte; use(buf[:]); return Stackguard() }
+func stack3092() (uintptr, uintptr) { var buf [3092]byte; use(buf[:]); return Stackguard() }
+func stack3096() (uintptr, uintptr) { var buf [3096]byte; use(buf[:]); return Stackguard() }
+func stack3100() (uintptr, uintptr) { var buf [3100]byte; use(buf[:]); return Stackguard() }
+func stack3104() (uintptr, uintptr) { var buf [3104]byte; use(buf[:]); return Stackguard() }
+func stack3108() (uintptr, uintptr) { var buf [3108]byte; use(buf[:]); return Stackguard() }
+func stack3112() (uintptr, uintptr) { var buf [3112]byte; use(buf[:]); return Stackguard() }
+func stack3116() (uintptr, uintptr) { var buf [3116]byte; use(buf[:]); return Stackguard() }
+func stack3120() (uintptr, uintptr) { var buf [3120]byte; use(buf[:]); return Stackguard() }
+func stack3124() (uintptr, uintptr) { var buf [3124]byte; use(buf[:]); return Stackguard() }
+func stack3128() (uintptr, uintptr) { var buf [3128]byte; use(buf[:]); return Stackguard() }
+func stack3132() (uintptr, uintptr) { var buf [3132]byte; use(buf[:]); return Stackguard() }
+func stack3136() (uintptr, uintptr) { var buf [3136]byte; use(buf[:]); return Stackguard() }
+func stack3140() (uintptr, uintptr) { var buf [3140]byte; use(buf[:]); return Stackguard() }
+func stack3144() (uintptr, uintptr) { var buf [3144]byte; use(buf[:]); return Stackguard() }
+func stack3148() (uintptr, uintptr) { var buf [3148]byte; use(buf[:]); return Stackguard() }
+func stack3152() (uintptr, uintptr) { var buf [3152]byte; use(buf[:]); return Stackguard() }
+func stack3156() (uintptr, uintptr) { var buf [3156]byte; use(buf[:]); return Stackguard() }
+func stack3160() (uintptr, uintptr) { var buf [3160]byte; use(buf[:]); return Stackguard() }
+func stack3164() (uintptr, uintptr) { var buf [3164]byte; use(buf[:]); return Stackguard() }
+func stack3168() (uintptr, uintptr) { var buf [3168]byte; use(buf[:]); return Stackguard() }
+func stack3172() (uintptr, uintptr) { var buf [3172]byte; use(buf[:]); return Stackguard() }
+func stack3176() (uintptr, uintptr) { var buf [3176]byte; use(buf[:]); return Stackguard() }
+func stack3180() (uintptr, uintptr) { var buf [3180]byte; use(buf[:]); return Stackguard() }
+func stack3184() (uintptr, uintptr) { var buf [3184]byte; use(buf[:]); return Stackguard() }
+func stack3188() (uintptr, uintptr) { var buf [3188]byte; use(buf[:]); return Stackguard() }
+func stack3192() (uintptr, uintptr) { var buf [3192]byte; use(buf[:]); return Stackguard() }
+func stack3196() (uintptr, uintptr) { var buf [3196]byte; use(buf[:]); return Stackguard() }
+func stack3200() (uintptr, uintptr) { var buf [3200]byte; use(buf[:]); return Stackguard() }
+func stack3204() (uintptr, uintptr) { var buf [3204]byte; use(buf[:]); return Stackguard() }
+func stack3208() (uintptr, uintptr) { var buf [3208]byte; use(buf[:]); return Stackguard() }
+func stack3212() (uintptr, uintptr) { var buf [3212]byte; use(buf[:]); return Stackguard() }
+func stack3216() (uintptr, uintptr) { var buf [3216]byte; use(buf[:]); return Stackguard() }
+func stack3220() (uintptr, uintptr) { var buf [3220]byte; use(buf[:]); return Stackguard() }
+func stack3224() (uintptr, uintptr) { var buf [3224]byte; use(buf[:]); return Stackguard() }
+func stack3228() (uintptr, uintptr) { var buf [3228]byte; use(buf[:]); return Stackguard() }
+func stack3232() (uintptr, uintptr) { var buf [3232]byte; use(buf[:]); return Stackguard() }
+func stack3236() (uintptr, uintptr) { var buf [3236]byte; use(buf[:]); return Stackguard() }
+func stack3240() (uintptr, uintptr) { var buf [3240]byte; use(buf[:]); return Stackguard() }
+func stack3244() (uintptr, uintptr) { var buf [3244]byte; use(buf[:]); return Stackguard() }
+func stack3248() (uintptr, uintptr) { var buf [3248]byte; use(buf[:]); return Stackguard() }
+func stack3252() (uintptr, uintptr) { var buf [3252]byte; use(buf[:]); return Stackguard() }
+func stack3256() (uintptr, uintptr) { var buf [3256]byte; use(buf[:]); return Stackguard() }
+func stack3260() (uintptr, uintptr) { var buf [3260]byte; use(buf[:]); return Stackguard() }
+func stack3264() (uintptr, uintptr) { var buf [3264]byte; use(buf[:]); return Stackguard() }
+func stack3268() (uintptr, uintptr) { var buf [3268]byte; use(buf[:]); return Stackguard() }
+func stack3272() (uintptr, uintptr) { var buf [3272]byte; use(buf[:]); return Stackguard() }
+func stack3276() (uintptr, uintptr) { var buf [3276]byte; use(buf[:]); return Stackguard() }
+func stack3280() (uintptr, uintptr) { var buf [3280]byte; use(buf[:]); return Stackguard() }
+func stack3284() (uintptr, uintptr) { var buf [3284]byte; use(buf[:]); return Stackguard() }
+func stack3288() (uintptr, uintptr) { var buf [3288]byte; use(buf[:]); return Stackguard() }
+func stack3292() (uintptr, uintptr) { var buf [3292]byte; use(buf[:]); return Stackguard() }
+func stack3296() (uintptr, uintptr) { var buf [3296]byte; use(buf[:]); return Stackguard() }
+func stack3300() (uintptr, uintptr) { var buf [3300]byte; use(buf[:]); return Stackguard() }
+func stack3304() (uintptr, uintptr) { var buf [3304]byte; use(buf[:]); return Stackguard() }
+func stack3308() (uintptr, uintptr) { var buf [3308]byte; use(buf[:]); return Stackguard() }
+func stack3312() (uintptr, uintptr) { var buf [3312]byte; use(buf[:]); return Stackguard() }
+func stack3316() (uintptr, uintptr) { var buf [3316]byte; use(buf[:]); return Stackguard() }
+func stack3320() (uintptr, uintptr) { var buf [3320]byte; use(buf[:]); return Stackguard() }
+func stack3324() (uintptr, uintptr) { var buf [3324]byte; use(buf[:]); return Stackguard() }
+func stack3328() (uintptr, uintptr) { var buf [3328]byte; use(buf[:]); return Stackguard() }
+func stack3332() (uintptr, uintptr) { var buf [3332]byte; use(buf[:]); return Stackguard() }
+func stack3336() (uintptr, uintptr) { var buf [3336]byte; use(buf[:]); return Stackguard() }
+func stack3340() (uintptr, uintptr) { var buf [3340]byte; use(buf[:]); return Stackguard() }
+func stack3344() (uintptr, uintptr) { var buf [3344]byte; use(buf[:]); return Stackguard() }
+func stack3348() (uintptr, uintptr) { var buf [3348]byte; use(buf[:]); return Stackguard() }
+func stack3352() (uintptr, uintptr) { var buf [3352]byte; use(buf[:]); return Stackguard() }
+func stack3356() (uintptr, uintptr) { var buf [3356]byte; use(buf[:]); return Stackguard() }
+func stack3360() (uintptr, uintptr) { var buf [3360]byte; use(buf[:]); return Stackguard() }
+func stack3364() (uintptr, uintptr) { var buf [3364]byte; use(buf[:]); return Stackguard() }
+func stack3368() (uintptr, uintptr) { var buf [3368]byte; use(buf[:]); return Stackguard() }
+func stack3372() (uintptr, uintptr) { var buf [3372]byte; use(buf[:]); return Stackguard() }
+func stack3376() (uintptr, uintptr) { var buf [3376]byte; use(buf[:]); return Stackguard() }
+func stack3380() (uintptr, uintptr) { var buf [3380]byte; use(buf[:]); return Stackguard() }
+func stack3384() (uintptr, uintptr) { var buf [3384]byte; use(buf[:]); return Stackguard() }
+func stack3388() (uintptr, uintptr) { var buf [3388]byte; use(buf[:]); return Stackguard() }
+func stack3392() (uintptr, uintptr) { var buf [3392]byte; use(buf[:]); return Stackguard() }
+func stack3396() (uintptr, uintptr) { var buf [3396]byte; use(buf[:]); return Stackguard() }
+func stack3400() (uintptr, uintptr) { var buf [3400]byte; use(buf[:]); return Stackguard() }
+func stack3404() (uintptr, uintptr) { var buf [3404]byte; use(buf[:]); return Stackguard() }
+func stack3408() (uintptr, uintptr) { var buf [3408]byte; use(buf[:]); return Stackguard() }
+func stack3412() (uintptr, uintptr) { var buf [3412]byte; use(buf[:]); return Stackguard() }
+func stack3416() (uintptr, uintptr) { var buf [3416]byte; use(buf[:]); return Stackguard() }
+func stack3420() (uintptr, uintptr) { var buf [3420]byte; use(buf[:]); return Stackguard() }
+func stack3424() (uintptr, uintptr) { var buf [3424]byte; use(buf[:]); return Stackguard() }
+func stack3428() (uintptr, uintptr) { var buf [3428]byte; use(buf[:]); return Stackguard() }
+func stack3432() (uintptr, uintptr) { var buf [3432]byte; use(buf[:]); return Stackguard() }
+func stack3436() (uintptr, uintptr) { var buf [3436]byte; use(buf[:]); return Stackguard() }
+func stack3440() (uintptr, uintptr) { var buf [3440]byte; use(buf[:]); return Stackguard() }
+func stack3444() (uintptr, uintptr) { var buf [3444]byte; use(buf[:]); return Stackguard() }
+func stack3448() (uintptr, uintptr) { var buf [3448]byte; use(buf[:]); return Stackguard() }
+func stack3452() (uintptr, uintptr) { var buf [3452]byte; use(buf[:]); return Stackguard() }
+func stack3456() (uintptr, uintptr) { var buf [3456]byte; use(buf[:]); return Stackguard() }
+func stack3460() (uintptr, uintptr) { var buf [3460]byte; use(buf[:]); return Stackguard() }
+func stack3464() (uintptr, uintptr) { var buf [3464]byte; use(buf[:]); return Stackguard() }
+func stack3468() (uintptr, uintptr) { var buf [3468]byte; use(buf[:]); return Stackguard() }
+func stack3472() (uintptr, uintptr) { var buf [3472]byte; use(buf[:]); return Stackguard() }
+func stack3476() (uintptr, uintptr) { var buf [3476]byte; use(buf[:]); return Stackguard() }
+func stack3480() (uintptr, uintptr) { var buf [3480]byte; use(buf[:]); return Stackguard() }
+func stack3484() (uintptr, uintptr) { var buf [3484]byte; use(buf[:]); return Stackguard() }
+func stack3488() (uintptr, uintptr) { var buf [3488]byte; use(buf[:]); return Stackguard() }
+func stack3492() (uintptr, uintptr) { var buf [3492]byte; use(buf[:]); return Stackguard() }
+func stack3496() (uintptr, uintptr) { var buf [3496]byte; use(buf[:]); return Stackguard() }
+func stack3500() (uintptr, uintptr) { var buf [3500]byte; use(buf[:]); return Stackguard() }
+func stack3504() (uintptr, uintptr) { var buf [3504]byte; use(buf[:]); return Stackguard() }
+func stack3508() (uintptr, uintptr) { var buf [3508]byte; use(buf[:]); return Stackguard() }
+func stack3512() (uintptr, uintptr) { var buf [3512]byte; use(buf[:]); return Stackguard() }
+func stack3516() (uintptr, uintptr) { var buf [3516]byte; use(buf[:]); return Stackguard() }
+func stack3520() (uintptr, uintptr) { var buf [3520]byte; use(buf[:]); return Stackguard() }
+func stack3524() (uintptr, uintptr) { var buf [3524]byte; use(buf[:]); return Stackguard() }
+func stack3528() (uintptr, uintptr) { var buf [3528]byte; use(buf[:]); return Stackguard() }
+func stack3532() (uintptr, uintptr) { var buf [3532]byte; use(buf[:]); return Stackguard() }
+func stack3536() (uintptr, uintptr) { var buf [3536]byte; use(buf[:]); return Stackguard() }
+func stack3540() (uintptr, uintptr) { var buf [3540]byte; use(buf[:]); return Stackguard() }
+func stack3544() (uintptr, uintptr) { var buf [3544]byte; use(buf[:]); return Stackguard() }
+func stack3548() (uintptr, uintptr) { var buf [3548]byte; use(buf[:]); return Stackguard() }
+func stack3552() (uintptr, uintptr) { var buf [3552]byte; use(buf[:]); return Stackguard() }
+func stack3556() (uintptr, uintptr) { var buf [3556]byte; use(buf[:]); return Stackguard() }
+func stack3560() (uintptr, uintptr) { var buf [3560]byte; use(buf[:]); return Stackguard() }
+func stack3564() (uintptr, uintptr) { var buf [3564]byte; use(buf[:]); return Stackguard() }
+func stack3568() (uintptr, uintptr) { var buf [3568]byte; use(buf[:]); return Stackguard() }
+func stack3572() (uintptr, uintptr) { var buf [3572]byte; use(buf[:]); return Stackguard() }
+func stack3576() (uintptr, uintptr) { var buf [3576]byte; use(buf[:]); return Stackguard() }
+func stack3580() (uintptr, uintptr) { var buf [3580]byte; use(buf[:]); return Stackguard() }
+func stack3584() (uintptr, uintptr) { var buf [3584]byte; use(buf[:]); return Stackguard() }
+func stack3588() (uintptr, uintptr) { var buf [3588]byte; use(buf[:]); return Stackguard() }
+func stack3592() (uintptr, uintptr) { var buf [3592]byte; use(buf[:]); return Stackguard() }
+func stack3596() (uintptr, uintptr) { var buf [3596]byte; use(buf[:]); return Stackguard() }
+func stack3600() (uintptr, uintptr) { var buf [3600]byte; use(buf[:]); return Stackguard() }
+func stack3604() (uintptr, uintptr) { var buf [3604]byte; use(buf[:]); return Stackguard() }
+func stack3608() (uintptr, uintptr) { var buf [3608]byte; use(buf[:]); return Stackguard() }
+func stack3612() (uintptr, uintptr) { var buf [3612]byte; use(buf[:]); return Stackguard() }
+func stack3616() (uintptr, uintptr) { var buf [3616]byte; use(buf[:]); return Stackguard() }
+func stack3620() (uintptr, uintptr) { var buf [3620]byte; use(buf[:]); return Stackguard() }
+func stack3624() (uintptr, uintptr) { var buf [3624]byte; use(buf[:]); return Stackguard() }
+func stack3628() (uintptr, uintptr) { var buf [3628]byte; use(buf[:]); return Stackguard() }
+func stack3632() (uintptr, uintptr) { var buf [3632]byte; use(buf[:]); return Stackguard() }
+func stack3636() (uintptr, uintptr) { var buf [3636]byte; use(buf[:]); return Stackguard() }
+func stack3640() (uintptr, uintptr) { var buf [3640]byte; use(buf[:]); return Stackguard() }
+func stack3644() (uintptr, uintptr) { var buf [3644]byte; use(buf[:]); return Stackguard() }
+func stack3648() (uintptr, uintptr) { var buf [3648]byte; use(buf[:]); return Stackguard() }
+func stack3652() (uintptr, uintptr) { var buf [3652]byte; use(buf[:]); return Stackguard() }
+func stack3656() (uintptr, uintptr) { var buf [3656]byte; use(buf[:]); return Stackguard() }
+func stack3660() (uintptr, uintptr) { var buf [3660]byte; use(buf[:]); return Stackguard() }
+func stack3664() (uintptr, uintptr) { var buf [3664]byte; use(buf[:]); return Stackguard() }
+func stack3668() (uintptr, uintptr) { var buf [3668]byte; use(buf[:]); return Stackguard() }
+func stack3672() (uintptr, uintptr) { var buf [3672]byte; use(buf[:]); return Stackguard() }
+func stack3676() (uintptr, uintptr) { var buf [3676]byte; use(buf[:]); return Stackguard() }
+func stack3680() (uintptr, uintptr) { var buf [3680]byte; use(buf[:]); return Stackguard() }
+func stack3684() (uintptr, uintptr) { var buf [3684]byte; use(buf[:]); return Stackguard() }
+func stack3688() (uintptr, uintptr) { var buf [3688]byte; use(buf[:]); return Stackguard() }
+func stack3692() (uintptr, uintptr) { var buf [3692]byte; use(buf[:]); return Stackguard() }
+func stack3696() (uintptr, uintptr) { var buf [3696]byte; use(buf[:]); return Stackguard() }
+func stack3700() (uintptr, uintptr) { var buf [3700]byte; use(buf[:]); return Stackguard() }
+func stack3704() (uintptr, uintptr) { var buf [3704]byte; use(buf[:]); return Stackguard() }
+func stack3708() (uintptr, uintptr) { var buf [3708]byte; use(buf[:]); return Stackguard() }
+func stack3712() (uintptr, uintptr) { var buf [3712]byte; use(buf[:]); return Stackguard() }
+func stack3716() (uintptr, uintptr) { var buf [3716]byte; use(buf[:]); return Stackguard() }
+func stack3720() (uintptr, uintptr) { var buf [3720]byte; use(buf[:]); return Stackguard() }
+func stack3724() (uintptr, uintptr) { var buf [3724]byte; use(buf[:]); return Stackguard() }
+func stack3728() (uintptr, uintptr) { var buf [3728]byte; use(buf[:]); return Stackguard() }
+func stack3732() (uintptr, uintptr) { var buf [3732]byte; use(buf[:]); return Stackguard() }
+func stack3736() (uintptr, uintptr) { var buf [3736]byte; use(buf[:]); return Stackguard() }
+func stack3740() (uintptr, uintptr) { var buf [3740]byte; use(buf[:]); return Stackguard() }
+func stack3744() (uintptr, uintptr) { var buf [3744]byte; use(buf[:]); return Stackguard() }
+func stack3748() (uintptr, uintptr) { var buf [3748]byte; use(buf[:]); return Stackguard() }
+func stack3752() (uintptr, uintptr) { var buf [3752]byte; use(buf[:]); return Stackguard() }
+func stack3756() (uintptr, uintptr) { var buf [3756]byte; use(buf[:]); return Stackguard() }
+func stack3760() (uintptr, uintptr) { var buf [3760]byte; use(buf[:]); return Stackguard() }
+func stack3764() (uintptr, uintptr) { var buf [3764]byte; use(buf[:]); return Stackguard() }
+func stack3768() (uintptr, uintptr) { var buf [3768]byte; use(buf[:]); return Stackguard() }
+func stack3772() (uintptr, uintptr) { var buf [3772]byte; use(buf[:]); return Stackguard() }
+func stack3776() (uintptr, uintptr) { var buf [3776]byte; use(buf[:]); return Stackguard() }
+func stack3780() (uintptr, uintptr) { var buf [3780]byte; use(buf[:]); return Stackguard() }
+func stack3784() (uintptr, uintptr) { var buf [3784]byte; use(buf[:]); return Stackguard() }
+func stack3788() (uintptr, uintptr) { var buf [3788]byte; use(buf[:]); return Stackguard() }
+func stack3792() (uintptr, uintptr) { var buf [3792]byte; use(buf[:]); return Stackguard() }
+func stack3796() (uintptr, uintptr) { var buf [3796]byte; use(buf[:]); return Stackguard() }
+func stack3800() (uintptr, uintptr) { var buf [3800]byte; use(buf[:]); return Stackguard() }
+func stack3804() (uintptr, uintptr) { var buf [3804]byte; use(buf[:]); return Stackguard() }
+func stack3808() (uintptr, uintptr) { var buf [3808]byte; use(buf[:]); return Stackguard() }
+func stack3812() (uintptr, uintptr) { var buf [3812]byte; use(buf[:]); return Stackguard() }
+func stack3816() (uintptr, uintptr) { var buf [3816]byte; use(buf[:]); return Stackguard() }
+func stack3820() (uintptr, uintptr) { var buf [3820]byte; use(buf[:]); return Stackguard() }
+func stack3824() (uintptr, uintptr) { var buf [3824]byte; use(buf[:]); return Stackguard() }
+func stack3828() (uintptr, uintptr) { var buf [3828]byte; use(buf[:]); return Stackguard() }
+func stack3832() (uintptr, uintptr) { var buf [3832]byte; use(buf[:]); return Stackguard() }
+func stack3836() (uintptr, uintptr) { var buf [3836]byte; use(buf[:]); return Stackguard() }
+func stack3840() (uintptr, uintptr) { var buf [3840]byte; use(buf[:]); return Stackguard() }
+func stack3844() (uintptr, uintptr) { var buf [3844]byte; use(buf[:]); return Stackguard() }
+func stack3848() (uintptr, uintptr) { var buf [3848]byte; use(buf[:]); return Stackguard() }
+func stack3852() (uintptr, uintptr) { var buf [3852]byte; use(buf[:]); return Stackguard() }
+func stack3856() (uintptr, uintptr) { var buf [3856]byte; use(buf[:]); return Stackguard() }
+func stack3860() (uintptr, uintptr) { var buf [3860]byte; use(buf[:]); return Stackguard() }
+func stack3864() (uintptr, uintptr) { var buf [3864]byte; use(buf[:]); return Stackguard() }
+func stack3868() (uintptr, uintptr) { var buf [3868]byte; use(buf[:]); return Stackguard() }
+func stack3872() (uintptr, uintptr) { var buf [3872]byte; use(buf[:]); return Stackguard() }
+func stack3876() (uintptr, uintptr) { var buf [3876]byte; use(buf[:]); return Stackguard() }
+func stack3880() (uintptr, uintptr) { var buf [3880]byte; use(buf[:]); return Stackguard() }
+func stack3884() (uintptr, uintptr) { var buf [3884]byte; use(buf[:]); return Stackguard() }
+func stack3888() (uintptr, uintptr) { var buf [3888]byte; use(buf[:]); return Stackguard() }
+func stack3892() (uintptr, uintptr) { var buf [3892]byte; use(buf[:]); return Stackguard() }
+func stack3896() (uintptr, uintptr) { var buf [3896]byte; use(buf[:]); return Stackguard() }
+func stack3900() (uintptr, uintptr) { var buf [3900]byte; use(buf[:]); return Stackguard() }
+func stack3904() (uintptr, uintptr) { var buf [3904]byte; use(buf[:]); return Stackguard() }
+func stack3908() (uintptr, uintptr) { var buf [3908]byte; use(buf[:]); return Stackguard() }
+func stack3912() (uintptr, uintptr) { var buf [3912]byte; use(buf[:]); return Stackguard() }
+func stack3916() (uintptr, uintptr) { var buf [3916]byte; use(buf[:]); return Stackguard() }
+func stack3920() (uintptr, uintptr) { var buf [3920]byte; use(buf[:]); return Stackguard() }
+func stack3924() (uintptr, uintptr) { var buf [3924]byte; use(buf[:]); return Stackguard() }
+func stack3928() (uintptr, uintptr) { var buf [3928]byte; use(buf[:]); return Stackguard() }
+func stack3932() (uintptr, uintptr) { var buf [3932]byte; use(buf[:]); return Stackguard() }
+func stack3936() (uintptr, uintptr) { var buf [3936]byte; use(buf[:]); return Stackguard() }
+func stack3940() (uintptr, uintptr) { var buf [3940]byte; use(buf[:]); return Stackguard() }
+func stack3944() (uintptr, uintptr) { var buf [3944]byte; use(buf[:]); return Stackguard() }
+func stack3948() (uintptr, uintptr) { var buf [3948]byte; use(buf[:]); return Stackguard() }
+func stack3952() (uintptr, uintptr) { var buf [3952]byte; use(buf[:]); return Stackguard() }
+func stack3956() (uintptr, uintptr) { var buf [3956]byte; use(buf[:]); return Stackguard() }
+func stack3960() (uintptr, uintptr) { var buf [3960]byte; use(buf[:]); return Stackguard() }
+func stack3964() (uintptr, uintptr) { var buf [3964]byte; use(buf[:]); return Stackguard() }
+func stack3968() (uintptr, uintptr) { var buf [3968]byte; use(buf[:]); return Stackguard() }
+func stack3972() (uintptr, uintptr) { var buf [3972]byte; use(buf[:]); return Stackguard() }
+func stack3976() (uintptr, uintptr) { var buf [3976]byte; use(buf[:]); return Stackguard() }
+func stack3980() (uintptr, uintptr) { var buf [3980]byte; use(buf[:]); return Stackguard() }
+func stack3984() (uintptr, uintptr) { var buf [3984]byte; use(buf[:]); return Stackguard() }
+func stack3988() (uintptr, uintptr) { var buf [3988]byte; use(buf[:]); return Stackguard() }
+func stack3992() (uintptr, uintptr) { var buf [3992]byte; use(buf[:]); return Stackguard() }
+func stack3996() (uintptr, uintptr) { var buf [3996]byte; use(buf[:]); return Stackguard() }
+func stack4000() (uintptr, uintptr) { var buf [4000]byte; use(buf[:]); return Stackguard() }
+func stack4004() (uintptr, uintptr) { var buf [4004]byte; use(buf[:]); return Stackguard() }
+func stack4008() (uintptr, uintptr) { var buf [4008]byte; use(buf[:]); return Stackguard() }
+func stack4012() (uintptr, uintptr) { var buf [4012]byte; use(buf[:]); return Stackguard() }
+func stack4016() (uintptr, uintptr) { var buf [4016]byte; use(buf[:]); return Stackguard() }
+func stack4020() (uintptr, uintptr) { var buf [4020]byte; use(buf[:]); return Stackguard() }
+func stack4024() (uintptr, uintptr) { var buf [4024]byte; use(buf[:]); return Stackguard() }
+func stack4028() (uintptr, uintptr) { var buf [4028]byte; use(buf[:]); return Stackguard() }
+func stack4032() (uintptr, uintptr) { var buf [4032]byte; use(buf[:]); return Stackguard() }
+func stack4036() (uintptr, uintptr) { var buf [4036]byte; use(buf[:]); return Stackguard() }
+func stack4040() (uintptr, uintptr) { var buf [4040]byte; use(buf[:]); return Stackguard() }
+func stack4044() (uintptr, uintptr) { var buf [4044]byte; use(buf[:]); return Stackguard() }
+func stack4048() (uintptr, uintptr) { var buf [4048]byte; use(buf[:]); return Stackguard() }
+func stack4052() (uintptr, uintptr) { var buf [4052]byte; use(buf[:]); return Stackguard() }
+func stack4056() (uintptr, uintptr) { var buf [4056]byte; use(buf[:]); return Stackguard() }
+func stack4060() (uintptr, uintptr) { var buf [4060]byte; use(buf[:]); return Stackguard() }
+func stack4064() (uintptr, uintptr) { var buf [4064]byte; use(buf[:]); return Stackguard() }
+func stack4068() (uintptr, uintptr) { var buf [4068]byte; use(buf[:]); return Stackguard() }
+func stack4072() (uintptr, uintptr) { var buf [4072]byte; use(buf[:]); return Stackguard() }
+func stack4076() (uintptr, uintptr) { var buf [4076]byte; use(buf[:]); return Stackguard() }
+func stack4080() (uintptr, uintptr) { var buf [4080]byte; use(buf[:]); return Stackguard() }
+func stack4084() (uintptr, uintptr) { var buf [4084]byte; use(buf[:]); return Stackguard() }
+func stack4088() (uintptr, uintptr) { var buf [4088]byte; use(buf[:]); return Stackguard() }
+func stack4092() (uintptr, uintptr) { var buf [4092]byte; use(buf[:]); return Stackguard() }
+func stack4096() (uintptr, uintptr) { var buf [4096]byte; use(buf[:]); return Stackguard() }
+func stack4100() (uintptr, uintptr) { var buf [4100]byte; use(buf[:]); return Stackguard() }
+func stack4104() (uintptr, uintptr) { var buf [4104]byte; use(buf[:]); return Stackguard() }
+func stack4108() (uintptr, uintptr) { var buf [4108]byte; use(buf[:]); return Stackguard() }
+func stack4112() (uintptr, uintptr) { var buf [4112]byte; use(buf[:]); return Stackguard() }
+func stack4116() (uintptr, uintptr) { var buf [4116]byte; use(buf[:]); return Stackguard() }
+func stack4120() (uintptr, uintptr) { var buf [4120]byte; use(buf[:]); return Stackguard() }
+func stack4124() (uintptr, uintptr) { var buf [4124]byte; use(buf[:]); return Stackguard() }
+func stack4128() (uintptr, uintptr) { var buf [4128]byte; use(buf[:]); return Stackguard() }
+func stack4132() (uintptr, uintptr) { var buf [4132]byte; use(buf[:]); return Stackguard() }
+func stack4136() (uintptr, uintptr) { var buf [4136]byte; use(buf[:]); return Stackguard() }
+func stack4140() (uintptr, uintptr) { var buf [4140]byte; use(buf[:]); return Stackguard() }
+func stack4144() (uintptr, uintptr) { var buf [4144]byte; use(buf[:]); return Stackguard() }
+func stack4148() (uintptr, uintptr) { var buf [4148]byte; use(buf[:]); return Stackguard() }
+func stack4152() (uintptr, uintptr) { var buf [4152]byte; use(buf[:]); return Stackguard() }
+func stack4156() (uintptr, uintptr) { var buf [4156]byte; use(buf[:]); return Stackguard() }
+func stack4160() (uintptr, uintptr) { var buf [4160]byte; use(buf[:]); return Stackguard() }
+func stack4164() (uintptr, uintptr) { var buf [4164]byte; use(buf[:]); return Stackguard() }
+func stack4168() (uintptr, uintptr) { var buf [4168]byte; use(buf[:]); return Stackguard() }
+func stack4172() (uintptr, uintptr) { var buf [4172]byte; use(buf[:]); return Stackguard() }
+func stack4176() (uintptr, uintptr) { var buf [4176]byte; use(buf[:]); return Stackguard() }
+func stack4180() (uintptr, uintptr) { var buf [4180]byte; use(buf[:]); return Stackguard() }
+func stack4184() (uintptr, uintptr) { var buf [4184]byte; use(buf[:]); return Stackguard() }
+func stack4188() (uintptr, uintptr) { var buf [4188]byte; use(buf[:]); return Stackguard() }
+func stack4192() (uintptr, uintptr) { var buf [4192]byte; use(buf[:]); return Stackguard() }
+func stack4196() (uintptr, uintptr) { var buf [4196]byte; use(buf[:]); return Stackguard() }
+func stack4200() (uintptr, uintptr) { var buf [4200]byte; use(buf[:]); return Stackguard() }
+func stack4204() (uintptr, uintptr) { var buf [4204]byte; use(buf[:]); return Stackguard() }
+func stack4208() (uintptr, uintptr) { var buf [4208]byte; use(buf[:]); return Stackguard() }
+func stack4212() (uintptr, uintptr) { var buf [4212]byte; use(buf[:]); return Stackguard() }
+func stack4216() (uintptr, uintptr) { var buf [4216]byte; use(buf[:]); return Stackguard() }
+func stack4220() (uintptr, uintptr) { var buf [4220]byte; use(buf[:]); return Stackguard() }
+func stack4224() (uintptr, uintptr) { var buf [4224]byte; use(buf[:]); return Stackguard() }
+func stack4228() (uintptr, uintptr) { var buf [4228]byte; use(buf[:]); return Stackguard() }
+func stack4232() (uintptr, uintptr) { var buf [4232]byte; use(buf[:]); return Stackguard() }
+func stack4236() (uintptr, uintptr) { var buf [4236]byte; use(buf[:]); return Stackguard() }
+func stack4240() (uintptr, uintptr) { var buf [4240]byte; use(buf[:]); return Stackguard() }
+func stack4244() (uintptr, uintptr) { var buf [4244]byte; use(buf[:]); return Stackguard() }
+func stack4248() (uintptr, uintptr) { var buf [4248]byte; use(buf[:]); return Stackguard() }
+func stack4252() (uintptr, uintptr) { var buf [4252]byte; use(buf[:]); return Stackguard() }
+func stack4256() (uintptr, uintptr) { var buf [4256]byte; use(buf[:]); return Stackguard() }
+func stack4260() (uintptr, uintptr) { var buf [4260]byte; use(buf[:]); return Stackguard() }
+func stack4264() (uintptr, uintptr) { var buf [4264]byte; use(buf[:]); return Stackguard() }
+func stack4268() (uintptr, uintptr) { var buf [4268]byte; use(buf[:]); return Stackguard() }
+func stack4272() (uintptr, uintptr) { var buf [4272]byte; use(buf[:]); return Stackguard() }
+func stack4276() (uintptr, uintptr) { var buf [4276]byte; use(buf[:]); return Stackguard() }
+func stack4280() (uintptr, uintptr) { var buf [4280]byte; use(buf[:]); return Stackguard() }
+func stack4284() (uintptr, uintptr) { var buf [4284]byte; use(buf[:]); return Stackguard() }
+func stack4288() (uintptr, uintptr) { var buf [4288]byte; use(buf[:]); return Stackguard() }
+func stack4292() (uintptr, uintptr) { var buf [4292]byte; use(buf[:]); return Stackguard() }
+func stack4296() (uintptr, uintptr) { var buf [4296]byte; use(buf[:]); return Stackguard() }
+func stack4300() (uintptr, uintptr) { var buf [4300]byte; use(buf[:]); return Stackguard() }
+func stack4304() (uintptr, uintptr) { var buf [4304]byte; use(buf[:]); return Stackguard() }
+func stack4308() (uintptr, uintptr) { var buf [4308]byte; use(buf[:]); return Stackguard() }
+func stack4312() (uintptr, uintptr) { var buf [4312]byte; use(buf[:]); return Stackguard() }
+func stack4316() (uintptr, uintptr) { var buf [4316]byte; use(buf[:]); return Stackguard() }
+func stack4320() (uintptr, uintptr) { var buf [4320]byte; use(buf[:]); return Stackguard() }
+func stack4324() (uintptr, uintptr) { var buf [4324]byte; use(buf[:]); return Stackguard() }
+func stack4328() (uintptr, uintptr) { var buf [4328]byte; use(buf[:]); return Stackguard() }
+func stack4332() (uintptr, uintptr) { var buf [4332]byte; use(buf[:]); return Stackguard() }
+func stack4336() (uintptr, uintptr) { var buf [4336]byte; use(buf[:]); return Stackguard() }
+func stack4340() (uintptr, uintptr) { var buf [4340]byte; use(buf[:]); return Stackguard() }
+func stack4344() (uintptr, uintptr) { var buf [4344]byte; use(buf[:]); return Stackguard() }
+func stack4348() (uintptr, uintptr) { var buf [4348]byte; use(buf[:]); return Stackguard() }
+func stack4352() (uintptr, uintptr) { var buf [4352]byte; use(buf[:]); return Stackguard() }
+func stack4356() (uintptr, uintptr) { var buf [4356]byte; use(buf[:]); return Stackguard() }
+func stack4360() (uintptr, uintptr) { var buf [4360]byte; use(buf[:]); return Stackguard() }
+func stack4364() (uintptr, uintptr) { var buf [4364]byte; use(buf[:]); return Stackguard() }
+func stack4368() (uintptr, uintptr) { var buf [4368]byte; use(buf[:]); return Stackguard() }
+func stack4372() (uintptr, uintptr) { var buf [4372]byte; use(buf[:]); return Stackguard() }
+func stack4376() (uintptr, uintptr) { var buf [4376]byte; use(buf[:]); return Stackguard() }
+func stack4380() (uintptr, uintptr) { var buf [4380]byte; use(buf[:]); return Stackguard() }
+func stack4384() (uintptr, uintptr) { var buf [4384]byte; use(buf[:]); return Stackguard() }
+func stack4388() (uintptr, uintptr) { var buf [4388]byte; use(buf[:]); return Stackguard() }
+func stack4392() (uintptr, uintptr) { var buf [4392]byte; use(buf[:]); return Stackguard() }
+func stack4396() (uintptr, uintptr) { var buf [4396]byte; use(buf[:]); return Stackguard() }
+func stack4400() (uintptr, uintptr) { var buf [4400]byte; use(buf[:]); return Stackguard() }
+func stack4404() (uintptr, uintptr) { var buf [4404]byte; use(buf[:]); return Stackguard() }
+func stack4408() (uintptr, uintptr) { var buf [4408]byte; use(buf[:]); return Stackguard() }
+func stack4412() (uintptr, uintptr) { var buf [4412]byte; use(buf[:]); return Stackguard() }
+func stack4416() (uintptr, uintptr) { var buf [4416]byte; use(buf[:]); return Stackguard() }
+func stack4420() (uintptr, uintptr) { var buf [4420]byte; use(buf[:]); return Stackguard() }
+func stack4424() (uintptr, uintptr) { var buf [4424]byte; use(buf[:]); return Stackguard() }
+func stack4428() (uintptr, uintptr) { var buf [4428]byte; use(buf[:]); return Stackguard() }
+func stack4432() (uintptr, uintptr) { var buf [4432]byte; use(buf[:]); return Stackguard() }
+func stack4436() (uintptr, uintptr) { var buf [4436]byte; use(buf[:]); return Stackguard() }
+func stack4440() (uintptr, uintptr) { var buf [4440]byte; use(buf[:]); return Stackguard() }
+func stack4444() (uintptr, uintptr) { var buf [4444]byte; use(buf[:]); return Stackguard() }
+func stack4448() (uintptr, uintptr) { var buf [4448]byte; use(buf[:]); return Stackguard() }
+func stack4452() (uintptr, uintptr) { var buf [4452]byte; use(buf[:]); return Stackguard() }
+func stack4456() (uintptr, uintptr) { var buf [4456]byte; use(buf[:]); return Stackguard() }
+func stack4460() (uintptr, uintptr) { var buf [4460]byte; use(buf[:]); return Stackguard() }
+func stack4464() (uintptr, uintptr) { var buf [4464]byte; use(buf[:]); return Stackguard() }
+func stack4468() (uintptr, uintptr) { var buf [4468]byte; use(buf[:]); return Stackguard() }
+func stack4472() (uintptr, uintptr) { var buf [4472]byte; use(buf[:]); return Stackguard() }
+func stack4476() (uintptr, uintptr) { var buf [4476]byte; use(buf[:]); return Stackguard() }
+func stack4480() (uintptr, uintptr) { var buf [4480]byte; use(buf[:]); return Stackguard() }
+func stack4484() (uintptr, uintptr) { var buf [4484]byte; use(buf[:]); return Stackguard() }
+func stack4488() (uintptr, uintptr) { var buf [4488]byte; use(buf[:]); return Stackguard() }
+func stack4492() (uintptr, uintptr) { var buf [4492]byte; use(buf[:]); return Stackguard() }
+func stack4496() (uintptr, uintptr) { var buf [4496]byte; use(buf[:]); return Stackguard() }
+func stack4500() (uintptr, uintptr) { var buf [4500]byte; use(buf[:]); return Stackguard() }
+func stack4504() (uintptr, uintptr) { var buf [4504]byte; use(buf[:]); return Stackguard() }
+func stack4508() (uintptr, uintptr) { var buf [4508]byte; use(buf[:]); return Stackguard() }
+func stack4512() (uintptr, uintptr) { var buf [4512]byte; use(buf[:]); return Stackguard() }
+func stack4516() (uintptr, uintptr) { var buf [4516]byte; use(buf[:]); return Stackguard() }
+func stack4520() (uintptr, uintptr) { var buf [4520]byte; use(buf[:]); return Stackguard() }
+func stack4524() (uintptr, uintptr) { var buf [4524]byte; use(buf[:]); return Stackguard() }
+func stack4528() (uintptr, uintptr) { var buf [4528]byte; use(buf[:]); return Stackguard() }
+func stack4532() (uintptr, uintptr) { var buf [4532]byte; use(buf[:]); return Stackguard() }
+func stack4536() (uintptr, uintptr) { var buf [4536]byte; use(buf[:]); return Stackguard() }
+func stack4540() (uintptr, uintptr) { var buf [4540]byte; use(buf[:]); return Stackguard() }
+func stack4544() (uintptr, uintptr) { var buf [4544]byte; use(buf[:]); return Stackguard() }
+func stack4548() (uintptr, uintptr) { var buf [4548]byte; use(buf[:]); return Stackguard() }
+func stack4552() (uintptr, uintptr) { var buf [4552]byte; use(buf[:]); return Stackguard() }
+func stack4556() (uintptr, uintptr) { var buf [4556]byte; use(buf[:]); return Stackguard() }
+func stack4560() (uintptr, uintptr) { var buf [4560]byte; use(buf[:]); return Stackguard() }
+func stack4564() (uintptr, uintptr) { var buf [4564]byte; use(buf[:]); return Stackguard() }
+func stack4568() (uintptr, uintptr) { var buf [4568]byte; use(buf[:]); return Stackguard() }
+func stack4572() (uintptr, uintptr) { var buf [4572]byte; use(buf[:]); return Stackguard() }
+func stack4576() (uintptr, uintptr) { var buf [4576]byte; use(buf[:]); return Stackguard() }
+func stack4580() (uintptr, uintptr) { var buf [4580]byte; use(buf[:]); return Stackguard() }
+func stack4584() (uintptr, uintptr) { var buf [4584]byte; use(buf[:]); return Stackguard() }
+func stack4588() (uintptr, uintptr) { var buf [4588]byte; use(buf[:]); return Stackguard() }
+func stack4592() (uintptr, uintptr) { var buf [4592]byte; use(buf[:]); return Stackguard() }
+func stack4596() (uintptr, uintptr) { var buf [4596]byte; use(buf[:]); return Stackguard() }
+func stack4600() (uintptr, uintptr) { var buf [4600]byte; use(buf[:]); return Stackguard() }
+func stack4604() (uintptr, uintptr) { var buf [4604]byte; use(buf[:]); return Stackguard() }
+func stack4608() (uintptr, uintptr) { var buf [4608]byte; use(buf[:]); return Stackguard() }
+func stack4612() (uintptr, uintptr) { var buf [4612]byte; use(buf[:]); return Stackguard() }
+func stack4616() (uintptr, uintptr) { var buf [4616]byte; use(buf[:]); return Stackguard() }
+func stack4620() (uintptr, uintptr) { var buf [4620]byte; use(buf[:]); return Stackguard() }
+func stack4624() (uintptr, uintptr) { var buf [4624]byte; use(buf[:]); return Stackguard() }
+func stack4628() (uintptr, uintptr) { var buf [4628]byte; use(buf[:]); return Stackguard() }
+func stack4632() (uintptr, uintptr) { var buf [4632]byte; use(buf[:]); return Stackguard() }
+func stack4636() (uintptr, uintptr) { var buf [4636]byte; use(buf[:]); return Stackguard() }
+func stack4640() (uintptr, uintptr) { var buf [4640]byte; use(buf[:]); return Stackguard() }
+func stack4644() (uintptr, uintptr) { var buf [4644]byte; use(buf[:]); return Stackguard() }
+func stack4648() (uintptr, uintptr) { var buf [4648]byte; use(buf[:]); return Stackguard() }
+func stack4652() (uintptr, uintptr) { var buf [4652]byte; use(buf[:]); return Stackguard() }
+func stack4656() (uintptr, uintptr) { var buf [4656]byte; use(buf[:]); return Stackguard() }
+func stack4660() (uintptr, uintptr) { var buf [4660]byte; use(buf[:]); return Stackguard() }
+func stack4664() (uintptr, uintptr) { var buf [4664]byte; use(buf[:]); return Stackguard() }
+func stack4668() (uintptr, uintptr) { var buf [4668]byte; use(buf[:]); return Stackguard() }
+func stack4672() (uintptr, uintptr) { var buf [4672]byte; use(buf[:]); return Stackguard() }
+func stack4676() (uintptr, uintptr) { var buf [4676]byte; use(buf[:]); return Stackguard() }
+func stack4680() (uintptr, uintptr) { var buf [4680]byte; use(buf[:]); return Stackguard() }
+func stack4684() (uintptr, uintptr) { var buf [4684]byte; use(buf[:]); return Stackguard() }
+func stack4688() (uintptr, uintptr) { var buf [4688]byte; use(buf[:]); return Stackguard() }
+func stack4692() (uintptr, uintptr) { var buf [4692]byte; use(buf[:]); return Stackguard() }
+func stack4696() (uintptr, uintptr) { var buf [4696]byte; use(buf[:]); return Stackguard() }
+func stack4700() (uintptr, uintptr) { var buf [4700]byte; use(buf[:]); return Stackguard() }
+func stack4704() (uintptr, uintptr) { var buf [4704]byte; use(buf[:]); return Stackguard() }
+func stack4708() (uintptr, uintptr) { var buf [4708]byte; use(buf[:]); return Stackguard() }
+func stack4712() (uintptr, uintptr) { var buf [4712]byte; use(buf[:]); return Stackguard() }
+func stack4716() (uintptr, uintptr) { var buf [4716]byte; use(buf[:]); return Stackguard() }
+func stack4720() (uintptr, uintptr) { var buf [4720]byte; use(buf[:]); return Stackguard() }
+func stack4724() (uintptr, uintptr) { var buf [4724]byte; use(buf[:]); return Stackguard() }
+func stack4728() (uintptr, uintptr) { var buf [4728]byte; use(buf[:]); return Stackguard() }
+func stack4732() (uintptr, uintptr) { var buf [4732]byte; use(buf[:]); return Stackguard() }
+func stack4736() (uintptr, uintptr) { var buf [4736]byte; use(buf[:]); return Stackguard() }
+func stack4740() (uintptr, uintptr) { var buf [4740]byte; use(buf[:]); return Stackguard() }
+func stack4744() (uintptr, uintptr) { var buf [4744]byte; use(buf[:]); return Stackguard() }
+func stack4748() (uintptr, uintptr) { var buf [4748]byte; use(buf[:]); return Stackguard() }
+func stack4752() (uintptr, uintptr) { var buf [4752]byte; use(buf[:]); return Stackguard() }
+func stack4756() (uintptr, uintptr) { var buf [4756]byte; use(buf[:]); return Stackguard() }
+func stack4760() (uintptr, uintptr) { var buf [4760]byte; use(buf[:]); return Stackguard() }
+func stack4764() (uintptr, uintptr) { var buf [4764]byte; use(buf[:]); return Stackguard() }
+func stack4768() (uintptr, uintptr) { var buf [4768]byte; use(buf[:]); return Stackguard() }
+func stack4772() (uintptr, uintptr) { var buf [4772]byte; use(buf[:]); return Stackguard() }
+func stack4776() (uintptr, uintptr) { var buf [4776]byte; use(buf[:]); return Stackguard() }
+func stack4780() (uintptr, uintptr) { var buf [4780]byte; use(buf[:]); return Stackguard() }
+func stack4784() (uintptr, uintptr) { var buf [4784]byte; use(buf[:]); return Stackguard() }
+func stack4788() (uintptr, uintptr) { var buf [4788]byte; use(buf[:]); return Stackguard() }
+func stack4792() (uintptr, uintptr) { var buf [4792]byte; use(buf[:]); return Stackguard() }
+func stack4796() (uintptr, uintptr) { var buf [4796]byte; use(buf[:]); return Stackguard() }
+func stack4800() (uintptr, uintptr) { var buf [4800]byte; use(buf[:]); return Stackguard() }
+func stack4804() (uintptr, uintptr) { var buf [4804]byte; use(buf[:]); return Stackguard() }
+func stack4808() (uintptr, uintptr) { var buf [4808]byte; use(buf[:]); return Stackguard() }
+func stack4812() (uintptr, uintptr) { var buf [4812]byte; use(buf[:]); return Stackguard() }
+func stack4816() (uintptr, uintptr) { var buf [4816]byte; use(buf[:]); return Stackguard() }
+func stack4820() (uintptr, uintptr) { var buf [4820]byte; use(buf[:]); return Stackguard() }
+func stack4824() (uintptr, uintptr) { var buf [4824]byte; use(buf[:]); return Stackguard() }
+func stack4828() (uintptr, uintptr) { var buf [4828]byte; use(buf[:]); return Stackguard() }
+func stack4832() (uintptr, uintptr) { var buf [4832]byte; use(buf[:]); return Stackguard() }
+func stack4836() (uintptr, uintptr) { var buf [4836]byte; use(buf[:]); return Stackguard() }
+func stack4840() (uintptr, uintptr) { var buf [4840]byte; use(buf[:]); return Stackguard() }
+func stack4844() (uintptr, uintptr) { var buf [4844]byte; use(buf[:]); return Stackguard() }
+func stack4848() (uintptr, uintptr) { var buf [4848]byte; use(buf[:]); return Stackguard() }
+func stack4852() (uintptr, uintptr) { var buf [4852]byte; use(buf[:]); return Stackguard() }
+func stack4856() (uintptr, uintptr) { var buf [4856]byte; use(buf[:]); return Stackguard() }
+func stack4860() (uintptr, uintptr) { var buf [4860]byte; use(buf[:]); return Stackguard() }
+func stack4864() (uintptr, uintptr) { var buf [4864]byte; use(buf[:]); return Stackguard() }
+func stack4868() (uintptr, uintptr) { var buf [4868]byte; use(buf[:]); return Stackguard() }
+func stack4872() (uintptr, uintptr) { var buf [4872]byte; use(buf[:]); return Stackguard() }
+func stack4876() (uintptr, uintptr) { var buf [4876]byte; use(buf[:]); return Stackguard() }
+func stack4880() (uintptr, uintptr) { var buf [4880]byte; use(buf[:]); return Stackguard() }
+func stack4884() (uintptr, uintptr) { var buf [4884]byte; use(buf[:]); return Stackguard() }
+func stack4888() (uintptr, uintptr) { var buf [4888]byte; use(buf[:]); return Stackguard() }
+func stack4892() (uintptr, uintptr) { var buf [4892]byte; use(buf[:]); return Stackguard() }
+func stack4896() (uintptr, uintptr) { var buf [4896]byte; use(buf[:]); return Stackguard() }
+func stack4900() (uintptr, uintptr) { var buf [4900]byte; use(buf[:]); return Stackguard() }
+func stack4904() (uintptr, uintptr) { var buf [4904]byte; use(buf[:]); return Stackguard() }
+func stack4908() (uintptr, uintptr) { var buf [4908]byte; use(buf[:]); return Stackguard() }
+func stack4912() (uintptr, uintptr) { var buf [4912]byte; use(buf[:]); return Stackguard() }
+func stack4916() (uintptr, uintptr) { var buf [4916]byte; use(buf[:]); return Stackguard() }
+func stack4920() (uintptr, uintptr) { var buf [4920]byte; use(buf[:]); return Stackguard() }
+func stack4924() (uintptr, uintptr) { var buf [4924]byte; use(buf[:]); return Stackguard() }
+func stack4928() (uintptr, uintptr) { var buf [4928]byte; use(buf[:]); return Stackguard() }
+func stack4932() (uintptr, uintptr) { var buf [4932]byte; use(buf[:]); return Stackguard() }
+func stack4936() (uintptr, uintptr) { var buf [4936]byte; use(buf[:]); return Stackguard() }
+func stack4940() (uintptr, uintptr) { var buf [4940]byte; use(buf[:]); return Stackguard() }
+func stack4944() (uintptr, uintptr) { var buf [4944]byte; use(buf[:]); return Stackguard() }
+func stack4948() (uintptr, uintptr) { var buf [4948]byte; use(buf[:]); return Stackguard() }
+func stack4952() (uintptr, uintptr) { var buf [4952]byte; use(buf[:]); return Stackguard() }
+func stack4956() (uintptr, uintptr) { var buf [4956]byte; use(buf[:]); return Stackguard() }
+func stack4960() (uintptr, uintptr) { var buf [4960]byte; use(buf[:]); return Stackguard() }
+func stack4964() (uintptr, uintptr) { var buf [4964]byte; use(buf[:]); return Stackguard() }
+func stack4968() (uintptr, uintptr) { var buf [4968]byte; use(buf[:]); return Stackguard() }
+func stack4972() (uintptr, uintptr) { var buf [4972]byte; use(buf[:]); return Stackguard() }
+func stack4976() (uintptr, uintptr) { var buf [4976]byte; use(buf[:]); return Stackguard() }
+func stack4980() (uintptr, uintptr) { var buf [4980]byte; use(buf[:]); return Stackguard() }
+func stack4984() (uintptr, uintptr) { var buf [4984]byte; use(buf[:]); return Stackguard() }
+func stack4988() (uintptr, uintptr) { var buf [4988]byte; use(buf[:]); return Stackguard() }
+func stack4992() (uintptr, uintptr) { var buf [4992]byte; use(buf[:]); return Stackguard() }
+func stack4996() (uintptr, uintptr) { var buf [4996]byte; use(buf[:]); return Stackguard() }
+func stack5000() (uintptr, uintptr) { var buf [5000]byte; use(buf[:]); return Stackguard() }
diff --git a/src/runtime/stack_test.go b/src/runtime/stack_test.go
new file mode 100644
index 000000000..7b9412af4
--- /dev/null
+++ b/src/runtime/stack_test.go
@@ -0,0 +1,367 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	. "runtime"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+	"unsafe"
+)
+
+// See stack.h.
+const (
+	StackGuard = 256
+	StackSmall = 64
+	StackLimit = StackGuard - StackSmall
+)
+
+// Test stack split logic by calling functions of every frame size
+// from near 0 up to and beyond the default segment size (4k).
+// Each of those functions reports its SP + stack limit, and then
+// the test (the caller) checks that those make sense.  By not
+// doing the actual checking and reporting from the suspect functions,
+// we minimize the possibility of crashes during the test itself.
+//
+// Exhaustive test for http://golang.org/issue/3310.
+// The linker used to get a few sizes near the segment size wrong:
+//
+// --- FAIL: TestStackSplit (0.01 seconds)
+// 	stack_test.go:22: after runtime_test.stack3812: sp=0x7f7818d5d078 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3816: sp=0x7f7818d5d078 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3820: sp=0x7f7818d5d070 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3824: sp=0x7f7818d5d070 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3828: sp=0x7f7818d5d068 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3832: sp=0x7f7818d5d068 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3836: sp=0x7f7818d5d060 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3840: sp=0x7f7818d5d060 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3844: sp=0x7f7818d5d058 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3848: sp=0x7f7818d5d058 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3852: sp=0x7f7818d5d050 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3856: sp=0x7f7818d5d050 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3860: sp=0x7f7818d5d048 < limit=0x7f7818d5d080
+// 	stack_test.go:22: after runtime_test.stack3864: sp=0x7f7818d5d048 < limit=0x7f7818d5d080
+// FAIL
+func TestStackSplit(t *testing.T) {
+	for _, f := range splitTests {
+		sp, guard := f()
+		bottom := guard - StackGuard
+		if sp < bottom+StackLimit {
+			fun := FuncForPC(**(**uintptr)(unsafe.Pointer(&f)))
+			t.Errorf("after %s: sp=%#x < limit=%#x (guard=%#x, bottom=%#x)",
+				fun.Name(), sp, bottom+StackLimit, guard, bottom)
+		}
+	}
+}
+
+var Used byte
+
+func use(buf []byte) {
+	for _, c := range buf {
+		Used += c
+	}
+}
+
+// TestStackMem measures per-thread stack segment cache behavior.
+// The test consumed up to 500MB in the past.
+func TestStackMem(t *testing.T) {
+	const (
+		BatchSize      = 32
+		BatchCount     = 256
+		ArraySize      = 1024
+		RecursionDepth = 128
+	)
+	if testing.Short() {
+		return
+	}
+	defer GOMAXPROCS(GOMAXPROCS(BatchSize))
+	s0 := new(MemStats)
+	ReadMemStats(s0)
+	for b := 0; b < BatchCount; b++ {
+		c := make(chan bool, BatchSize)
+		for i := 0; i < BatchSize; i++ {
+			go func() {
+				var f func(k int, a [ArraySize]byte)
+				f = func(k int, a [ArraySize]byte) {
+					if k == 0 {
+						time.Sleep(time.Millisecond)
+						return
+					}
+					f(k-1, a)
+				}
+				f(RecursionDepth, [ArraySize]byte{})
+				c <- true
+			}()
+		}
+		for i := 0; i < BatchSize; i++ {
+			<-c
+		}
+
+		// The goroutines have signaled via c that they are ready to exit.
+		// Give them a chance to exit by sleeping. If we don't wait, we
+		// might not reuse them on the next batch.
+		time.Sleep(10 * time.Millisecond)
+	}
+	s1 := new(MemStats)
+	ReadMemStats(s1)
+	consumed := int64(s1.StackSys - s0.StackSys)
+	t.Logf("Consumed %vMB for stack mem", consumed>>20)
+	estimate := int64(8 * BatchSize * ArraySize * RecursionDepth) // 8 is to reduce flakiness.
+	if consumed > estimate {
+		t.Fatalf("Stack mem: want %v, got %v", estimate, consumed)
+	}
+	// Due to broken stack memory accounting (http://golang.org/issue/7468),
+	// StackInuse can decrease during function execution, so we cast the values to int64.
+	inuse := int64(s1.StackInuse) - int64(s0.StackInuse)
+	t.Logf("Inuse %vMB for stack mem", inuse>>20)
+	if inuse > 4<<20 {
+		t.Fatalf("Stack inuse: want %v, got %v", 4<<20, inuse)
+	}
+}
+
+// Test stack growing in different contexts.
+func TestStackGrowth(t *testing.T) {
+	switch GOARCH {
+	case "386", "arm":
+		t.Skipf("skipping test on %q; see issue 8083", GOARCH)
+	}
+	t.Parallel()
+	var wg sync.WaitGroup
+
+	// in a normal goroutine
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		growStack()
+	}()
+	wg.Wait()
+
+	// in locked goroutine
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		LockOSThread()
+		growStack()
+		UnlockOSThread()
+	}()
+	wg.Wait()
+
+	// in finalizer
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		done := make(chan bool)
+		go func() {
+			s := new(string)
+			SetFinalizer(s, func(ss *string) {
+				growStack()
+				done <- true
+			})
+			s = nil
+			done <- true
+		}()
+		<-done
+		GC()
+		select {
+		case <-done:
+		case <-time.After(20 * time.Second):
+			t.Fatal("finalizer did not run")
+		}
+	}()
+	wg.Wait()
+}
+
+// ... and in init
+//func init() {
+//	growStack()
+//}
+
+func growStack() {
+	n := 1 << 10
+	if testing.Short() {
+		n = 1 << 8
+	}
+	for i := 0; i < n; i++ {
+		x := 0
+		growStackIter(&x, i)
+		if x != i+1 {
+			panic("stack is corrupted")
+		}
+	}
+	GC()
+}
+
+// This function is not an anonimous func, so that the compiler can do escape
+// analysis and place x on stack (and subsequently stack growth update the pointer).
+func growStackIter(p *int, n int) {
+	if n == 0 {
+		*p = n + 1
+		GC()
+		return
+	}
+	*p = n + 1
+	x := 0
+	growStackIter(&x, n-1)
+	if x != n {
+		panic("stack is corrupted")
+	}
+}
+
+func TestStackGrowthCallback(t *testing.T) {
+	t.Parallel()
+	var wg sync.WaitGroup
+
+	// test stack growth at chan op
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		c := make(chan int, 1)
+		growStackWithCallback(func() {
+			c <- 1
+			<-c
+		})
+	}()
+
+	// test stack growth at map op
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		m := make(map[int]int)
+		growStackWithCallback(func() {
+			_, _ = m[1]
+			m[1] = 1
+		})
+	}()
+
+	// test stack growth at goroutine creation
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		growStackWithCallback(func() {
+			done := make(chan bool)
+			go func() {
+				done <- true
+			}()
+			<-done
+		})
+	}()
+
+	wg.Wait()
+}
+
+func growStackWithCallback(cb func()) {
+	var f func(n int)
+	f = func(n int) {
+		if n == 0 {
+			cb()
+			return
+		}
+		f(n - 1)
+	}
+	for i := 0; i < 1<<10; i++ {
+		f(i)
+	}
+}
+
+// TestDeferPtrs tests the adjustment of Defer's argument pointers (p aka &y)
+// during a stack copy.
+func set(p *int, x int) {
+	*p = x
+}
+func TestDeferPtrs(t *testing.T) {
+	var y int
+
+	defer func() {
+		if y != 42 {
+			t.Errorf("defer's stack references were not adjusted appropriately")
+		}
+	}()
+	defer set(&y, 42)
+	growStack()
+}
+
+// use about n KB of stack
+func useStack(n int) {
+	if n == 0 {
+		return
+	}
+	var b [1024]byte // makes frame about 1KB
+	useStack(n - 1 + int(b[99]))
+}
+
+func growing(c chan int, done chan struct{}) {
+	for n := range c {
+		useStack(n)
+		done <- struct{}{}
+	}
+	done <- struct{}{}
+}
+
+func TestStackCache(t *testing.T) {
+	// Allocate a bunch of goroutines and grow their stacks.
+	// Repeat a few times to test the stack cache.
+	const (
+		R = 4
+		G = 200
+		S = 5
+	)
+	for i := 0; i < R; i++ {
+		var reqchans [G]chan int
+		done := make(chan struct{})
+		for j := 0; j < G; j++ {
+			reqchans[j] = make(chan int)
+			go growing(reqchans[j], done)
+		}
+		for s := 0; s < S; s++ {
+			for j := 0; j < G; j++ {
+				reqchans[j] <- 1 << uint(s)
+			}
+			for j := 0; j < G; j++ {
+				<-done
+			}
+		}
+		for j := 0; j < G; j++ {
+			close(reqchans[j])
+		}
+		for j := 0; j < G; j++ {
+			<-done
+		}
+	}
+}
+
+func TestStackOutput(t *testing.T) {
+	b := make([]byte, 1024)
+	stk := string(b[:Stack(b, false)])
+	if !strings.HasPrefix(stk, "goroutine ") {
+		t.Errorf("Stack (len %d):\n%s", len(stk), stk)
+		t.Errorf("Stack output should begin with \"goroutine \"")
+	}
+}
+
+func TestStackAllOutput(t *testing.T) {
+	b := make([]byte, 1024)
+	stk := string(b[:Stack(b, true)])
+	if !strings.HasPrefix(stk, "goroutine ") {
+		t.Errorf("Stack (len %d):\n%s", len(stk), stk)
+		t.Errorf("Stack output should begin with \"goroutine \"")
+	}
+}
+
+func TestStackPanic(t *testing.T) {
+	// Test that stack copying copies panics correctly.  This is difficult
+	// to test because it is very unlikely that the stack will be copied
+	// in the middle of gopanic.  But it can happen.
+	// To make this test effective, edit panic.go:gopanic and uncomment
+	// the GC() call just before freedefer(d).
+	defer func() {
+		if x := recover(); x == nil {
+			t.Errorf("recover failed")
+		}
+	}()
+	useStack(32)
+	panic("test panic")
+}
diff --git a/src/runtime/string.c b/src/runtime/string.c
new file mode 100644
index 000000000..4f25adf8e
--- /dev/null
+++ b/src/runtime/string.c
@@ -0,0 +1,288 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "race.h"
+#include "textflag.h"
+
+String	runtime·emptystring;
+
+#pragma textflag NOSPLIT
+intgo
+runtime·findnull(byte *s)
+{
+	intgo l;
+
+	if(s == nil)
+		return 0;
+	for(l=0; s[l]!=0; l++)
+		;
+	return l;
+}
+
+intgo
+runtime·findnullw(uint16 *s)
+{
+	intgo l;
+
+	if(s == nil)
+		return 0;
+	for(l=0; s[l]!=0; l++)
+		;
+	return l;
+}
+
+uintptr runtime·maxstring = 256; // a hint for print
+
+static String
+gostringsize(intgo l)
+{
+	String s;
+	uintptr ms;
+
+	if(l == 0)
+		return runtime·emptystring;
+	s.str = runtime·mallocgc(l, 0, FlagNoScan|FlagNoZero);
+	s.len = l;
+	for(;;) {
+		ms = runtime·maxstring;
+		if((uintptr)l <= ms || runtime·casp((void**)&runtime·maxstring, (void*)ms, (void*)l))
+			break;
+	}
+	return s;
+}
+
+String
+runtime·gostring(byte *str)
+{
+	intgo l;
+	String s;
+
+	l = runtime·findnull(str);
+	s = gostringsize(l);
+	runtime·memmove(s.str, str, l);
+	return s;
+}
+
+String
+runtime·gostringn(byte *str, intgo l)
+{
+	String s;
+
+	s = gostringsize(l);
+	runtime·memmove(s.str, str, l);
+	return s;
+}
+
+// used by cmd/cgo
+Slice
+runtime·gobytes(byte *p, intgo n)
+{
+	Slice sl;
+
+	sl.array = runtime·mallocgc(n, 0, FlagNoScan|FlagNoZero);
+	sl.len = n;
+	sl.cap = n;
+	runtime·memmove(sl.array, p, n);
+	return sl;
+}
+
+#pragma textflag NOSPLIT
+String
+runtime·gostringnocopy(byte *str)
+{
+	String s;
+	
+	s.str = str;
+	s.len = runtime·findnull(str);
+	return s;
+}
+
+// TODO: move this elsewhere
+enum
+{
+	Bit1	= 7,
+	Bitx	= 6,
+	Bit2	= 5,
+	Bit3	= 4,
+	Bit4	= 3,
+	Bit5	= 2,
+
+	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
+
+	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+
+	Runeerror	= 0xFFFD,
+
+	SurrogateMin = 0xD800,
+	SurrogateMax = 0xDFFF,
+
+	Runemax	= 0x10FFFF,	/* maximum rune value */
+};
+
+static int32
+runetochar(byte *str, int32 rune)  /* note: in original, arg2 was pointer */
+{
+	/* Runes are signed, so convert to unsigned for range check. */
+	uint32 c;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	c = rune;
+	if(c <= Rune1) {
+		str[0] = c;
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	if(c <= Rune2) {
+		str[0] = T2 | (c >> 1*Bitx);
+		str[1] = Tx | (c & Maskx);
+		return 2;
+	}
+
+	/*
+	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * Rune wouldn't have fit in one or two bytes.
+	 */
+	if (c > Runemax)
+		c = Runeerror;
+	if (SurrogateMin <= c && c <= SurrogateMax)
+		c = Runeerror;
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	if (c <= Rune3) {
+		str[0] = T3 |  (c >> 2*Bitx);
+		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[2] = Tx |  (c & Maskx);
+		return 3;
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	str[0] = T4 | (c >> 3*Bitx);
+	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+	str[3] = Tx | (c & Maskx);
+	return 4;
+}
+
+String
+runtime·gostringw(uint16 *str)
+{
+	intgo n1, n2, i;
+	byte buf[8];
+	String s;
+
+	n1 = 0;
+	for(i=0; str[i]; i++)
+		n1 += runetochar(buf, str[i]);
+	s = gostringsize(n1+4);
+	n2 = 0;
+	for(i=0; str[i]; i++) {
+		// check for race
+		if(n2 >= n1)
+			break;
+		n2 += runetochar(s.str+n2, str[i]);
+	}
+	s.len = n2;
+	s.str[s.len] = 0;
+	return s;
+}
+
+String
+runtime·catstring(String s1, String s2)
+{
+	String s3;
+
+	if(s1.len == 0)
+		return s2;
+	if(s2.len == 0)
+		return s1;
+
+	s3 = gostringsize(s1.len + s2.len);
+	runtime·memmove(s3.str, s1.str, s1.len);
+	runtime·memmove(s3.str+s1.len, s2.str, s2.len);
+	return s3;
+}
+
+int32
+runtime·strcmp(byte *s1, byte *s2)
+{
+	uintptr i;
+	byte c1, c2;
+
+	for(i=0;; i++) {
+		c1 = s1[i];
+		c2 = s2[i];
+		if(c1 < c2)
+			return -1;
+		if(c1 > c2)
+			return +1;
+		if(c1 == 0)
+			return 0;
+	}
+}
+
+int32
+runtime·strncmp(byte *s1, byte *s2, uintptr n)
+{
+	uintptr i;
+	byte c1, c2;
+
+	for(i=0; i<n; i++) {
+		c1 = s1[i];
+		c2 = s2[i];
+		if(c1 < c2)
+			return -1;
+		if(c1 > c2)
+			return +1;
+		if(c1 == 0)
+			break;
+	}
+	return 0;
+}
+
+byte*
+runtime·strstr(byte *s1, byte *s2)
+{
+	byte *sp1, *sp2;
+
+	if(*s2 == 0)
+		return s1;
+	for(; *s1; s1++) {
+		if(*s1 != *s2)
+			continue;
+		sp1 = s1;
+		sp2 = s2;
+		for(;;) {
+			if(*sp2 == 0)
+				return s1;
+			if(*sp1++ != *sp2++)
+				break;
+		}
+	}
+	return nil;
+}
diff --git a/src/runtime/string.go b/src/runtime/string.go
new file mode 100644
index 000000000..da3160449
--- /dev/null
+++ b/src/runtime/string.go
@@ -0,0 +1,241 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+func concatstrings(a []string) string {
+	idx := 0
+	l := 0
+	count := 0
+	for i, x := range a {
+		n := len(x)
+		if n == 0 {
+			continue
+		}
+		if l+n < l {
+			gothrow("string concatenation too long")
+		}
+		l += n
+		count++
+		idx = i
+	}
+	if count == 0 {
+		return ""
+	}
+	if count == 1 {
+		return a[idx]
+	}
+	s, b := rawstring(l)
+	l = 0
+	for _, x := range a {
+		copy(b[l:], x)
+		l += len(x)
+	}
+	return s
+}
+
+//go:nosplit
+func concatstring2(a [2]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring3(a [3]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring4(a [4]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring5(a [5]string) string {
+	return concatstrings(a[:])
+}
+
+func slicebytetostring(b []byte) string {
+	if raceenabled && len(b) > 0 {
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			uintptr(len(b)),
+			getcallerpc(unsafe.Pointer(&b)),
+			funcPC(slicebytetostring))
+	}
+	s, c := rawstring(len(b))
+	copy(c, b)
+	return s
+}
+
+func slicebytetostringtmp(b []byte) string {
+	// Return a "string" referring to the actual []byte bytes.
+	// This is only for use by internal compiler optimizations
+	// that know that the string form will be discarded before
+	// the calling goroutine could possibly modify the original
+	// slice or synchronize with another goroutine.
+	// Today, the only such case is a m[string(k)] lookup where
+	// m is a string-keyed map and k is a []byte.
+
+	if raceenabled && len(b) > 0 {
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			uintptr(len(b)),
+			getcallerpc(unsafe.Pointer(&b)),
+			funcPC(slicebytetostringtmp))
+	}
+	return *(*string)(unsafe.Pointer(&b))
+}
+
+func stringtoslicebyte(s string) []byte {
+	b := rawbyteslice(len(s))
+	copy(b, s)
+	return b
+}
+
+func stringtoslicerune(s string) []rune {
+	// two passes.
+	// unlike slicerunetostring, no race because strings are immutable.
+	n := 0
+	t := s
+	for len(s) > 0 {
+		_, k := charntorune(s)
+		s = s[k:]
+		n++
+	}
+	a := rawruneslice(n)
+	n = 0
+	for len(t) > 0 {
+		r, k := charntorune(t)
+		t = t[k:]
+		a[n] = r
+		n++
+	}
+	return a
+}
+
+func slicerunetostring(a []rune) string {
+	if raceenabled && len(a) > 0 {
+		racereadrangepc(unsafe.Pointer(&a[0]),
+			uintptr(len(a))*unsafe.Sizeof(a[0]),
+			getcallerpc(unsafe.Pointer(&a)),
+			funcPC(slicerunetostring))
+	}
+	var dum [4]byte
+	size1 := 0
+	for _, r := range a {
+		size1 += runetochar(dum[:], r)
+	}
+	s, b := rawstring(size1 + 3)
+	size2 := 0
+	for _, r := range a {
+		// check for race
+		if size2 >= size1 {
+			break
+		}
+		size2 += runetochar(b[size2:], r)
+	}
+	return s[:size2]
+}
+
+type stringStruct struct {
+	str unsafe.Pointer
+	len int
+}
+
+func intstring(v int64) string {
+	s, b := rawstring(4)
+	n := runetochar(b, rune(v))
+	return s[:n]
+}
+
+// stringiter returns the index of the next
+// rune after the rune that starts at s[k].
+func stringiter(s string, k int) int {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0
+	}
+
+	c := s[k]
+	if c < runeself {
+		return k + 1
+	}
+
+	// multi-char rune
+	_, n := charntorune(s[k:])
+	return k + n
+}
+
+// stringiter2 returns the rune that starts at s[k]
+// and the index where the next rune starts.
+func stringiter2(s string, k int) (int, rune) {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0, 0
+	}
+
+	c := s[k]
+	if c < runeself {
+		return k + 1, rune(c)
+	}
+
+	// multi-char rune
+	r, n := charntorune(s[k:])
+	return k + n, r
+}
+
+// rawstring allocates storage for a new string. The returned
+// string and byte slice both refer to the same storage.
+// The storage is not zeroed. Callers should use
+// b to set the string contents and then drop b.
+func rawstring(size int) (s string, b []byte) {
+	p := gomallocgc(uintptr(size), nil, flagNoScan|flagNoZero)
+
+	(*stringStruct)(unsafe.Pointer(&s)).str = p
+	(*stringStruct)(unsafe.Pointer(&s)).len = size
+
+	(*slice)(unsafe.Pointer(&b)).array = (*uint8)(p)
+	(*slice)(unsafe.Pointer(&b)).len = uint(size)
+	(*slice)(unsafe.Pointer(&b)).cap = uint(size)
+
+	for {
+		ms := maxstring
+		if uintptr(size) <= uintptr(ms) || casuintptr((*uintptr)(unsafe.Pointer(&maxstring)), uintptr(ms), uintptr(size)) {
+			return
+		}
+	}
+}
+
+// rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
+func rawbyteslice(size int) (b []byte) {
+	cap := goroundupsize(uintptr(size))
+	p := gomallocgc(cap, nil, flagNoScan|flagNoZero)
+	if cap != uintptr(size) {
+		memclr(add(p, uintptr(size)), cap-uintptr(size))
+	}
+
+	(*slice)(unsafe.Pointer(&b)).array = (*uint8)(p)
+	(*slice)(unsafe.Pointer(&b)).len = uint(size)
+	(*slice)(unsafe.Pointer(&b)).cap = uint(cap)
+	return
+}
+
+// rawruneslice allocates a new rune slice. The rune slice is not zeroed.
+func rawruneslice(size int) (b []rune) {
+	if uintptr(size) > maxMem/4 {
+		gothrow("out of memory")
+	}
+	mem := goroundupsize(uintptr(size) * 4)
+	p := gomallocgc(mem, nil, flagNoScan|flagNoZero)
+	if mem != uintptr(size)*4 {
+		memclr(add(p, uintptr(size)*4), mem-uintptr(size)*4)
+	}
+
+	(*slice)(unsafe.Pointer(&b)).array = (*uint8)(p)
+	(*slice)(unsafe.Pointer(&b)).len = uint(size)
+	(*slice)(unsafe.Pointer(&b)).cap = uint(mem / 4)
+	return
+}
diff --git a/src/runtime/string_test.go b/src/runtime/string_test.go
new file mode 100644
index 000000000..e7ac51a5f
--- /dev/null
+++ b/src/runtime/string_test.go
@@ -0,0 +1,147 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func BenchmarkCompareStringEqual(b *testing.B) {
+	bytes := []byte("Hello Gophers!")
+	s1, s2 := string(bytes), string(bytes)
+	for i := 0; i < b.N; i++ {
+		if s1 != s2 {
+			b.Fatal("s1 != s2")
+		}
+	}
+}
+
+func BenchmarkCompareStringIdentical(b *testing.B) {
+	s1 := "Hello Gophers!"
+	s2 := s1
+	for i := 0; i < b.N; i++ {
+		if s1 != s2 {
+			b.Fatal("s1 != s2")
+		}
+	}
+}
+
+func BenchmarkCompareStringSameLength(b *testing.B) {
+	s1 := "Hello Gophers!"
+	s2 := "Hello, Gophers"
+	for i := 0; i < b.N; i++ {
+		if s1 == s2 {
+			b.Fatal("s1 == s2")
+		}
+	}
+}
+
+func BenchmarkCompareStringDifferentLength(b *testing.B) {
+	s1 := "Hello Gophers!"
+	s2 := "Hello, Gophers!"
+	for i := 0; i < b.N; i++ {
+		if s1 == s2 {
+			b.Fatal("s1 == s2")
+		}
+	}
+}
+
+func BenchmarkCompareStringBigUnaligned(b *testing.B) {
+	bytes := make([]byte, 0, 1<<20)
+	for len(bytes) < 1<<20 {
+		bytes = append(bytes, "Hello Gophers!"...)
+	}
+	s1, s2 := string(bytes), "hello"+string(bytes)
+	for i := 0; i < b.N; i++ {
+		if s1 != s2[len("hello"):] {
+			b.Fatal("s1 != s2")
+		}
+	}
+	b.SetBytes(int64(len(s1)))
+}
+
+func BenchmarkCompareStringBig(b *testing.B) {
+	bytes := make([]byte, 0, 1<<20)
+	for len(bytes) < 1<<20 {
+		bytes = append(bytes, "Hello Gophers!"...)
+	}
+	s1, s2 := string(bytes), string(bytes)
+	for i := 0; i < b.N; i++ {
+		if s1 != s2 {
+			b.Fatal("s1 != s2")
+		}
+	}
+	b.SetBytes(int64(len(s1)))
+}
+
+func BenchmarkRuneIterate(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for range s {
+		}
+	}
+}
+
+func BenchmarkRuneIterate2(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for range s {
+		}
+	}
+}
+
+func TestStringW(t *testing.T) {
+	strings := []string{
+		"hello",
+		"a\u5566\u7788b",
+	}
+
+	for _, s := range strings {
+		var b []uint16
+		for _, c := range s {
+			b = append(b, uint16(c))
+			if c != rune(uint16(c)) {
+				t.Errorf("bad test: stringW can't handle >16 bit runes")
+			}
+		}
+		b = append(b, 0)
+		r := runtime.GostringW(b)
+		if r != s {
+			t.Errorf("gostringW(%v) = %s, want %s", b, r, s)
+		}
+	}
+}
+
+func TestLargeStringConcat(t *testing.T) {
+	output := executeTest(t, largeStringConcatSource, nil)
+	want := "panic: " + strings.Repeat("0", 1<<10) + strings.Repeat("1", 1<<10) +
+		strings.Repeat("2", 1<<10) + strings.Repeat("3", 1<<10)
+	if !strings.HasPrefix(output, want) {
+		t.Fatalf("output does not start with %q:\n%s", want, output)
+	}
+}
+
+var largeStringConcatSource = `
+package main
+import "strings"
+func main() {
+	s0 := strings.Repeat("0", 1<<10)
+	s1 := strings.Repeat("1", 1<<10)
+	s2 := strings.Repeat("2", 1<<10)
+	s3 := strings.Repeat("3", 1<<10)
+	s := s0 + s1 + s2 + s3
+	panic(s)
+}
+`
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
new file mode 100644
index 000000000..03f618e15
--- /dev/null
+++ b/src/runtime/stubs.go
@@ -0,0 +1,240 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Declarations for runtime services implemented in C or assembly.
+// C implementations of these functions are in stubs.goc.
+// Assembly implementations are in various files, see comments with
+// each function.
+
+const ptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const
+const regSize = 4 << (^uintreg(0) >> 63) // unsafe.Sizeof(uintreg(0)) but an ideal const
+
+// Should be a built-in for unsafe.Pointer?
+//go:nosplit
+func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(p) + x)
+}
+
+// n must be a power of 2
+func roundup(p unsafe.Pointer, n uintptr) unsafe.Pointer {
+	delta := -uintptr(p) & (n - 1)
+	return unsafe.Pointer(uintptr(p) + delta)
+}
+
+// in stubs.goc
+func getg() *g
+func acquirem() *m
+func releasem(mp *m)
+func gomcache() *mcache
+
+// mcall switches from the g to the g0 stack and invokes fn(g),
+// where g is the goroutine that made the call.
+// mcall saves g's current PC/SP in g->sched so that it can be restored later.
+// It is up to fn to arrange for that later execution, typically by recording
+// g in a data structure, causing something to call ready(g) later.
+// mcall returns to the original goroutine g later, when g has been rescheduled.
+// fn must not return at all; typically it ends by calling schedule, to let the m
+// run other goroutines.
+//
+// mcall can only be called from g stacks (not g0, not gsignal).
+//go:noescape
+func mcall(fn func(*g))
+
+// onM switches from the g to the g0 stack and invokes fn().
+// When fn returns, onM switches back to the g and returns,
+// continuing execution on the g stack.
+// If arguments must be passed to fn, they can be written to
+// g->m->ptrarg (pointers) and g->m->scalararg (non-pointers)
+// before the call and then consulted during fn.
+// Similarly, fn can pass return values back in those locations.
+// If fn is written in Go, it can be a closure, which avoids the need for
+// ptrarg and scalararg entirely.
+// After reading values out of ptrarg and scalararg it is conventional
+// to zero them to avoid (memory or information) leaks.
+//
+// If onM is called from a g0 stack, it invokes fn and returns,
+// without any stack switches.
+//
+// If onM is called from a gsignal stack, it crashes the program.
+// The implication is that functions used in signal handlers must
+// not use onM.
+//
+// NOTE(rsc): We could introduce a separate onMsignal that is
+// like onM but if called from a gsignal stack would just run fn on
+// that stack. The caller of onMsignal would be required to save the
+// old values of ptrarg/scalararg and restore them when the call
+// was finished, in case the signal interrupted an onM sequence
+// in progress on the g or g0 stacks. Until there is a clear need for this,
+// we just reject onM in signal handling contexts entirely.
+//
+//go:noescape
+func onM(fn func())
+
+func badonm() {
+	gothrow("onM called from signal goroutine")
+}
+
+// C functions that run on the M stack.
+// Call using mcall.
+func gosched_m(*g)
+func park_m(*g)
+func recovery_m(*g)
+
+// More C functions that run on the M stack.
+// Call using onM.
+func mcacheRefill_m()
+func largeAlloc_m()
+func gc_m()
+func scavenge_m()
+func setFinalizer_m()
+func removeFinalizer_m()
+func markallocated_m()
+func unrollgcprog_m()
+func unrollgcproginplace_m()
+func setgcpercent_m()
+func setmaxthreads_m()
+func ready_m()
+func deferproc_m()
+func goexit_m()
+func startpanic_m()
+func dopanic_m()
+
+// memclr clears n bytes starting at ptr.
+// in memclr_*.s
+//go:noescape
+func memclr(ptr unsafe.Pointer, n uintptr)
+
+// memmove copies n bytes from "from" to "to".
+// in memmove_*.s
+//go:noescape
+func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr)
+
+const (
+	concurrentSweep = true
+)
+
+func gosched()
+func starttheworld()
+func stoptheworld()
+func newextram()
+func lockOSThread()
+func unlockOSThread()
+
+// exported value for testing
+var hashLoad = loadFactor
+
+// in asm_*.s
+//go:noescape
+func memeq(a, b unsafe.Pointer, size uintptr) bool
+
+// Code pointers for the nohash/noequal algorithms. Used for producing better error messages.
+var nohashcode uintptr
+var noequalcode uintptr
+
+// noescape hides a pointer from escape analysis.  noescape is
+// the identity function but escape analysis doesn't think the
+// output depends on the input.  noescape is inlined and currently
+// compiles down to a single xor instruction.
+// USE CAREFULLY!
+//go:nosplit
+func noescape(p unsafe.Pointer) unsafe.Pointer {
+	x := uintptr(p)
+	return unsafe.Pointer(x ^ 0)
+}
+
+func entersyscall()
+func entersyscallblock()
+func exitsyscall()
+
+func cgocallback(fn, frame unsafe.Pointer, framesize uintptr)
+func gogo(buf *gobuf)
+func gosave(buf *gobuf)
+func read(fd int32, p unsafe.Pointer, n int32) int32
+func close(fd int32) int32
+func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+
+//go:noescape
+func jmpdefer(fv *funcval, argp uintptr)
+func exit1(code int32)
+func asminit()
+func setg(gg *g)
+func exit(code int32)
+func breakpoint()
+func nanotime() int64
+func usleep(usec uint32)
+func cputicks() int64
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+func munmap(addr unsafe.Pointer, n uintptr)
+func madvise(addr unsafe.Pointer, n uintptr, flags int32)
+func reflectcall(fn, arg unsafe.Pointer, n uint32, retoffset uint32)
+func osyield()
+func procyield(cycles uint32)
+func cgocallback_gofunc(fv *funcval, frame unsafe.Pointer, framesize uintptr)
+func readgogc() int32
+func purgecachedstats(c *mcache)
+func gostringnocopy(b *byte) string
+func goexit()
+
+//go:noescape
+func write(fd uintptr, p unsafe.Pointer, n int32) int32
+
+//go:noescape
+func cas(ptr *uint32, old, new uint32) bool
+
+//go:noescape
+func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+
+//go:noescape
+func casuintptr(ptr *uintptr, old, new uintptr) bool
+
+//go:noescape
+func atomicstoreuintptr(ptr *uintptr, new uintptr)
+
+//go:noescape
+func atomicloaduintptr(ptr *uintptr) uintptr
+
+//go:noescape
+func atomicloaduint(ptr *uint) uint
+
+//go:noescape
+func setcallerpc(argp unsafe.Pointer, pc uintptr)
+
+//go:noescape
+func getcallerpc(argp unsafe.Pointer) uintptr
+
+//go:noescape
+func getcallersp(argp unsafe.Pointer) uintptr
+
+//go:noescape
+func asmcgocall(fn, arg unsafe.Pointer)
+
+//go:noescape
+func asmcgocall_errno(fn, arg unsafe.Pointer) int32
+
+//go:noescape
+func open(name *byte, mode, perm int32) int32
+
+//go:noescape
+func gotraceback(*bool) int32
+
+const _NoArgs = ^uintptr(0)
+
+func newstack()
+func newproc()
+func lessstack()
+func morestack()
+func mstart()
+func rt0_go()
+func sigpanic()
+
+// return0 is a stub used to return 0 from deferproc.
+// It is called at the very end of deferproc to signal
+// the calling Go function that it should not jump
+// to deferreturn.
+// in asm_*.s
+func return0()
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
new file mode 100644
index 000000000..bd9e9924c
--- /dev/null
+++ b/src/runtime/symtab.go
@@ -0,0 +1,292 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// NOTE: Func does not expose the actual unexported fields, because we return *Func
+// values to users, and we want to keep them from being able to overwrite the data
+// with (say) *f = Func{}.
+// All code operating on a *Func must call raw to get the *_func instead.
+
+// A Func represents a Go function in the running binary.
+type Func struct {
+	opaque struct{} // unexported field to disallow conversions
+}
+
+func (f *Func) raw() *_func {
+	return (*_func)(unsafe.Pointer(f))
+}
+
+// funcdata.h
+const (
+	_PCDATA_ArgSize             = 0
+	_PCDATA_StackMapIndex       = 1
+	_FUNCDATA_ArgsPointerMaps   = 0
+	_FUNCDATA_LocalsPointerMaps = 1
+	_FUNCDATA_DeadValueMaps     = 2
+	_ArgsSizeUnknown            = -0x80000000
+)
+
+var (
+	pclntable []byte
+	ftab      []functab
+	filetab   []uint32
+
+	pclntab, epclntab struct{} // linker symbols
+)
+
+type functab struct {
+	entry   uintptr
+	funcoff uintptr
+}
+
+func symtabinit() {
+	// See golang.org/s/go12symtab for header: 0xfffffffb,
+	// two zero bytes, a byte giving the PC quantum,
+	// and a byte giving the pointer width in bytes.
+	pcln := (*[8]byte)(unsafe.Pointer(&pclntab))
+	pcln32 := (*[2]uint32)(unsafe.Pointer(&pclntab))
+	if pcln32[0] != 0xfffffffb || pcln[4] != 0 || pcln[5] != 0 || pcln[6] != _PCQuantum || pcln[7] != ptrSize {
+		println("runtime: function symbol table header:", hex(pcln32[0]), hex(pcln[4]), hex(pcln[5]), hex(pcln[6]), hex(pcln[7]))
+		gothrow("invalid function symbol table\n")
+	}
+
+	// pclntable is all bytes of pclntab symbol.
+	sp := (*sliceStruct)(unsafe.Pointer(&pclntable))
+	sp.array = unsafe.Pointer(&pclntab)
+	sp.len = int(uintptr(unsafe.Pointer(&epclntab)) - uintptr(unsafe.Pointer(&pclntab)))
+	sp.cap = sp.len
+
+	// ftab is lookup table for function by program counter.
+	nftab := int(*(*uintptr)(add(unsafe.Pointer(pcln), 8)))
+	p := add(unsafe.Pointer(pcln), 8+ptrSize)
+	sp = (*sliceStruct)(unsafe.Pointer(&ftab))
+	sp.array = p
+	sp.len = nftab + 1
+	sp.cap = sp.len
+	for i := 0; i < nftab; i++ {
+		// NOTE: ftab[nftab].entry is legal; it is the address beyond the final function.
+		if ftab[i].entry > ftab[i+1].entry {
+			f1 := (*_func)(unsafe.Pointer(&pclntable[ftab[i].funcoff]))
+			f2 := (*_func)(unsafe.Pointer(&pclntable[ftab[i+1].funcoff]))
+			f2name := "end"
+			if i+1 < nftab {
+				f2name = gofuncname(f2)
+			}
+			println("function symbol table not sorted by program counter:", hex(ftab[i].entry), gofuncname(f1), ">", hex(ftab[i+1].entry), f2name)
+			for j := 0; j <= i; j++ {
+				print("\t", hex(ftab[j].entry), " ", gofuncname((*_func)(unsafe.Pointer(&pclntable[ftab[j].funcoff]))))
+			}
+			gothrow("invalid runtime symbol table")
+		}
+	}
+
+	// file table follows ftab.
+	sp = (*sliceStruct)(unsafe.Pointer(&filetab))
+	p = unsafe.Pointer(add(unsafe.Pointer(pcln), ftab[nftab].funcoff))
+	sp.array = unsafe.Pointer(add(unsafe.Pointer(pcln), ftab[nftab].funcoff))
+	// length is in first element of array.
+	// set len to 1 so we can get first element.
+	sp.len = 1
+	sp.cap = 1
+	sp.len = int(filetab[0])
+	sp.cap = sp.len
+}
+
+// FuncForPC returns a *Func describing the function that contains the
+// given program counter address, or else nil.
+func FuncForPC(pc uintptr) *Func {
+	return (*Func)(unsafe.Pointer(findfunc(pc)))
+}
+
+// Name returns the name of the function.
+func (f *Func) Name() string {
+	return gofuncname(f.raw())
+}
+
+// Entry returns the entry address of the function.
+func (f *Func) Entry() uintptr {
+	return f.raw().entry
+}
+
+// FileLine returns the file name and line number of the
+// source code corresponding to the program counter pc.
+// The result will not be accurate if pc is not a program
+// counter within f.
+func (f *Func) FileLine(pc uintptr) (file string, line int) {
+	// Pass strict=false here, because anyone can call this function,
+	// and they might just be wrong about targetpc belonging to f.
+	line = int(funcline1(f.raw(), pc, &file, false))
+	return file, line
+}
+
+func findfunc(pc uintptr) *_func {
+	if len(ftab) == 0 {
+		return nil
+	}
+
+	if pc < ftab[0].entry || pc >= ftab[len(ftab)-1].entry {
+		return nil
+	}
+
+	// binary search to find func with entry <= pc.
+	lo := 0
+	nf := len(ftab) - 1 // last entry is sentinel
+	for nf > 0 {
+		n := nf / 2
+		f := &ftab[lo+n]
+		if f.entry <= pc && pc < ftab[lo+n+1].entry {
+			return (*_func)(unsafe.Pointer(&pclntable[f.funcoff]))
+		} else if pc < f.entry {
+			nf = n
+		} else {
+			lo += n + 1
+			nf -= n + 1
+		}
+	}
+
+	gothrow("findfunc: binary search failed")
+	return nil
+}
+
+func pcvalue(f *_func, off int32, targetpc uintptr, strict bool) int32 {
+	if off == 0 {
+		return -1
+	}
+	p := pclntable[off:]
+	pc := f.entry
+	val := int32(-1)
+	for {
+		var ok bool
+		p, ok = step(p, &pc, &val, pc == f.entry)
+		if !ok {
+			break
+		}
+		if targetpc < pc {
+			return val
+		}
+	}
+
+	// If there was a table, it should have covered all program counters.
+	// If not, something is wrong.
+	if panicking != 0 || !strict {
+		return -1
+	}
+
+	print("runtime: invalid pc-encoded table f=", gofuncname(f), " pc=", hex(pc), " targetpc=", hex(targetpc), " tab=", p, "\n")
+
+	p = pclntable[off:]
+	pc = f.entry
+	val = -1
+	for {
+		var ok bool
+		p, ok = step(p, &pc, &val, pc == f.entry)
+		if !ok {
+			break
+		}
+		print("\tvalue=", val, " until pc=", hex(pc), "\n")
+	}
+
+	gothrow("invalid runtime symbol table")
+	return -1
+}
+
+func funcname(f *_func) *byte {
+	if f == nil || f.nameoff == 0 {
+		return nil
+	}
+	return (*byte)(unsafe.Pointer(&pclntable[f.nameoff]))
+}
+
+func gofuncname(f *_func) string {
+	return gostringnocopy(funcname(f))
+}
+
+func funcline1(f *_func, targetpc uintptr, file *string, strict bool) int32 {
+	*file = "?"
+	fileno := int(pcvalue(f, f.pcfile, targetpc, strict))
+	line := pcvalue(f, f.pcln, targetpc, strict)
+	if fileno == -1 || line == -1 || fileno >= len(filetab) {
+		// print("looking for ", hex(targetpc), " in ", gofuncname(f), " got file=", fileno, " line=", lineno, "\n")
+		return 0
+	}
+	*file = gostringnocopy(&pclntable[filetab[fileno]])
+	return line
+}
+
+func funcline(f *_func, targetpc uintptr, file *string) int32 {
+	return funcline1(f, targetpc, file, true)
+}
+
+func funcspdelta(f *_func, targetpc uintptr) int32 {
+	x := pcvalue(f, f.pcsp, targetpc, true)
+	if x&(ptrSize-1) != 0 {
+		print("invalid spdelta ", f.pcsp, " ", x, "\n")
+	}
+	return x
+}
+
+func pcdatavalue(f *_func, table int32, targetpc uintptr) int32 {
+	if table < 0 || table >= f.npcdata {
+		return -1
+	}
+	off := *(*int32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4))
+	return pcvalue(f, off, targetpc, true)
+}
+
+func funcarglen(f *_func, targetpc uintptr) int32 {
+	if targetpc == f.entry {
+		return 0
+	}
+	return pcdatavalue(f, _PCDATA_ArgSize, targetpc-_PCQuantum)
+}
+
+func funcdata(f *_func, i int32) unsafe.Pointer {
+	if i < 0 || i >= f.nfuncdata {
+		return nil
+	}
+	p := add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(f.npcdata)*4)
+	if ptrSize == 8 && uintptr(p)&4 != 0 {
+		if uintptr(unsafe.Pointer(f))&4 != 0 {
+			println("runtime: misaligned func", f)
+		}
+		p = add(p, 4)
+	}
+	return *(*unsafe.Pointer)(add(p, uintptr(i)*ptrSize))
+}
+
+// step advances to the next pc, value pair in the encoded table.
+func step(p []byte, pc *uintptr, val *int32, first bool) (newp []byte, ok bool) {
+	p, uvdelta := readvarint(p)
+	if uvdelta == 0 && !first {
+		return nil, false
+	}
+	if uvdelta&1 != 0 {
+		uvdelta = ^(uvdelta >> 1)
+	} else {
+		uvdelta >>= 1
+	}
+	vdelta := int32(uvdelta)
+	p, pcdelta := readvarint(p)
+	*pc += uintptr(pcdelta * _PCQuantum)
+	*val += vdelta
+	return p, true
+}
+
+// readvarint reads a varint from p.
+func readvarint(p []byte) (newp []byte, val uint32) {
+	var v, shift uint32
+	for {
+		b := p[0]
+		p = p[1:]
+		v |= (uint32(b) & 0x7F) << shift
+		if b&0x80 == 0 {
+			break
+		}
+		shift += 7
+	}
+	return p, v
+}
diff --git a/src/runtime/symtab_test.go b/src/runtime/symtab_test.go
new file mode 100644
index 000000000..bd9fe18c4
--- /dev/null
+++ b/src/runtime/symtab_test.go
@@ -0,0 +1,47 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func TestCaller(t *testing.T) {
+	procs := runtime.GOMAXPROCS(-1)
+	c := make(chan bool, procs)
+	for p := 0; p < procs; p++ {
+		go func() {
+			for i := 0; i < 1000; i++ {
+				testCallerFoo(t)
+			}
+			c <- true
+		}()
+		defer func() {
+			<-c
+		}()
+	}
+}
+
+func testCallerFoo(t *testing.T) {
+	testCallerBar(t)
+}
+
+func testCallerBar(t *testing.T) {
+	for i := 0; i < 2; i++ {
+		pc, file, line, ok := runtime.Caller(i)
+		f := runtime.FuncForPC(pc)
+		if !ok ||
+			!strings.HasSuffix(file, "symtab_test.go") ||
+			(i == 0 && !strings.HasSuffix(f.Name(), "testCallerBar")) ||
+			(i == 1 && !strings.HasSuffix(f.Name(), "testCallerFoo")) ||
+			line < 5 || line > 1000 ||
+			f.Entry() >= pc {
+			t.Errorf("incorrect symbol info %d: %t %d %d %s %s %d",
+				i, ok, f.Entry(), pc, f.Name(), file, line)
+		}
+	}
+}
diff --git a/src/runtime/sys_arm.c b/src/runtime/sys_arm.c
new file mode 100644
index 000000000..a65560e5b
--- /dev/null
+++ b/src/runtime/sys_arm.c
@@ -0,0 +1,35 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// adjust Gobuf as if it executed a call to fn with context ctxt
+// and then did an immediate Gosave.
+void
+runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
+{
+	if(gobuf->lr != 0)
+		runtime·throw("invalid use of gostartcall");
+	gobuf->lr = gobuf->pc;
+	gobuf->pc = (uintptr)fn;
+	gobuf->ctxt = ctxt;
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+void
+runtime·rewindmorestack(Gobuf *gobuf)
+{
+	uint32 inst;
+
+	inst = *(uint32*)gobuf->pc;
+	if((gobuf->pc&3) == 0 && (inst>>24) == 0x9a) {
+		//runtime·printf("runtime: rewind pc=%p to pc=%p\n", gobuf->pc, gobuf->pc + ((int32)(inst<<8)>>6) + 8);
+		gobuf->pc += ((int32)(inst<<8)>>6) + 8;
+		return;
+	}
+	runtime·printf("runtime: pc=%p %x\n", gobuf->pc, inst);
+	runtime·throw("runtime: misuse of rewindmorestack");
+}
diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s
new file mode 100644
index 000000000..a961c71a8
--- /dev/null
+++ b/src/runtime/sys_darwin_386.s
@@ -0,0 +1,541 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// System calls and other sys.stuff for 386, Darwin
+// See http://fxr.watson.org/fxr/source/bsd/kern/syscalls.c?v=xnu-1228
+// or /usr/include/sys/syscall.h (on a Mac) for system call numbers.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$0
+	MOVL	$1, AX
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// Exit this OS thread (like pthread_exit, which eventually
+// calls __bsdthread_terminate).
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVL	$361, AX
+	INT	$0x80
+	JAE 2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVL	$5, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVL	$6, AX
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVL	$3, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVL	$4, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	MOVL	$20, AX // getpid
+	INT	$0x80
+	MOVL	AX, 4(SP)	// pid
+	MOVL	sig+0(FP), AX
+	MOVL	AX, 8(SP)	// signal
+	MOVL	$1, 12(SP)	// posix
+	MOVL	$37, AX // kill
+	INT	$0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVL	$197, AX
+	INT	$0x80
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVL	$75, AX
+	INT	$0x80
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVL	$73, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$0
+	MOVL	$83, AX
+	INT	$0x80
+	RET
+
+// OS X comm page time offsets
+// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h
+#define	cpu_capabilities	0x20
+#define	nt_tsc_base	0x50
+#define	nt_scale	0x58
+#define	nt_shift	0x5c
+#define	nt_ns_base	0x60
+#define	nt_generation	0x68
+#define	gtod_generation	0x6c
+#define	gtod_ns_base	0x70
+#define	gtod_sec_base	0x78
+
+// called from assembly
+// 64-bit unix nanoseconds returned in DX:AX.
+// I'd much rather write this in C but we need
+// assembly for the 96-bit multiply and RDTSC.
+TEXT runtime·now(SB),NOSPLIT,$40
+	MOVL	$0xffff0000, BP /* comm page base */
+	
+	// Test for slow CPU. If so, the math is completely
+	// different, and unimplemented here, so use the
+	// system call.
+	MOVL	cpu_capabilities(BP), AX
+	TESTL	$0x4000, AX
+	JNZ	systime
+
+	// Loop trying to take a consistent snapshot
+	// of the time parameters.
+timeloop:
+	MOVL	gtod_generation(BP), BX
+	TESTL	BX, BX
+	JZ	systime
+	MOVL	nt_generation(BP), CX
+	TESTL	CX, CX
+	JZ	timeloop
+	RDTSC
+	MOVL	nt_tsc_base(BP), SI
+	MOVL	(nt_tsc_base+4)(BP), DI
+	MOVL	SI, 0(SP)
+	MOVL	DI, 4(SP)
+	MOVL	nt_scale(BP), SI
+	MOVL	SI, 8(SP)
+	MOVL	nt_ns_base(BP), SI
+	MOVL	(nt_ns_base+4)(BP), DI
+	MOVL	SI, 12(SP)
+	MOVL	DI, 16(SP)
+	CMPL	nt_generation(BP), CX
+	JNE	timeloop
+	MOVL	gtod_ns_base(BP), SI
+	MOVL	(gtod_ns_base+4)(BP), DI
+	MOVL	SI, 20(SP)
+	MOVL	DI, 24(SP)
+	MOVL	gtod_sec_base(BP), SI
+	MOVL	(gtod_sec_base+4)(BP), DI
+	MOVL	SI, 28(SP)
+	MOVL	DI, 32(SP)
+	CMPL	gtod_generation(BP), BX
+	JNE	timeloop
+
+	// Gathered all the data we need. Compute time.
+	//	((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9
+	// The multiply and shift extracts the top 64 bits of the 96-bit product.
+	SUBL	0(SP), AX // DX:AX = (tsc - nt_tsc_base)
+	SBBL	4(SP), DX
+
+	// We have x = tsc - nt_tsc_base - DX:AX to be
+	// multiplied by y = nt_scale = 8(SP), keeping the top 64 bits of the 96-bit product.
+	// x*y = (x&0xffffffff)*y + (x&0xffffffff00000000)*y
+	// (x*y)>>32 = ((x&0xffffffff)*y)>>32 + (x>>32)*y
+	MOVL	DX, CX // SI = (x&0xffffffff)*y >> 32
+	MOVL	$0, DX
+	MULL	8(SP)
+	MOVL	DX, SI
+
+	MOVL	CX, AX // DX:AX = (x>>32)*y
+	MOVL	$0, DX
+	MULL	8(SP)
+
+	ADDL	SI, AX	// DX:AX += (x&0xffffffff)*y >> 32
+	ADCL	$0, DX
+	
+	// DX:AX is now ((tsc - nt_tsc_base) * nt_scale) >> 32.
+	ADDL	12(SP), AX	// DX:AX += nt_ns_base
+	ADCL	16(SP), DX
+	SUBL	20(SP), AX	// DX:AX -= gtod_ns_base
+	SBBL	24(SP), DX
+	MOVL	AX, SI	// DI:SI = DX:AX
+	MOVL	DX, DI
+	MOVL	28(SP), AX	// DX:AX = gtod_sec_base*1e9
+	MOVL	32(SP), DX
+	MOVL	$1000000000, CX
+	MULL	CX
+	ADDL	SI, AX	// DX:AX += DI:SI
+	ADCL	DI, DX
+	RET
+
+systime:
+	// Fall back to system call (usually first call in this thread)
+	LEAL	12(SP), AX	// must be non-nil, unused
+	MOVL	AX, 4(SP)
+	MOVL	$0, 8(SP)	// time zone pointer
+	MOVL	$116, AX
+	INT	$0x80
+	// sec is in AX, usec in DX
+	// convert to DX:AX nsec
+	MOVL	DX, BX
+	MOVL	$1000000000, CX
+	MULL	CX
+	IMULL	$1000, BX
+	ADDL	BX, AX
+	ADCL	$0, DX
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$0
+	CALL	runtime·now(SB)
+	MOVL	$1000000000, CX
+	DIVL	CX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB),NOSPLIT,$0
+	CALL	runtime·now(SB)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	$329, AX  // pthread_sigmask (on OS X, sigprocmask==entire process)
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$0
+	MOVL	$46, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// Sigtramp's job is to call the actual signal handler.
+// It is called with the following arguments on the stack:
+//	0(FP)	"return address" - ignored
+//	4(FP)	actual handler
+//	8(FP)	signal number
+//	12(FP)	siginfo style
+//	16(FP)	siginfo
+//	20(FP)	context
+TEXT runtime·sigtramp(SB),NOSPLIT,$40
+	get_tls(CX)
+	
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	sig+8(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVL	DI, 20(SP)
+
+	// g = m->gsignal
+	MOVL	g_m(DI), BP
+	MOVL	m_gsignal(BP), BP
+	MOVL	BP, g(CX)
+
+	// copy arguments to sighandler
+	MOVL	sig+8(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+12(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+16(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	MOVL	handler+0(FP), BX
+	CALL	BX
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), DI
+	MOVL	DI, g(CX)
+
+sigtramp_ret:
+	// call sigreturn
+	MOVL	context+16(FP), CX
+	MOVL	style+4(FP), BX
+	MOVL	$0, 0(SP)	// "caller PC" - ignored
+	MOVL	CX, 4(SP)
+	MOVL	BX, 8(SP)
+	MOVL	$184, AX	// sigreturn(ucontext, infostyle)
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	MOVL	$53, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$32
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 24(SP)  // sec
+	MOVL	DX, 28(SP)  // usec
+
+	// select(0, 0, 0, 0, &tv)
+	MOVL	$0, 0(SP)  // "return PC" - ignored
+	MOVL	$0, 4(SP)
+	MOVL	$0, 8(SP)
+	MOVL	$0, 12(SP)
+	MOVL	$0, 16(SP)
+	LEAL	24(SP), AX
+	MOVL	AX, 20(SP)
+	MOVL	$93, AX
+	INT	$0x80
+	RET
+
+// void bsdthread_create(void *stk, M *mp, G *gp, void (*fn)(void))
+// System call args are: func arg stack pthread flags.
+TEXT runtime·bsdthread_create(SB),NOSPLIT,$32
+	MOVL	$360, AX
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fn+12(FP), BX
+	MOVL	BX, 4(SP)	// func
+	MOVL	mm+4(FP), BX
+	MOVL	BX, 8(SP)	// arg
+	MOVL	stk+0(FP), BX
+	MOVL	BX, 12(SP)	// stack
+	MOVL	gg+8(FP), BX
+	MOVL	BX, 16(SP)	// pthread
+	MOVL	$0x1000000, 20(SP)	// flags = PTHREAD_START_CUSTOM
+	INT	$0x80
+	JAE	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+16(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+16(FP)
+	RET
+
+// The thread that bsdthread_create creates starts executing here,
+// because we registered this function using bsdthread_register
+// at startup.
+//	AX = "pthread" (= g)
+//	BX = mach thread port
+//	CX = "func" (= fn)
+//	DX = "arg" (= m)
+//	DI = stack top
+//	SI = flags (= 0x1000000)
+//	SP = stack - C_32_STK_ALIGN
+TEXT runtime·bsdthread_start(SB),NOSPLIT,$0
+	// set up ldt 7+id to point at m->tls.
+	// m->tls is at m+40.  newosproc left
+	// the m->id in tls[0].
+	LEAL	m_tls(DX), BP
+	MOVL	0(BP), DI
+	ADDL	$7, DI	// m0 is LDT#7. count up.
+	// setldt(tls#, &tls, sizeof tls)
+	PUSHAL	// save registers
+	PUSHL	$32	// sizeof tls
+	PUSHL	BP	// &tls
+	PUSHL	DI	// tls #
+	CALL	runtime·setldt(SB)
+	POPL	AX
+	POPL	AX
+	POPL	AX
+	POPAL
+
+	// Now segment is established.  Initialize m, g.
+	get_tls(BP)
+	MOVL	AX, g(BP)
+	MOVL	DX, g_m(AX)
+	MOVL	BX, m_procid(DX)	// m->procid = thread port (for debuggers)
+	CALL	runtime·stackcheck(SB)		// smashes AX
+	CALL	CX	// fn()
+	CALL	runtime·exit1(SB)
+	RET
+
+// void bsdthread_register(void)
+// registers callbacks for threadstart (see bsdthread_create above
+// and wqthread and pthsize (not used).  returns 0 on success.
+TEXT runtime·bsdthread_register(SB),NOSPLIT,$40
+	MOVL	$366, AX
+	// 0(SP) is where kernel expects caller PC; ignored
+	MOVL	$runtime·bsdthread_start(SB), 4(SP)	// threadstart
+	MOVL	$0, 8(SP)	// wqthread, not used by us
+	MOVL	$0, 12(SP)	// pthsize, not used by us
+	MOVL	$0, 16(SP)	// dummy_value [sic]
+	MOVL	$0, 20(SP)	// targetconc_ptr
+	MOVL	$0, 24(SP)	// dispatchqueue_offset
+	INT	$0x80
+	JAE	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// Invoke Mach system call.
+// Assumes system call number in AX,
+// caller PC on stack, caller's caller PC next,
+// and then the system call arguments.
+//
+// Can be used for BSD too, but we don't,
+// because if you use this interface the BSD
+// system call numbers need an extra field
+// in the high 16 bits that seems to be the
+// argument count in bytes but is not always.
+// INT $0x80 works fine for those.
+TEXT runtime·sysenter(SB),NOSPLIT,$0
+	POPL	DX
+	MOVL	SP, CX
+	BYTE $0x0F; BYTE $0x34;  // SYSENTER
+	// returns to DX with SP set to CX
+
+TEXT runtime·mach_msg_trap(SB),NOSPLIT,$0
+	MOVL	$-31, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+28(FP)
+	RET
+
+TEXT runtime·mach_reply_port(SB),NOSPLIT,$0
+	MOVL	$-26, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·mach_task_self(SB),NOSPLIT,$0
+	MOVL	$-28, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+0(FP)
+	RET
+
+// Mach provides trap versions of the semaphore ops,
+// instead of requiring the use of RPC.
+
+// uint32 mach_semaphore_wait(uint32)
+TEXT runtime·mach_semaphore_wait(SB),NOSPLIT,$0
+	MOVL	$-36, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+// uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
+TEXT runtime·mach_semaphore_timedwait(SB),NOSPLIT,$0
+	MOVL	$-38, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+// uint32 mach_semaphore_signal(uint32)
+TEXT runtime·mach_semaphore_signal(SB),NOSPLIT,$0
+	MOVL	$-33, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+// uint32 mach_semaphore_signal_all(uint32)
+TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
+	MOVL	$-34, AX
+	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+// setldt(int entry, int address, int limit)
+// entry and limit are ignored.
+TEXT runtime·setldt(SB),NOSPLIT,$32
+	MOVL	address+4(FP), BX	// aka base
+
+	/*
+	 * When linking against the system libraries,
+	 * we use its pthread_create and let it set up %gs
+	 * for us.  When we do that, the private storage
+	 * we get is not at 0(GS) but at 0x468(GS).
+	 * 8l rewrites 0(TLS) into 0x468(GS) for us.
+	 * To accommodate that rewrite, we translate the
+	 * address and limit here so that 0x468(GS) maps to 0(address).
+	 *
+	 * See cgo/gcc_darwin_386.c:/468 for the derivation
+	 * of the constant.
+	 */
+	SUBL	$0x468, BX
+
+	/*
+	 * Must set up as USER_CTHREAD segment because
+	 * Darwin forces that value into %gs for signal handlers,
+	 * and if we don't set one up, we'll get a recursive
+	 * fault trying to get into the signal handler.
+	 * Since we have to set one up anyway, it might as
+	 * well be the value we want.  So don't bother with
+	 * i386_set_ldt.
+	 */
+	MOVL	BX, 4(SP)
+	MOVL	$3, AX	// thread_fast_set_cthread_self - machdep call #3
+	INT	$0x82	// sic: 0x82, not 0x80, for machdep call
+
+	XORL	AX, AX
+	MOVW	GS, AX
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVL	$202, AX
+	INT	$0x80
+	JAE	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVL	$362, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	$363, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$32
+	MOVL	$92, AX  // fcntl
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fd+0(FP), BX
+	MOVL	BX, 4(SP)  // fd
+	MOVL	$2, 8(SP)  // F_SETFD
+	MOVL	$1, 12(SP)  // FD_CLOEXEC
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	RET
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
new file mode 100644
index 000000000..bd397d72a
--- /dev/null
+++ b/src/runtime/sys_darwin_amd64.s
@@ -0,0 +1,505 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for AMD64, Darwin
+// See http://fxr.watson.org/fxr/source/bsd/kern/syscalls.c?v=xnu-1228
+// or /usr/include/sys/syscall.h (on a Mac) for system call numbers.
+//
+// The low 24 bits are the system call number.
+// The high 8 bits specify the kind of system call: 1=Mach, 2=BSD, 3=Machine-Dependent.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$0
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$(0x2000000+1), AX	// syscall entry
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// Exit this OS thread (like pthread_exit, which eventually
+// calls __bsdthread_terminate).
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$(0x2000000+361), AX	// syscall entry
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
+	MOVL	$(0x2000000+5), AX	// syscall entry
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVL	$(0x2000000+6), AX	// syscall entry
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$(0x2000000+3), AX	// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$(0x2000000+4), AX	// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$24
+	MOVL	$(0x2000000+20), AX // getpid
+	SYSCALL
+	MOVQ	AX, DI	// arg 1 - pid
+	MOVL	sig+0(FP), SI	// arg 2 - signal
+	MOVL	$1, DX	// arg 3 - posix
+	MOVL	$(0x2000000+37), AX // kill
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $0
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	$(0x2000000+83), AX	// syscall entry
+	SYSCALL
+	RET
+
+TEXT runtime·madvise(SB), NOSPLIT, $0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	flags+16(FP), DX		// arg 3 advice
+	MOVL	$(0x2000000+75), AX	// syscall entry madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+
+// OS X comm page time offsets
+// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h
+#define	nt_tsc_base	0x50
+#define	nt_scale	0x58
+#define	nt_shift	0x5c
+#define	nt_ns_base	0x60
+#define	nt_generation	0x68
+#define	gtod_generation	0x6c
+#define	gtod_ns_base	0x70
+#define	gtod_sec_base	0x78
+
+TEXT nanotime<>(SB), NOSPLIT, $32
+	MOVQ	$0x7fffffe00000, BP	/* comm page base */
+	// Loop trying to take a consistent snapshot
+	// of the time parameters.
+timeloop:
+	MOVL	gtod_generation(BP), R8
+	TESTL	R8, R8
+	JZ	systime
+	MOVL	nt_generation(BP), R9
+	TESTL	R9, R9
+	JZ	timeloop
+	RDTSC
+	MOVQ	nt_tsc_base(BP), R10
+	MOVL	nt_scale(BP), R11
+	MOVQ	nt_ns_base(BP), R12
+	CMPL	nt_generation(BP), R9
+	JNE	timeloop
+	MOVQ	gtod_ns_base(BP), R13
+	MOVQ	gtod_sec_base(BP), R14
+	CMPL	gtod_generation(BP), R8
+	JNE	timeloop
+
+	// Gathered all the data we need. Compute time.
+	//	((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9
+	// The multiply and shift extracts the top 64 bits of the 96-bit product.
+	SHLQ	$32, DX
+	ADDQ	DX, AX
+	SUBQ	R10, AX
+	MULQ	R11
+	SHRQ	$32, AX:DX
+	ADDQ	R12, AX
+	SUBQ	R13, AX
+	IMULQ	$1000000000, R14
+	ADDQ	R14, AX
+	RET
+
+systime:
+	// Fall back to system call (usually first call in this thread).
+	MOVQ	SP, DI	// must be non-nil, unused
+	MOVQ	$0, SI
+	MOVL	$(0x2000000+116), AX
+	SYSCALL
+	// sec is in AX, usec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	IMULQ	$1000, DX
+	ADDQ	DX, AX
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+	CALL	nanotime<>(SB)
+	MOVQ	AX, ret+0(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$0-12
+	CALL	nanotime<>(SB)
+
+	// generated code for
+	//	func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
+	// adapted to reduce duplication
+	MOVQ	AX, CX
+	MOVQ	$1360296554856532783, AX
+	MULQ	CX
+	ADDQ	CX, DX
+	RCRQ	$1, DX
+	SHRQ	$29, DX
+	MOVQ	DX, sec+0(FP)
+	IMULQ	$1000000000, DX
+	SUBQ	DX, CX
+	MOVL	CX, nsec+8(FP)
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	$(0x2000000+329), AX  // pthread_sigmask (on OS X, sigprocmask==entire process)
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$0
+	MOVL	mode+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
+	MOVQ	old+16(FP), CX		// arg 3 oact
+	MOVQ	old+16(FP), R10		// arg 3 oact
+	MOVL	$(0x2000000+46), AX	// syscall entry
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+
+	MOVQ	R8, 32(SP)	// save ucontext
+	MOVQ	SI, 40(SP)	// save infostyle
+
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVL	DX, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVQ	R10, 48(SP)
+
+	// g = m->gsignal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+
+	MOVL	DX, 0(SP)
+	MOVQ	CX, 8(SP)
+	MOVQ	R8, 16(SP)
+	MOVQ	R10, 24(SP)
+
+	CALL	DI
+
+	// restore g
+	get_tls(BX)
+	MOVQ	48(SP), R10
+	MOVQ	R10, g(BX)
+
+sigtramp_ret:
+	// call sigreturn
+	MOVL	$(0x2000000+184), AX	// sigreturn(ucontext, infostyle)
+	MOVQ	32(SP), DI	// saved ucontext
+	MOVQ	40(SP), SI	// saved infostyle
+	SYSCALL
+	INT $3	// not reached
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	prot+16(FP), DX		// arg 3 prot
+	MOVL	flags+20(FP), R10		// arg 4 flags
+	MOVL	fd+24(FP), R8		// arg 5 fid
+	MOVL	off+28(FP), R9		// arg 6 offset
+	MOVL	$(0x2000000+197), AX	// syscall entry
+	SYSCALL
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	$(0x2000000+73), AX	// syscall entry
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	MOVQ	new+8(SP), DI
+	MOVQ	old+16(SP), SI
+	MOVQ	$(0x2000000+53), AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)  // sec
+	MOVL	DX, 8(SP)  // usec
+
+	// select(0, 0, 0, 0, &tv)
+	MOVL	$0, DI
+	MOVL	$0, SI
+	MOVL	$0, DX
+	MOVL	$0, R10
+	MOVQ	SP, R8
+	MOVL	$(0x2000000+93), AX
+	SYSCALL
+	RET
+
+// void bsdthread_create(void *stk, M *mp, G *gp, void (*fn)(void))
+TEXT runtime·bsdthread_create(SB),NOSPLIT,$0
+	// Set up arguments to bsdthread_create system call.
+	// The ones in quotes pass through to the thread callback
+	// uninterpreted, so we can put whatever we want there.
+	MOVQ	fn+32(SP), DI	// "func"
+	MOVQ	mm+16(SP), SI	// "arg"
+	MOVQ	stk+8(SP), DX	// stack
+	MOVQ	gg+24(SP), R10	// "pthread"
+	MOVQ	$0x01000000, R8	// flags = PTHREAD_START_CUSTOM
+	MOVQ	$0, R9	// paranoia
+	MOVQ	$(0x2000000+360), AX	// bsdthread_create
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+32(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+32(FP)
+	RET
+
+// The thread that bsdthread_create creates starts executing here,
+// because we registered this function using bsdthread_register
+// at startup.
+//	DI = "pthread"
+//	SI = mach thread port
+//	DX = "func" (= fn)
+//	CX = "arg" (= m)
+//	R8 = stack
+//	R9 = flags (= 0)
+//	SP = stack - C_64_REDZONE_LEN (= stack - 128)
+TEXT runtime·bsdthread_start(SB),NOSPLIT,$0
+	MOVQ	R8, SP		// empirically, SP is very wrong but R8 is right
+
+	PUSHQ	DX
+	PUSHQ	CX
+	PUSHQ	SI
+
+	// set up thread local storage pointing at m->tls.
+	LEAQ	m_tls(CX), DI
+	CALL	runtime·settls(SB)
+
+	POPQ	SI
+	POPQ	CX
+	POPQ	DX
+
+	get_tls(BX)
+	MOVQ	SI, m_procid(CX)	// thread port is m->procid
+	MOVQ	m_g0(CX), AX
+	MOVQ	AX, g(BX)
+	MOVQ	CX, g_m(AX)
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	CALL	DX	// fn
+	CALL	runtime·exit1(SB)
+	RET
+
+// void bsdthread_register(void)
+// registers callbacks for threadstart (see bsdthread_create above
+// and wqthread and pthsize (not used).  returns 0 on success.
+TEXT runtime·bsdthread_register(SB),NOSPLIT,$0
+	MOVQ	$runtime·bsdthread_start(SB), DI	// threadstart
+	MOVQ	$0, SI	// wqthread, not used by us
+	MOVQ	$0, DX	// pthsize, not used by us
+	MOVQ	$0, R10	// dummy_value [sic]
+	MOVQ	$0, R8	// targetconc_ptr
+	MOVQ	$0, R9	// dispatchqueue_offset
+	MOVQ	$(0x2000000+366), AX	// bsdthread_register
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// Mach system calls use 0x1000000 instead of the BSD's 0x2000000.
+
+// uint32 mach_msg_trap(void*, uint32, uint32, uint32, uint32, uint32, uint32)
+TEXT runtime·mach_msg_trap(SB),NOSPLIT,$0
+	MOVQ	h+0(FP), DI
+	MOVL	op+8(FP), SI
+	MOVL	send_size+12(FP), DX
+	MOVL	rcv_size+16(FP), R10
+	MOVL	rcv_name+20(FP), R8
+	MOVL	timeout+24(FP), R9
+	MOVL	notify+28(FP), R11
+	PUSHQ	R11	// seventh arg, on stack
+	MOVL	$(0x1000000+31), AX	// mach_msg_trap
+	SYSCALL
+	POPQ	R11
+	MOVL	AX, ret+32(FP)
+	RET
+
+TEXT runtime·mach_task_self(SB),NOSPLIT,$0
+	MOVL	$(0x1000000+28), AX	// task_self_trap
+	SYSCALL
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·mach_thread_self(SB),NOSPLIT,$0
+	MOVL	$(0x1000000+27), AX	// thread_self_trap
+	SYSCALL
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·mach_reply_port(SB),NOSPLIT,$0
+	MOVL	$(0x1000000+26), AX	// mach_reply_port
+	SYSCALL
+	MOVL	AX, ret+0(FP)
+	RET
+
+// Mach provides trap versions of the semaphore ops,
+// instead of requiring the use of RPC.
+
+// uint32 mach_semaphore_wait(uint32)
+TEXT runtime·mach_semaphore_wait(SB),NOSPLIT,$0
+	MOVL	sema+0(FP), DI
+	MOVL	$(0x1000000+36), AX	// semaphore_wait_trap
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+// uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
+TEXT runtime·mach_semaphore_timedwait(SB),NOSPLIT,$0
+	MOVL	sema+0(FP), DI
+	MOVL	sec+4(FP), SI
+	MOVL	nsec+8(FP), DX
+	MOVL	$(0x1000000+38), AX	// semaphore_timedwait_trap
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+// uint32 mach_semaphore_signal(uint32)
+TEXT runtime·mach_semaphore_signal(SB),NOSPLIT,$0
+	MOVL	sema+0(FP), DI
+	MOVL	$(0x1000000+33), AX	// semaphore_signal_trap
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+// uint32 mach_semaphore_signal_all(uint32)
+TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
+	MOVL	sema+0(FP), DI
+	MOVL	$(0x1000000+34), AX	// semaphore_signal_all_trap
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$32
+	/*
+	* Same as in sys_darwin_386.s:/ugliness, different constant.
+	* See cgo/gcc_darwin_amd64.c for the derivation
+	* of the constant.
+	*/
+	SUBQ $0x8a0, DI
+
+	MOVL	$(0x3000000+3), AX	// thread_fast_set_cthread_self - machdep call #3
+	SYSCALL
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVQ	mib+0(FP), DI
+	MOVL	miblen+8(FP), SI
+	MOVQ	out+16(FP), DX
+	MOVQ	size+24(FP), R10
+	MOVQ	dst+32(FP), R8
+	MOVQ	ndst+40(FP), R9
+	MOVL	$(0x2000000+202), AX	// syscall entry
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVQ    $0, DI
+	MOVQ    $0, SI
+	MOVQ    $0, DX
+	MOVL	$(0x2000000+362), AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL    fd+0(FP), DI
+	MOVQ    ev1+8(FP), SI
+	MOVL    nev1+16(FP), DX
+	MOVQ    ev2+24(FP), R10
+	MOVL    nev2+32(FP), R8
+	MOVQ    ts+40(FP), R9
+	MOVL	$(0x2000000+363), AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL    fd+0(FP), DI  // fd
+	MOVQ    $2, SI  // F_SETFD
+	MOVQ    $1, DX  // FD_CLOEXEC
+	MOVL	$(0x2000000+92), AX  // fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_dragonfly_386.s b/src/runtime/sys_dragonfly_386.s
new file mode 100644
index 000000000..dd0e27e26
--- /dev/null
+++ b/src/runtime/sys_dragonfly_386.s
@@ -0,0 +1,381 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for 386, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+	
+TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$-4
+	MOVL	$469, AX		// umtx_sleep
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·sys_umtx_wakeup(SB),NOSPLIT,$-4
+	MOVL	$470, AX		// umtx_wakeup
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·lwp_create(SB),NOSPLIT,$-4
+	MOVL	$495, AX		// lwp_create
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·lwp_start(SB),NOSPLIT,$0
+
+	// Set GS to point at m->tls.
+	MOVL	mm+0(FP), BX
+	MOVL	m_g0(BX), DX
+	LEAL	m_tls(BX), BP
+	PUSHAL
+	PUSHL	BP
+	CALL	runtime·settls(SB)
+	POPL	AX
+	POPAL
+	
+	// Now segment is established.  Initialize m, g.
+	get_tls(CX)
+	MOVL	BX, g_m(DX)
+	MOVL	DX, g(CX)
+
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	MOVL	0(DX), DX		// paranoia; check they are not nil
+	MOVL	0(BX), BX
+
+	// More paranoia; check that stack splitting code works.
+	PUSHAL
+	CALL	runtime·emptyfunc(SB)
+	POPAL
+
+	CALL	runtime·mstart(SB)
+
+	CALL	runtime·exit1(SB)
+	MOVL	$0x1234, 0x1005
+	RET
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVL	$1, AX
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$16
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	$0x10000, 4(SP)		// arg 1 - how (EXTEXIT_LWP)
+	MOVL	$0, 8(SP)		// arg 2 - status
+	MOVL	$0, 12(SP)		// arg 3 - addr
+	MOVL	$494, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-4
+	MOVL	$5, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-4
+	MOVL	$6, AX
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-4
+	MOVL	$3, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-4
+	MOVL	$4, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-4
+	MOVL	$194, AX
+	INT	$0x80
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	MOVL	$496, AX		// lwp_gettid
+	INT	$0x80
+	MOVL	$0, 0(SP)
+	MOVL	$-1, 4(SP)		// arg 1 - pid
+	MOVL	AX, 8(SP)		// arg 2 - tid
+	MOVL	sig+0(FP), AX
+	MOVL	AX, 8(SP)		// arg 3 - signum
+	MOVL	$497, AX		// lwp_kill
+	INT	$0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$36
+	LEAL	addr+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - addr
+	MOVSL				// arg 2 - len
+	MOVSL				// arg 3 - prot
+	MOVSL				// arg 4 - flags
+	MOVSL				// arg 5 - fd
+	MOVL	$0, AX
+	STOSL				// arg 6 - pad
+	MOVSL				// arg 7 - offset
+	MOVL	$0, AX			// top 32 bits of file offset
+	STOSL
+	MOVL	$197, AX		// sys_mmap
+	INT	$0x80
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$-4
+	MOVL	$73, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$-4
+	MOVL	$75, AX	// madvise
+	INT	$0x80
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $-4
+	MOVL	$83, AX
+	INT	$0x80
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	LEAL	12(SP), BX
+	MOVL	$0, 4(SP)	// CLOCK_REALTIME
+	MOVL	BX, 8(SP)
+	INT	$0x80
+	MOVL	12(SP), AX	// sec
+	MOVL	16(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	BX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	LEAL	12(SP), BX
+	MOVL	$4, 4(SP)	// CLOCK_MONOTONIC
+	MOVL	BX, 8(SP)
+	INT	$0x80
+	MOVL	12(SP), AX	// sec
+	MOVL	16(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	// convert to DX:AX nsec
+	MOVL	$1000000000, CX
+	MULL	CX
+	ADDL	BX, AX
+	ADCL	$0, DX
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-4
+	MOVL	$342, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$44
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVL	DI, 20(SP)
+	
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+	// copy arguments for call to sighandler
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+4(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+8(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+sigtramp_ret:
+	// call sigreturn
+	MOVL	context+8(FP), AX
+	MOVL	$0, 0(SP)	// syscall gap
+	MOVL	AX, 4(SP)
+	MOVL	$344, AX	// sigreturn(ucontext)
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	MOVL	$53, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$20
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 12(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVL	AX, 16(SP)		// tv_nsec
+
+	MOVL	$0, 0(SP)
+	LEAL	12(SP), AX
+	MOVL	AX, 4(SP)		// arg 1 - rqtp
+	MOVL	$0, 8(SP)		// arg 2 - rmtp
+	MOVL	$240, AX		// sys_nanosleep
+	INT	$0x80
+	RET
+
+TEXT runtime·setldt(SB),NOSPLIT,$4
+	// Under DragonFly we set the GS base instead of messing with the LDT.
+	MOVL	tls0+4(FP), AX
+	MOVL	AX, 0(SP)
+	CALL	runtime·settls(SB)
+	RET
+
+TEXT runtime·settls(SB),NOSPLIT,$24
+	// adjust for ELF: wants to use -8(GS) and -4(GS) for g and m
+	MOVL	tlsbase+0(FP), CX
+	ADDL	$8, CX
+
+	// Set up a struct tls_info - a size of -1 maps the whole address
+	// space and is required for direct-tls access of variable data
+	// via negative offsets.
+	LEAL	16(SP), BX
+	MOVL	CX, 16(SP)		// base
+	MOVL	$-1, 20(SP)		// size
+
+	// set_tls_area returns the descriptor that needs to be loaded into GS.
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	$0, 4(SP)		// arg 1 - which
+	MOVL	BX, 8(SP)		// arg 2 - tls_info
+	MOVL	$8, 12(SP)		// arg 3 - infosize
+	MOVL    $472, AX                // set_tls_area
+	INT     $0x80
+	JCC     2(PC)
+	MOVL    $0xf1, 0xf1             // crash
+	MOVW	AX, GS
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$28
+	LEAL	mib+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - name
+	MOVSL				// arg 2 - namelen
+	MOVSL				// arg 3 - oldp
+	MOVSL				// arg 4 - oldlenp
+	MOVSL				// arg 5 - newp
+	MOVSL				// arg 6 - newlen
+	MOVL	$202, AX		// sys___sysctl
+	INT	$0x80
+	JCC	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$331, AX		// sys_sched_yield
+	INT	$0x80
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$16
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	$3, 4(SP)		// arg 1 - how (SIG_SETMASK)
+	MOVL	new+0(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - set
+	MOVL	old+4(FP), AX
+	MOVL	AX, 12(SP)		// arg 3 - oset
+	MOVL	$340, AX		// sys_sigprocmask
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVL	$362, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	$363, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$32
+	MOVL	$92, AX		// fcntl
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fd+0(FP), BX
+	MOVL	BX, 4(SP)	// fd
+	MOVL	$2, 8(SP)	// F_SETFD
+	MOVL	$1, 12(SP)	// FD_CLOEXEC
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	RET
+
+GLOBL runtime·tlsoffset(SB),$4
diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s
new file mode 100644
index 000000000..2c756018c
--- /dev/null
+++ b/src/runtime/sys_dragonfly_amd64.s
@@ -0,0 +1,344 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for AMD64, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+	
+TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$0
+	MOVQ addr+0(FP), DI		// arg 1 - ptr
+	MOVL val+8(FP), SI		// arg 2 - value
+	MOVL timeout+12(FP), DX		// arg 3 - timeout
+	MOVL $469, AX		// umtx_sleep
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·sys_umtx_wakeup(SB),NOSPLIT,$0
+	MOVQ addr+0(FP), DI		// arg 1 - ptr
+	MOVL val+8(FP), SI		// arg 2 - count
+	MOVL $470, AX		// umtx_wakeup
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·lwp_create(SB),NOSPLIT,$0
+	MOVQ param+0(FP), DI		// arg 1 - params
+	MOVL $495, AX		// lwp_create
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·lwp_start(SB),NOSPLIT,$0
+	MOVQ	DI, R13 // m
+
+	// set up FS to point at m->tls
+	LEAQ	m_tls(R13), DI
+	CALL	runtime·settls(SB)	// smashes DI
+
+	// set up m, g
+	get_tls(CX)
+	MOVQ	m_g0(R13), DI
+	MOVQ	R13, g_m(DI)
+	MOVQ	DI, g(CX)
+
+	CALL	runtime·stackcheck(SB)
+	CALL	runtime·mstart(SB)
+
+	MOVQ 0, AX			// crash (not reached)
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$1, AX
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$431, AX
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
+	MOVL	$5, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVL	$6, AX
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$3, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-8
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$4, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-8
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
+	MOVL	$194, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	MOVL	$496, AX	// lwp_gettid
+	SYSCALL
+	MOVQ	$-1, DI		// arg 1 - pid
+	MOVQ	8(SP), DI	// arg 2 - tid
+	MOVL	sig+0(FP), SI	// arg 3 - signum
+	MOVL	$497, AX	// lwp_kill
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $-8
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	$83, AX
+	SYSCALL
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	MOVQ	$0, DI  	// CLOCK_REALTIME
+	LEAQ	8(SP), SI
+	SYSCALL
+	MOVQ	8(SP), AX	// sec
+	MOVQ	16(SP), DX	// nsec
+
+	// sec is in AX, nsec in DX
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	MOVQ	$4, DI  	// CLOCK_MONOTONIC
+	LEAQ	8(SP), SI
+	SYSCALL
+	MOVQ	8(SP), AX	// sec
+	MOVQ	16(SP), DX	// nsec
+
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-8
+	MOVL	sig+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
+	MOVL	$342, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVQ	R10, 40(SP)
+	
+	// g = m->signal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+	
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(BX)
+	MOVQ	40(SP), R10
+	MOVQ	R10, g(BX)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
+	SUBQ	$16, SP
+	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
+	MOVQ	$0, R9			// arg 6 - pad
+	MOVL	$197, AX
+	SYSCALL
+	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	$73, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
+	MOVQ	$75, AX	// madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+	
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVQ	new+8(SP), DI
+	MOVQ	old+16(SP), SI
+	MOVQ	$53, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVQ	AX, 8(SP)		// tv_nsec
+
+	MOVQ	SP, DI			// arg 1 - rqtp
+	MOVQ	$0, SI			// arg 2 - rmtp
+	MOVL	$240, AX		// sys_nanosleep
+	SYSCALL
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$16
+	ADDQ	$16, DI	// adjust for ELF: wants to use -16(FS) and -8(FS) for g and m
+	MOVQ	DI, 0(SP)
+	MOVQ	$16, 8(SP)
+	MOVQ	$0, DI			// arg 1 - which
+	MOVQ	SP, SI			// arg 2 - tls_info
+	MOVQ	$16, DX			// arg 3 - infosize
+	MOVQ	$472, AX		// set_tls_area
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
+	MOVQ	$202, AX		// sys___sysctl
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$331, AX		// sys_sched_yield
+	SYSCALL
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	$3, DI			// arg 1 - how (SIG_SETMASK)
+	MOVQ	new+0(FP), SI		// arg 2 - set
+	MOVQ	old+8(FP), DX		// arg 3 - oset
+	MOVL	$340, AX		// sys_sigprocmask
+	SYSCALL
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVQ	$0, DI
+	MOVQ	$0, SI
+	MOVQ	$0, DX
+	MOVL	$362, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
+	MOVL	$363, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI	// fd
+	MOVQ	$2, SI		// F_SETFD
+	MOVQ	$1, DX		// FD_CLOEXEC
+	MOVL	$92, AX		// fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
new file mode 100644
index 000000000..ffc28560e
--- /dev/null
+++ b/src/runtime/sys_freebsd_386.s
@@ -0,0 +1,391 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for 386, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+	
+TEXT runtime·sys_umtx_op(SB),NOSPLIT,$-4
+	MOVL	$454, AX
+	INT	$0x80
+	MOVL	AX, ret+20(FP)
+	RET
+
+TEXT runtime·thr_new(SB),NOSPLIT,$-4
+	MOVL	$455, AX
+	INT	$0x80
+	RET
+
+TEXT runtime·thr_start(SB),NOSPLIT,$0
+	MOVL	mm+0(FP), AX
+	MOVL	m_g0(AX), BX
+	LEAL	m_tls(AX), BP
+	MOVL	0(BP), DI
+	ADDL	$7, DI
+	PUSHAL
+	PUSHL	$32
+	PUSHL	BP
+	PUSHL	DI
+	CALL	runtime·setldt(SB)
+	POPL	AX
+	POPL	AX
+	POPL	AX
+	POPAL
+	get_tls(CX)
+	MOVL	BX, g(CX)
+	
+	MOVL	AX, g_m(BX)
+	CALL	runtime·stackcheck(SB)		// smashes AX
+	CALL	runtime·mstart(SB)
+
+	MOVL	0, AX			// crash (not reached)
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVL	$1, AX
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-4
+	MOVL	$431, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-4
+	MOVL	$5, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-4
+	MOVL	$6, AX
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-4
+	MOVL	$3, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-4
+	MOVL	$4, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-4
+	MOVL	$194, AX
+	INT	$0x80
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	// thr_self(&8(SP))
+	LEAL	8(SP), AX
+	MOVL	AX, 4(SP)
+	MOVL	$432, AX
+	INT	$0x80
+	// thr_kill(self, SIGPIPE)
+	MOVL	8(SP), AX
+	MOVL	AX, 4(SP)
+	MOVL	sig+0(FP), AX
+	MOVL	AX, 8(SP)
+	MOVL	$433, AX
+	INT	$0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$32
+	LEAL addr+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL
+	MOVSL
+	MOVSL
+	MOVSL
+	MOVSL
+	MOVSL
+	MOVL	$0, AX	// top 32 bits of file offset
+	STOSL
+	MOVL	$477, AX
+	INT	$0x80
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$-4
+	MOVL	$73, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$-4
+	MOVL	$75, AX	// madvise
+	INT	$0x80
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $-4
+	MOVL	$83, AX
+	INT	$0x80
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	LEAL	12(SP), BX
+	MOVL	$0, 4(SP)	// CLOCK_REALTIME
+	MOVL	BX, 8(SP)
+	INT	$0x80
+	MOVL	12(SP), AX	// sec
+	MOVL	16(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	BX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	LEAL	12(SP), BX
+	// We can use CLOCK_MONOTONIC_FAST here when we drop
+	// support for FreeBSD 8-STABLE.
+	MOVL	$4, 4(SP)	// CLOCK_MONOTONIC
+	MOVL	BX, 8(SP)
+	INT	$0x80
+	MOVL	12(SP), AX	// sec
+	MOVL	16(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	// convert to DX:AX nsec
+	MOVL	$1000000000, CX
+	MULL	CX
+	ADDL	BX, AX
+	ADCL	$0, DX
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-4
+	MOVL	$416, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$44
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVL	DI, 20(SP)
+	
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+	// copy arguments for call to sighandler
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+4(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+8(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+sigtramp_ret:
+	// call sigreturn
+	MOVL	context+8(FP), AX
+	MOVL	$0, 0(SP)	// syscall gap
+	MOVL	AX, 4(SP)
+	MOVL	$417, AX	// sigreturn(ucontext)
+	INT	$0x80
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	MOVL	$53, AX
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$20
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 12(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVL	AX, 16(SP)		// tv_nsec
+
+	MOVL	$0, 0(SP)
+	LEAL	12(SP), AX
+	MOVL	AX, 4(SP)		// arg 1 - rqtp
+	MOVL	$0, 8(SP)		// arg 2 - rmtp
+	MOVL	$240, AX		// sys_nanosleep
+	INT	$0x80
+	RET
+
+/*
+descriptor entry format for system call
+is the native machine format, ugly as it is:
+
+	2-byte limit
+	3-byte base
+	1-byte: 0x80=present, 0x60=dpl<<5, 0x1F=type
+	1-byte: 0x80=limit is *4k, 0x40=32-bit operand size,
+		0x0F=4 more bits of limit
+	1 byte: 8 more bits of base
+
+int i386_get_ldt(int, union ldt_entry *, int);
+int i386_set_ldt(int, const union ldt_entry *, int);
+
+*/
+
+// setldt(int entry, int address, int limit)
+TEXT runtime·setldt(SB),NOSPLIT,$32
+	MOVL	address+4(FP), BX	// aka base
+	// see comment in sys_linux_386.s; freebsd is similar
+	ADDL	$0x8, BX
+
+	// set up data_desc
+	LEAL	16(SP), AX	// struct data_desc
+	MOVL	$0, 0(AX)
+	MOVL	$0, 4(AX)
+
+	MOVW	BX, 2(AX)
+	SHRL	$16, BX
+	MOVB	BX, 4(AX)
+	SHRL	$8, BX
+	MOVB	BX, 7(AX)
+
+	MOVW	$0xffff, 0(AX)
+	MOVB	$0xCF, 6(AX)	// 32-bit operand, 4k limit unit, 4 more bits of limit
+
+	MOVB	$0xF2, 5(AX)	// r/w data descriptor, dpl=3, present
+
+	// call i386_set_ldt(entry, desc, 1)
+	MOVL	$0xffffffff, 0(SP)	// auto-allocate entry and return in AX
+	MOVL	AX, 4(SP)
+	MOVL	$1, 8(SP)
+	CALL	runtime·i386_set_ldt(SB)
+
+	// compute segment selector - (entry*8+7)
+	SHLL	$3, AX
+	ADDL	$7, AX
+	MOVW	AX, GS
+	RET
+
+TEXT runtime·i386_set_ldt(SB),NOSPLIT,$16
+	LEAL	args+0(FP), AX	// 0(FP) == 4(SP) before SP got moved
+	MOVL	$0, 0(SP)	// syscall gap
+	MOVL	$1, 4(SP)
+	MOVL	AX, 8(SP)
+	MOVL	$165, AX
+	INT	$0x80
+	JAE	2(PC)
+	INT	$3
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$28
+	LEAL	mib+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - name
+	MOVSL				// arg 2 - namelen
+	MOVSL				// arg 3 - oldp
+	MOVSL				// arg 4 - oldlenp
+	MOVSL				// arg 5 - newp
+	MOVSL				// arg 6 - newlen
+	MOVL	$202, AX		// sys___sysctl
+	INT	$0x80
+	JAE	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$331, AX		// sys_sched_yield
+	INT	$0x80
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$16
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	$3, 4(SP)		// arg 1 - how (SIG_SETMASK)
+	MOVL	new+0(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - set
+	MOVL	old+4(FP), AX
+	MOVL	AX, 12(SP)		// arg 3 - oset
+	MOVL	$340, AX		// sys_sigprocmask
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVL	$362, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	$363, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$32
+	MOVL	$92, AX		// fcntl
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fd+0(FP), BX
+	MOVL	BX, 4(SP)	// fd
+	MOVL	$2, 8(SP)	// F_SETFD
+	MOVL	$1, 12(SP)	// FD_CLOEXEC
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	RET
+
+GLOBL runtime·tlsoffset(SB),$4
diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s
new file mode 100644
index 000000000..65f8c1a6e
--- /dev/null
+++ b/src/runtime/sys_freebsd_amd64.s
@@ -0,0 +1,357 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for AMD64, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// FreeBSD 8, FreeBSD 9, and older versions that I have checked
+// do not restore R10 on exit from a "restarted" system call
+// if you use the SYSCALL instruction. This means that, for example,
+// if a signal arrives while the wait4 system call is executing,
+// the wait4 internally returns ERESTART, which makes the kernel
+// back up the PC to execute the SYSCALL instruction a second time.
+// However, since the kernel does not restore R10, the fourth
+// argument to the system call has been lost. (FreeBSD 9 also fails
+// to restore the fifth and sixth arguments, R8 and R9, although
+// some earlier versions did restore those correctly.)
+// The broken code is in fast_syscall in FreeBSD's amd64/amd64/exception.S.
+// It restores only DI, SI, DX, AX, and RFLAGS on system call return.
+// http://fxr.watson.org/fxr/source/amd64/amd64/exception.S?v=FREEBSD91#L399
+//
+// The INT $0x80 system call path (int0x80_syscall in FreeBSD's 
+// amd64/ia32/ia32_exception.S) does not have this problem,
+// but it expects the third argument in R10. Instead of rewriting
+// all the assembly in this file, #define SYSCALL to a safe simulation
+// using INT $0x80.
+//
+// INT $0x80 is a little slower than SYSCALL, but correctness wins.
+//
+// See golang.org/issue/6372.
+#define SYSCALL MOVQ R10, CX; INT $0x80
+	
+TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
+	MOVQ addr+0(FP), DI
+	MOVL mode+8(FP), SI
+	MOVL val+12(FP), DX
+	MOVQ ptr2+16(FP), R10
+	MOVQ ts+24(FP), R8
+	MOVL $454, AX
+	SYSCALL
+	MOVL	AX, ret+32(FP)
+	RET
+
+TEXT runtime·thr_new(SB),NOSPLIT,$0
+	MOVQ param+0(FP), DI
+	MOVL size+8(FP), SI
+	MOVL $455, AX
+	SYSCALL
+	RET
+
+TEXT runtime·thr_start(SB),NOSPLIT,$0
+	MOVQ	DI, R13 // m
+
+	// set up FS to point at m->tls
+	LEAQ	m_tls(R13), DI
+	CALL	runtime·settls(SB)	// smashes DI
+
+	// set up m, g
+	get_tls(CX)
+	MOVQ	m_g0(R13), DI
+	MOVQ	R13, g_m(DI)
+	MOVQ	DI, g(CX)
+
+	CALL	runtime·stackcheck(SB)
+	CALL	runtime·mstart(SB)
+
+	MOVQ 0, AX			// crash (not reached)
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$1, AX
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 exit status
+	MOVL	$431, AX
+	SYSCALL
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
+	MOVL	$5, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVL	$6, AX
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$3, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-8
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$4, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-8
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
+	MOVL	$194, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	// thr_self(&8(SP))
+	LEAQ	8(SP), DI	// arg 1 &8(SP)
+	MOVL	$432, AX
+	SYSCALL
+	// thr_kill(self, SIGPIPE)
+	MOVQ	8(SP), DI	// arg 1 id
+	MOVL	sig+0(FP), SI	// arg 2
+	MOVL	$433, AX
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $-8
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	$83, AX
+	SYSCALL
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	MOVQ	$0, DI		// CLOCK_REALTIME
+	LEAQ	8(SP), SI
+	SYSCALL
+	MOVQ	8(SP), AX	// sec
+	MOVQ	16(SP), DX	// nsec
+
+	// sec is in AX, nsec in DX
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVL	$232, AX
+	// We can use CLOCK_MONOTONIC_FAST here when we drop
+	// support for FreeBSD 8-STABLE.
+	MOVQ	$4, DI		// CLOCK_MONOTONIC
+	LEAQ	8(SP), SI
+	SYSCALL
+	MOVQ	8(SP), AX	// sec
+	MOVQ	16(SP), DX	// nsec
+
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-8
+	MOVL	sig+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
+	MOVL	$416, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVQ	R10, 40(SP)
+	
+	// g = m->signal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+	
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(BX)
+	MOVQ	40(SP), R10
+	MOVQ	R10, g(BX)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	prot+16(FP), DX		// arg 3 prot
+	MOVL	flags+20(FP), R10		// arg 4 flags
+	MOVL	fd+24(FP), R8		// arg 5 fid
+	MOVL	off+28(FP), R9		// arg 6 offset
+	MOVL	$477, AX
+	SYSCALL
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	$73, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
+	MOVQ	$75, AX	// madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+	
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVQ	new+8(SP), DI
+	MOVQ	old+16(SP), SI
+	MOVQ	$53, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVQ	AX, 8(SP)		// tv_nsec
+
+	MOVQ	SP, DI			// arg 1 - rqtp
+	MOVQ	$0, SI			// arg 2 - rmtp
+	MOVL	$240, AX		// sys_nanosleep
+	SYSCALL
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$8
+	ADDQ	$16, DI	// adjust for ELF: wants to use -16(FS) and -8(FS) for g and m
+	MOVQ	DI, 0(SP)
+	MOVQ	SP, SI
+	MOVQ	$129, DI	// AMD64_SET_FSBASE
+	MOVQ	$165, AX	// sysarch
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
+	MOVQ	$202, AX		// sys___sysctl
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$331, AX		// sys_sched_yield
+	SYSCALL
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	$3, DI			// arg 1 - how (SIG_SETMASK)
+	MOVQ	new+0(FP), SI		// arg 2 - set
+	MOVQ	old+8(FP), DX		// arg 3 - oset
+	MOVL	$340, AX		// sys_sigprocmask
+	SYSCALL
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVQ	$0, DI
+	MOVQ	$0, SI
+	MOVQ	$0, DX
+	MOVL	$362, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
+	MOVL	$363, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI	// fd
+	MOVQ	$2, SI		// F_SETFD
+	MOVQ	$1, DX		// FD_CLOEXEC
+	MOVL	$92, AX		// fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s
new file mode 100644
index 000000000..d875138b6
--- /dev/null
+++ b/src/runtime/sys_freebsd_arm.s
@@ -0,0 +1,382 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for ARM, FreeBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// for EABI, as we don't support OABI
+#define SYS_BASE 0x0
+
+#define SYS_exit (SYS_BASE + 1)
+#define SYS_read (SYS_BASE + 3)
+#define SYS_write (SYS_BASE + 4)
+#define SYS_open (SYS_BASE + 5)
+#define SYS_close (SYS_BASE + 6)
+#define SYS_sigaltstack (SYS_BASE + 53)
+#define SYS_munmap (SYS_BASE + 73)
+#define SYS_madvise (SYS_BASE + 75)
+#define SYS_setitimer (SYS_BASE + 83)
+#define SYS_fcntl (SYS_BASE + 92)
+#define SYS_getrlimit (SYS_BASE + 194)
+#define SYS___sysctl (SYS_BASE + 202)
+#define SYS_nanosleep (SYS_BASE + 240)
+#define SYS_clock_gettime (SYS_BASE + 232)
+#define SYS_sched_yield (SYS_BASE + 331)
+#define SYS_sigprocmask (SYS_BASE + 340)
+#define SYS_kqueue (SYS_BASE + 362)
+#define SYS_kevent (SYS_BASE + 363)
+#define SYS_sigaction (SYS_BASE + 416)
+#define SYS_thr_exit (SYS_BASE + 431)
+#define SYS_thr_self (SYS_BASE + 432)
+#define SYS_thr_kill (SYS_BASE + 433)
+#define SYS__umtx_op (SYS_BASE + 454)
+#define SYS_thr_new (SYS_BASE + 455)
+#define SYS_mmap (SYS_BASE + 477) 
+	
+TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW 8(FP), R2
+	MOVW 12(FP), R3
+	ADD $20, R13 // arg 5 is passed on stack
+	MOVW $SYS__umtx_op, R7
+	SWI $0
+	SUB $20, R13
+	// BCS error
+	MOVW	R0, ret+20(FP)
+	RET
+
+TEXT runtime·thr_new(SB),NOSPLIT,$0
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW $SYS_thr_new, R7
+	SWI $0
+	RET
+
+TEXT runtime·thr_start(SB),NOSPLIT,$0
+	// set up g
+	MOVW m_g0(R0), g
+	MOVW R0, g_m(g)
+	BL runtime·emptyfunc(SB) // fault if stack check is wrong
+	BL runtime·mstart(SB)
+
+	MOVW $2, R8  // crash (not reached)
+	MOVW R8, (R8)
+	RET
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 exit status
+	MOVW $SYS_exit, R7
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 exit status
+	MOVW $SYS_thr_exit, R7	
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 name
+	MOVW 4(FP), R1	// arg 2 mode
+	MOVW 8(FP), R2	// arg 3 perm
+	MOVW $SYS_open, R7
+	SWI $0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 fd
+	MOVW 4(FP), R1	// arg 2 buf
+	MOVW 8(FP), R2	// arg 3 count
+	MOVW $SYS_read, R7
+	SWI $0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 fd
+	MOVW 4(FP), R1	// arg 2 buf
+	MOVW 8(FP), R2	// arg 3 count
+	MOVW $SYS_write, R7
+	SWI $0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0	// arg 1 fd
+	MOVW $SYS_close, R7
+	SWI $0
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW $SYS_getrlimit, R7
+	SWI $0
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$8
+	// thr_self(&4(R13))
+	MOVW $4(R13), R0 // arg 1 &4(R13)
+	MOVW $SYS_thr_self, R7
+	SWI $0
+	// thr_kill(self, SIGPIPE)
+	MOVW 4(R13), R0	// arg 1 id
+	MOVW sig+0(FP), R1	// arg 2 - signal
+	MOVW $SYS_thr_kill, R7
+	SWI $0
+	RET
+
+TEXT runtime·setitimer(SB), NOSPLIT, $-8
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW 8(FP), R2
+	MOVW $SYS_setitimer, R7
+	SWI $0
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVW $0, R0 // CLOCK_REALTIME
+	MOVW $8(R13), R1
+	MOVW $SYS_clock_gettime, R7
+	SWI $0
+
+	MOVW 8(R13), R0 // sec.low
+	MOVW 12(R13), R1 // sec.high
+	MOVW 16(R13), R2 // nsec
+
+	MOVW R0, 0(FP)
+	MOVW R1, 4(FP)
+	MOVW R2, 8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	// We can use CLOCK_MONOTONIC_FAST here when we drop
+	// support for FreeBSD 8-STABLE.
+	MOVW $4, R0 // CLOCK_MONOTONIC
+	MOVW $8(R13), R1
+	MOVW $SYS_clock_gettime, R7
+	SWI $0
+
+	MOVW 8(R13), R0 // sec.low
+	MOVW 12(R13), R4 // sec.high
+	MOVW 16(R13), R2 // nsec
+
+	MOVW $1000000000, R3
+	MULLU R0, R3, (R1, R0)
+	MUL R3, R4
+	ADD.S R2, R0
+	ADC R4, R1
+
+	MOVW R0, ret_lo+0(FP)
+	MOVW R1, ret_hi+4(FP)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0		// arg 1 sig
+	MOVW 4(FP), R1		// arg 2 act
+	MOVW 8(FP), R2		// arg 3 oact
+	MOVW $SYS_sigaction, R7
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$24
+	// this might be called in external code context,
+	// where g is not set.
+	// first save R0, because runtime·load_g will clobber it
+	MOVW	R0, 4(R13) // signum
+	MOVB	runtime·iscgo(SB), R0
+	CMP 	$0, R0
+	BL.NE	runtime·load_g(SB)
+
+	CMP $0, g
+	BNE 4(PC)
+	// signal number is already prepared in 4(R13)
+	MOVW $runtime·badsignal(SB), R11
+	BL (R11)
+	RET
+
+	// save g
+	MOVW g, R4
+	MOVW g, 20(R13)
+
+	// g = m->signal
+	MOVW g_m(g), R8
+	MOVW m_gsignal(R8), g
+
+	// R0 is already saved
+	MOVW R1, 8(R13) // info
+	MOVW R2, 12(R13) // context
+	MOVW R4, 16(R13) // oldg
+
+	BL runtime·sighandler(SB)
+
+	// restore g
+	MOVW 20(R13), g
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$16
+	MOVW 0(FP), R0		// arg 1 addr
+	MOVW 4(FP), R1		// arg 2 len
+	MOVW 8(FP), R2		// arg 3 prot
+	MOVW 12(FP), R3		// arg 4 flags
+	// arg 5 (fid) and arg6 (offset_lo, offset_hi) are passed on stack
+	// note the C runtime only passes the 32-bit offset_lo to us
+	MOVW 16(FP), R4		// arg 5
+	MOVW R4, 4(R13)
+	MOVW 20(FP), R5		// arg 6 lower 32-bit
+	// the word at 8(R13) is skipped due to 64-bit argument alignment.
+	MOVW R5, 12(R13)
+	MOVW $0, R6 		// higher 32-bit for arg 6
+	MOVW R6, 16(R13)
+	ADD $4, R13
+	MOVW $SYS_mmap, R7
+	SWI $0
+	SUB $4, R13
+	// TODO(dfc) error checking ?
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVW 0(FP), R0		// arg 1 addr
+	MOVW 4(FP), R1		// arg 2 len
+	MOVW $SYS_munmap, R7
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVW 0(FP), R0		// arg 1 addr
+	MOVW 4(FP), R1		// arg 2 len
+	MOVW 8(FP), R2		// arg 3 flags
+	MOVW $SYS_madvise, R7
+	SWI $0
+	// ignore failure - maybe pages are locked
+	RET
+	
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVW new+0(FP), R0
+	MOVW old+4(FP), R1
+	MOVW $SYS_sigaltstack, R7
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVW usec+0(FP), R0
+	MOVW R0, R2
+	MOVW $1000000, R1
+	DIV R1, R0
+	// 0(R13) is the saved LR, don't use it
+	MOVW R0, 4(R13) // tv_sec.low
+	MOVW $0, R0
+	MOVW R0, 8(R13) // tv_sec.high
+	MOD R1, R2
+	MOVW $1000, R1
+	MUL R1, R2
+	MOVW R2, 12(R13) // tv_nsec
+
+	MOVW $4(R13), R0 // arg 1 - rqtp
+	MOVW $0, R1      // arg 2 - rmtp
+	MOVW $SYS_nanosleep, R7
+	SWI $0
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// arg 1 - name
+	MOVW 4(FP), R1	// arg 2 - namelen
+	MOVW 8(FP), R2	// arg 3 - old
+	MOVW 12(FP), R3	// arg 4 - oldlenp
+	// arg 5 (newp) and arg 6 (newlen) are passed on stack
+	ADD $20, R13
+	MOVW $SYS___sysctl, R7
+	SWI $0
+	SUB.CS $0, R0, R0
+	SUB $20, R13
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVW $SYS_sched_yield, R7
+	SWI $0
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVW $3, R0	// arg 1 - how (SIG_SETMASK)
+	MOVW 0(FP), R1	// arg 2 - set
+	MOVW 4(FP), R2	// arg 3 - oset
+	MOVW $SYS_sigprocmask, R7
+	SWI $0
+	MOVW.CS $0, R8 // crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+// int32 runtime·kqueue(void)
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVW $SYS_kqueue, R7
+	SWI $0
+	RSB.CS $0, R0
+	MOVW	R0, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// kq
+	MOVW 4(FP), R1	// changelist
+	MOVW 8(FP), R2	// nchanges
+	MOVW 12(FP), R3	// eventlist
+	ADD $20, R13	// pass arg 5 and 6 on stack
+	MOVW $SYS_kevent, R7
+	SWI $0
+	RSB.CS $0, R0
+	SUB $20, R13
+	MOVW	R0, ret+24(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// fd
+	MOVW $2, R1	// F_SETFD
+	MOVW $1, R2	// FD_CLOEXEC
+	MOVW $SYS_fcntl, R7
+	SWI $0
+	RET
+
+TEXT runtime·casp(SB),NOSPLIT,$0
+	B	runtime·cas(SB)
+
+// TODO(minux): this is only valid for ARMv6+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+TEXT runtime·cas(SB),NOSPLIT,$0
+	B runtime·armcas(SB)
+
+// TODO(minux): this only supports ARMv6K+.
+TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
+	WORD $0xee1d0f70 // mrc p15, 0, r0, c13, c0, 3
+	RET
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
new file mode 100644
index 000000000..0f6d4bbb5
--- /dev/null
+++ b/src/runtime/sys_linux_386.s
@@ -0,0 +1,489 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for 386, Linux
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+TEXT runtime·exit(SB),NOSPLIT,$0
+	MOVL	$252, AX	// syscall number
+	MOVL	code+0(FP), BX
+	CALL	*runtime·_vdso(SB)
+	INT $3	// not reached
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVL	$1, AX	// exit - exit the current os thread
+	MOVL	code+0(FP), BX
+	CALL	*runtime·_vdso(SB)
+	INT $3	// not reached
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVL	$5, AX		// syscall - open
+	MOVL	name+0(FP), BX
+	MOVL	mode+4(FP), CX
+	MOVL	perm+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVL	$6, AX		// syscall - close
+	MOVL	fd+0(FP), BX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVL	$4, AX		// syscall - write
+	MOVL	fd+0(FP), BX
+	MOVL	p+4(FP), CX
+	MOVL	n+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVL	$3, AX		// syscall - read
+	MOVL	fd+0(FP), BX
+	MOVL	p+4(FP), CX
+	MOVL	n+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$0
+	MOVL	$191, AX		// syscall - ugetrlimit
+	MOVL	kind+0(FP), BX
+	MOVL	limit+4(FP), CX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$8
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 0(SP)
+	MOVL	DX, 4(SP)
+
+	// select(0, 0, 0, 0, &tv)
+	MOVL	$142, AX
+	MOVL	$0, BX
+	MOVL	$0, CX
+	MOVL	$0, DX
+	MOVL	$0, SI
+	LEAL	0(SP), DI
+	CALL	*runtime·_vdso(SB)
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$12
+	MOVL	$224, AX	// syscall - gettid
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, BX	// arg 1 tid
+	MOVL	sig+0(FP), CX	// arg 2 signal
+	MOVL	$238, AX	// syscall - tkill
+	CALL	*runtime·_vdso(SB)
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$0-12
+	MOVL	$104, AX			// syscall - setitimer
+	MOVL	mode+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	RET
+
+TEXT runtime·mincore(SB),NOSPLIT,$0-16
+	MOVL	$218, AX			// syscall - mincore
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	dst+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVL	$265, AX			// syscall - clock_gettime
+	MOVL	$0, BX		// CLOCK_REALTIME
+	LEAL	8(SP), CX
+	MOVL	$0, DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	8(SP), AX	// sec
+	MOVL	12(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	BX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVL	$265, AX			// syscall - clock_gettime
+	MOVL	$1, BX		// CLOCK_MONOTONIC
+	LEAL	8(SP), CX
+	MOVL	$0, DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	8(SP), AX	// sec
+	MOVL	12(SP), BX	// nsec
+
+	// sec is in AX, nsec in BX
+	// convert to DX:AX nsec
+	MOVL	$1000000000, CX
+	MULL	CX
+	ADDL	BX, AX
+	ADCL	$0, DX
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
+	MOVL	$175, AX		// syscall entry
+	MOVL	sig+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
+	MOVL	size+12(FP), SI
+	CALL	*runtime·_vdso(SB)
+	CMPL	AX, $0xfffff001
+	JLS	2(PC)
+	INT $3
+	RET
+
+TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
+	MOVL	$174, AX		// syscall - rt_sigaction
+	MOVL	sig+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
+	MOVL	size+12(FP), SI
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$44
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	sig+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVL	DI, 20(SP)
+
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+	// copy arguments for call to sighandler
+	MOVL	sig+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+4(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+8(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+	RET
+
+TEXT runtime·sigreturn(SB),NOSPLIT,$0
+	MOVL	$173, AX	// rt_sigreturn
+	// Sigreturn expects same SP as signal handler,
+	// so cannot CALL *runtime._vsdo(SB) here.
+	INT	$0x80
+	INT $3	// not reached
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVL	$192, AX	// mmap2
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	prot+8(FP), DX
+	MOVL	flags+12(FP), SI
+	MOVL	fd+16(FP), DI
+	MOVL	off+20(FP), BP
+	SHRL	$12, BP
+	CALL	*runtime·_vdso(SB)
+	CMPL	AX, $0xfffff001
+	JLS	3(PC)
+	NOTL	AX
+	INCL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVL	$91, AX	// munmap
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	CALL	*runtime·_vdso(SB)
+	CMPL	AX, $0xfffff001
+	JLS	2(PC)
+	INT $3
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVL	$219, AX	// madvise
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	flags+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	// ignore failure - maybe pages are locked
+	RET
+
+// int32 futex(int32 *uaddr, int32 op, int32 val,
+//	struct timespec *timeout, int32 *uaddr2, int32 val2);
+TEXT runtime·futex(SB),NOSPLIT,$0
+	MOVL	$240, AX	// futex
+	MOVL	addr+0(FP), BX
+	MOVL	op+4(FP), CX
+	MOVL	val+8(FP), DX
+	MOVL	ts+12(FP), SI
+	MOVL	addr2+16(FP), DI
+	MOVL	val3+20(FP), BP
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·clone(SB),NOSPLIT,$0
+	MOVL	$120, AX	// clone
+	MOVL	flags+4(SP), BX
+	MOVL	stack+8(SP), CX
+	MOVL	$0, DX	// parent tid ptr
+	MOVL	$0, DI	// child tid ptr
+
+	// Copy mp, gp, fn off parent stack for use by child.
+	SUBL	$16, CX
+	MOVL	mm+12(SP), SI
+	MOVL	SI, 0(CX)
+	MOVL	gg+16(SP), SI
+	MOVL	SI, 4(CX)
+	MOVL	fn+20(SP), SI
+	MOVL	SI, 8(CX)
+	MOVL	$1234, 12(CX)
+
+	// cannot use CALL *runtime·_vdso(SB) here, because
+	// the stack changes during the system call (after
+	// CALL *runtime·_vdso(SB), the child is still using
+	// the parent's stack when executing its RET instruction).
+	INT	$0x80
+
+	// In parent, return.
+	CMPL	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+20(FP)
+	RET
+
+	// Paranoia: check that SP is as we expect.
+	MOVL	mm+8(FP), BP
+	CMPL	BP, $1234
+	JEQ	2(PC)
+	INT	$3
+
+	// Initialize AX to Linux tid
+	MOVL	$224, AX
+	CALL	*runtime·_vdso(SB)
+
+	// In child on new stack.  Reload registers (paranoia).
+	MOVL	0(SP), BX	// m
+	MOVL	flags+0(FP), DX	// g
+	MOVL	stk+4(FP), SI	// fn
+
+	MOVL	AX, m_procid(BX)	// save tid as m->procid
+
+	// set up ldt 7+id to point at m->tls.
+	// newosproc left the id in tls[0].
+	LEAL	m_tls(BX), BP
+	MOVL	0(BP), DI
+	ADDL	$7, DI	// m0 is LDT#7. count up.
+	// setldt(tls#, &tls, sizeof tls)
+	PUSHAL	// save registers
+	PUSHL	$32	// sizeof tls
+	PUSHL	BP	// &tls
+	PUSHL	DI	// tls #
+	CALL	runtime·setldt(SB)
+	POPL	AX
+	POPL	AX
+	POPL	AX
+	POPAL
+
+	// Now segment is established.  Initialize m, g.
+	get_tls(AX)
+	MOVL	DX, g(AX)
+	MOVL	BX, g_m(DX)
+
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	MOVL	0(DX), DX	// paranoia; check they are not nil
+	MOVL	0(BX), BX
+
+	// more paranoia; check that stack splitting code works
+	PUSHAL
+	CALL	runtime·emptyfunc(SB)
+	POPAL
+
+	CALL	SI	// fn()
+	CALL	runtime·exit1(SB)
+	MOVL	$0x1234, 0x1005
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVL	$186, AX	// sigaltstack
+	MOVL	new+4(SP), BX
+	MOVL	old+8(SP), CX
+	CALL	*runtime·_vdso(SB)
+	CMPL	AX, $0xfffff001
+	JLS	2(PC)
+	INT	$3
+	RET
+
+// <asm-i386/ldt.h>
+// struct user_desc {
+//	unsigned int  entry_number;
+//	unsigned long base_addr;
+//	unsigned int  limit;
+//	unsigned int  seg_32bit:1;
+//	unsigned int  contents:2;
+//	unsigned int  read_exec_only:1;
+//	unsigned int  limit_in_pages:1;
+//	unsigned int  seg_not_present:1;
+//	unsigned int  useable:1;
+// };
+#define SEG_32BIT 0x01
+// contents are the 2 bits 0x02 and 0x04.
+#define CONTENTS_DATA 0x00
+#define CONTENTS_STACK 0x02
+#define CONTENTS_CODE 0x04
+#define READ_EXEC_ONLY 0x08
+#define LIMIT_IN_PAGES 0x10
+#define SEG_NOT_PRESENT 0x20
+#define USEABLE 0x40
+
+// setldt(int entry, int address, int limit)
+TEXT runtime·setldt(SB),NOSPLIT,$32
+	MOVL	entry+0(FP), BX	// entry
+	MOVL	address+4(FP), CX	// base address
+
+	/*
+	 * When linking against the system libraries,
+	 * we use its pthread_create and let it set up %gs
+	 * for us.  When we do that, the private storage
+	 * we get is not at 0(GS), 4(GS), but -8(GS), -4(GS).
+	 * To insulate the rest of the tool chain from this
+	 * ugliness, 8l rewrites 0(TLS) into -8(GS) for us.
+	 * To accommodate that rewrite, we translate
+	 * the address here and bump the limit to 0xffffffff (no limit)
+	 * so that -8(GS) maps to 0(address).
+	 * Also, the final 0(GS) (current 8(CX)) has to point
+	 * to itself, to mimic ELF.
+	 */
+	ADDL	$0x8, CX	// address
+	MOVL	CX, 0(CX)
+
+	// set up user_desc
+	LEAL	16(SP), AX	// struct user_desc
+	MOVL	BX, 0(AX)
+	MOVL	CX, 4(AX)
+	MOVL	$0xfffff, 8(AX)
+	MOVL	$(SEG_32BIT|LIMIT_IN_PAGES|USEABLE|CONTENTS_DATA), 12(AX)	// flag bits
+
+	// call modify_ldt
+	MOVL	$1, BX	// func = 1 (write)
+	MOVL	AX, CX	// user_desc
+	MOVL	$16, DX	// sizeof(user_desc)
+	MOVL	$123, AX	// syscall - modify_ldt
+	CALL	*runtime·_vdso(SB)
+
+	// breakpoint on error
+	CMPL AX, $0xfffff001
+	JLS 2(PC)
+	INT $3
+
+	// compute segment selector - (entry*8+7)
+	MOVL	entry+0(FP), AX
+	SHLL	$3, AX
+	ADDL	$7, AX
+	MOVW	AX, GS
+
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	MOVL	$158, AX
+	CALL	*runtime·_vdso(SB)
+	RET
+
+TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
+	MOVL	$242, AX		// syscall - sched_getaffinity
+	MOVL	pid+0(FP), BX
+	MOVL	len+4(FP), CX
+	MOVL	buf+8(FP), DX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
+	RET
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),NOSPLIT,$0
+	MOVL    $254, AX
+	MOVL	size+0(FP), BX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),NOSPLIT,$0
+	MOVL    $329, AX
+	MOVL	flags+0(FP), BX
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
+	RET
+
+// func epollctl(epfd, op, fd int32, ev *epollEvent) int
+TEXT runtime·epollctl(SB),NOSPLIT,$0
+	MOVL	$255, AX
+	MOVL	epfd+0(FP), BX
+	MOVL	op+4(FP), CX
+	MOVL	fd+8(FP), DX
+	MOVL	ev+12(FP), SI
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),NOSPLIT,$0
+	MOVL	$256, AX
+	MOVL	epfd+0(FP), BX
+	MOVL	ev+4(FP), CX
+	MOVL	nev+8(FP), DX
+	MOVL	timeout+12(FP), SI
+	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL	$55, AX  // fcntl
+	MOVL	fd+0(FP), BX  // fd
+	MOVL	$2, CX  // F_SETFD
+	MOVL	$1, DX  // FD_CLOEXEC
+	CALL	*runtime·_vdso(SB)
+	RET
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
new file mode 100644
index 000000000..33b91e872
--- /dev/null
+++ b/src/runtime/sys_linux_amd64.s
@@ -0,0 +1,410 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for AMD64, Linux
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+TEXT runtime·exit(SB),NOSPLIT,$0-4
+	MOVL	code+0(FP), DI
+	MOVL	$231, AX	// exitgroup - force all os threads to exit
+	SYSCALL
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$0-4
+	MOVL	code+0(FP), DI
+	MOVL	$60, AX	// exit - exit the current os thread
+	SYSCALL
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0-20
+	MOVQ	name+0(FP), DI
+	MOVL	mode+8(FP), SI
+	MOVL	perm+12(FP), DX
+	MOVL	$2, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0-12
+	MOVL	fd+0(FP), DI
+	MOVL	$3, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0-28
+	MOVQ	fd+0(FP), DI
+	MOVQ	p+8(FP), SI
+	MOVL	n+16(FP), DX
+	MOVL	$1, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0-28
+	MOVL	fd+0(FP), DI
+	MOVQ	p+8(FP), SI
+	MOVL	n+16(FP), DX
+	MOVL	$0, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$0-20
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
+	MOVL	$97, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)
+	MOVQ	DX, 8(SP)
+
+	// select(0, 0, 0, 0, &tv)
+	MOVL	$0, DI
+	MOVL	$0, SI
+	MOVL	$0, DX
+	MOVL	$0, R10
+	MOVQ	SP, R8
+	MOVL	$23, AX
+	SYSCALL
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$0
+	MOVL	$186, AX	// syscall - gettid
+	SYSCALL
+	MOVL	AX, DI	// arg 1 tid
+	MOVL	sig+0(FP), SI	// arg 2
+	MOVL	$200, AX	// syscall - tkill
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$0-24
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	$38, AX			// syscall entry
+	SYSCALL
+	RET
+
+TEXT runtime·mincore(SB),NOSPLIT,$0-28
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVQ	dst+16(FP), DX
+	MOVL	$27, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$16
+	// Be careful. We're calling a function with gcc calling convention here.
+	// We're guaranteed 128 bytes on entry, and we've taken 16, and the
+	// call uses another 8.
+	// That leaves 104 for the gettime code to use. Hope that's enough!
+	MOVQ	runtime·__vdso_clock_gettime_sym(SB), AX
+	CMPQ	AX, $0
+	JEQ	fallback_gtod
+	MOVL	$0, DI // CLOCK_REALTIME
+	LEAQ	0(SP), SI
+	CALL	AX
+	MOVQ	0(SP), AX	// sec
+	MOVQ	8(SP), DX	// nsec
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+fallback_gtod:
+	LEAQ	0(SP), DI
+	MOVQ	$0, SI
+	MOVQ	runtime·__vdso_gettimeofday_sym(SB), AX
+	CALL	AX
+	MOVQ	0(SP), AX	// sec
+	MOVL	8(SP), DX	// usec
+	IMULQ	$1000, DX
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$16
+	// Duplicate time.now here to avoid using up precious stack space.
+	// See comment above in time.now.
+	MOVQ	runtime·__vdso_clock_gettime_sym(SB), AX
+	CMPQ	AX, $0
+	JEQ	fallback_gtod_nt
+	MOVL	$1, DI // CLOCK_MONOTONIC
+	LEAQ	0(SP), SI
+	CALL	AX
+	MOVQ	0(SP), AX	// sec
+	MOVQ	8(SP), DX	// nsec
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+fallback_gtod_nt:
+	LEAQ	0(SP), DI
+	MOVQ	$0, SI
+	MOVQ	runtime·__vdso_gettimeofday_sym(SB), AX
+	CALL	AX
+	MOVQ	0(SP), AX	// sec
+	MOVL	8(SP), DX	// usec
+	IMULQ	$1000, DX
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-28
+	MOVL	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	size+24(FP), R10
+	MOVL	$14, AX			// syscall entry
+	SYSCALL
+	CMPQ	AX, $0xfffffffffffff001
+	JLS	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·rt_sigaction(SB),NOSPLIT,$0-36
+	MOVQ	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVQ	size+24(FP), R10
+	MOVL	$13, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+32(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVQ	R10, 40(SP)
+
+	// g = m->gsignal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(BX)
+	MOVQ	40(SP), R10
+	MOVQ	R10, g(BX)
+	RET
+
+TEXT runtime·sigreturn(SB),NOSPLIT,$0
+	MOVL	$15, AX	// rt_sigreturn
+	SYSCALL
+	INT $3	// not reached
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	prot+16(FP), DX
+	MOVL	flags+20(FP), R10
+	MOVL	fd+24(FP), R8
+	MOVL	off+28(FP), R9
+
+	MOVL	$9, AX			// mmap
+	SYSCALL
+	CMPQ	AX, $0xfffffffffffff001
+	JLS	3(PC)
+	NOTQ	AX
+	INCQ	AX
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVQ	$11, AX	// munmap
+	SYSCALL
+	CMPQ	AX, $0xfffffffffffff001
+	JLS	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
+	MOVQ	$28, AX	// madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+
+// int64 futex(int32 *uaddr, int32 op, int32 val,
+//	struct timespec *timeout, int32 *uaddr2, int32 val2);
+TEXT runtime·futex(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI
+	MOVL	op+8(FP), SI
+	MOVL	val+12(FP), DX
+	MOVQ	ts+16(FP), R10
+	MOVQ	addr2+24(FP), R8
+	MOVL	val3+32(FP), R9
+	MOVL	$202, AX
+	SYSCALL
+	MOVL	AX, ret+40(FP)
+	RET
+
+// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·clone(SB),NOSPLIT,$0
+	MOVL	flags+8(SP), DI
+	MOVQ	stack+16(SP), SI
+
+	// Copy mp, gp, fn off parent stack for use by child.
+	// Careful: Linux system call clobbers CX and R11.
+	MOVQ	mm+24(SP), R8
+	MOVQ	gg+32(SP), R9
+	MOVQ	fn+40(SP), R12
+
+	MOVL	$56, AX
+	SYSCALL
+
+	// In parent, return.
+	CMPQ	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+40(FP)
+	RET
+
+	// In child, on new stack.
+	MOVQ	SI, SP
+
+	// Initialize m->procid to Linux tid
+	MOVL	$186, AX	// gettid
+	SYSCALL
+	MOVQ	AX, m_procid(R8)
+
+	// Set FS to point at m->tls.
+	LEAQ	m_tls(R8), DI
+	CALL	runtime·settls(SB)
+
+	// In child, set up new stack
+	get_tls(CX)
+	MOVQ	R8, g_m(R9)
+	MOVQ	R9, g(CX)
+	CALL	runtime·stackcheck(SB)
+
+	// Call fn
+	CALL	R12
+
+	// It shouldn't return.  If it does, exit
+	MOVL	$111, DI
+	MOVL	$60, AX
+	SYSCALL
+	JMP	-3(PC)	// keep exiting
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVQ	new+8(SP), DI
+	MOVQ	old+16(SP), SI
+	MOVQ	$131, AX
+	SYSCALL
+	CMPQ	AX, $0xfffffffffffff001
+	JLS	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$32
+	ADDQ	$16, DI	// ELF wants to use -16(FS), -8(FS)
+
+	MOVQ	DI, SI
+	MOVQ	$0x1002, DI	// ARCH_SET_FS
+	MOVQ	$158, AX	// arch_prctl
+	SYSCALL
+	CMPQ	AX, $0xfffffffffffff001
+	JLS	2(PC)
+	MOVL	$0xf1, 0xf1  // crash
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	MOVL	$24, AX
+	SYSCALL
+	RET
+
+TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
+	MOVQ	pid+0(FP), DI
+	MOVQ	len+8(FP), SI
+	MOVQ	buf+16(FP), DX
+	MOVL	$204, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·epollcreate(int32 size);
+TEXT runtime·epollcreate(SB),NOSPLIT,$0
+	MOVL    size+0(FP), DI
+	MOVL    $213, AX                        // syscall entry
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+// int32 runtime·epollcreate1(int32 flags);
+TEXT runtime·epollcreate1(SB),NOSPLIT,$0
+	MOVL	flags+0(FP), DI
+	MOVL	$291, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+// func epollctl(epfd, op, fd int32, ev *epollEvent) int
+TEXT runtime·epollctl(SB),NOSPLIT,$0
+	MOVL	epfd+0(FP), DI
+	MOVL	op+4(FP), SI
+	MOVL	fd+8(FP), DX
+	MOVQ	ev+16(FP), R10
+	MOVL	$233, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
+TEXT runtime·epollwait(SB),NOSPLIT,$0
+	MOVL	epfd+0(FP), DI
+	MOVQ	ev+8(FP), SI
+	MOVL	nev+16(FP), DX
+	MOVL	timeout+20(FP), R10
+	MOVL	$232, AX			// syscall entry
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL    fd+0(FP), DI  // fd
+	MOVQ    $2, SI  // F_SETFD
+	MOVQ    $1, DX  // FD_CLOEXEC
+	MOVL	$72, AX  // fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
new file mode 100644
index 000000000..bd285f399
--- /dev/null
+++ b/src/runtime/sys_linux_arm.s
@@ -0,0 +1,461 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//
+// System calls and other sys.stuff for arm, Linux
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// for EABI, as we don't support OABI
+#define SYS_BASE 0x0
+
+#define SYS_exit (SYS_BASE + 1)
+#define SYS_read (SYS_BASE + 3)
+#define SYS_write (SYS_BASE + 4)
+#define SYS_open (SYS_BASE + 5)
+#define SYS_close (SYS_BASE + 6)
+#define SYS_gettimeofday (SYS_BASE + 78)
+#define SYS_clone (SYS_BASE + 120)
+#define SYS_rt_sigreturn (SYS_BASE + 173)
+#define SYS_rt_sigaction (SYS_BASE + 174)
+#define SYS_rt_sigprocmask (SYS_BASE + 175)
+#define SYS_sigaltstack (SYS_BASE + 186)
+#define SYS_mmap2 (SYS_BASE + 192)
+#define SYS_futex (SYS_BASE + 240)
+#define SYS_exit_group (SYS_BASE + 248)
+#define SYS_munmap (SYS_BASE + 91)
+#define SYS_madvise (SYS_BASE + 220)
+#define SYS_setitimer (SYS_BASE + 104)
+#define SYS_mincore (SYS_BASE + 219)
+#define SYS_gettid (SYS_BASE + 224)
+#define SYS_tkill (SYS_BASE + 238)
+#define SYS_sched_yield (SYS_BASE + 158)
+#define SYS_select (SYS_BASE + 142) // newselect
+#define SYS_ugetrlimit (SYS_BASE + 191)
+#define SYS_sched_getaffinity (SYS_BASE + 242)
+#define SYS_clock_gettime (SYS_BASE + 263)
+#define SYS_epoll_create (SYS_BASE + 250)
+#define SYS_epoll_ctl (SYS_BASE + 251)
+#define SYS_epoll_wait (SYS_BASE + 252)
+#define SYS_epoll_create1 (SYS_BASE + 357)
+#define SYS_fcntl (SYS_BASE + 55)
+
+#define ARM_BASE (SYS_BASE + 0x0f0000)
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_open, R7
+	SWI	$0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	$SYS_close, R7
+	SWI	$0
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_write, R7
+	SWI	$0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_read, R7
+	SWI	$0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·getrlimit(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	$SYS_ugetrlimit, R7
+	SWI	$0
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVW	0(FP), R0
+	MOVW	$SYS_exit_group, R7
+	SWI	$0
+	MOVW	$1234, R0
+	MOVW	$1002, R1
+	MOVW	R0, (R1)	// fail hard
+
+TEXT runtime·exit1(SB),NOSPLIT,$-4
+	MOVW	0(FP), R0
+	MOVW	$SYS_exit, R7
+	SWI	$0
+	MOVW	$1234, R0
+	MOVW	$1003, R1
+	MOVW	R0, (R1)	// fail hard
+
+TEXT	runtime·raise(SB),NOSPLIT,$-4
+	MOVW	$SYS_gettid, R7
+	SWI	$0
+	// arg 1 tid already in R0 from gettid
+	MOVW	sig+0(FP), R1	// arg 2 - signal
+	MOVW	$SYS_tkill, R7
+	SWI	$0
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	12(FP), R3
+	MOVW	16(FP), R4
+	MOVW	20(FP), R5
+	MOVW	$SYS_mmap2, R7
+	SWI	$0
+	MOVW	$0xfffff001, R6
+	CMP		R6, R0
+	RSB.HI	$0, R0
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	$SYS_munmap, R7
+	SWI	$0
+	MOVW	$0xfffff001, R6
+	CMP 	R6, R0
+	MOVW.HI	$0, R8  // crash on syscall failure
+	MOVW.HI	R8, (R8)
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_madvise, R7
+	SWI	$0
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_setitimer, R7
+	SWI	$0
+	RET
+
+TEXT runtime·mincore(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_mincore, R7
+	SWI	$0
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT time·now(SB), NOSPLIT, $32
+	MOVW	$0, R0  // CLOCK_REALTIME
+	MOVW	$8(R13), R1  // timespec
+	MOVW	$SYS_clock_gettime, R7
+	SWI	$0
+	
+	MOVW	8(R13), R0  // sec
+	MOVW	12(R13), R2  // nsec
+	
+	MOVW	R0, 0(FP)
+	MOVW	$0, R1
+	MOVW	R1, 4(FP)
+	MOVW	R2, 8(FP)
+	RET	
+
+// int64 nanotime(void)
+TEXT runtime·nanotime(SB),NOSPLIT,$32
+	MOVW	$1, R0  // CLOCK_MONOTONIC
+	MOVW	$8(R13), R1  // timespec
+	MOVW	$SYS_clock_gettime, R7
+	SWI	$0
+	
+	MOVW	8(R13), R0  // sec
+	MOVW	12(R13), R2  // nsec
+	
+	MOVW	$1000000000, R3
+	MULLU	R0, R3, (R1, R0)
+	MOVW	$0, R4
+	ADD.S	R2, R0
+	ADC	R4, R1
+
+	MOVW	R0, ret_lo+0(FP)
+	MOVW	R1, ret_hi+4(FP)
+	RET
+
+// int32 futex(int32 *uaddr, int32 op, int32 val,
+//	struct timespec *timeout, int32 *uaddr2, int32 val2);
+TEXT runtime·futex(SB),NOSPLIT,$0
+	MOVW	4(SP), R0
+	MOVW	8(SP), R1
+	MOVW	12(SP), R2
+	MOVW	16(SP), R3
+	MOVW	20(SP), R4
+	MOVW	24(SP), R5
+	MOVW	$SYS_futex, R7
+	SWI	$0
+	MOVW	R0, ret+24(FP)
+	RET
+
+
+// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·clone(SB),NOSPLIT,$0
+	MOVW	flags+0(FP), R0
+	MOVW	stk+4(FP), R1
+	MOVW	$0, R2	// parent tid ptr
+	MOVW	$0, R3	// tls_val
+	MOVW	$0, R4	// child tid ptr
+	MOVW	$0, R5
+
+	// Copy mp, gp, fn off parent stack for use by child.
+	// TODO(kaib): figure out which registers are clobbered by clone and avoid stack copying
+	MOVW	$-16(R1), R1
+	MOVW	mm+8(FP), R6
+	MOVW	R6, 0(R1)
+	MOVW	gg+12(FP), R6
+	MOVW	R6, 4(R1)
+	MOVW	fn+16(FP), R6
+	MOVW	R6, 8(R1)
+	MOVW	$1234, R6
+	MOVW	R6, 12(R1)
+
+	MOVW	$SYS_clone, R7
+	SWI	$0
+
+	// In parent, return.
+	CMP	$0, R0
+	BEQ	3(PC)
+	MOVW	R0, ret+20(FP)
+	RET
+
+	// Paranoia: check that SP is as we expect. Use R13 to avoid linker 'fixup'
+	MOVW	12(R13), R0
+	MOVW	$1234, R1
+	CMP	R0, R1
+	BEQ	2(PC)
+	BL	runtime·abort(SB)
+
+	MOVW	4(R13), g
+	MOVW	0(R13), R8
+	MOVW	R8, g_m(g)
+
+	// paranoia; check they are not nil
+	MOVW	0(R8), R0
+	MOVW	0(g), R0
+
+	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
+
+	// Initialize m->procid to Linux tid
+	MOVW	$SYS_gettid, R7
+	SWI	$0
+	MOVW	g_m(g), R8
+	MOVW	R0, m_procid(R8)
+
+	// Call fn
+	MOVW	8(R13), R0
+	MOVW	$16(R13), R13
+	BL	(R0)
+
+	MOVW	$0, R0
+	MOVW	R0, 4(R13)
+	BL	runtime·exit1(SB)
+
+	// It shouldn't return
+	MOVW	$1234, R0
+	MOVW	$1005, R1
+	MOVW	R0, (R1)
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	$SYS_sigaltstack, R7
+	SWI	$0
+	MOVW	$0xfffff001, R6
+	CMP 	R6, R0
+	MOVW.HI	$0, R8  // crash on syscall failure
+	MOVW.HI	R8, (R8)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$24
+	// this might be called in external code context,
+	// where g is not set.
+	// first save R0, because runtime·load_g will clobber it
+	MOVW	R0, 4(R13)
+	MOVB	runtime·iscgo(SB), R0
+	CMP 	$0, R0
+	BL.NE	runtime·load_g(SB)
+
+	CMP 	$0, g
+	BNE 	4(PC)
+	// signal number is already prepared in 4(R13)
+	MOVW  	$runtime·badsignal(SB), R11
+	BL	(R11)
+	RET
+
+	// save g
+	MOVW	g, R3
+	MOVW	g, 20(R13)
+
+	// g = m->gsignal
+	MOVW	g_m(g), R8
+	MOVW	m_gsignal(R8), g
+
+	// copy arguments for call to sighandler
+	// R0 is already saved above
+	MOVW	R1, 8(R13)
+	MOVW	R2, 12(R13)
+	MOVW	R3, 16(R13)
+
+	BL	runtime·sighandler(SB)
+
+	// restore g
+	MOVW	20(R13), g
+
+	RET
+
+TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	12(FP), R3
+	MOVW	$SYS_rt_sigprocmask, R7
+	SWI	$0
+	RET
+
+TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	12(FP), R3
+	MOVW	$SYS_rt_sigaction, R7
+	SWI	$0
+	MOVW	R0, ret+16(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$12
+	MOVW	usec+0(FP), R0
+	MOVW	R0, R1
+	MOVW	$1000000, R2
+	DIV	R2, R0
+	MOD	R2, R1
+	MOVW	R0, 4(SP)
+	MOVW	R1, 8(SP)
+	MOVW	$0, R0
+	MOVW	$0, R1
+	MOVW	$0, R2
+	MOVW	$0, R3
+	MOVW	$4(SP), R4
+	MOVW	$SYS_select, R7
+	SWI	$0
+	RET
+
+// Use kernel version instead of native armcas in asm_arm.s.
+// See ../sync/atomic/asm_linux_arm.s for details.
+TEXT cas<>(SB),NOSPLIT,$0
+	MOVW	$0xffff0fc0, PC
+
+TEXT runtime·cas(SB),NOSPLIT,$0
+	MOVW	ptr+0(FP), R2
+	MOVW	old+4(FP), R0
+casagain:
+	MOVW	new+8(FP), R1
+	BL	cas<>(SB)
+	BCC	cascheck
+	MOVW	$1, R0
+	MOVB	R0, ret+12(FP)
+	RET
+cascheck:
+	// Kernel lies; double-check.
+	MOVW	ptr+0(FP), R2
+	MOVW	old+4(FP), R0
+	MOVW	0(R2), R3
+	CMP	R0, R3
+	BEQ	casagain
+	MOVW	$0, R0
+	MOVB	R0, ret+12(FP)
+	RET
+
+TEXT runtime·casp(SB),NOSPLIT,$0
+	B	runtime·cas(SB)
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	MOVW	$SYS_sched_yield, R7
+	SWI	$0
+	RET
+
+TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	$SYS_sched_getaffinity, R7
+	SWI	$0
+	MOVW	R0, ret+12(FP)
+	RET
+
+// int32 runtime·epollcreate(int32 size)
+TEXT runtime·epollcreate(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	$SYS_epoll_create, R7
+	SWI	$0
+	MOVW	R0, ret+4(FP)
+	RET
+
+// int32 runtime·epollcreate1(int32 flags)
+TEXT runtime·epollcreate1(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	$SYS_epoll_create1, R7
+	SWI	$0
+	MOVW	R0, ret+4(FP)
+	RET
+
+// func epollctl(epfd, op, fd int32, ev *epollEvent) int
+TEXT runtime·epollctl(SB),NOSPLIT,$0
+	MOVW	epfd+0(FP), R0
+	MOVW	op+4(FP), R1
+	MOVW	fd+8(FP), R2
+	MOVW	ev+12(FP), R3
+	MOVW	$SYS_epoll_ctl, R7
+	SWI	$0
+	MOVW	R0, ret+16(FP)
+	RET
+
+// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
+TEXT runtime·epollwait(SB),NOSPLIT,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
+	MOVW	12(FP), R3
+	MOVW	$SYS_epoll_wait, R7
+	SWI	$0
+	MOVW	R0, ret+16(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVW	0(FP), R0	// fd
+	MOVW	$2, R1	// F_SETFD
+	MOVW	$1, R2	// FD_CLOEXEC
+	MOVW	$SYS_fcntl, R7
+	SWI	$0
+	RET
+
+// b __kuser_get_tls @ 0xffff0fe0
+TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
+	MOVW	$0xffff0fe0, R0
+	B	(R0)
diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s
new file mode 100644
index 000000000..47985f31f
--- /dev/null
+++ b/src/runtime/sys_nacl_386.s
@@ -0,0 +1,363 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+#include "syscall_nacl.h"
+
+#define NACL_SYSCALL(code) \
+	MOVL $(0x10000 + ((code)<<5)), AX; CALL AX
+
+TEXT runtime·exit(SB),NOSPLIT,$4
+	MOVL code+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_exit)
+	JMP 0(PC)
+
+TEXT runtime·exit1(SB),NOSPLIT,$4
+	MOVL code+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_thread_exit)
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$12
+	MOVL name+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL mode+4(FP), AX
+	MOVL AX, 4(SP)
+	MOVL perm+8(FP), AX
+	MOVL AX, 8(SP)
+	NACL_SYSCALL(SYS_open)
+	MOVL AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$4
+	MOVL fd+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_close)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$12
+	MOVL fd+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL p+4(FP), AX
+	MOVL AX, 4(SP)
+	MOVL n+8(FP), AX
+	MOVL AX, 8(SP)
+	NACL_SYSCALL(SYS_read)
+	MOVL AX, ret+12(FP)
+	RET
+
+TEXT syscall·naclWrite(SB), NOSPLIT, $16-16
+	MOVL arg1+0(FP), DI
+	MOVL arg2+4(FP), SI
+	MOVL arg3+8(FP), DX
+	MOVL DI, 0(SP)
+	MOVL SI, 4(SP)
+	MOVL DX, 8(SP)
+	CALL runtime·write(SB)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$12
+	MOVL fd+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL p+4(FP), AX
+	MOVL AX, 4(SP)
+	MOVL n+8(FP), AX
+	MOVL AX, 8(SP)
+	NACL_SYSCALL(SYS_write)
+	MOVL AX, ret+12(FP)
+	RET
+
+TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$8
+	MOVL p+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL size+4(FP), AX
+	MOVL AX, 4(SP)
+	NACL_SYSCALL(SYS_exception_stack)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$8
+	MOVL fn+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL arg+4(FP), AX
+	MOVL AX, 4(SP)
+	NACL_SYSCALL(SYS_exception_handler)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_sem_create(SB),NOSPLIT,$4
+	MOVL flag+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_sem_create)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$4
+	MOVL sem+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_sem_wait)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_sem_post(SB),NOSPLIT,$4
+	MOVL sem+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_sem_post)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$4
+	MOVL flag+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_mutex_create)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$4
+	MOVL mutex+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_mutex_lock)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$4
+	MOVL mutex+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_mutex_trylock)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$4
+	MOVL mutex+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_mutex_unlock)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_create(SB),NOSPLIT,$4
+	MOVL flag+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_cond_create)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$8
+	MOVL cond+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL n+4(FP), AX
+	MOVL AX, 4(SP)
+	NACL_SYSCALL(SYS_cond_wait)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$4
+	MOVL cond+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_cond_signal)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$4
+	MOVL cond+0(FP), AX
+	MOVL AX, 0(SP)
+	NACL_SYSCALL(SYS_cond_broadcast)
+	MOVL AX, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$12
+	MOVL cond+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL lock+4(FP), AX
+	MOVL AX, 4(SP)
+	MOVL ts+8(FP), AX
+	MOVL AX, 8(SP)
+	NACL_SYSCALL(SYS_cond_timed_wait_abs)
+	MOVL AX, ret+12(FP)
+	RET
+
+TEXT runtime·nacl_thread_create(SB),NOSPLIT,$16
+	MOVL fn+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL stk+4(FP), AX
+	MOVL AX, 4(SP)
+	MOVL tls+8(FP), AX
+	MOVL AX, 8(SP)
+	MOVL xx+12(FP), AX
+	MOVL AX, 12(SP)
+	NACL_SYSCALL(SYS_thread_create)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
+	JMP runtime·mstart(SB)
+
+TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$8
+	MOVL ts+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL extra+4(FP), AX
+	MOVL AX, 4(SP)
+	NACL_SYSCALL(SYS_nanosleep)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	NACL_SYSCALL(SYS_sched_yield)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$32
+	MOVL	addr+0(FP), AX
+	MOVL	AX, 0(SP)
+	MOVL	n+4(FP), AX
+	MOVL	AX, 4(SP)
+	MOVL	prot+8(FP), AX
+	MOVL	AX, 8(SP)
+	MOVL	flags+12(FP), AX
+	MOVL	AX, 12(SP)
+	MOVL	fd+16(FP), AX
+	MOVL	AX, 16(SP)
+	MOVL	off+20(FP), AX
+	MOVL	AX, 24(SP)
+	MOVL	$0, 28(SP)
+	LEAL	24(SP), AX
+	MOVL	AX, 20(SP)
+	NACL_SYSCALL(SYS_mmap)
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT time·now(SB),NOSPLIT,$20
+	MOVL $0, 0(SP) // real time clock
+	LEAL 8(SP), AX
+	MOVL AX, 4(SP) // timespec
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVL 8(SP), AX // low 32 sec
+	MOVL 12(SP), CX // high 32 sec
+	MOVL 16(SP), BX // nsec
+
+	// sec is in AX, nsec in BX
+	MOVL	AX, sec+0(FP)
+	MOVL	CX, sec+4(FP)
+	MOVL	BX, nsec+8(FP)
+	RET
+
+TEXT syscall·now(SB),NOSPLIT,$0
+	JMP time·now(SB)
+
+TEXT runtime·nacl_clock_gettime(SB),NOSPLIT,$8
+	MOVL arg1+0(FP), AX
+	MOVL AX, 0(SP)
+	MOVL arg2+4(FP), AX
+	MOVL AX, 4(SP)
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVL AX, ret+8(FP)
+	RET
+	
+TEXT runtime·nanotime(SB),NOSPLIT,$20
+	MOVL $0, 0(SP) // real time clock
+	LEAL 8(SP), AX
+	MOVL AX, 4(SP) // timespec
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVL 8(SP), AX // low 32 sec
+	MOVL 16(SP), BX // nsec
+
+	// sec is in AX, nsec in BX
+	// convert to DX:AX nsec
+	MOVL	$1000000000, CX
+	MULL	CX
+	ADDL	BX, AX
+	ADCL	$0, DX
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·setldt(SB),NOSPLIT,$8
+	MOVL	addr+4(FP), BX // aka base
+	ADDL	$0x8, BX
+	MOVL	BX, 0(SP)
+	NACL_SYSCALL(SYS_tls_init)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	$11, BX
+	MOVL	$0, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVL	DI, 20(SP)
+	
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+	
+	// copy arguments for sighandler
+	MOVL	$11, 0(SP) // signal
+	MOVL	$0, 4(SP) // siginfo
+	LEAL	ctxt+4(FP), AX
+	MOVL	AX, 8(SP) // context
+	MOVL	DI, 12(SP) // g
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+sigtramp_ret:
+	// Enable exceptions again.
+	NACL_SYSCALL(SYS_exception_clear_flag)
+
+	// NaCl has abidcated its traditional operating system responsibility
+	// and declined to implement 'sigreturn'. Instead the only way to return
+	// to the execution of our program is to restore the registers ourselves.
+	// Unfortunately, that is impossible to do with strict fidelity, because
+	// there is no way to do the final update of PC that ends the sequence
+	// without either (1) jumping to a register, in which case the register ends
+	// holding the PC value instead of its intended value or (2) storing the PC
+	// on the stack and using RET, which imposes the requirement that SP is
+	// valid and that is okay to smash the word below it. The second would
+	// normally be the lesser of the two evils, except that on NaCl, the linker
+	// must rewrite RET into "POP reg; AND $~31, reg; JMP reg", so either way
+	// we are going to lose a register as a result of the incoming signal.
+	// Similarly, there is no way to restore EFLAGS; the usual way is to use
+	// POPFL, but NaCl rejects that instruction. We could inspect the bits and
+	// execute a sequence of instructions designed to recreate those flag
+	// settings, but that's a lot of work.
+	//
+	// Thankfully, Go's signal handlers never try to return directly to the
+	// executing code, so all the registers and EFLAGS are dead and can be
+	// smashed. The only registers that matter are the ones that are setting
+	// up for the simulated call that the signal handler has created.
+	// Today those registers are just PC and SP, but in case additional registers
+	// are relevant in the future (for example DX is the Go func context register)
+	// we restore as many registers as possible.
+	// 
+	// We smash BP, because that's what the linker smashes during RET.
+	//
+	LEAL	ctxt+4(FP), BP
+	ADDL	$64, BP
+	MOVL	0(BP), AX
+	MOVL	4(BP), CX
+	MOVL	8(BP), DX
+	MOVL	12(BP), BX
+	MOVL	16(BP), SP
+	// 20(BP) is saved BP, never to be seen again
+	MOVL	24(BP), SI
+	MOVL	28(BP), DI
+	// 36(BP) is saved EFLAGS, never to be seen again
+	MOVL	32(BP), BP // saved PC
+	JMP	BP
diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s
new file mode 100644
index 000000000..c30c2a893
--- /dev/null
+++ b/src/runtime/sys_nacl_amd64p32.s
@@ -0,0 +1,459 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+#include "syscall_nacl.h"
+
+#define NACL_SYSCALL(code) \
+	MOVL $(0x10000 + ((code)<<5)), AX; CALL AX
+
+TEXT runtime·settls(SB),NOSPLIT,$0
+	MOVL	DI, TLS // really BP
+	RET
+
+TEXT runtime·exit(SB),NOSPLIT,$0
+	MOVL code+0(FP), DI
+	NACL_SYSCALL(SYS_exit)
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVL code+0(FP), DI
+	NACL_SYSCALL(SYS_thread_exit)
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVL name+0(FP), DI
+	MOVL mode+4(FP), SI
+	MOVL perm+8(FP), DX
+	NACL_SYSCALL(SYS_open)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVL fd+0(FP), DI
+	NACL_SYSCALL(SYS_close)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVL fd+0(FP), DI
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
+	NACL_SYSCALL(SYS_read)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT syscall·naclWrite(SB), NOSPLIT, $24-20
+	MOVL arg1+0(FP), DI
+	MOVL arg2+4(FP), SI
+	MOVL arg3+8(FP), DX
+	MOVL DI, 0(SP)
+	MOVL SI, 4(SP)
+	MOVL DX, 8(SP)
+	CALL runtime·write(SB)
+	MOVL 16(SP), AX
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$16-20
+	// If using fake time and writing to stdout or stderr,
+	// emit playback header before actual data.
+	MOVQ runtime·timens(SB), AX
+	CMPQ AX, $0
+	JEQ write
+	MOVL fd+0(FP), DI
+	CMPL DI, $1
+	JEQ playback
+	CMPL DI, $2
+	JEQ playback
+
+write:
+	// Ordinary write.
+	MOVL fd+0(FP), DI
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
+	NACL_SYSCALL(SYS_write)
+	MOVL	AX, ret+16(FP)
+	RET
+
+	// Write with playback header.
+	// First, lock to avoid interleaving writes.
+playback:
+	MOVL $1, BX
+	XCHGL	runtime·writelock(SB), BX
+	CMPL BX, $0
+	JNE playback
+
+	// Playback header: 0 0 P B <8-byte time> <4-byte data length>
+	MOVL $(('B'<<24) | ('P'<<16)), 0(SP)
+	BSWAPQ AX
+	MOVQ AX, 4(SP)
+	MOVL n+8(FP), DX
+	BSWAPL DX
+	MOVL DX, 12(SP)
+	MOVL $1, DI // standard output
+	MOVL SP, SI
+	MOVL $16, DX
+	NACL_SYSCALL(SYS_write)
+
+	// Write actual data.
+	MOVL $1, DI // standard output
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
+	NACL_SYSCALL(SYS_write)
+
+	// Unlock.
+	MOVL	$0, runtime·writelock(SB)
+
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
+	MOVL p+0(FP), DI
+	MOVL size+4(FP), SI
+	NACL_SYSCALL(SYS_exception_stack)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
+	MOVL fn+0(FP), DI
+	MOVL arg+4(FP), SI
+	NACL_SYSCALL(SYS_exception_handler)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
+	MOVL flag+0(FP), DI
+	NACL_SYSCALL(SYS_sem_create)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
+	MOVL sem+0(FP), DI
+	NACL_SYSCALL(SYS_sem_wait)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
+	MOVL sem+0(FP), DI
+	NACL_SYSCALL(SYS_sem_post)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
+	MOVL flag+0(FP), DI
+	NACL_SYSCALL(SYS_mutex_create)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
+	MOVL mutex+0(FP), DI
+	NACL_SYSCALL(SYS_mutex_lock)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
+	MOVL mutex+0(FP), DI
+	NACL_SYSCALL(SYS_mutex_trylock)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
+	MOVL mutex+0(FP), DI
+	NACL_SYSCALL(SYS_mutex_unlock)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
+	MOVL flag+0(FP), DI
+	NACL_SYSCALL(SYS_cond_create)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
+	MOVL cond+0(FP), DI
+	MOVL n+4(FP), SI
+	NACL_SYSCALL(SYS_cond_wait)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
+	MOVL cond+0(FP), DI
+	NACL_SYSCALL(SYS_cond_signal)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
+	MOVL cond+0(FP), DI
+	NACL_SYSCALL(SYS_cond_broadcast)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
+	MOVL cond+0(FP), DI
+	MOVL lock+4(FP), SI
+	MOVL ts+8(FP), DX
+	NACL_SYSCALL(SYS_cond_timed_wait_abs)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
+	MOVL fn+0(FP), DI
+	MOVL stk+4(FP), SI
+	MOVL tls+8(FP), DX
+	MOVL xx+12(FP), CX
+	NACL_SYSCALL(SYS_thread_create)
+	MOVL AX, ret+16(FP)
+	RET
+
+TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
+	NACL_SYSCALL(SYS_tls_get)
+	SUBL	$8, AX
+	MOVL	AX, TLS
+	JMP runtime·mstart(SB)
+
+TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
+	MOVL ts+0(FP), DI
+	MOVL extra+4(FP), SI
+	NACL_SYSCALL(SYS_nanosleep)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	NACL_SYSCALL(SYS_sched_yield)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$8
+	MOVL addr+0(FP), DI
+	MOVL n+4(FP), SI
+	MOVL prot+8(FP), DX
+	MOVL flags+12(FP), CX
+	MOVL fd+16(FP), R8
+	MOVL off+20(FP), AX
+	MOVQ AX, 0(SP)
+	MOVL SP, R9
+	NACL_SYSCALL(SYS_mmap)
+	CMPL AX, $-4095
+	JNA 2(PC)
+	NEGL AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT time·now(SB),NOSPLIT,$16
+	MOVQ runtime·timens(SB), AX
+	CMPQ AX, $0
+	JEQ realtime
+	MOVQ $0, DX
+	MOVQ $1000000000, CX
+	DIVQ CX
+	MOVQ AX, sec+0(FP)
+	MOVL DX, nsec+8(FP)
+	RET
+realtime:
+	MOVL $0, DI // real time clock
+	LEAL 0(SP), AX
+	MOVL AX, SI // timespec
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVL 0(SP), AX // low 32 sec
+	MOVL 4(SP), CX // high 32 sec
+	MOVL 8(SP), BX // nsec
+
+	// sec is in AX, nsec in BX
+	MOVL	AX, sec+0(FP)
+	MOVL	CX, sec+4(FP)
+	MOVL	BX, nsec+8(FP)
+	RET
+
+TEXT syscall·now(SB),NOSPLIT,$0
+	JMP time·now(SB)
+
+TEXT runtime·nacl_clock_gettime(SB),NOSPLIT,$0
+	MOVL arg1+0(FP), DI
+	MOVL arg2+4(FP), SI
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVL AX, ret+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$16
+	MOVQ runtime·timens(SB), AX
+	CMPQ AX, $0
+	JEQ 3(PC)
+	MOVQ	AX, ret+0(FP)
+	RET
+	MOVL $0, DI // real time clock
+	LEAL 0(SP), AX
+	MOVL AX, SI // timespec
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVQ 0(SP), AX // sec
+	MOVL 8(SP), DX // nsec
+
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$80
+	// restore TLS register at time of execution,
+	// in case it's been smashed.
+	// the TLS register is really BP, but for consistency
+	// with non-NaCl systems it is referred to here as TLS.
+	// NOTE: Cannot use SYS_tls_get here (like we do in mstart_nacl),
+	// because the main thread never calls tls_set.
+	LEAL ctxt+0(FP), AX
+	MOVL (16*4+5*8)(AX), AX
+	MOVL	AX, TLS
+
+	// check that g exists
+	get_tls(CX)
+	MOVL	g(CX), DI
+	
+	CMPL	DI, $0
+	JEQ	nog
+
+	// save g
+	MOVL	DI, 20(SP)
+	
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+//JMP debughandler
+
+	// copy arguments for sighandler
+	MOVL	$11, 0(SP) // signal
+	MOVL	$0, 4(SP) // siginfo
+	LEAL	ctxt+0(FP), AX
+	MOVL	AX, 8(SP) // context
+	MOVL	DI, 12(SP) // g
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+sigtramp_ret:
+	// Enable exceptions again.
+	NACL_SYSCALL(SYS_exception_clear_flag)
+
+	// Restore registers as best we can. Impossible to do perfectly.
+	// See comment in sys_nacl_386.s for extended rationale.
+	LEAL	ctxt+0(FP), SI
+	ADDL	$64, SI
+	MOVQ	0(SI), AX
+	MOVQ	8(SI), CX
+	MOVQ	16(SI), DX
+	MOVQ	24(SI), BX
+	MOVL	32(SI), SP	// MOVL for SP sandboxing
+	// 40(SI) is saved BP aka TLS, already restored above
+	// 48(SI) is saved SI, never to be seen again
+	MOVQ	56(SI), DI
+	MOVQ	64(SI), R8
+	MOVQ	72(SI), R9
+	MOVQ	80(SI), R10
+	MOVQ	88(SI), R11
+	MOVQ	96(SI), R12
+	MOVQ	104(SI), R13
+	MOVQ	112(SI), R14
+	// 120(SI) is R15, which is owned by Native Client and must not be modified
+	MOVQ	128(SI), SI // saved PC
+	// 136(SI) is saved EFLAGS, never to be seen again
+	JMP	SI
+
+debughandler:
+	// print basic information
+	LEAL	ctxt+0(FP), DI
+	MOVL	$runtime·sigtrampf(SB), AX
+	MOVL	AX, 0(SP)
+	MOVQ	(16*4+16*8)(DI), BX // rip
+	MOVQ	BX, 8(SP)
+	MOVQ	(16*4+0*8)(DI), BX // rax
+	MOVQ	BX, 16(SP)
+	MOVQ	(16*4+1*8)(DI), BX // rcx
+	MOVQ	BX, 24(SP)
+	MOVQ	(16*4+2*8)(DI), BX // rdx
+	MOVQ	BX, 32(SP)
+	MOVQ	(16*4+3*8)(DI), BX // rbx
+	MOVQ	BX, 40(SP)
+	MOVQ	(16*4+7*8)(DI), BX // rdi
+	MOVQ	BX, 48(SP)
+	MOVQ	(16*4+15*8)(DI), BX // r15
+	MOVQ	BX, 56(SP)
+	MOVQ	(16*4+4*8)(DI), BX // rsp
+	MOVQ	0(BX), BX
+	MOVQ	BX, 64(SP)
+	CALL	runtime·printf(SB)
+	
+	LEAL	ctxt+0(FP), DI
+	MOVQ	(16*4+16*8)(DI), BX // rip
+	MOVL	BX, 0(SP)
+	MOVQ	(16*4+4*8)(DI), BX // rsp
+	MOVL	BX, 4(SP)
+	MOVL	$0, 8(SP)	// lr
+	get_tls(CX)
+	MOVL	g(CX), BX
+	MOVL	BX, 12(SP)	// gp
+	CALL	runtime·traceback(SB)
+
+notls:
+	MOVL	0, AX
+	RET
+
+nog:
+	MOVL	0, AX
+	RET
+
+// cannot do real signal handling yet, because gsignal has not been allocated.
+MOVL $1, DI; NACL_SYSCALL(SYS_exit)
+
+TEXT runtime·nacl_sysinfo(SB),NOSPLIT,$16
+/*
+	MOVL	di+0(FP), DI
+	LEAL	12(DI), BX
+	MOVL	8(DI), AX
+	ADDL	4(DI), AX
+	ADDL	$2, AX
+	LEAL	(BX)(AX*4), BX
+	MOVL	BX, runtime·nacl_irt_query(SB)
+auxloop:
+	MOVL	0(BX), DX
+	CMPL	DX, $0
+	JNE	2(PC)
+	RET
+	CMPL	DX, $32
+	JEQ	auxfound
+	ADDL	$8, BX
+	JMP	auxloop
+auxfound:
+	MOVL	4(BX), BX
+	MOVL	BX, runtime·nacl_irt_query(SB)
+
+	LEAL	runtime·nacl_irt_basic_v0_1_str(SB), DI
+	LEAL	runtime·nacl_irt_basic_v0_1(SB), SI
+	MOVL	runtime·nacl_irt_basic_v0_1_size(SB), DX
+	MOVL	runtime·nacl_irt_query(SB), BX
+	CALL	BX
+
+	LEAL	runtime·nacl_irt_memory_v0_3_str(SB), DI
+	LEAL	runtime·nacl_irt_memory_v0_3(SB), SI
+	MOVL	runtime·nacl_irt_memory_v0_3_size(SB), DX
+	MOVL	runtime·nacl_irt_query(SB), BX
+	CALL	BX
+
+	LEAL	runtime·nacl_irt_thread_v0_1_str(SB), DI
+	LEAL	runtime·nacl_irt_thread_v0_1(SB), SI
+	MOVL	runtime·nacl_irt_thread_v0_1_size(SB), DX
+	MOVL	runtime·nacl_irt_query(SB), BX
+	CALL	BX
+
+	// TODO: Once we have a NaCl SDK with futex syscall support,
+	// try switching to futex syscalls and here load the
+	// nacl-irt-futex-0.1 table.
+*/
+	RET
diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s
new file mode 100644
index 000000000..d354ab483
--- /dev/null
+++ b/src/runtime/sys_nacl_arm.s
@@ -0,0 +1,320 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+#include "syscall_nacl.h"
+
+#define NACL_SYSCALL(code) \
+	MOVW	$(0x10000 + ((code)<<5)), R8; BL (R8)
+
+TEXT runtime·exit(SB),NOSPLIT,$0
+	MOVW	code+0(FP), R0
+	NACL_SYSCALL(SYS_exit)
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$0
+	MOVW	code+0(FP), R0
+	NACL_SYSCALL(SYS_thread_exit)
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVW	name+0(FP), R0
+	MOVW	name+0(FP), R1
+	MOVW	name+0(FP), R2
+	NACL_SYSCALL(SYS_open)
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	NACL_SYSCALL(SYS_close)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
+	NACL_SYSCALL(SYS_read)
+	MOVW	R0, ret+12(FP)
+	RET
+
+// func naclWrite(fd int, b []byte) int
+TEXT syscall·naclWrite(SB),NOSPLIT,$0
+	MOVW	arg1+0(FP), R0
+	MOVW	arg2+4(FP), R1
+	MOVW	arg3+8(FP), R2
+	NACL_SYSCALL(SYS_write)
+	MOVW	R0, ret+16(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$0
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
+	NACL_SYSCALL(SYS_write)
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
+	MOVW	p+0(FP), R0
+	MOVW	size+4(FP), R1
+	NACL_SYSCALL(SYS_exception_stack)
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
+	MOVW	fn+0(FP), R0
+	MOVW	arg+4(FP), R1
+	NACL_SYSCALL(SYS_exception_handler)
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
+	MOVW	flag+0(FP), R0
+	NACL_SYSCALL(SYS_sem_create)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
+	MOVW	sem+0(FP), R0
+	NACL_SYSCALL(SYS_sem_wait)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
+	MOVW	sem+0(FP), R0
+	NACL_SYSCALL(SYS_sem_post)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
+	MOVW	flag+0(FP), R0
+	NACL_SYSCALL(SYS_mutex_create)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
+	MOVW	mutex+0(FP), R0
+	NACL_SYSCALL(SYS_mutex_lock)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
+	MOVW	mutex+0(FP), R0
+	NACL_SYSCALL(SYS_mutex_trylock)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
+	MOVW	mutex+0(FP), R0
+	NACL_SYSCALL(SYS_mutex_unlock)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
+	MOVW	flag+0(FP), R0
+	NACL_SYSCALL(SYS_cond_create)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
+	MOVW	cond+0(FP), R0
+	MOVW	n+4(FP), R1
+	NACL_SYSCALL(SYS_cond_wait)
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
+	MOVW	cond+0(FP), R0
+	NACL_SYSCALL(SYS_cond_signal)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
+	MOVW	cond+0(FP), R0
+	NACL_SYSCALL(SYS_cond_broadcast)
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
+	MOVW	cond+0(FP), R0
+	MOVW	lock+4(FP), R1
+	MOVW	ts+8(FP), R2
+	NACL_SYSCALL(SYS_cond_timed_wait_abs)
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
+	MOVW	fn+0(FP), R0
+	MOVW	stk+4(FP), R1
+	MOVW	tls+8(FP), R2
+	MOVW	xx+12(FP), R3
+	NACL_SYSCALL(SYS_thread_create)
+	MOVW	R0, ret+16(FP)
+	RET
+
+TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
+	MOVW	0(R9), R0 // TLS
+	MOVW	-8(R0), R1 // g
+	MOVW	-4(R0), R2 // m
+	MOVW	R2, g_m(R1)
+	MOVW	R1, g
+	B runtime·mstart(SB)
+
+TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
+	MOVW	ts+0(FP), R0
+	MOVW	extra+4(FP), R1
+	NACL_SYSCALL(SYS_nanosleep)
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	NACL_SYSCALL(SYS_sched_yield)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$8
+	MOVW	addr+0(FP), R0
+	MOVW	n+4(FP), R1
+	MOVW	prot+8(FP), R2
+	MOVW	flags+12(FP), R3
+	MOVW	fd+16(FP), R4
+	// arg6:offset should be passed as a pointer (to int64)
+	MOVW	off+20(FP), R5
+	MOVW	R5, 4(R13)
+	MOVW	$0, R6
+	MOVW	R6, 8(R13)
+	MOVW	$4(R13), R5
+	MOVM.DB.W [R4,R5], (R13) // arg5 and arg6 are passed on stack
+	NACL_SYSCALL(SYS_mmap)
+	MOVM.IA.W (R13), [R4, R5]
+	CMP	$-4095, R0
+	RSB.HI	$0, R0
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT time·now(SB),NOSPLIT,$16
+	MOVW	$0, R0 // real time clock
+	MOVW	$4(R13), R1
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVW	4(R13), R0 // low 32-bit sec
+	MOVW	8(R13), R1 // high 32-bit sec
+	MOVW	12(R13), R2 // nsec
+	MOVW	R0, sec+0(FP)
+	MOVW	R1, sec+4(FP)
+	MOVW	R2, sec+8(FP)
+	RET
+
+TEXT syscall·now(SB),NOSPLIT,$0
+	B time·now(SB)
+
+TEXT runtime·nacl_clock_gettime(SB),NOSPLIT,$0
+	MOVW	arg1+0(FP), R0
+	MOVW	arg2+4(FP), R1
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVW	R0, ret+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB),NOSPLIT,$16
+	MOVW	$0, R0 // real time clock
+	MOVW	$4(R13), R1
+	NACL_SYSCALL(SYS_clock_gettime)
+	MOVW	4(R13), R0 // low 32-bit sec
+	MOVW	8(R13), R1 // high 32-bit sec (ignored for now)
+	MOVW	12(R13), R2 // nsec
+	MOVW	$1000000000, R3
+	MULLU	R0, R3, (R1, R0)
+	MOVW	$0, R4
+	ADD.S	R2, R0
+	ADC	R4, R1
+	MOVW	R0, ret_lo+0(FP)
+	MOVW	R1, ret_hi+4(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$80
+	// load g from thread context
+	MOVW	$ctxt+-4(FP), R0
+	MOVW	(16*4+10*4)(R0), g
+
+	// check that g exists
+	CMP	$0, g
+	BNE 	4(PC)
+	MOVW  	$runtime·badsignal2(SB), R11
+	BL	(R11)
+	RET
+
+	// save g
+	MOVW	g, R3
+	MOVW	g, 20(R13)
+
+	// g = m->gsignal
+	MOVW	g_m(g), R8
+	MOVW	m_gsignal(R8), g
+
+	// copy arguments for call to sighandler
+	MOVW	$11, R0
+	MOVW	R0, 4(R13) // signal
+	MOVW	$0, R0
+	MOVW	R0, 8(R13) // siginfo
+	MOVW	$ctxt+-4(FP), R0
+	MOVW	R0, 12(R13) // context
+	MOVW	R3, 16(R13) // g
+
+	BL	runtime·sighandler(SB)
+
+	// restore g
+	MOVW	20(R13), g
+
+sigtramp_ret:
+	// Enable exceptions again.
+	NACL_SYSCALL(SYS_exception_clear_flag)
+
+	// Restore registers as best we can. Impossible to do perfectly.
+	// See comment in sys_nacl_386.s for extended rationale.
+	MOVW	$ctxt+-4(FP), R1
+	ADD	$64, R1
+	MOVW	(0*4)(R1), R0
+	MOVW	(2*4)(R1), R2
+	MOVW	(3*4)(R1), R3
+	MOVW	(4*4)(R1), R4
+	MOVW	(5*4)(R1), R5
+	MOVW	(6*4)(R1), R6
+	MOVW	(7*4)(R1), R7
+	MOVW	(8*4)(R1), R8
+	// cannot write to R9
+	MOVW	(10*4)(R1), g
+	MOVW	(11*4)(R1), R11
+	MOVW	(12*4)(R1), R12
+	MOVW	(13*4)(R1), R13
+	MOVW	(14*4)(R1), R14
+	MOVW	(15*4)(R1), R1
+	B	(R1)
+
+nog:
+	MOVW	$0, R0
+	RET
+
+TEXT runtime·nacl_sysinfo(SB),NOSPLIT,$16
+	RET
+
+TEXT runtime·casp(SB),NOSPLIT,$0
+	B	runtime·cas(SB)
+
+// This is only valid for ARMv6+, however, NaCl/ARM is only defined
+// for ARMv7A anyway.
+// bool armcas(int32 *val, int32 old, int32 new)
+// AtomiBLy:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+TEXT runtime·cas(SB),NOSPLIT,$0
+	B runtime·armcas(SB)
+
+TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
+	WORD $0xe7fedef0 // NACL_INSTR_ARM_ABORT_NOW (UDF #0xEDE0)
diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s
new file mode 100644
index 000000000..83a76cb34
--- /dev/null
+++ b/src/runtime/sys_netbsd_386.s
@@ -0,0 +1,384 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for 386, NetBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVL	$1, AX
+	INT	$0x80
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-4
+	MOVL	$310, AX		// sys__lwp_exit
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-4
+	MOVL	$5, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-4
+	MOVL	$6, AX
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-4
+	MOVL	$3, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-4
+	MOVL	$4, AX			// sys_write
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$24
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 12(SP)		// tv_sec - l32
+	MOVL	$0, 16(SP)		// tv_sec - h32
+	MOVL	$1000, AX
+	MULL	DX
+	MOVL	AX, 20(SP)		// tv_nsec
+
+	MOVL	$0, 0(SP)
+	LEAL	12(SP), AX
+	MOVL	AX, 4(SP)		// arg 1 - rqtp
+	MOVL	$0, 8(SP)		// arg 2 - rmtp
+	MOVL	$430, AX		// sys_nanosleep
+	INT	$0x80
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$12
+	MOVL	$311, AX		// sys__lwp_self
+	INT	$0x80
+	MOVL	$0, 0(SP)
+	MOVL	AX, 4(SP)		// arg 1 - target
+	MOVL	sig+0(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - signo
+	MOVL	$318, AX		// sys__lwp_kill
+	INT	$0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$36
+	LEAL	addr+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - addr
+	MOVSL				// arg 2 - len
+	MOVSL				// arg 3 - prot
+	MOVSL				// arg 4 - flags
+	MOVSL				// arg 5 - fd
+	MOVL	$0, AX
+	STOSL				// arg 6 - pad
+	MOVSL				// arg 7 - offset
+	MOVL	$0, AX			// top 32 bits of file offset
+	STOSL
+	MOVL	$197, AX		// sys_mmap
+	INT	$0x80
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$-4
+	MOVL	$73, AX			// sys_munmap
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$-4
+	MOVL	$75, AX			// sys_madvise
+	INT	$0x80
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-4
+	MOVL	$425, AX		// sys_setitimer
+	INT	$0x80
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	LEAL	12(SP), BX
+	MOVL	$0, 4(SP)		// arg 1 - clock_id
+	MOVL	BX, 8(SP)		// arg 2 - tp
+	MOVL	$427, AX		// sys_clock_gettime
+	INT	$0x80
+
+	MOVL	12(SP), AX		// sec - l32
+	MOVL	AX, sec+0(FP)
+	MOVL	16(SP), AX		// sec - h32
+	MOVL	AX, sec+4(FP)
+
+	MOVL	20(SP), BX		// nsec
+	MOVL	BX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB),NOSPLIT,$32
+	LEAL	12(SP), BX
+	MOVL	$0, 4(SP)		// arg 1 - clock_id
+	MOVL	BX, 8(SP)		// arg 2 - tp
+	MOVL	$427, AX		// sys_clock_gettime
+	INT	$0x80
+
+	MOVL	16(SP), CX		// sec - h32
+	IMULL	$1000000000, CX
+
+	MOVL	12(SP), AX		// sec - l32
+	MOVL	$1000000000, BX
+	MULL	BX			// result in dx:ax
+
+	MOVL	20(SP), BX		// nsec
+	ADDL	BX, AX
+	ADCL	CX, DX			// add high bits with carry
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·getcontext(SB),NOSPLIT,$-4
+	MOVL	$307, AX		// sys_getcontext
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$-4
+	MOVL	$293, AX		// sys_sigprocmask
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigreturn_tramp(SB),NOSPLIT,$0
+	LEAL	140(SP), AX		// Load address of ucontext
+	MOVL	AX, 4(SP)
+	MOVL	$308, AX		// sys_setcontext
+	INT	$0x80
+	MOVL	$-1, 4(SP)		// Something failed...
+	MOVL	$1, AX			// sys_exit
+	INT	$0x80
+
+TEXT runtime·sigaction(SB),NOSPLIT,$24
+	LEAL	sig+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - sig
+	MOVSL				// arg 2 - act
+	MOVSL				// arg 3 - oact
+	LEAL	runtime·sigreturn_tramp(SB), AX
+	STOSL				// arg 4 - tramp
+	MOVL	$2, AX
+	STOSL				// arg 5 - vers
+	MOVL	$340, AX		// sys___sigaction_sigtramp
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$44
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVL	DI, 20(SP)
+
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+	// copy arguments for call to sighandler
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+4(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+8(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+	RET
+
+// int32 lwp_create(void *context, uintptr flags, void *lwpid);
+TEXT runtime·lwp_create(SB),NOSPLIT,$16
+	MOVL	$0, 0(SP)
+	MOVL	ctxt+0(FP), AX
+	MOVL	AX, 4(SP)		// arg 1 - context
+	MOVL	flags+4(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - flags
+	MOVL	lwpid+8(FP), AX
+	MOVL	AX, 12(SP)		// arg 3 - lwpid
+	MOVL	$309, AX		// sys__lwp_create
+	INT	$0x80
+	JCC	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
+
+	// Set FS to point at m->tls
+	LEAL	m_tls(BX), BP
+	PUSHAL				// save registers
+	PUSHL	BP
+	CALL	runtime·settls(SB)
+	POPL	AX
+	POPAL
+
+	// Now segment is established.  Initialize m, g.
+	get_tls(AX)
+	MOVL	DX, g(AX)
+	MOVL	BX, g_m(DX)
+
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	MOVL	0(DX), DX		// paranoia; check they are not nil
+	MOVL	0(BX), BX
+
+	// more paranoia; check that stack splitting code works
+	PUSHAL
+	CALL	runtime·emptyfunc(SB)
+	POPAL
+
+	// Call fn
+	CALL	SI
+
+	CALL	runtime·exit1(SB)
+	MOVL	$0x1234, 0x1005
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVL	$281, AX		// sys___sigaltstack14
+	MOVL	new+4(SP), BX
+	MOVL	old+8(SP), CX
+	INT	$0x80
+	CMPL	AX, $0xfffff001
+	JLS	2(PC)
+	INT	$3
+	RET
+
+TEXT runtime·setldt(SB),NOSPLIT,$8
+	// Under NetBSD we set the GS base instead of messing with the LDT.
+	MOVL	16(SP), AX		// tls0
+	MOVL	AX, 0(SP)
+	CALL	runtime·settls(SB)
+	RET
+
+TEXT runtime·settls(SB),NOSPLIT,$16
+	// adjust for ELF: wants to use -8(GS) and -4(GS) for g and m
+	MOVL	base+0(FP), CX
+	ADDL	$8, CX
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	CX, 4(SP)		// arg 1 - ptr
+	MOVL	$317, AX		// sys__lwp_setprivate
+	INT	$0x80
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$350, AX		// sys_sched_yield
+	INT	$0x80
+	RET
+
+TEXT runtime·lwp_park(SB),NOSPLIT,$-4
+	MOVL	$434, AX		// sys__lwp_park
+	INT	$0x80
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·lwp_unpark(SB),NOSPLIT,$-4
+	MOVL	$321, AX		// sys__lwp_unpark
+	INT	$0x80
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·lwp_self(SB),NOSPLIT,$-4
+	MOVL	$311, AX		// sys__lwp_self
+	INT	$0x80
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$28
+	LEAL	mib+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - name
+	MOVSL				// arg 2 - namelen
+	MOVSL				// arg 3 - oldp
+	MOVSL				// arg 4 - oldlenp
+	MOVSL				// arg 5 - newp
+	MOVSL				// arg 6 - newlen
+	MOVL	$202, AX		// sys___sysctl
+	INT	$0x80
+	JCC	3(PC)
+	NEGL	AX
+	RET
+	MOVL	$0, AX
+	RET
+
+GLOBL runtime·tlsoffset(SB),$4
+
+// int32 runtime·kqueue(void)
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVL	$344, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	$435, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$32
+	MOVL	$92, AX		// fcntl
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fd+0(FP), BX
+	MOVL	BX, 4(SP)	// fd
+	MOVL	$2, 8(SP)	// F_SETFD
+	MOVL	$1, 12(SP)	// FD_CLOEXEC
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	RET
diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s
new file mode 100644
index 000000000..eb9766d3f
--- /dev/null
+++ b/src/runtime/sys_netbsd_amd64.s
@@ -0,0 +1,358 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for AMD64, NetBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// int32 lwp_create(void *context, uintptr flags, void *lwpid)
+TEXT runtime·lwp_create(SB),NOSPLIT,$0
+	MOVQ	ctxt+0(FP), DI
+	MOVQ	flags+8(FP), SI
+	MOVQ	lwpid+16(FP), DX
+	MOVL	$309, AX		// sys__lwp_create
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
+	
+	// Set FS to point at m->tls.
+	LEAQ	m_tls(R8), DI
+	CALL	runtime·settls(SB)
+
+	// Set up new stack.
+	get_tls(CX)
+	MOVQ	R8, g_m(R9)
+	MOVQ	R9, g(CX)
+	CALL	runtime·stackcheck(SB)
+
+	// Call fn
+	CALL	R12
+
+	// It shouldn't return.  If it does, exit.
+	MOVL	$310, AX		// sys__lwp_exit
+	SYSCALL
+	JMP	-3(PC)			// keep exiting
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	MOVL	$350, AX		// sys_sched_yield
+	SYSCALL
+	RET
+
+TEXT runtime·lwp_park(SB),NOSPLIT,$0
+	MOVQ	abstime+0(FP), DI		// arg 1 - abstime
+	MOVL	unpark+8(FP), SI		// arg 2 - unpark
+	MOVQ	hint+16(FP), DX		// arg 3 - hint
+	MOVQ	unparkhint+24(FP), R10		// arg 4 - unparkhint
+	MOVL	$434, AX		// sys__lwp_park
+	SYSCALL
+	MOVL	AX, ret+32(FP)
+	RET
+
+TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
+	MOVL	lwp+0(FP), DI		// arg 1 - lwp
+	MOVQ	hint+8(FP), SI		// arg 2 - hint
+	MOVL	$321, AX		// sys__lwp_unpark
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·lwp_self(SB),NOSPLIT,$0
+	MOVL	$311, AX		// sys__lwp_self
+	SYSCALL
+	MOVL	AX, ret+0(FP)
+	RET
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 - exit status
+	MOVL	$1, AX			// sys_exit
+	SYSCALL
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8
+	MOVL	$310, AX		// sys__lwp_exit
+	SYSCALL
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
+	MOVL	$5, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVL	$6, AX
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$3, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-8
+	MOVQ	fd+0(FP), DI		// arg 1 - fd
+	MOVQ	p+8(FP), SI		// arg 2 - buf
+	MOVL	n+16(FP), DX		// arg 3 - nbyte
+	MOVL	$4, AX			// sys_write
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVQ	AX, 8(SP)		// tv_nsec
+
+	MOVQ	SP, DI			// arg 1 - rqtp
+	MOVQ	$0, SI			// arg 2 - rmtp
+	MOVL	$430, AX		// sys_nanosleep
+	SYSCALL
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	MOVL	$311, AX		// sys__lwp_self
+	SYSCALL
+	MOVQ	AX, DI			// arg 1 - target
+	MOVL	sig+0(FP), SI		// arg 2 - signo
+	MOVL	$318, AX		// sys__lwp_kill
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-8
+	MOVL	mode+0(FP), DI		// arg 1 - which
+	MOVQ	new+8(FP), SI		// arg 2 - itv
+	MOVQ	old+16(FP), DX		// arg 3 - oitv
+	MOVL	$425, AX		// sys_setitimer
+	SYSCALL
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVQ	$0, DI			// arg 1 - clock_id
+	LEAQ	8(SP), SI		// arg 2 - tp
+	MOVL	$427, AX		// sys_clock_gettime
+	SYSCALL
+	MOVQ	8(SP), AX		// sec
+	MOVL	16(SP), DX		// nsec
+
+	// sec is in AX, nsec in DX
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$32
+	MOVQ	$0, DI			// arg 1 - clock_id
+	LEAQ	8(SP), SI		// arg 2 - tp
+	MOVL	$427, AX		// sys_clock_gettime
+	SYSCALL
+	MOVQ	8(SP), AX		// sec
+	MOVL	16(SP), DX		// nsec
+
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·getcontext(SB),NOSPLIT,$-8
+	MOVQ	ctxt+0(FP), DI		// arg 1 - context
+	MOVL	$307, AX		// sys_getcontext
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	mode+0(FP), DI		// arg 1 - how
+	MOVQ	new+8(FP), SI		// arg 2 - set
+	MOVQ	old+16(FP), DX		// arg 3 - oset
+	MOVL	$293, AX		// sys_sigprocmask
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigreturn_tramp(SB),NOSPLIT,$-8
+	MOVQ	R15, DI			// Load address of ucontext
+	MOVQ	$308, AX		// sys_setcontext
+	SYSCALL
+	MOVQ	$-1, DI			// Something failed...
+	MOVL	$1, AX			// sys_exit
+	SYSCALL
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-8
+	MOVL	sig+0(FP), DI		// arg 1 - signum
+	MOVQ	new+8(FP), SI		// arg 2 - nsa
+	MOVQ	old+16(FP), DX		// arg 3 - osa
+					// arg 4 - tramp
+	LEAQ	runtime·sigreturn_tramp(SB), R10
+	MOVQ	$2, R8			// arg 5 - vers
+	MOVL	$340, AX		// sys___sigaction_sigtramp
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVQ	R10, 40(SP)
+
+	// g = m->signal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(BX)
+	MOVQ	40(SP), R10
+	MOVQ	R10, g(BX)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
+	SUBQ	$16, SP
+	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
+	MOVQ	$0, R9			// arg 6 - pad
+	MOVL	$197, AX		// sys_mmap
+	SYSCALL
+	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	$73, AX			// sys_munmap
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	flags+16(FP), DX	// arg 3 - behav
+	MOVQ	$75, AX			// sys_madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVQ	new+8(SP), DI		// arg 1 - nss
+	MOVQ	old+16(SP), SI		// arg 2 - oss
+	MOVQ	$281, AX		// sys___sigaltstack14
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$8
+	// adjust for ELF: wants to use -16(FS) and -8(FS) for g and m
+	ADDQ	$16, DI			// arg 1 - ptr
+	MOVQ	$317, AX		// sys__lwp_setprivate
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
+	MOVQ	$202, AX		// sys___sysctl
+	SYSCALL
+	JCC 4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// int32 runtime·kqueue(void)
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVQ	$0, DI
+	MOVL	$344, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
+	MOVL	$435, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI	// fd
+	MOVQ	$2, SI		// F_SETFD
+	MOVQ	$1, DX		// FD_CLOEXEC
+	MOVL	$92, AX		// fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s
new file mode 100644
index 000000000..039a0832e
--- /dev/null
+++ b/src/runtime/sys_netbsd_arm.s
@@ -0,0 +1,351 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for ARM, NetBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVW 0(FP), R0	// arg 1 exit status
+	SWI $0xa00001
+	MOVW.CS $0, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-4
+	SWI $0xa00136	// sys__lwp_exit
+	MOVW $1, R8	// crash
+	MOVW R8, (R8)
+	RET
+	
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW 8(FP), R2
+	SWI $0xa00005
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0
+	SWI $0xa00006
+	MOVW	R0, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVW 0(FP), R0
+	MOVW 4(FP), R1
+	MOVW 8(FP), R2
+	SWI $0xa00003
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-4
+	MOVW	0(FP), R0	// arg 1 - fd
+	MOVW	4(FP), R1	// arg 2 - buf
+	MOVW	8(FP), R2	// arg 3 - nbyte
+	SWI $0xa00004	// sys_write
+	MOVW	R0, ret+12(FP)
+	RET
+
+// int32 lwp_create(void *context, uintptr flags, void *lwpid)
+TEXT runtime·lwp_create(SB),NOSPLIT,$0
+	MOVW ctxt+0(FP), R0
+	MOVW flags+4(FP), R1
+	MOVW lwpid+8(FP), R2
+	SWI $0xa00135	// sys__lwp_create
+	MOVW	R0, ret+12(FP)
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	SWI $0xa0015e	// sys_sched_yield
+	RET
+
+TEXT runtime·lwp_park(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// arg 1 - abstime
+	MOVW 4(FP), R1	// arg 2 - unpark
+	MOVW 8(FP), R2	// arg 3 - hint
+	MOVW 12(FP), R3	// arg 4 - unparkhint
+	SWI $0xa001b2	// sys__lwp_park
+	MOVW	R0, ret+16(FP)
+	RET
+
+TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
+	MOVW	0(FP), R0	// arg 1 - lwp
+	MOVW	4(FP), R1	// arg 2 - hint
+	SWI $0xa00141 // sys__lwp_unpark
+	MOVW	R0, ret+8(FP)
+	RET
+
+TEXT runtime·lwp_self(SB),NOSPLIT,$0
+	SWI $0xa00137	// sys__lwp_self
+	MOVW	R0, ret+0(FP)
+	RET
+
+TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
+	MOVW R0, g_m(R1)
+	MOVW R1, g
+
+	BL runtime·emptyfunc(SB) // fault if stack check is wrong
+	BL (R2)
+	MOVW $2, R8  // crash (not reached)
+	MOVW R8, (R8)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVW usec+0(FP), R0
+	MOVW R0, R2
+	MOVW $1000000, R1
+	DIV R1, R0
+	// 0(R13) is the saved LR, don't use it
+	MOVW R0, 4(R13) // tv_sec.low
+	MOVW $0, R0
+	MOVW R0, 8(R13) // tv_sec.high
+	MOD R1, R2
+	MOVW $1000, R1
+	MUL R1, R2
+	MOVW R2, 12(R13) // tv_nsec
+
+	MOVW $4(R13), R0 // arg 1 - rqtp
+	MOVW $0, R1      // arg 2 - rmtp
+	SWI $0xa001ae	// sys_nanosleep
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	SWI $0xa00137	// sys__lwp_self, the returned R0 is arg 1
+	MOVW	sig+0(FP), R1	// arg 2 - signal
+	SWI $0xa0013e	// sys__lwp_kill
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-4
+	MOVW 0(FP), R0	// arg 1 - which
+	MOVW 4(FP), R1	// arg 2 - itv
+	MOVW 8(FP), R2	// arg 3 - oitv
+	SWI $0xa001a9	// sys_setitimer
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVW $0, R0	// CLOCK_REALTIME
+	MOVW $8(R13), R1
+	SWI $0xa001ab	// clock_gettime
+
+	MOVW 8(R13), R0	// sec.low
+	MOVW 12(R13), R1 // sec.high
+	MOVW 16(R13), R2 // nsec
+
+	MOVW R0, 0(FP)
+	MOVW R1, 4(FP)
+	MOVW R2, 8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB), NOSPLIT, $32
+	MOVW $0, R0 // CLOCK_REALTIME
+	MOVW $8(R13), R1
+	SWI $0xa001ab	// clock_gettime
+
+	MOVW 8(R13), R0 // sec.low
+	MOVW 12(R13), R4 // sec.high
+	MOVW 16(R13), R2 // nsec
+
+	MOVW $1000000000, R3
+	MULLU R0, R3, (R1, R0)
+	MUL R3, R4
+	ADD.S R2, R0
+	ADC R4, R1
+
+	MOVW R0, ret_lo+0(FP)
+	MOVW R1, ret_hi+4(FP)
+	RET
+
+TEXT runtime·getcontext(SB),NOSPLIT,$-4
+	MOVW 0(FP), R0	// arg 1 - context
+	SWI $0xa00133	// sys_getcontext
+	MOVW.CS $0, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// arg 1 - how
+	MOVW 4(FP), R1	// arg 2 - set
+	MOVW 8(FP), R2	// arg 3 - oset
+	SWI $0xa00125	// sys_sigprocmask
+	MOVW.CS $0, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·sigreturn_tramp(SB),NOSPLIT,$-4
+	// on entry, SP points to siginfo, we add sizeof(ucontext)
+	// to SP to get a pointer to ucontext.
+	ADD $0x80, R13, R0 // 0x80 == sizeof(UcontextT)
+	SWI $0xa00134	// sys_setcontext
+	// something failed, we have to exit
+	MOVW $0x4242, R0 // magic return number
+	SWI $0xa00001	// sys_exit
+	B -2(PC)	// continue exit
+
+TEXT runtime·sigaction(SB),NOSPLIT,$4
+	MOVW 0(FP), R0	// arg 1 - signum
+	MOVW 4(FP), R1	// arg 2 - nsa
+	MOVW 8(FP), R2	// arg 3 - osa
+	MOVW $runtime·sigreturn_tramp(SB), R3	// arg 4 - tramp
+	MOVW $2, R4	// arg 5 - vers
+	MOVW R4, 4(R13)
+	ADD $4, R13	// pass arg 5 on stack
+	SWI $0xa00154	// sys___sigaction_sigtramp
+	SUB $4, R13
+	MOVW.CS $3, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$24
+	// this might be called in external code context,
+	// where g is not set.
+	// first save R0, because runtime·load_g will clobber it
+	MOVW	R0, 4(R13) // signum
+	MOVB	runtime·iscgo(SB), R0
+	CMP 	$0, R0
+	BL.NE	runtime·load_g(SB)
+
+	CMP $0, g
+	BNE 4(PC)
+	// signal number is already prepared in 4(R13)
+	MOVW $runtime·badsignal(SB), R11
+	BL (R11)
+	RET
+
+	// save g
+	MOVW g, R4
+	MOVW g, 20(R13)
+
+	// g = m->signal
+	MOVW g_m(g), R8
+	MOVW m_gsignal(R8), g
+
+	// R0 is already saved
+	MOVW R1, 8(R13) // info
+	MOVW R2, 12(R13) // context
+	MOVW R4, 16(R13) // gp
+
+	BL runtime·sighandler(SB)
+
+	// restore g
+	MOVW 20(R13), g
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$12
+	MOVW 0(FP), R0	// arg 1 - addr
+	MOVW 4(FP), R1	// arg 2 - len
+	MOVW 8(FP), R2	// arg 3 - prot
+	MOVW 12(FP), R3	// arg 4 - flags
+	// arg 5 (fid) and arg6 (offset_lo, offset_hi) are passed on stack
+	// note the C runtime only passes the 32-bit offset_lo to us
+	MOVW 16(FP), R4		// arg 5
+	MOVW R4, 4(R13)
+	MOVW 20(FP), R5		// arg 6 lower 32-bit
+	MOVW R5, 8(R13)
+	MOVW $0, R6 // higher 32-bit for arg 6
+	MOVW R6, 12(R13)
+	ADD $4, R13 // pass arg 5 and arg 6 on stack
+	SWI $0xa000c5	// sys_mmap
+	SUB $4, R13
+	MOVW	R0, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// arg 1 - addr
+	MOVW 4(FP), R1	// arg 2 - len
+	SWI $0xa00049	// sys_munmap
+	MOVW.CS $0, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// arg 1 - addr
+	MOVW 4(FP), R1	// arg 2 - len
+	MOVW 8(FP), R2	// arg 3 - behav
+	SWI $0xa0004b	// sys_madvise
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-4
+	MOVW 0(FP), R0	// arg 1 - nss
+	MOVW 4(FP), R1	// arg 2 - oss
+	SWI $0xa00119	// sys___sigaltstack14
+	MOVW.CS $0, R8	// crash on syscall failure
+	MOVW.CS R8, (R8)
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$8
+	MOVW 0(FP), R0	// arg 1 - name
+	MOVW 4(FP), R1	// arg 2 - namelen
+	MOVW 8(FP), R2	// arg 3 - oldp
+	MOVW 12(FP), R3	// arg 4 - oldlenp
+	MOVW 16(FP), R4	// arg 5 - newp
+	MOVW R4, 4(R13)
+	MOVW 20(FP), R4	// arg 6 - newlen
+	MOVW R4, 8(R13)
+	ADD $4, R13	// pass arg 5 and 6 on stack
+	SWI $0xa000ca	// sys___sysctl
+	SUB $4, R13
+	MOVW	R0, ret+24(FP)
+	RET
+
+// int32 runtime·kqueue(void)
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	SWI $0xa00158	// sys_kqueue
+	RSB.CS $0, R0
+	MOVW	R0, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
+TEXT runtime·kevent(SB),NOSPLIT,$8
+	MOVW 0(FP), R0	// kq
+	MOVW 4(FP), R1	// changelist
+	MOVW 8(FP), R2	// nchanges
+	MOVW 12(FP), R3	// eventlist
+	MOVW 16(FP), R4	// nevents
+	MOVW R4, 4(R13)
+	MOVW 20(FP), R4	// timeout
+	MOVW R4, 8(R13)
+	ADD $4, R13	// pass arg 5 and 6 on stack
+	SWI $0xa001b3	// sys___kevent50
+	RSB.CS $0, R0
+	SUB $4, R13
+	MOVW	R0, ret+24(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd)
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVW 0(FP), R0	// fd
+	MOVW $2, R1	// F_SETFD
+	MOVW $1, R2	// FD_CLOEXEC
+	SWI $0xa0005c	// sys_fcntl
+	RET
+
+TEXT runtime·casp(SB),NOSPLIT,$0
+	B	runtime·cas(SB)
+
+// TODO(minux): this is only valid for ARMv6+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+TEXT runtime·cas(SB),NOSPLIT,$0
+	B runtime·armcas(SB)
+
+TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
+	MOVM.WP [R1, R2, R3, R12], (R13)
+	SWI $0x00a0013c // _lwp_getprivate
+	MOVM.IAW    (R13), [R1, R2, R3, R12]
+	RET
diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s
new file mode 100644
index 000000000..12d9c5c6b
--- /dev/null
+++ b/src/runtime/sys_openbsd_386.s
@@ -0,0 +1,398 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for 386, OpenBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+#define	CLOCK_MONOTONIC	$3
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-4
+	MOVL	$1, AX
+	INT	$0x80
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$8
+	MOVL	$0, 0(SP)
+	MOVL	$0, 4(SP)		// arg 1 - notdead
+	MOVL	$302, AX		// sys___threxit
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-4
+	MOVL	$5, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-4
+	MOVL	$6, AX
+	INT	$0x80
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-4
+	MOVL	$3, AX
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-4
+	MOVL	$4, AX			// sys_write
+	INT	$0x80
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$24
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVL	AX, 12(SP)		// tv_sec - l32
+	MOVL	$0, 16(SP)		// tv_sec - h32
+	MOVL	$1000, AX
+	MULL	DX
+	MOVL	AX, 20(SP)		// tv_nsec
+
+	MOVL	$0, 0(SP)
+	LEAL	12(SP), AX
+	MOVL	AX, 4(SP)		// arg 1 - rqtp
+	MOVL	$0, 8(SP)		// arg 2 - rmtp
+	MOVL	$91, AX			// sys_nanosleep
+	INT	$0x80
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$12
+	MOVL	$299, AX		// sys_getthrid
+	INT	$0x80
+	MOVL	$0, 0(SP)
+	MOVL	AX, 4(SP)		// arg 1 - pid
+	MOVL	sig+0(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - signum
+	MOVL	$37, AX			// sys_kill
+	INT	$0x80
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$36
+	LEAL	addr+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - addr
+	MOVSL				// arg 2 - len
+	MOVSL				// arg 3 - prot
+	MOVSL				// arg 4 - flags
+	MOVSL				// arg 5 - fd
+	MOVL	$0, AX
+	STOSL				// arg 6 - pad
+	MOVSL				// arg 7 - offset
+	MOVL	$0, AX			// top 32 bits of file offset
+	STOSL
+	MOVL	$197, AX		// sys_mmap
+	INT	$0x80
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$-4
+	MOVL	$73, AX			// sys_munmap
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$-4
+	MOVL	$75, AX			// sys_madvise
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-4
+	MOVL	$69, AX
+	INT	$0x80
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	LEAL	12(SP), BX
+	MOVL	$0, 4(SP)		// arg 1 - clock_id
+	MOVL	BX, 8(SP)		// arg 2 - tp
+	MOVL	$87, AX			// sys_clock_gettime
+	INT	$0x80
+
+	MOVL	12(SP), AX		// sec - l32
+	MOVL	AX, sec+0(FP)
+	MOVL	16(SP), AX		// sec - h32
+	MOVL	AX, sec+4(FP)
+
+	MOVL	20(SP), BX		// nsec
+	MOVL	BX, nsec+8(FP)
+	RET
+
+// int64 nanotime(void) so really
+// void nanotime(int64 *nsec)
+TEXT runtime·nanotime(SB),NOSPLIT,$32
+	LEAL	12(SP), BX
+	MOVL	CLOCK_MONOTONIC, 4(SP)	// arg 1 - clock_id
+	MOVL	BX, 8(SP)		// arg 2 - tp
+	MOVL	$87, AX			// sys_clock_gettime
+	INT	$0x80
+
+	MOVL    16(SP), CX		// sec - h32
+	IMULL   $1000000000, CX
+
+	MOVL    12(SP), AX		// sec - l32
+	MOVL    $1000000000, BX
+	MULL    BX			// result in dx:ax
+
+	MOVL	20(SP), BX		// nsec
+	ADDL	BX, AX
+	ADCL	CX, DX			// add high bits with carry
+
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-4
+	MOVL	$46, AX			// sys_sigaction
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$-4
+	MOVL	$48, AX			// sys_sigprocmask
+	INT	$0x80
+	JAE	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$44
+	get_tls(CX)
+
+	// check that g exists
+	MOVL	g(CX), DI
+	CMPL	DI, $0
+	JNE	6(PC)
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP 	sigtramp_ret
+
+	// save g
+	MOVL	DI, 20(SP)
+	
+	// g = m->gsignal
+	MOVL	g_m(DI), BX
+	MOVL	m_gsignal(BX), BX
+	MOVL	BX, g(CX)
+
+	// copy arguments for call to sighandler
+	MOVL	signo+0(FP), BX
+	MOVL	BX, 0(SP)
+	MOVL	info+4(FP), BX
+	MOVL	BX, 4(SP)
+	MOVL	context+8(FP), BX
+	MOVL	BX, 8(SP)
+	MOVL	DI, 12(SP)
+
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(CX)
+	MOVL	20(SP), BX
+	MOVL	BX, g(CX)
+
+sigtramp_ret:
+	// call sigreturn
+	MOVL	context+8(FP), AX
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	AX, 4(SP)		// arg 1 - sigcontext
+	MOVL	$103, AX		// sys_sigreturn
+	INT	$0x80
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+// int32 tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·tfork(SB),NOSPLIT,$12
+
+	// Copy mp, gp and fn from the parent stack onto the child stack.
+	MOVL	param+0(FP), AX
+	MOVL	8(AX), CX		// tf_stack
+	SUBL	$16, CX
+	MOVL	CX, 8(AX)
+	MOVL	mm+8(FP), SI
+	MOVL	SI, 0(CX)
+	MOVL	gg+12(FP), SI
+	MOVL	SI, 4(CX)
+	MOVL	fn+16(FP), SI
+	MOVL	SI, 8(CX)
+	MOVL	$1234, 12(CX)
+
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	param+0(FP), AX
+	MOVL	AX, 4(SP)		// arg 1 - param
+	MOVL	psize+4(FP), AX
+	MOVL	AX, 8(SP)		// arg 2 - psize
+	MOVL	$8, AX			// sys___tfork
+	INT	$0x80
+
+	// Return if tfork syscall failed.
+	JCC	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+20(FP)
+	RET
+
+	// In parent, return.
+	CMPL	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+20(FP)
+	RET
+
+	// Paranoia: check that SP is as we expect.
+	MOVL	12(SP), BP
+	CMPL	BP, $1234
+	JEQ	2(PC)
+	INT	$3
+
+	// Reload registers.
+	MOVL	0(SP), BX		// m
+	MOVL	4(SP), DX		// g
+	MOVL	8(SP), SI		// fn
+
+	// Set FS to point at m->tls.
+	LEAL	m_tls(BX), BP
+	PUSHAL				// save registers
+	PUSHL	BP
+	CALL	runtime·settls(SB)
+	POPL	AX
+	POPAL
+	
+	// Now segment is established.  Initialize m, g.
+	get_tls(AX)
+	MOVL	DX, g(AX)
+	MOVL	BX, g_m(DX)
+
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	MOVL	0(DX), DX		// paranoia; check they are not nil
+	MOVL	0(BX), BX
+
+	// More paranoia; check that stack splitting code works.
+	PUSHAL
+	CALL	runtime·emptyfunc(SB)
+	POPAL
+
+	// Call fn.
+	CALL	SI
+
+	CALL	runtime·exit1(SB)
+	MOVL	$0x1234, 0x1005
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVL	$288, AX		// sys_sigaltstack
+	MOVL	new+4(SP), BX
+	MOVL	old+8(SP), CX
+	INT	$0x80
+	CMPL	AX, $0xfffff001
+	JLS	2(PC)
+	INT	$3
+	RET
+
+TEXT runtime·setldt(SB),NOSPLIT,$4
+	// Under OpenBSD we set the GS base instead of messing with the LDT.
+	MOVL	tls0+4(FP), AX
+	MOVL	AX, 0(SP)
+	CALL	runtime·settls(SB)
+	RET
+
+TEXT runtime·settls(SB),NOSPLIT,$8
+	// adjust for ELF: wants to use -8(GS) and -4(GS) for g and m
+	MOVL	tlsbase+0(FP), CX
+	ADDL	$8, CX
+	MOVL	$0, 0(SP)		// syscall gap
+	MOVL	CX, 4(SP)		// arg 1 - tcb
+	MOVL	$329, AX		// sys___set_tcb
+	INT	$0x80
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·osyield(SB),NOSPLIT,$-4
+	MOVL	$298, AX		// sys_sched_yield
+	INT	$0x80
+	RET
+
+TEXT runtime·thrsleep(SB),NOSPLIT,$-4
+	MOVL	$94, AX			// sys___thrsleep
+	INT	$0x80
+	MOVL	AX, ret+20(FP)
+	RET
+
+TEXT runtime·thrwakeup(SB),NOSPLIT,$-4
+	MOVL	$301, AX		// sys___thrwakeup
+	INT	$0x80
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$28
+	LEAL	mib+0(FP), SI
+	LEAL	4(SP), DI
+	CLD
+	MOVSL				// arg 1 - name
+	MOVSL				// arg 2 - namelen
+	MOVSL				// arg 3 - oldp
+	MOVSL				// arg 4 - oldlenp
+	MOVSL				// arg 5 - newp
+	MOVSL				// arg 6 - newlen
+	MOVL	$202, AX		// sys___sysctl
+	INT	$0x80
+	JCC	4(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVL	$269, AX
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	$72, AX			// sys_kevent
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	MOVL	AX, ret+24(FP)
+	RET
+
+// int32 runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$32
+	MOVL	$92, AX			// sys_fcntl
+	// 0(SP) is where the caller PC would be; kernel skips it
+	MOVL	fd+0(FP), BX
+	MOVL	BX, 4(SP)	// fd
+	MOVL	$2, 8(SP)	// F_SETFD
+	MOVL	$1, 12(SP)	// FD_CLOEXEC
+	INT	$0x80
+	JAE	2(PC)
+	NEGL	AX
+	RET
+
+GLOBL runtime·tlsoffset(SB),$4
diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s
new file mode 100644
index 000000000..4e9db2390
--- /dev/null
+++ b/src/runtime/sys_openbsd_amd64.s
@@ -0,0 +1,350 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for AMD64, OpenBSD
+// /usr/src/sys/kern/syscalls.master for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+#define CLOCK_MONOTONIC	$3
+
+// int32 tfork(void *param, uintptr psize, M *mp, G *gp, void (*fn)(void));
+TEXT runtime·tfork(SB),NOSPLIT,$32
+
+	// Copy mp, gp and fn off parent stack for use by child.
+	MOVQ	mm+16(FP), R8
+	MOVQ	gg+24(FP), R9
+	MOVQ	fn+32(FP), R12
+
+	MOVQ	param+0(FP), DI
+	MOVQ	psize+8(FP), SI
+	MOVL	$8, AX			// sys___tfork
+	SYSCALL
+
+	// Return if tfork syscall failed.
+	JCC	4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+40(FP)
+	RET
+
+	// In parent, return.
+	CMPL	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+40(FP)
+	RET
+
+	// Set FS to point at m->tls.
+	LEAQ	m_tls(R8), DI
+	CALL	runtime·settls(SB)
+
+	// In child, set up new stack.
+	get_tls(CX)
+	MOVQ	R8, g_m(R9)
+	MOVQ	R9, g(CX)
+	CALL	runtime·stackcheck(SB)
+
+	// Call fn
+	CALL	R12
+
+	// It shouldn't return.  If it does, exit
+	MOVQ	$0, DI			// arg 1 - notdead
+	MOVL	$302, AX		// sys___threxit
+	SYSCALL
+	JMP	-3(PC)			// keep exiting
+
+TEXT runtime·osyield(SB),NOSPLIT,$0
+	MOVL	$298, AX		// sys_sched_yield
+	SYSCALL
+	RET
+
+TEXT runtime·thrsleep(SB),NOSPLIT,$0
+	MOVQ	ident+0(FP), DI		// arg 1 - ident
+	MOVL	clock_id+8(FP), SI		// arg 2 - clock_id
+	MOVQ	tsp+16(FP), DX		// arg 3 - tp
+	MOVQ	lock+24(FP), R10		// arg 4 - lock
+	MOVQ	abort+32(FP), R8		// arg 5 - abort
+	MOVL	$94, AX			// sys___thrsleep
+	SYSCALL
+	MOVL	AX, ret+40(FP)
+	RET
+
+TEXT runtime·thrwakeup(SB),NOSPLIT,$0
+	MOVQ	ident+0(FP), DI		// arg 1 - ident
+	MOVL	n+8(FP), SI		// arg 2 - n
+	MOVL	$301, AX		// sys___thrwakeup
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+// Exit the entire program (like C exit)
+TEXT runtime·exit(SB),NOSPLIT,$-8
+	MOVL	code+0(FP), DI		// arg 1 - exit status
+	MOVL	$1, AX			// sys_exit
+	SYSCALL
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·exit1(SB),NOSPLIT,$-8
+	MOVQ	$0, DI			// arg 1 - notdead
+	MOVL	$302, AX		// sys___threxit
+	SYSCALL
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$-8
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
+	MOVL	$5, AX
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVL	$6, AX
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·read(SB),NOSPLIT,$-8
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
+	MOVL	$3, AX
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·write(SB),NOSPLIT,$-8
+	MOVQ	fd+0(FP), DI		// arg 1 - fd
+	MOVQ	p+8(FP), SI		// arg 2 - buf
+	MOVL	n+16(FP), DX		// arg 3 - nbyte
+	MOVL	$4, AX			// sys_write
+	SYSCALL
+	MOVL	AX, ret+24(FP)
+	RET
+
+TEXT runtime·usleep(SB),NOSPLIT,$16
+	MOVL	$0, DX
+	MOVL	usec+0(FP), AX
+	MOVL	$1000000, CX
+	DIVL	CX
+	MOVQ	AX, 0(SP)		// tv_sec
+	MOVL	$1000, AX
+	MULL	DX
+	MOVQ	AX, 8(SP)		// tv_nsec
+
+	MOVQ	SP, DI			// arg 1 - rqtp
+	MOVQ	$0, SI			// arg 2 - rmtp
+	MOVL	$91, AX			// sys_nanosleep
+	SYSCALL
+	RET
+
+TEXT runtime·raise(SB),NOSPLIT,$16
+	MOVL	$299, AX		// sys_getthrid
+	SYSCALL
+	MOVQ	AX, DI			// arg 1 - pid
+	MOVL	sig+0(FP), SI		// arg 2 - signum
+	MOVL	$37, AX			// sys_kill
+	SYSCALL
+	RET
+
+TEXT runtime·setitimer(SB),NOSPLIT,$-8
+	MOVL	mode+0(FP), DI		// arg 1 - which
+	MOVQ	new+8(FP), SI		// arg 2 - itv
+	MOVQ	old+16(FP), DX		// arg 3 - oitv
+	MOVL	$69, AX			// sys_setitimer
+	SYSCALL
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB), NOSPLIT, $32
+	MOVQ	$0, DI			// arg 1 - clock_id
+	LEAQ	8(SP), SI		// arg 2 - tp
+	MOVL	$87, AX			// sys_clock_gettime
+	SYSCALL
+	MOVQ	8(SP), AX		// sec
+	MOVQ	16(SP), DX		// nsec
+
+	// sec is in AX, nsec in DX
+	MOVQ	AX, sec+0(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·nanotime(SB),NOSPLIT,$24
+	MOVQ	CLOCK_MONOTONIC, DI	// arg 1 - clock_id
+	LEAQ	8(SP), SI		// arg 2 - tp
+	MOVL	$87, AX			// sys_clock_gettime
+	SYSCALL
+	MOVQ	8(SP), AX		// sec
+	MOVQ	16(SP), DX		// nsec
+
+	// sec is in AX, nsec in DX
+	// return nsec in AX
+	IMULQ	$1000000000, AX
+	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
+	RET
+
+TEXT runtime·sigaction(SB),NOSPLIT,$-8
+	MOVL	sig+0(FP), DI		// arg 1 - signum
+	MOVQ	new+8(FP), SI		// arg 2 - nsa
+	MOVQ	old+16(FP), DX		// arg 3 - osa
+	MOVL	$46, AX
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sigprocmask(SB),NOSPLIT,$0
+	MOVL	mode+0(FP), DI		// arg 1 - how
+	MOVL	new+4(FP), SI		// arg 2 - set
+	MOVL	$48, AX			// sys_sigprocmask
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·sigtramp(SB),NOSPLIT,$64
+	get_tls(BX)
+	
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	5(PC)
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	RET
+
+	// save g
+	MOVQ	R10, 40(SP)
+	
+	// g = m->signal
+	MOVQ	g_m(R10), BP
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+	
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+	
+	CALL	runtime·sighandler(SB)
+
+	// restore g
+	get_tls(BX)
+	MOVQ	40(SP), R10
+	MOVQ	R10, g(BX)
+	RET
+
+TEXT runtime·mmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
+	SUBQ	$16, SP
+	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
+	MOVQ	$0, R9			// arg 6 - pad
+	MOVL	$197, AX
+	SYSCALL
+	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
+	RET
+
+TEXT runtime·munmap(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	$73, AX			// sys_munmap
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·madvise(SB),NOSPLIT,$0
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	flags+16(FP), DX	// arg 3 - behav
+	MOVQ	$75, AX			// sys_madvise
+	SYSCALL
+	// ignore failure - maybe pages are locked
+	RET
+
+TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+	MOVQ	new+8(SP), DI		// arg 1 - nss
+	MOVQ	old+16(SP), SI		// arg 2 - oss
+	MOVQ	$288, AX		// sys_sigaltstack
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$0
+	// adjust for ELF: wants to use -16(FS) and -8(FS) for g and m
+	ADDQ	$16, DI
+	MOVQ	$329, AX		// sys___settcb
+	SYSCALL
+	JCC	2(PC)
+	MOVL	$0xf1, 0xf1		// crash
+	RET
+
+TEXT runtime·sysctl(SB),NOSPLIT,$0
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
+	MOVQ	$202, AX		// sys___sysctl
+	SYSCALL
+	JCC	4(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// int32 runtime·kqueue(void);
+TEXT runtime·kqueue(SB),NOSPLIT,$0
+	MOVQ	$0, DI
+	MOVQ	$0, SI
+	MOVQ	$0, DX
+	MOVL	$269, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
+TEXT runtime·kevent(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
+	MOVL	$72, AX
+	SYSCALL
+	JCC	2(PC)
+	NEGQ	AX
+	MOVL	AX, ret+48(FP)
+	RET
+
+// void runtime·closeonexec(int32 fd);
+TEXT runtime·closeonexec(SB),NOSPLIT,$0
+	MOVL	fd+0(FP), DI	// fd
+	MOVQ	$2, SI		// F_SETFD
+	MOVQ	$1, DX		// FD_CLOEXEC
+	MOVL	$92, AX		// fcntl
+	SYSCALL
+	RET
diff --git a/src/runtime/sys_plan9_386.s b/src/runtime/sys_plan9_386.s
new file mode 100644
index 000000000..743298181
--- /dev/null
+++ b/src/runtime/sys_plan9_386.s
@@ -0,0 +1,258 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// setldt(int entry, int address, int limit)
+TEXT runtime·setldt(SB),NOSPLIT,$0
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVL    $14, AX
+	INT     $64
+	MOVL	AX, ret+12(FP)
+	RET
+
+TEXT runtime·pread(SB),NOSPLIT,$0
+	MOVL    $50, AX
+	INT     $64
+	MOVL	AX, ret+20(FP)
+	RET
+
+TEXT runtime·pwrite(SB),NOSPLIT,$0
+	MOVL    $51, AX
+	INT     $64
+	MOVL	AX, ret+20(FP)
+	RET
+
+// int32 _seek(int64*, int32, int64, int32)
+TEXT _seek<>(SB),NOSPLIT,$0
+	MOVL	$39, AX
+	INT	$64
+	RET
+
+TEXT runtime·seek(SB),NOSPLIT,$24
+	LEAL	ret+16(FP), AX
+	MOVL	fd+0(FP), BX
+	MOVL	offset_lo+4(FP), CX
+	MOVL	offset_hi+8(FP), DX
+	MOVL	whence+12(FP), SI
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	MOVL	CX, 8(SP)
+	MOVL	DX, 12(SP)
+	MOVL	SI, 16(SP)
+	CALL	_seek<>(SB)
+	CMPL	AX, $0
+	JGE	3(PC)
+	MOVL	$-1, ret_lo+16(FP)
+	MOVL	$-1, ret_hi+20(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVL	$4, AX
+	INT	$64
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·exits(SB),NOSPLIT,$0
+	MOVL    $8, AX
+	INT     $64
+	RET
+
+TEXT runtime·brk_(SB),NOSPLIT,$0
+	MOVL    $24, AX
+	INT     $64
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·sleep(SB),NOSPLIT,$0
+	MOVL    $17, AX
+	INT     $64
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·plan9_semacquire(SB),NOSPLIT,$0
+	MOVL	$37, AX
+	INT	$64
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·plan9_tsemacquire(SB),NOSPLIT,$0
+	MOVL	$52, AX
+	INT	$64
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT nsec<>(SB),NOSPLIT,$0
+	MOVL	$53, AX
+	INT	$64
+	RET
+
+TEXT runtime·nsec(SB),NOSPLIT,$8
+	LEAL	ret+4(FP), AX
+	MOVL	AX, 0(SP)
+	CALL	nsec<>(SB)
+	CMPL	AX, $0
+	JGE	3(PC)
+	MOVL	$-1, ret_lo+4(FP)
+	MOVL	$-1, ret_hi+8(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$8-12
+	CALL	runtime·nanotime(SB)
+	MOVL	0(SP), AX
+	MOVL	4(SP), DX
+
+	MOVL	$1000000000, CX
+	DIVL	CX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
+
+TEXT runtime·notify(SB),NOSPLIT,$0
+	MOVL	$28, AX
+	INT	$64
+	MOVL	AX, ret+4(FP)
+	RET
+
+TEXT runtime·noted(SB),NOSPLIT,$0
+	MOVL	$29, AX
+	INT	$64
+	MOVL	AX, ret+4(FP)
+	RET
+	
+TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
+	MOVL	$38, AX
+	INT	$64
+	MOVL	AX, ret+8(FP)
+	RET
+	
+TEXT runtime·rfork(SB),NOSPLIT,$0
+	MOVL	$19, AX // rfork
+	MOVL	stack+8(SP), CX
+	MOVL	mm+12(SP), BX	// m
+	MOVL	gg+16(SP), DX	// g
+	MOVL	fn+20(SP), SI	// fn
+	INT     $64
+
+	// In parent, return.
+	CMPL	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+20(FP)
+	RET
+
+	// set SP to be on the new child stack
+	MOVL	CX, SP
+
+	// Initialize m, g.
+	get_tls(AX)
+	MOVL	DX, g(AX)
+	MOVL	BX, g_m(DX)
+
+	// Initialize procid from TOS struct.
+	MOVL	_tos(SB), AX
+	MOVL	48(AX), AX // procid
+	MOVL	AX, m_procid(BX)	// save pid as m->procid
+	
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	
+	MOVL	0(DX), DX	// paranoia; check they are not nil
+	MOVL	0(BX), BX
+	
+	// more paranoia; check that stack splitting code works
+	PUSHL	SI
+	CALL	runtime·emptyfunc(SB)
+	POPL	SI
+	
+	CALL	SI	// fn()
+	CALL	runtime·exit(SB)
+	MOVL	AX, ret+20(FP)
+	RET
+
+// void sigtramp(void *ureg, int8 *note)
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+	get_tls(AX)
+
+	// check that g exists
+	MOVL	g(AX), BX
+	CMPL	BX, $0
+	JNE	3(PC)
+	CALL	runtime·badsignal2(SB) // will exit
+	RET
+
+	// save args
+	MOVL	ureg+4(SP), CX
+	MOVL	note+8(SP), DX
+
+	// change stack
+	MOVL	g_m(BX), BX
+	MOVL	m_gsignal(BX), BP
+	MOVL	g_stackbase(BP), BP
+	MOVL	BP, SP
+
+	// make room for args and g
+	SUBL	$24, SP
+
+	// save g
+	MOVL	g(AX), BP
+	MOVL	BP, 20(SP)
+
+	// g = m->gsignal
+	MOVL	m_gsignal(BX), DI
+	MOVL	DI, g(AX)
+
+	// load args and call sighandler
+	MOVL	CX, 0(SP)
+	MOVL	DX, 4(SP)
+	MOVL	BP, 8(SP)
+
+	CALL	runtime·sighandler(SB)
+	MOVL	12(SP), AX
+
+	// restore g
+	get_tls(BX)
+	MOVL	20(SP), BP
+	MOVL	BP, g(BX)
+
+	// call noted(AX)
+	MOVL	AX, 0(SP)
+	CALL	runtime·noted(SB)
+	RET
+
+// Only used by the 64-bit runtime.
+TEXT runtime·setfpmasks(SB),NOSPLIT,$0
+	RET
+
+#define ERRMAX 128	/* from os_plan9.h */
+
+// void errstr(int8 *buf, int32 len)
+TEXT errstr<>(SB),NOSPLIT,$0
+	MOVL    $41, AX
+	INT     $64
+	RET
+
+// func errstr() string
+// Only used by package syscall.
+// Grab error string due to a syscall made
+// in entersyscall mode, without going
+// through the allocator (issue 4994).
+// See ../syscall/asm_plan9_386.s:/·Syscall/
+TEXT runtime·errstr(SB),NOSPLIT,$8-8
+	get_tls(AX)
+	MOVL	g(AX), BX
+	MOVL	g_m(BX), BX
+	MOVL	m_errstr(BX), CX
+	MOVL	CX, 0(SP)
+	MOVL	$ERRMAX, 4(SP)
+	CALL	errstr<>(SB)
+	CALL	runtime·findnull(SB)
+	MOVL	4(SP), AX
+	MOVL	AX, ret_len+4(FP)
+	MOVL	0(SP), AX
+	MOVL	AX, ret_base+0(FP)
+	RET
diff --git a/src/runtime/sys_plan9_amd64.s b/src/runtime/sys_plan9_amd64.s
new file mode 100644
index 000000000..954c0c27b
--- /dev/null
+++ b/src/runtime/sys_plan9_amd64.s
@@ -0,0 +1,260 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// setldt(int entry, int address, int limit)
+TEXT runtime·setldt(SB),NOSPLIT,$0
+	RET
+
+TEXT runtime·open(SB),NOSPLIT,$0
+	MOVQ	$14, BP
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·pread(SB),NOSPLIT,$0
+	MOVQ	$50, BP
+	SYSCALL
+	MOVL	AX, ret+32(FP)
+	RET
+
+TEXT runtime·pwrite(SB),NOSPLIT,$0
+	MOVQ	$51, BP
+	SYSCALL
+	MOVL	AX, ret+32(FP)
+	RET
+
+// int32 _seek(int64*, int32, int64, int32)
+TEXT _seek<>(SB),NOSPLIT,$0
+	MOVQ	$39, BP
+	SYSCALL
+	RET
+
+// int64 seek(int32, int64, int32)
+// Convenience wrapper around _seek, the actual system call.
+TEXT runtime·seek(SB),NOSPLIT,$32
+	LEAQ	ret+24(FP), AX
+	MOVL	fd+0(FP), BX
+	MOVQ	offset+8(FP), CX
+	MOVL	whence+16(FP), DX
+	MOVQ	AX, 0(SP)
+	MOVL	BX, 8(SP)
+	MOVQ	CX, 16(SP)
+	MOVL	DX, 24(SP)
+	CALL	_seek<>(SB)
+	CMPL	AX, $0
+	JGE	2(PC)
+	MOVQ	$-1, ret+24(FP)
+	RET
+
+TEXT runtime·close(SB),NOSPLIT,$0
+	MOVQ	$4, BP
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·exits(SB),NOSPLIT,$0
+	MOVQ	$8, BP
+	SYSCALL
+	RET
+
+TEXT runtime·brk_(SB),NOSPLIT,$0
+	MOVQ	$24, BP
+	SYSCALL
+	MOVQ	AX, ret+8(FP)
+	RET
+
+TEXT runtime·sleep(SB),NOSPLIT,$0
+	MOVQ	$17, BP
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·plan9_semacquire(SB),NOSPLIT,$0
+	MOVQ	$37, BP
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·plan9_tsemacquire(SB),NOSPLIT,$0
+	MOVQ	$52, BP
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·nsec(SB),NOSPLIT,$0
+	MOVQ	$53, BP
+	SYSCALL
+	MOVQ	AX, ret+8(FP)
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$8-12
+	CALL	runtime·nanotime(SB)
+	MOVQ	0(SP), AX
+
+	// generated code for
+	//	func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
+	// adapted to reduce duplication
+	MOVQ	AX, CX
+	MOVQ	$1360296554856532783, AX
+	MULQ	CX
+	ADDQ	CX, DX
+	RCRQ	$1, DX
+	SHRQ	$29, DX
+	MOVQ	DX, sec+0(FP)
+	IMULQ	$1000000000, DX
+	SUBQ	DX, CX
+	MOVL	CX, nsec+8(FP)
+	RET
+
+TEXT runtime·notify(SB),NOSPLIT,$0
+	MOVQ	$28, BP
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+
+TEXT runtime·noted(SB),NOSPLIT,$0
+	MOVQ	$29, BP
+	SYSCALL
+	MOVL	AX, ret+8(FP)
+	RET
+	
+TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
+	MOVQ	$38, BP
+	SYSCALL
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·rfork(SB),NOSPLIT,$0
+	MOVQ	$19, BP // rfork
+	SYSCALL
+
+	// In parent, return.
+	CMPQ	AX, $0
+	JEQ	3(PC)
+	MOVL	AX, ret+40(FP)
+	RET
+
+	// In child on forked stack.
+	MOVQ	mm+24(SP), BX	// m
+	MOVQ	gg+32(SP), DX	// g
+	MOVQ	fn+40(SP), SI	// fn
+
+	// set SP to be on the new child stack
+	MOVQ	stack+16(SP), CX
+	MOVQ	CX, SP
+
+	// Initialize m, g.
+	get_tls(AX)
+	MOVQ	DX, g(AX)
+	MOVQ	BX, g_m(DX)
+
+	// Initialize procid from TOS struct.
+	MOVQ	_tos(SB), AX
+	MOVQ	64(AX), AX
+	MOVQ	AX, m_procid(BX)	// save pid as m->procid
+	
+	CALL	runtime·stackcheck(SB)	// smashes AX, CX
+	
+	MOVQ	0(DX), DX	// paranoia; check they are not nil
+	MOVQ	0(BX), BX
+	
+	CALL	SI	// fn()
+	CALL	runtime·exit(SB)
+	MOVL	AX, ret+40(FP)
+	RET
+
+// This is needed by asm_amd64.s
+TEXT runtime·settls(SB),NOSPLIT,$0
+	RET
+
+// void sigtramp(void *ureg, int8 *note)
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+	get_tls(AX)
+
+	// check that g exists
+	MOVQ	g(AX), BX
+	CMPQ	BX, $0
+	JNE	3(PC)
+	CALL	runtime·badsignal2(SB) // will exit
+	RET
+
+	// save args
+	MOVQ	ureg+8(SP), CX
+	MOVQ	note+16(SP), DX
+
+	// change stack
+	MOVQ	g_m(BX), BX
+	MOVQ	m_gsignal(BX), R10
+	MOVQ	g_stackbase(R10), BP
+	MOVQ	BP, SP
+
+	// make room for args and g
+	SUBQ	$40, SP
+
+	// save g
+	MOVQ	g(AX), BP
+	MOVQ	BP, 32(SP)
+
+	// g = m->gsignal
+	MOVQ	R10, g(AX)
+
+	// load args and call sighandler
+	MOVQ	CX, 0(SP)
+	MOVQ	DX, 8(SP)
+	MOVQ	BP, 16(SP)
+
+	CALL	runtime·sighandler(SB)
+	MOVL	24(SP), AX
+
+	// restore g
+	get_tls(BX)
+	MOVQ	32(SP), R10
+	MOVQ	R10, g(BX)
+
+	// call noted(AX)
+	MOVQ	AX, 0(SP)
+	CALL	runtime·noted(SB)
+	RET
+
+TEXT runtime·setfpmasks(SB),NOSPLIT,$8
+	STMXCSR	0(SP)
+	MOVL	0(SP), AX
+	ANDL	$~0x3F, AX
+	ORL	$(0x3F<<7), AX
+	MOVL	AX, 0(SP)
+	LDMXCSR	0(SP)
+	RET
+
+#define ERRMAX 128	/* from os_plan9.h */
+
+// void errstr(int8 *buf, int32 len)
+TEXT errstr<>(SB),NOSPLIT,$0
+	MOVQ    $41, BP
+	SYSCALL
+	RET
+
+// func errstr() string
+// Only used by package syscall.
+// Grab error string due to a syscall made
+// in entersyscall mode, without going
+// through the allocator (issue 4994).
+// See ../syscall/asm_plan9_amd64.s:/·Syscall/
+TEXT runtime·errstr(SB),NOSPLIT,$16-16
+	get_tls(AX)
+	MOVQ	g(AX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	m_errstr(BX), CX
+	MOVQ	CX, 0(SP)
+	MOVQ	$ERRMAX, 8(SP)
+	CALL	errstr<>(SB)
+	CALL	runtime·findnull(SB)
+	MOVQ	8(SP), AX
+	MOVQ	AX, ret_len+8(FP)
+	MOVQ	0(SP), AX
+	MOVQ	AX, ret_base+0(FP)
+	RET
diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s
new file mode 100644
index 000000000..093315c4a
--- /dev/null
+++ b/src/runtime/sys_solaris_amd64.s
@@ -0,0 +1,349 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// System calls and other sys.stuff for AMD64, SunOS
+// /usr/include/sys/syscall.h for syscall numbers.
+//
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// This is needed by asm_amd64.s
+TEXT runtime·settls(SB),NOSPLIT,$8
+	RET
+
+// void libc·miniterrno(void *(*___errno)(void));
+//
+// Set the TLS errno pointer in M.
+//
+// Called using runtime·asmcgocall from os_solaris.c:/minit.
+// NOT USING GO CALLING CONVENTION.
+TEXT runtime·miniterrno(SB),NOSPLIT,$0
+	// asmcgocall will put first argument into DI.
+	CALL	DI	// SysV ABI so returns in AX
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	AX,	m_perrno(BX)
+	RET
+
+// int64 runtime·nanotime1(void);
+//
+// clock_gettime(3c) wrapper because Timespec is too large for
+// runtime·nanotime stack.
+//
+// Called using runtime·sysvicall6 from os_solaris.c:/nanotime.
+// NOT USING GO CALLING CONVENTION.
+TEXT runtime·nanotime1(SB),NOSPLIT,$0
+	// need space for the timespec argument.
+	SUBQ	$64, SP	// 16 bytes will do, but who knows in the future?
+	MOVQ	$3, DI	// CLOCK_REALTIME from <sys/time_impl.h>
+	MOVQ	SP, SI
+	MOVQ	libc·clock_gettime(SB), AX
+	CALL	AX
+	MOVQ	(SP), AX	// tv_sec from struct timespec
+	IMULQ	$1000000000, AX	// multiply into nanoseconds
+	ADDQ	8(SP), AX	// tv_nsec, offset should be stable.
+	ADDQ	$64, SP
+	RET
+
+// pipe(3c) wrapper that returns fds in AX, DX.
+// NOT USING GO CALLING CONVENTION.
+TEXT runtime·pipe1(SB),NOSPLIT,$0
+	SUBQ	$16, SP // 8 bytes will do, but stack has to be 16-byte alligned
+	MOVQ	SP, DI
+	MOVQ	libc·pipe(SB), AX
+	CALL	AX
+	MOVL	0(SP), AX
+	MOVL	4(SP), DX
+	ADDQ	$16, SP
+	RET
+
+// Call a library function with SysV calling conventions.
+// The called function can take a maximum of 6 INTEGER class arguments,
+// see 
+//   Michael Matz, Jan Hubicka, Andreas Jaeger, and Mark Mitchell
+//   System V Application Binary Interface 
+//   AMD64 Architecture Processor Supplement
+// section 3.2.3.
+//
+// Called by runtime·asmcgocall or runtime·cgocall.
+// NOT USING GO CALLING CONVENTION.
+TEXT runtime·asmsysvicall6(SB),NOSPLIT,$0
+	// asmcgocall will put first argument into DI.
+	PUSHQ	DI			// save for later
+	MOVQ	libcall_fn(DI), AX
+	MOVQ	libcall_args(DI), R11
+	MOVQ	libcall_n(DI), R10
+
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	m_perrno(BX), DX
+	CMPQ	DX, $0
+	JEQ	skiperrno1
+	MOVL	$0, 0(DX)
+
+skiperrno1:
+	CMPQ	R11, $0
+	JEQ	skipargs
+	// Load 6 args into correspondent registers.
+	MOVQ	0(R11), DI
+	MOVQ	8(R11), SI
+	MOVQ	16(R11), DX
+	MOVQ	24(R11), CX
+	MOVQ	32(R11), R8
+	MOVQ	40(R11), R9
+skipargs:
+
+	// Call SysV function
+	CALL	AX
+
+	// Return result
+	POPQ	DI
+	MOVQ	AX, libcall_r1(DI)
+	MOVQ	DX, libcall_r2(DI)
+
+	get_tls(CX)
+	MOVQ	g(CX), BX
+	MOVQ	g_m(BX), BX
+	MOVQ	m_perrno(BX), AX
+	CMPQ	AX, $0
+	JEQ	skiperrno2
+	MOVL	0(AX), AX
+	MOVQ	AX, libcall_err(DI)
+
+skiperrno2:	
+	RET
+
+// uint32 tstart_sysvicall(M *newm);
+TEXT runtime·tstart_sysvicall(SB),NOSPLIT,$0
+	// DI contains first arg newm
+	MOVQ	m_g0(DI), DX		// g
+
+	// Make TLS entries point at g and m.
+	get_tls(BX)
+	MOVQ	DX, g(BX)
+	MOVQ	DI, g_m(DX)
+
+	// Layout new m scheduler stack on os stack.
+	MOVQ	SP, AX
+	MOVQ	AX, g_stackbase(DX)
+	SUBQ	$(0x100000), AX		// stack size
+	MOVQ	AX, g_stackguard(DX)
+	MOVQ	AX, g_stackguard0(DX)
+
+	// Someday the convention will be D is always cleared.
+	CLD
+
+	CALL	runtime·stackcheck(SB)	// clobbers AX,CX
+	CALL	runtime·mstart(SB)
+
+	XORL	AX, AX			// return 0 == success
+	MOVL	AX, ret+8(FP)
+	RET
+
+// Careful, this is called by __sighndlr, a libc function. We must preserve
+// registers as per AMD 64 ABI.
+TEXT runtime·sigtramp(SB),NOSPLIT,$0
+	// Note that we are executing on altsigstack here, so we have
+	// more stack available than NOSPLIT would have us believe.
+	// To defeat the linker, we make our own stack frame with
+	// more space:
+	SUBQ    $184, SP
+
+	// save registers
+	MOVQ    BX, 32(SP)
+	MOVQ    BP, 40(SP)
+	MOVQ	R12, 48(SP)
+	MOVQ	R13, 56(SP)
+	MOVQ	R14, 64(SP)
+	MOVQ	R15, 72(SP)
+
+	get_tls(BX)
+	// check that g exists
+	MOVQ	g(BX), R10
+	CMPQ	R10, $0
+	JNE	allgood
+	MOVQ	DI, 0(SP)
+	MOVQ	$runtime·badsignal(SB), AX
+	CALL	AX
+	JMP	exit
+
+allgood:
+	// save g
+	MOVQ	R10, 80(SP)
+
+	// Save m->libcall and m->scratch. We need to do this because we
+	// might get interrupted by a signal in runtime·asmcgocall.
+
+	// save m->libcall 
+	MOVQ	g_m(R10), BP
+	LEAQ	m_libcall(BP), R11
+	MOVQ	libcall_fn(R11), R10
+	MOVQ	R10, 88(SP)
+	MOVQ	libcall_args(R11), R10
+	MOVQ	R10, 96(SP)
+	MOVQ	libcall_n(R11), R10
+	MOVQ	R10, 104(SP)
+	MOVQ    libcall_r1(R11), R10
+	MOVQ    R10, 168(SP)
+	MOVQ    libcall_r2(R11), R10
+	MOVQ    R10, 176(SP)
+
+	// save m->scratch
+	LEAQ	m_scratch(BP), R11
+	MOVQ	0(R11), R10
+	MOVQ	R10, 112(SP)
+	MOVQ	8(R11), R10
+	MOVQ	R10, 120(SP)
+	MOVQ	16(R11), R10
+	MOVQ	R10, 128(SP)
+	MOVQ	24(R11), R10
+	MOVQ	R10, 136(SP)
+	MOVQ	32(R11), R10
+	MOVQ	R10, 144(SP)
+	MOVQ	40(R11), R10
+	MOVQ	R10, 152(SP)
+
+	// save errno, it might be EINTR; stuff we do here might reset it.
+	MOVQ	m_perrno(BP), R10
+	MOVL	0(R10), R10
+	MOVQ	R10, 160(SP)
+
+	MOVQ	g(BX), R10
+	// g = m->gsignal
+	MOVQ	m_gsignal(BP), BP
+	MOVQ	BP, g(BX)
+
+	// prepare call
+	MOVQ	DI, 0(SP)
+	MOVQ	SI, 8(SP)
+	MOVQ	DX, 16(SP)
+	MOVQ	R10, 24(SP)
+	CALL	runtime·sighandler(SB)
+
+	get_tls(BX)
+	MOVQ	g(BX), BP
+	MOVQ	g_m(BP), BP
+	// restore libcall
+	LEAQ	m_libcall(BP), R11
+	MOVQ	88(SP), R10
+	MOVQ	R10, libcall_fn(R11)
+	MOVQ	96(SP), R10
+	MOVQ	R10, libcall_args(R11)
+	MOVQ	104(SP), R10
+	MOVQ	R10, libcall_n(R11)
+	MOVQ    168(SP), R10
+	MOVQ    R10, libcall_r1(R11)
+	MOVQ    176(SP), R10
+	MOVQ    R10, libcall_r2(R11)
+
+	// restore scratch
+	LEAQ	m_scratch(BP), R11
+	MOVQ	112(SP), R10
+	MOVQ	R10, 0(R11)
+	MOVQ	120(SP), R10
+	MOVQ	R10, 8(R11)
+	MOVQ	128(SP), R10
+	MOVQ	R10, 16(R11)
+	MOVQ	136(SP), R10
+	MOVQ	R10, 24(R11)
+	MOVQ	144(SP), R10
+	MOVQ	R10, 32(R11)
+	MOVQ	152(SP), R10
+	MOVQ	R10, 40(R11)
+
+	// restore errno
+	MOVQ	m_perrno(BP), R11
+	MOVQ	160(SP), R10
+	MOVL	R10, 0(R11)
+
+	// restore g
+	MOVQ	80(SP), R10
+	MOVQ	R10, g(BX)
+
+exit:
+	// restore registers
+	MOVQ	32(SP), BX
+	MOVQ	40(SP), BP
+	MOVQ	48(SP), R12
+	MOVQ	56(SP), R13
+	MOVQ	64(SP), R14
+	MOVQ	72(SP), R15
+
+	ADDQ    $184, SP
+	RET
+
+// Called from runtime·usleep (Go). Can be called on Go stack, on OS stack,
+// can also be called in cgo callback path without a g->m.
+TEXT runtime·usleep1(SB),NOSPLIT,$0
+	MOVL	usec+0(FP), DI
+	MOVQ	$runtime·usleep2(SB), AX // to hide from 6l
+
+	// Execute call on m->g0.
+	get_tls(R15)
+	CMPQ	R15, $0
+	JE	usleep1_noswitch
+
+	MOVQ	g(R15), R13
+	CMPQ	R13, $0
+	JE	usleep1_noswitch
+	MOVQ	g_m(R13), R13
+	CMPQ	R13, $0
+	JE	usleep1_noswitch
+	// TODO(aram): do something about the cpu profiler here.
+
+	MOVQ	m_g0(R13), R14
+	CMPQ	g(R15), R14
+	JNE	usleep1_switch
+	// executing on m->g0 already
+	CALL	AX
+	RET
+
+usleep1_switch:
+	// Switch to m->g0 stack and back.
+	MOVQ	(g_sched+gobuf_sp)(R14), R14
+	MOVQ	SP, -8(R14)
+	LEAQ	-8(R14), SP
+	CALL	AX
+	MOVQ	0(SP), SP
+	RET
+
+usleep1_noswitch:
+	// Not a Go-managed thread. Do not switch stack.
+	CALL	AX
+	RET
+
+// Runs on OS stack. duration (in µs units) is in DI.
+TEXT runtime·usleep2(SB),NOSPLIT,$0
+	MOVQ	libc·usleep(SB), AX
+	CALL	AX
+	RET
+
+// Runs on OS stack, called from runtime·osyield.
+TEXT runtime·osyield1(SB),NOSPLIT,$0
+	MOVQ	libc·sched_yield(SB), AX
+	CALL	AX
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$8-12
+	CALL	runtime·nanotime(SB)
+	MOVQ	0(SP), AX
+
+	// generated code for
+	//	func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
+	// adapted to reduce duplication
+	MOVQ	AX, CX
+	MOVQ	$1360296554856532783, AX
+	MULQ	CX
+	ADDQ	CX, DX
+	RCRQ	$1, DX
+	SHRQ	$29, DX
+	MOVQ	DX, sec+0(FP)
+	IMULQ	$1000000000, DX
+	SUBQ	DX, CX
+	MOVL	CX, nsec+8(FP)
+	RET
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
new file mode 100644
index 000000000..a9e096f01
--- /dev/null
+++ b/src/runtime/sys_windows_386.s
@@ -0,0 +1,380 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// void runtime·asmstdcall(void *c);
+TEXT runtime·asmstdcall(SB),NOSPLIT,$0
+	MOVL	fn+0(FP), BX
+
+	// SetLastError(0).
+	MOVL	$0, 0x34(FS)
+
+	// Copy args to the stack.
+	MOVL	SP, BP
+	MOVL	libcall_n(BX), CX	// words
+	MOVL	CX, AX
+	SALL	$2, AX
+	SUBL	AX, SP			// room for args
+	MOVL	SP, DI
+	MOVL	libcall_args(BX), SI
+	CLD
+	REP; MOVSL
+
+	// Call stdcall or cdecl function.
+	// DI SI BP BX are preserved, SP is not
+	CALL	libcall_fn(BX)
+	MOVL	BP, SP
+
+	// Return result.
+	MOVL	fn+0(FP), BX
+	MOVL	AX, libcall_r1(BX)
+	MOVL	DX, libcall_r2(BX)
+
+	// GetLastError().
+	MOVL	0x34(FS), AX
+	MOVL	AX, libcall_err(BX)
+
+	RET
+
+TEXT	runtime·badsignal2(SB),NOSPLIT,$24
+	// stderr
+	MOVL	$-12, 0(SP)
+	MOVL	SP, BP
+	CALL	*runtime·GetStdHandle(SB)
+	MOVL	BP, SP
+
+	MOVL	AX, 0(SP)	// handle
+	MOVL	$runtime·badsignalmsg(SB), DX // pointer
+	MOVL	DX, 4(SP)
+	MOVL	runtime·badsignallen(SB), DX // count
+	MOVL	DX, 8(SP)
+	LEAL	20(SP), DX  // written count
+	MOVL	$0, 0(DX)
+	MOVL	DX, 12(SP)
+	MOVL	$0, 16(SP) // overlapped
+	CALL	*runtime·WriteFile(SB)
+	MOVL	BP, SI
+	RET
+
+// faster get/set last error
+TEXT runtime·getlasterror(SB),NOSPLIT,$0
+	MOVL	0x34(FS), AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·setlasterror(SB),NOSPLIT,$0
+	MOVL	err+0(FP), AX
+	MOVL	AX, 0x34(FS)
+	RET
+
+// Called by Windows as a Vectored Exception Handler (VEH).
+// First argument is pointer to struct containing
+// exception record and context pointers.
+// Return 0 for 'not handled', -1 for handled.
+TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
+	MOVL	ptrs+0(FP), CX
+	SUBL	$32, SP
+
+	// save callee-saved registers
+	MOVL	BX, 28(SP)
+	MOVL	BP, 16(SP)
+	MOVL	SI, 20(SP)
+	MOVL	DI, 24(SP)
+
+	MOVL	0(CX), BX // ExceptionRecord*
+	MOVL	4(CX), CX // Context*
+
+	// fetch g
+	get_tls(DX)
+	CMPL	DX, $0
+	JNE	3(PC)
+	MOVL	$0, AX // continue
+	JMP	done
+	MOVL	g(DX), DX
+	CMPL	DX, $0
+	JNE	2(PC)
+	CALL	runtime·badsignal2(SB)
+	// call sighandler(ExceptionRecord*, Context*, G*)
+	MOVL	BX, 0(SP)
+	MOVL	CX, 4(SP)
+	MOVL	DX, 8(SP)
+	CALL	runtime·sighandler(SB)
+	// AX is set to report result back to Windows
+	MOVL	12(SP), AX
+
+done:
+	// restore callee-saved registers
+	MOVL	24(SP), DI
+	MOVL	20(SP), SI
+	MOVL	16(SP), BP
+	MOVL	28(SP), BX
+
+	ADDL	$32, SP
+	// RET 4 (return and pop 4 bytes parameters)
+	BYTE $0xC2; WORD $4
+	RET // unreached; make assembler happy
+
+TEXT runtime·ctrlhandler(SB),NOSPLIT,$0
+	PUSHL	$runtime·ctrlhandler1(SB)
+	CALL	runtime·externalthreadhandler(SB)
+	MOVL	4(SP), CX
+	ADDL	$12, SP
+	JMP	CX
+
+TEXT runtime·profileloop(SB),NOSPLIT,$0
+	PUSHL	$runtime·profileloop1(SB)
+	CALL	runtime·externalthreadhandler(SB)
+	MOVL	4(SP), CX
+	ADDL	$12, SP
+	JMP	CX
+
+TEXT runtime·externalthreadhandler(SB),NOSPLIT,$0
+	PUSHL	BP
+	MOVL	SP, BP
+	PUSHL	BX
+	PUSHL	SI
+	PUSHL	DI
+	PUSHL	0x14(FS)
+	MOVL	SP, DX
+
+	// setup dummy m, g
+	SUBL	$m_end, SP		// space for M
+	MOVL	SP, 0(SP)
+	MOVL	$m_end, 4(SP)
+	CALL	runtime·memclr(SB)	// smashes AX,BX,CX
+
+	LEAL	m_tls(SP), CX
+	MOVL	CX, 0x14(FS)
+	MOVL	SP, BX
+	SUBL	$g_end, SP		// space for G
+	MOVL	SP, g(CX)
+	MOVL	SP, m_g0(BX)
+
+	MOVL	SP, 0(SP)
+	MOVL	$g_end, 4(SP)
+	CALL	runtime·memclr(SB)	// smashes AX,BX,CX
+	LEAL	g_end(SP), BX
+	MOVL	BX, g_m(SP)
+	LEAL	-4096(SP), CX
+	MOVL	CX, g_stackguard(SP)
+	MOVL	DX, g_stackbase(SP)
+
+	PUSHL	16(BP)			// arg for handler
+	CALL	8(BP)
+	POPL	CX
+
+	get_tls(CX)
+	MOVL	g(CX), CX
+	MOVL	g_stackbase(CX), SP
+	POPL	0x14(FS)
+	POPL	DI
+	POPL	SI
+	POPL	BX
+	POPL	BP
+	RET
+
+GLOBL runtime·cbctxts(SB), $4
+
+TEXT runtime·callbackasm1+0(SB),NOSPLIT,$0
+  	MOVL	0(SP), AX	// will use to find our callback context
+
+	// remove return address from stack, we are not returning there
+	ADDL	$4, SP
+
+	// address to callback parameters into CX
+	LEAL	4(SP), CX
+
+	// save registers as required for windows callback
+	PUSHL	DI
+	PUSHL	SI
+	PUSHL	BP
+	PUSHL	BX
+
+	// determine index into runtime·cbctxts table
+	SUBL	$runtime·callbackasm(SB), AX
+	MOVL	$0, DX
+	MOVL	$5, BX	// divide by 5 because each call instruction in runtime·callbacks is 5 bytes long
+	DIVL	BX,
+
+	// find correspondent runtime·cbctxts table entry
+	MOVL	runtime·cbctxts(SB), BX
+	MOVL	-4(BX)(AX*4), BX
+
+	// extract callback context
+	MOVL	cbctxt_gobody(BX), AX
+	MOVL	cbctxt_argsize(BX), DX
+
+	// preserve whatever's at the memory location that
+	// the callback will use to store the return value
+	PUSHL	0(CX)(DX*1)
+
+	// extend argsize by size of return value
+	ADDL	$4, DX
+
+	// remember how to restore stack on return
+	MOVL	cbctxt_restorestack(BX), BX
+	PUSHL	BX
+
+	// call target Go function
+	PUSHL	DX			// argsize (including return value)
+	PUSHL	CX			// callback parameters
+	PUSHL	AX			// address of target Go function
+	CLD
+	CALL	runtime·cgocallback_gofunc(SB)
+	POPL	AX
+	POPL	CX
+	POPL	DX
+
+	// how to restore stack on return
+	POPL	BX
+
+	// return value into AX (as per Windows spec)
+	// and restore previously preserved value
+	MOVL	-4(CX)(DX*1), AX
+	POPL	-4(CX)(DX*1)
+
+	MOVL	BX, CX			// cannot use BX anymore
+
+	// restore registers as required for windows callback
+	POPL	BX
+	POPL	BP
+	POPL	SI
+	POPL	DI
+
+	// remove callback parameters before return (as per Windows spec)
+	POPL	DX
+	ADDL	CX, SP
+	PUSHL	DX
+
+	CLD
+
+	RET
+
+// void tstart(M *newm);
+TEXT runtime·tstart(SB),NOSPLIT,$0
+	MOVL	newm+4(SP), CX		// m
+	MOVL	m_g0(CX), DX		// g
+
+	// Layout new m scheduler stack on os stack.
+	MOVL	SP, AX
+	MOVL	AX, g_stackbase(DX)
+	SUBL	$(64*1024), AX		// stack size
+	MOVL	AX, g_stackguard(DX)
+
+	// Set up tls.
+	LEAL	m_tls(CX), SI
+	MOVL	SI, 0x14(FS)
+	MOVL	CX, g_m(DX)
+	MOVL	DX, g(SI)
+
+	// Someday the convention will be D is always cleared.
+	CLD
+
+	CALL	runtime·stackcheck(SB)	// clobbers AX,CX
+	CALL	runtime·mstart(SB)
+
+	RET
+
+// uint32 tstart_stdcall(M *newm);
+TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
+	MOVL	newm+4(SP), BX
+
+	PUSHL	BX
+	CALL	runtime·tstart(SB)
+	POPL	BX
+
+	// Adjust stack for stdcall to return properly.
+	MOVL	(SP), AX		// save return address
+	ADDL	$4, SP			// remove single parameter
+	MOVL	AX, (SP)		// restore return address
+
+	XORL	AX, AX			// return 0 == success
+
+	RET
+
+// setldt(int entry, int address, int limit)
+TEXT runtime·setldt(SB),NOSPLIT,$0
+	MOVL	address+4(FP), CX
+	MOVL	CX, 0x14(FS)
+	RET
+
+// Sleep duration is in 100ns units.
+TEXT runtime·usleep1(SB),NOSPLIT,$0
+	MOVL	usec+0(FP), BX
+	MOVL	$runtime·usleep2(SB), AX // to hide from 8l
+
+	// Execute call on m->g0 stack, in case we are not actually
+	// calling a system call wrapper, like when running under WINE.
+	get_tls(CX)
+	CMPL	CX, $0
+	JNE	3(PC)
+	// Not a Go-managed thread. Do not switch stack.
+	CALL	AX
+	RET
+
+	MOVL	g(CX), BP
+	MOVL	g_m(BP), BP
+
+	// leave pc/sp for cpu profiler
+	MOVL	(SP), SI
+	MOVL	SI, m_libcallpc(BP)
+	MOVL	g(CX), SI
+	MOVL	SI, m_libcallg(BP)
+	// sp must be the last, because once async cpu profiler finds
+	// all three values to be non-zero, it will use them
+	LEAL	usec+0(FP), SI
+	MOVL	SI, m_libcallsp(BP)
+
+	MOVL	m_g0(BP), SI
+	CMPL	g(CX), SI
+	JNE	usleep1_switch
+	// executing on m->g0 already
+	CALL	AX
+	JMP	usleep1_ret
+
+usleep1_switch:
+	// Switch to m->g0 stack and back.
+	MOVL	(g_sched+gobuf_sp)(SI), SI
+	MOVL	SP, -4(SI)
+	LEAL	-4(SI), SP
+	CALL	AX
+	MOVL	0(SP), SP
+
+usleep1_ret:
+	get_tls(CX)
+	MOVL	g(CX), BP
+	MOVL	g_m(BP), BP
+	MOVL	$0, m_libcallsp(BP)
+	RET
+
+// Runs on OS stack. duration (in 100ns units) is in BX.
+TEXT runtime·usleep2(SB),NOSPLIT,$20
+	// Want negative 100ns units.
+	NEGL	BX
+	MOVL	$-1, hi-4(SP)
+	MOVL	BX, lo-8(SP)
+	LEAL	lo-8(SP), BX
+	MOVL	BX, ptime-12(SP)
+	MOVL	$0, alertable-16(SP)
+	MOVL	$-1, handle-20(SP)
+	MOVL	SP, BP
+	MOVL	runtime·NtWaitForSingleObject(SB), AX
+	CALL	AX
+	MOVL	BP, SP
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$8-12
+	CALL	runtime·unixnano(SB)
+	MOVL	0(SP), AX
+	MOVL	4(SP), DX
+
+	MOVL	$1000000000, CX
+	DIVL	CX
+	MOVL	AX, sec+0(FP)
+	MOVL	$0, sec+4(FP)
+	MOVL	DX, nsec+8(FP)
+	RET
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
new file mode 100644
index 000000000..21f73daf0
--- /dev/null
+++ b/src/runtime/sys_windows_amd64.s
@@ -0,0 +1,407 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+// maxargs should be divisible by 2, as Windows stack
+// must be kept 16-byte aligned on syscall entry.
+#define maxargs 16
+
+// void runtime·asmstdcall(void *c);
+TEXT runtime·asmstdcall(SB),NOSPLIT,$0
+	// asmcgocall will put first argument into CX.
+	PUSHQ	CX			// save for later
+	MOVQ	libcall_fn(CX), AX
+	MOVQ	libcall_args(CX), SI
+	MOVQ	libcall_n(CX), CX
+
+	// SetLastError(0).
+	MOVQ	0x30(GS), DI
+	MOVL	$0, 0x68(DI)
+
+	SUBQ	$(maxargs*8), SP	// room for args
+
+	// Fast version, do not store args on the stack.
+	CMPL	CX, $4
+	JLE	loadregs
+
+	// Check we have enough room for args.
+	CMPL	CX, $maxargs
+	JLE	2(PC)
+	INT	$3			// not enough room -> crash
+
+	// Copy args to the stack.
+	MOVQ	SP, DI
+	CLD
+	REP; MOVSQ
+	MOVQ	SP, SI
+
+loadregs:
+	// Load first 4 args into correspondent registers.
+	MOVQ	0(SI), CX
+	MOVQ	8(SI), DX
+	MOVQ	16(SI), R8
+	MOVQ	24(SI), R9
+
+	// Call stdcall function.
+	CALL	AX
+
+	ADDQ	$(maxargs*8), SP
+
+	// Return result.
+	POPQ	CX
+	MOVQ	AX, libcall_r1(CX)
+
+	// GetLastError().
+	MOVQ	0x30(GS), DI
+	MOVL	0x68(DI), AX
+	MOVQ	AX, libcall_err(CX)
+
+	RET
+
+TEXT runtime·badsignal2(SB),NOSPLIT,$48
+	// stderr
+	MOVQ	$-12, CX // stderr
+	MOVQ	CX, 0(SP)
+	MOVQ	runtime·GetStdHandle(SB), AX
+	CALL	AX
+
+	MOVQ	AX, CX	// handle
+	MOVQ	CX, 0(SP)
+	MOVQ	$runtime·badsignalmsg(SB), DX // pointer
+	MOVQ	DX, 8(SP)
+	MOVL	$runtime·badsignallen(SB), R8 // count
+	MOVQ	R8, 16(SP)
+	LEAQ	40(SP), R9  // written count
+	MOVQ	$0, 0(R9)
+	MOVQ	R9, 24(SP)
+	MOVQ	$0, 32(SP)	// overlapped
+	MOVQ	runtime·WriteFile(SB), AX
+	CALL	AX
+	
+	RET
+
+// faster get/set last error
+TEXT runtime·getlasterror(SB),NOSPLIT,$0
+	MOVQ	0x30(GS), AX
+	MOVL	0x68(AX), AX
+	MOVL	AX, ret+0(FP)
+	RET
+
+TEXT runtime·setlasterror(SB),NOSPLIT,$0
+	MOVL	err+0(FP), AX
+	MOVQ	0x30(GS),	CX
+	MOVL	AX, 0x68(CX)
+	RET
+
+// Called by Windows as a Vectored Exception Handler (VEH).
+// First argument is pointer to struct containing
+// exception record and context pointers.
+// Return 0 for 'not handled', -1 for handled.
+TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
+	// CX: PEXCEPTION_POINTERS ExceptionInfo
+
+	// DI SI BP BX R12 R13 R14 R15 registers and DF flag are preserved
+	// as required by windows callback convention.
+	PUSHFQ
+	SUBQ	$96, SP
+	MOVQ	DI, 80(SP)
+	MOVQ	SI, 72(SP)
+	MOVQ	BP, 64(SP)
+	MOVQ	BX, 56(SP)
+	MOVQ	R12, 48(SP)
+	MOVQ	R13, 40(SP)
+	MOVQ	R14, 32(SP)
+	MOVQ	R15, 88(SP)
+
+	MOVQ	0(CX), BX // ExceptionRecord*
+	MOVQ	8(CX), CX // Context*
+
+	// fetch g
+	get_tls(DX)
+	CMPQ	DX, $0
+	JNE	3(PC)
+	MOVQ	$0, AX // continue
+	JMP	done
+	MOVQ	g(DX), DX
+	CMPQ	DX, $0
+	JNE	2(PC)
+	CALL	runtime·badsignal2(SB)
+	// call sighandler(ExceptionRecord*, Context*, G*)
+	MOVQ	BX, 0(SP)
+	MOVQ	CX, 8(SP)
+	MOVQ	DX, 16(SP)
+	CALL	runtime·sighandler(SB)
+	// AX is set to report result back to Windows
+	MOVL	24(SP), AX
+
+done:
+	// restore registers as required for windows callback
+	MOVQ	88(SP), R15
+	MOVQ	32(SP), R14
+	MOVQ	40(SP), R13
+	MOVQ	48(SP), R12
+	MOVQ	56(SP), BX
+	MOVQ	64(SP), BP
+	MOVQ	72(SP), SI
+	MOVQ	80(SP), DI
+	ADDQ	$96, SP
+	POPFQ
+
+	RET
+
+TEXT runtime·ctrlhandler(SB),NOSPLIT,$8
+	MOVQ	CX, 16(SP)		// spill
+	MOVQ	$runtime·ctrlhandler1(SB), CX
+	MOVQ	CX, 0(SP)
+	CALL	runtime·externalthreadhandler(SB)
+	RET
+
+TEXT runtime·profileloop(SB),NOSPLIT,$8
+	MOVQ	$runtime·profileloop1(SB), CX
+	MOVQ	CX, 0(SP)
+	CALL	runtime·externalthreadhandler(SB)
+	RET
+
+TEXT runtime·externalthreadhandler(SB),NOSPLIT,$0
+	PUSHQ	BP
+	MOVQ	SP, BP
+	PUSHQ	BX
+	PUSHQ	SI
+	PUSHQ	DI
+	PUSHQ	0x28(GS)
+	MOVQ	SP, DX
+
+	// setup dummy m, g
+	SUBQ	$m_end, SP		// space for M
+	MOVQ	SP, 0(SP)
+	MOVQ	$m_end, 8(SP)
+	CALL	runtime·memclr(SB)	// smashes AX,BX,CX
+
+	LEAQ	m_tls(SP), CX
+	MOVQ	CX, 0x28(GS)
+	MOVQ	SP, BX
+	SUBQ	$g_end, SP		// space for G
+	MOVQ	SP, g(CX)
+	MOVQ	SP, m_g0(BX)
+
+	MOVQ	SP, 0(SP)
+	MOVQ	$g_end, 8(SP)
+	CALL	runtime·memclr(SB)	// smashes AX,BX,CX
+	LEAQ	g_end(SP), BX
+	MOVQ	BX, g_m(SP)
+
+	LEAQ	-8192(SP), CX
+	MOVQ	CX, g_stackguard(SP)
+	MOVQ	DX, g_stackbase(SP)
+
+	PUSHQ	32(BP)			// arg for handler
+	CALL	16(BP)
+	POPQ	CX
+
+	get_tls(CX)
+	MOVQ	g(CX), CX
+	MOVQ	g_stackbase(CX), SP
+	POPQ	0x28(GS)
+	POPQ	DI
+	POPQ	SI
+	POPQ	BX
+	POPQ	BP
+	RET
+
+GLOBL runtime·cbctxts(SB), $8
+
+TEXT runtime·callbackasm1(SB),NOSPLIT,$0
+	// Construct args vector for cgocallback().
+	// By windows/amd64 calling convention first 4 args are in CX, DX, R8, R9
+	// args from the 5th on are on the stack.
+	// In any case, even if function has 0,1,2,3,4 args, there is reserved
+	// but uninitialized "shadow space" for the first 4 args.
+	// The values are in registers.
+  	MOVQ	CX, (16+0)(SP)
+  	MOVQ	DX, (16+8)(SP)
+  	MOVQ	R8, (16+16)(SP)
+  	MOVQ	R9, (16+24)(SP)
+
+	// remove return address from stack, we are not returning there
+  	MOVQ	0(SP), AX
+	ADDQ	$8, SP
+
+	// determine index into runtime·cbctxts table
+	MOVQ	$runtime·callbackasm(SB), DX
+	SUBQ	DX, AX
+	MOVQ	$0, DX
+	MOVQ	$5, CX	// divide by 5 because each call instruction in runtime·callbacks is 5 bytes long
+	DIVL	CX,
+
+	// find correspondent runtime·cbctxts table entry
+	MOVQ	runtime·cbctxts(SB), CX
+	MOVQ	-8(CX)(AX*8), AX
+
+	// extract callback context
+	MOVQ	cbctxt_argsize(AX), DX
+	MOVQ	cbctxt_gobody(AX), AX
+
+	// preserve whatever's at the memory location that
+	// the callback will use to store the return value
+	LEAQ	8(SP), CX       // args vector, skip return address
+	PUSHQ	0(CX)(DX*1)     // store 8 bytes from just after the args array
+	ADDQ	$8, DX          // extend argsize by size of return value
+
+	// DI SI BP BX R12 R13 R14 R15 registers and DF flag are preserved
+	// as required by windows callback convention.
+	PUSHFQ
+	SUBQ	$64, SP
+	MOVQ	DI, 56(SP)
+	MOVQ	SI, 48(SP)
+	MOVQ	BP, 40(SP)
+	MOVQ	BX, 32(SP)
+	MOVQ	R12, 24(SP)
+	MOVQ	R13, 16(SP)
+	MOVQ	R14, 8(SP)
+	MOVQ	R15, 0(SP)
+
+	// prepare call stack.  use SUBQ to hide from stack frame checks
+	// cgocallback(Go func, void *frame, uintptr framesize)
+	SUBQ	$24, SP
+	MOVQ	DX, 16(SP)	// argsize (including return value)
+	MOVQ	CX, 8(SP)	// callback parameters
+	MOVQ	AX, 0(SP)	// address of target Go function
+	CLD
+	CALL	runtime·cgocallback_gofunc(SB)
+	MOVQ	0(SP), AX
+	MOVQ	8(SP), CX
+	MOVQ	16(SP), DX
+	ADDQ	$24, SP
+
+	// restore registers as required for windows callback
+	MOVQ	0(SP), R15
+	MOVQ	8(SP), R14
+	MOVQ	16(SP), R13
+	MOVQ	24(SP), R12
+	MOVQ	32(SP), BX
+	MOVQ	40(SP), BP
+	MOVQ	48(SP), SI
+	MOVQ	56(SP), DI
+	ADDQ	$64, SP
+	POPFQ
+
+	MOVL	-8(CX)(DX*1), AX  // return value
+	POPQ	-8(CX)(DX*1)      // restore bytes just after the args
+	RET
+
+// uint32 tstart_stdcall(M *newm);
+TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
+	// CX contains first arg newm
+	MOVQ	m_g0(CX), DX		// g
+
+	// Layout new m scheduler stack on os stack.
+	MOVQ	SP, AX
+	MOVQ	AX, g_stackbase(DX)
+	SUBQ	$(64*1024), AX		// stack size
+	MOVQ	AX, g_stackguard(DX)
+
+	// Set up tls.
+	LEAQ	m_tls(CX), SI
+	MOVQ	SI, 0x28(GS)
+	MOVQ	CX, g_m(DX)
+	MOVQ	DX, g(SI)
+
+	// Someday the convention will be D is always cleared.
+	CLD
+
+	CALL	runtime·stackcheck(SB)	// clobbers AX,CX
+	CALL	runtime·mstart(SB)
+
+	XORL	AX, AX			// return 0 == success
+	RET
+
+// set tls base to DI
+TEXT runtime·settls(SB),NOSPLIT,$0
+	MOVQ	DI, 0x28(GS)
+	RET
+
+// Sleep duration is in 100ns units.
+TEXT runtime·usleep1(SB),NOSPLIT,$0
+	MOVL	usec+0(FP), BX
+	MOVQ	$runtime·usleep2(SB), AX // to hide from 6l
+
+	// Execute call on m->g0 stack, in case we are not actually
+	// calling a system call wrapper, like when running under WINE.
+	get_tls(R15)
+	CMPQ	R15, $0
+	JNE	3(PC)
+	// Not a Go-managed thread. Do not switch stack.
+	CALL	AX
+	RET
+
+	MOVQ	g(R15), R13
+	MOVQ	g_m(R13), R13
+
+	// leave pc/sp for cpu profiler
+	MOVQ	(SP), R12
+	MOVQ	R12, m_libcallpc(R13)
+	MOVQ	g(R15), R12
+	MOVQ	R12, m_libcallg(R13)
+	// sp must be the last, because once async cpu profiler finds
+	// all three values to be non-zero, it will use them
+	LEAQ	usec+0(FP), R12
+	MOVQ	R12, m_libcallsp(R13)
+
+	MOVQ	m_g0(R13), R14
+	CMPQ	g(R15), R14
+	JNE	usleep1_switch
+	// executing on m->g0 already
+	CALL	AX
+	JMP	usleep1_ret
+
+usleep1_switch:
+	// Switch to m->g0 stack and back.
+	MOVQ	(g_sched+gobuf_sp)(R14), R14
+	MOVQ	SP, -8(R14)
+	LEAQ	-8(R14), SP
+	CALL	AX
+	MOVQ	0(SP), SP
+
+usleep1_ret:
+	MOVQ	$0, m_libcallsp(R13)
+	RET
+
+// Runs on OS stack. duration (in 100ns units) is in BX.
+TEXT runtime·usleep2(SB),NOSPLIT,$16
+	MOVQ	SP, AX
+	ANDQ	$~15, SP	// alignment as per Windows requirement
+	MOVQ	AX, 8(SP)
+	// Want negative 100ns units.
+	NEGQ	BX
+	MOVQ	SP, R8 // ptime
+	MOVQ	BX, (R8)
+	MOVQ	$-1, CX // handle
+	MOVQ	$0, DX // alertable
+	MOVQ	runtime·NtWaitForSingleObject(SB), AX
+	CALL	AX
+	MOVQ	8(SP), SP
+	RET
+
+// func now() (sec int64, nsec int32)
+TEXT time·now(SB),NOSPLIT,$8-12
+	CALL	runtime·unixnano(SB)
+	MOVQ	0(SP), AX
+
+	// generated code for
+	//	func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
+	// adapted to reduce duplication
+	MOVQ	AX, CX
+	MOVQ	$1360296554856532783, AX
+	MULQ	CX
+	ADDQ	CX, DX
+	RCRQ	$1, DX
+	SHRQ	$29, DX
+	MOVQ	DX, sec+0(FP)
+	IMULQ	$1000000000, DX
+	SUBQ	DX, CX
+	MOVL	CX, nsec+8(FP)
+	RET
+
diff --git a/src/runtime/sys_x86.c b/src/runtime/sys_x86.c
new file mode 100644
index 000000000..a450b3e58
--- /dev/null
+++ b/src/runtime/sys_x86.c
@@ -0,0 +1,57 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32 386
+
+#include "runtime.h"
+
+// adjust Gobuf as it if executed a call to fn with context ctxt
+// and then did an immediate gosave.
+void
+runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
+{
+	uintptr *sp;
+	
+	sp = (uintptr*)gobuf->sp;
+	if(sizeof(uintreg) > sizeof(uintptr))
+		*--sp = 0;
+	*--sp = (uintptr)gobuf->pc;
+	gobuf->sp = (uintptr)sp;
+	gobuf->pc = (uintptr)fn;
+	gobuf->ctxt = ctxt;
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+void
+runtime·rewindmorestack(Gobuf *gobuf)
+{
+	byte *pc;
+
+	pc = (byte*)gobuf->pc;
+	if(pc[0] == 0xe9) { // jmp 4-byte offset
+		gobuf->pc = gobuf->pc + 5 + *(int32*)(pc+1);
+		return;
+	}
+	if(pc[0] == 0xeb) { // jmp 1-byte offset
+		gobuf->pc = gobuf->pc + 2 + *(int8*)(pc+1);
+		return;
+	}
+	if(pc[0] == 0xcc) {
+		// This is a breakpoint inserted by gdb.  We could use
+		// runtime·findfunc to find the function.  But if we
+		// do that, then we will continue execution at the
+		// function entry point, and we will not hit the gdb
+		// breakpoint.  So for this case we don't change
+		// gobuf->pc, so that when we return we will execute
+		// the jump instruction and carry on.  This means that
+		// stack unwinding may not work entirely correctly
+		// (http://golang.org/issue/5723) but the user is
+		// running under gdb anyhow.
+		return;
+	}
+	runtime·printf("runtime: pc=%p %x %x %x %x %x\n", pc, pc[0], pc[1], pc[2], pc[3], pc[4]);
+	runtime·throw("runtime: misuse of rewindmorestack");
+}
diff --git a/src/runtime/syscall_nacl.h b/src/runtime/syscall_nacl.h
new file mode 100644
index 000000000..b33852ec8
--- /dev/null
+++ b/src/runtime/syscall_nacl.h
@@ -0,0 +1,71 @@
+// generated by mknacl.sh - do not edit
+#define SYS_null 1
+#define SYS_nameservice 2
+#define SYS_dup 8
+#define SYS_dup2 9
+#define SYS_open 10
+#define SYS_close 11
+#define SYS_read 12
+#define SYS_write 13
+#define SYS_lseek 14
+#define SYS_ioctl 15
+#define SYS_stat 16
+#define SYS_fstat 17
+#define SYS_chmod 18
+#define SYS_brk 20
+#define SYS_mmap 21
+#define SYS_munmap 22
+#define SYS_getdents 23
+#define SYS_mprotect 24
+#define SYS_list_mappings 25
+#define SYS_exit 30
+#define SYS_getpid 31
+#define SYS_sched_yield 32
+#define SYS_sysconf 33
+#define SYS_gettimeofday 40
+#define SYS_clock 41
+#define SYS_nanosleep 42
+#define SYS_clock_getres 43
+#define SYS_clock_gettime 44
+#define SYS_mkdir 45
+#define SYS_rmdir 46
+#define SYS_chdir 47
+#define SYS_getcwd 48
+#define SYS_unlink 49
+#define SYS_imc_makeboundsock 60
+#define SYS_imc_accept 61
+#define SYS_imc_connect 62
+#define SYS_imc_sendmsg 63
+#define SYS_imc_recvmsg 64
+#define SYS_imc_mem_obj_create 65
+#define SYS_imc_socketpair 66
+#define SYS_mutex_create 70
+#define SYS_mutex_lock 71
+#define SYS_mutex_trylock 72
+#define SYS_mutex_unlock 73
+#define SYS_cond_create 74
+#define SYS_cond_wait 75
+#define SYS_cond_signal 76
+#define SYS_cond_broadcast 77
+#define SYS_cond_timed_wait_abs 79
+#define SYS_thread_create 80
+#define SYS_thread_exit 81
+#define SYS_tls_init 82
+#define SYS_thread_nice 83
+#define SYS_tls_get 84
+#define SYS_second_tls_set 85
+#define SYS_second_tls_get 86
+#define SYS_exception_handler 87
+#define SYS_exception_stack 88
+#define SYS_exception_clear_flag 89
+#define SYS_sem_create 100
+#define SYS_sem_wait 101
+#define SYS_sem_post 102
+#define SYS_sem_get_value 103
+#define SYS_dyncode_create 104
+#define SYS_dyncode_modify 105
+#define SYS_dyncode_delete 106
+#define SYS_test_infoleak 109
+#define SYS_test_crash 110
+#define SYS_test_syscall_1 111
+#define SYS_test_syscall_2 112
diff --git a/src/runtime/syscall_solaris.c b/src/runtime/syscall_solaris.c
new file mode 100644
index 000000000..13ac31bde
--- /dev/null
+++ b/src/runtime/syscall_solaris.c
@@ -0,0 +1,23 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#pragma dynimport libc·chdir chdir "libc.so"
+#pragma dynimport libc·chroot chroot "libc.so"
+#pragma dynimport libc·close close "libc.so"
+#pragma dynimport libc·dlclose dlclose "libc.so"
+#pragma dynimport libc·dlopen dlopen "libc.so"
+#pragma dynimport libc·dlsym dlsym "libc.so"
+#pragma dynimport libc·execve execve "libc.so"
+#pragma dynimport libc·fcntl fcntl "libc.so"
+#pragma dynimport libc·gethostname gethostname "libc.so"
+#pragma dynimport libc·ioctl ioctl "libc.so"
+#pragma dynimport libc·pipe pipe "libc.so"
+#pragma dynimport libc·setgid setgid "libc.so"
+#pragma dynimport libc·setgroups setgroups "libc.so"
+#pragma dynimport libc·setsid setsid "libc.so"
+#pragma dynimport libc·setuid setuid "libc.so"
+#pragma dynimport libc·setpgid setsid "libc.so"
+#pragma dynimport libc·syscall syscall "libc.so"
+#pragma dynimport libc·forkx forkx "libc.so"
+#pragma dynimport libc·wait4 wait4 "libc.so"
diff --git a/src/runtime/syscall_solaris.go b/src/runtime/syscall_solaris.go
new file mode 100644
index 000000000..d0a3fc8dd
--- /dev/null
+++ b/src/runtime/syscall_solaris.go
@@ -0,0 +1,322 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var (
+	libc_chdir,
+	libc_chroot,
+	libc_close,
+	libc_dlopen,
+	libc_dlclose,
+	libc_dlsym,
+	libc_execve,
+	libc_exit,
+	libc_fcntl,
+	libc_forkx,
+	libc_gethostname,
+	libc_ioctl,
+	libc_pipe,
+	libc_setgid,
+	libc_setgroups,
+	libc_setsid,
+	libc_setuid,
+	libc_setpgid,
+	libc_syscall,
+	libc_wait4,
+	libc_write,
+	pipe1 libcFunc
+)
+
+//go:nosplit
+func syscall_sysvicall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(fn),
+		n:    nargs,
+		args: unsafe.Pointer(&a1),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	return call.r1, call.r2, call.err
+}
+
+//go:nosplit
+func syscall_rawsysvicall6(fn, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(fn),
+		n:    nargs,
+		args: unsafe.Pointer(&a1),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.r2, call.err
+}
+
+// TODO(aram): Once we remove all instances of C calling sysvicallN, make
+// sysvicallN return errors and replace the body of the following functions
+// with calls to sysvicallN.
+
+//go:nosplit
+func syscall_chdir(path uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_chdir),
+		n:    1,
+		args: unsafe.Pointer(&path),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+//go:nosplit
+func syscall_chroot(path uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_chroot),
+		n:    1,
+		args: unsafe.Pointer(&path),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+// like close, but must not split stack, for forkx.
+//go:nosplit
+func syscall_close(fd int32) int32 {
+	return int32(sysvicall1(&libc_close, uintptr(fd)))
+}
+
+func syscall_dlopen(name *byte, mode uintptr) (handle uintptr, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_dlopen),
+		n:    2,
+		args: unsafe.Pointer(&name),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	if call.r1 == 0 {
+		return call.r1, call.err
+	}
+	return call.r1, 0
+}
+
+func syscall_dlclose(handle uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_dlclose),
+		n:    1,
+		args: unsafe.Pointer(&handle),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	return call.r1
+}
+
+func syscall_dlsym(handle uintptr, name *byte) (proc uintptr, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_dlsym),
+		n:    2,
+		args: unsafe.Pointer(&handle),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	if call.r1 == 0 {
+		return call.r1, call.err
+	}
+	return call.r1, 0
+}
+
+//go:nosplit
+func syscall_execve(path, argv, envp uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_execve),
+		n:    3,
+		args: unsafe.Pointer(&path),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+// like exit, but must not split stack, for forkx.
+//go:nosplit
+func syscall_exit(code uintptr) {
+	sysvicall1(&libc_exit, code)
+}
+
+//go:nosplit
+func syscall_fcntl(fd, cmd, arg uintptr) (val, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_fcntl),
+		n:    3,
+		args: unsafe.Pointer(&fd),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.err
+}
+
+//go:nosplit
+func syscall_forkx(flags uintptr) (pid uintptr, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_forkx),
+		n:    1,
+		args: unsafe.Pointer(&flags),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.err
+}
+
+func syscall_gethostname() (name string, err uintptr) {
+	cname := new([_MAXHOSTNAMELEN]byte)
+	var args = [2]uintptr{uintptr(unsafe.Pointer(&cname[0])), _MAXHOSTNAMELEN}
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_gethostname),
+		n:    2,
+		args: unsafe.Pointer(&args[0]),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	if call.r1 != 0 {
+		return "", call.err
+	}
+	cname[_MAXHOSTNAMELEN-1] = 0
+	return gostringnocopy(&cname[0]), 0
+}
+
+//go:nosplit
+func syscall_ioctl(fd, req, arg uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_ioctl),
+		n:    3,
+		args: unsafe.Pointer(&fd),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+func syscall_pipe() (r, w, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&pipe1),
+		n:    0,
+		args: unsafe.Pointer(&pipe1), // it's unused but must be non-nil, otherwise crashes
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	return call.r1, call.r2, call.err
+}
+
+// This is syscall.RawSyscall, it exists to satisfy some build dependency,
+// but it doesn't work correctly.
+//
+// DO NOT USE!
+//
+// TODO(aram): make this panic once we stop calling fcntl(2) in net using it.
+func syscall_rawsyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_syscall),
+		n:    4,
+		args: unsafe.Pointer(&trap),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.r2, call.err
+}
+
+//go:nosplit
+func syscall_setgid(gid uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_setgid),
+		n:    1,
+		args: unsafe.Pointer(&gid),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+//go:nosplit
+func syscall_setgroups(ngid, gid uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_setgroups),
+		n:    2,
+		args: unsafe.Pointer(&ngid),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+//go:nosplit
+func syscall_setsid() (pid, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_setsid),
+		n:    0,
+		args: unsafe.Pointer(&libc_setsid), // it's unused but must be non-nil, otherwise crashes
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.err
+}
+
+//go:nosplit
+func syscall_setuid(uid uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_setuid),
+		n:    1,
+		args: unsafe.Pointer(&uid),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+//go:nosplit
+func syscall_setpgid(pid, pgid uintptr) (err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_setpgid),
+		n:    2,
+		args: unsafe.Pointer(&pid),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.err
+}
+
+// This is syscall.Syscall, it exists to satisfy some build dependency,
+// but it doesn't work correctly.
+//
+// DO NOT USE!
+//
+// TODO(aram): make this panic once we stop calling fcntl(2) in net using it.
+func syscall_syscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_syscall),
+		n:    4,
+		args: unsafe.Pointer(&trap),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	return call.r1, call.r2, call.err
+}
+
+func syscall_wait4(pid uintptr, wstatus *uint32, options uintptr, rusage unsafe.Pointer) (wpid int, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_wait4),
+		n:    4,
+		args: unsafe.Pointer(&pid),
+	}
+	entersyscallblock()
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	exitsyscall()
+	return int(call.r1), call.err
+}
+
+//go:nosplit
+func syscall_write(fd, buf, nbyte uintptr) (n, err uintptr) {
+	call := libcall{
+		fn:   unsafe.Pointer(&libc_write),
+		n:    3,
+		args: unsafe.Pointer(&fd),
+	}
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
+	return call.r1, call.err
+}
diff --git a/src/runtime/syscall_windows.c b/src/runtime/syscall_windows.c
new file mode 100644
index 000000000..e7903b517
--- /dev/null
+++ b/src/runtime/syscall_windows.c
@@ -0,0 +1,166 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "os_GOOS.h"
+#include "cgocall.h"
+#include "textflag.h"
+
+typedef struct HandleErr HandleErr;
+typedef struct SyscallErr SyscallErr;
+
+struct HandleErr {
+	uintptr handle;
+	uintptr err;
+};
+
+struct SyscallErr {
+	uintptr r1;
+	uintptr r2;
+	uintptr err;
+};
+
+#pragma textflag NOSPLIT
+HandleErr
+syscall·loadlibrary(uint16 *filename)
+{
+	LibCall c;
+	HandleErr r;
+
+	c.fn = runtime·LoadLibrary;
+	c.n = 1;
+	c.args = &filename;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	r.handle = c.r1;
+	if(r.handle == 0)
+		r.err = c.err;
+	else
+		r.err = 0;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+HandleErr
+syscall·getprocaddress(uintptr handle, int8 *procname)
+{
+	LibCall c;
+	HandleErr r;
+
+	USED(procname);
+	c.fn = runtime·GetProcAddress;
+	c.n = 2;
+	c.args = &handle;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	r.handle = c.r1;
+	if(r.handle == 0)
+		r.err = c.err;
+	else
+		r.err = 0;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+SyscallErr
+syscall·Syscall(uintptr fn, uintptr nargs, uintptr a1, uintptr a2, uintptr a3)
+{
+	LibCall c;
+
+	USED(a2);
+	USED(a3);
+	c.fn = (void*)fn;
+	c.n = nargs;
+	c.args = &a1;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	return (SyscallErr){c.r1, c.r2, c.err};
+}
+
+#pragma textflag NOSPLIT
+SyscallErr
+syscall·Syscall6(uintptr fn, uintptr nargs, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6)
+{
+	LibCall c;
+
+	USED(a2);
+	USED(a3);
+	USED(a4);
+	USED(a5);
+	USED(a6);
+	c.fn = (void*)fn;
+	c.n = nargs;
+	c.args = &a1;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	return (SyscallErr){c.r1, c.r2, c.err};
+}
+
+#pragma textflag NOSPLIT
+SyscallErr
+syscall·Syscall9(uintptr fn, uintptr nargs, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6, uintptr a7, uintptr a8, uintptr a9)
+{
+	LibCall c;
+
+	USED(a2);
+	USED(a3);
+	USED(a4);
+	USED(a5);
+	USED(a6);
+	USED(a7);
+	USED(a8);
+	USED(a9);
+	c.fn = (void*)fn;
+	c.n = nargs;
+	c.args = &a1;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	return (SyscallErr){c.r1, c.r2, c.err};
+}
+
+#pragma textflag NOSPLIT
+SyscallErr
+syscall·Syscall12(uintptr fn, uintptr nargs, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6, uintptr a7, uintptr a8, uintptr a9, uintptr a10, uintptr a11, uintptr a12)
+{
+	LibCall c;
+
+	USED(a2);
+	USED(a3);
+	USED(a4);
+	USED(a5);
+	USED(a6);
+	USED(a7);
+	USED(a8);
+	USED(a9);
+	USED(a10);
+	USED(a11);
+	USED(a12);
+	c.fn = (void*)fn;
+	c.n = nargs;
+	c.args = &a1;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	return (SyscallErr){c.r1, c.r2, c.err};
+}
+
+#pragma textflag NOSPLIT
+SyscallErr
+syscall·Syscall15(uintptr fn, uintptr nargs, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6, uintptr a7, uintptr a8, uintptr a9, uintptr a10, uintptr a11, uintptr a12, uintptr a13, uintptr a14, uintptr a15)
+{
+	LibCall c;
+
+	USED(a2);
+	USED(a3);
+	USED(a4);
+	USED(a5);
+	USED(a6);
+	USED(a7);
+	USED(a8);
+	USED(a9);
+	USED(a10);
+	USED(a11);
+	USED(a12);
+	USED(a13);
+	USED(a14);
+	USED(a15);
+	c.fn = (void*)fn;
+	c.n = nargs;
+	c.args = &a1;
+	runtime·cgocall_errno(runtime·asmstdcall, &c);
+	return (SyscallErr){c.r1, c.r2, c.err};
+}
diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go
new file mode 100644
index 000000000..0592c57e1
--- /dev/null
+++ b/src/runtime/syscall_windows.go
@@ -0,0 +1,88 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+type callbacks struct {
+	lock mutex
+	ctxt [cb_max]*wincallbackcontext
+	n    int
+}
+
+func (c *wincallbackcontext) isCleanstack() bool {
+	return c.cleanstack
+}
+
+func (c *wincallbackcontext) setCleanstack(cleanstack bool) {
+	c.cleanstack = cleanstack
+}
+
+var (
+	cbs     callbacks
+	cbctxts **wincallbackcontext = &cbs.ctxt[0] // to simplify access to cbs.ctxt in sys_windows_*.s
+
+	callbackasm byte // type isn't really byte, it's code in runtime
+)
+
+// callbackasmAddr returns address of runtime.callbackasm
+// function adjusted by i.
+// runtime.callbackasm is just a series of CALL instructions
+// (each is 5 bytes long), and we want callback to arrive at
+// correspondent call instruction instead of start of
+// runtime.callbackasm.
+func callbackasmAddr(i int) uintptr {
+	return uintptr(add(unsafe.Pointer(&callbackasm), uintptr(i*5)))
+}
+
+func compileCallback(fn eface, cleanstack bool) (code uintptr) {
+	if fn._type == nil || (fn._type.kind&kindMask) != kindFunc {
+		panic("compilecallback: not a function")
+	}
+	ft := (*functype)(unsafe.Pointer(fn._type))
+	if len(ft.out) != 1 {
+		panic("compilecallback: function must have one output parameter")
+	}
+	uintptrSize := unsafe.Sizeof(uintptr(0))
+	if t := (**_type)(unsafe.Pointer(&ft.out[0])); (*t).size != uintptrSize {
+		panic("compilecallback: output parameter size is wrong")
+	}
+	argsize := uintptr(0)
+	for _, t := range (*[1024](*_type))(unsafe.Pointer(&ft.in[0]))[:len(ft.in)] {
+		if (*t).size != uintptrSize {
+			panic("compilecallback: input parameter size is wrong")
+		}
+		argsize += uintptrSize
+	}
+
+	lock(&cbs.lock)
+	defer unlock(&cbs.lock)
+
+	n := cbs.n
+	for i := 0; i < n; i++ {
+		if cbs.ctxt[i].gobody == fn.data && cbs.ctxt[i].isCleanstack() == cleanstack {
+			return callbackasmAddr(i)
+		}
+	}
+	if n >= cb_max {
+		gothrow("too many callback functions")
+	}
+
+	c := new(wincallbackcontext)
+	c.gobody = fn.data
+	c.argsize = argsize
+	c.setCleanstack(cleanstack)
+	if cleanstack && argsize != 0 {
+		c.restorestack = argsize
+	} else {
+		c.restorestack = 0
+	}
+	cbs.ctxt[n] = c
+	cbs.n++
+
+	return callbackasmAddr(n)
+}
diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go
new file mode 100644
index 000000000..fabf935d8
--- /dev/null
+++ b/src/runtime/syscall_windows_test.go
@@ -0,0 +1,451 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"syscall"
+	"testing"
+	"unsafe"
+)
+
+type DLL struct {
+	*syscall.DLL
+	t *testing.T
+}
+
+func GetDLL(t *testing.T, name string) *DLL {
+	d, e := syscall.LoadDLL(name)
+	if e != nil {
+		t.Fatal(e)
+	}
+	return &DLL{DLL: d, t: t}
+}
+
+func (d *DLL) Proc(name string) *syscall.Proc {
+	p, e := d.FindProc(name)
+	if e != nil {
+		d.t.Fatal(e)
+	}
+	return p
+}
+
+func TestStdCall(t *testing.T) {
+	type Rect struct {
+		left, top, right, bottom int32
+	}
+	res := Rect{}
+	expected := Rect{1, 1, 40, 60}
+	a, _, _ := GetDLL(t, "user32.dll").Proc("UnionRect").Call(
+		uintptr(unsafe.Pointer(&res)),
+		uintptr(unsafe.Pointer(&Rect{10, 1, 14, 60})),
+		uintptr(unsafe.Pointer(&Rect{1, 2, 40, 50})))
+	if a != 1 || res.left != expected.left ||
+		res.top != expected.top ||
+		res.right != expected.right ||
+		res.bottom != expected.bottom {
+		t.Error("stdcall USER32.UnionRect returns", a, "res=", res)
+	}
+}
+
+func Test64BitReturnStdCall(t *testing.T) {
+
+	const (
+		VER_BUILDNUMBER      = 0x0000004
+		VER_MAJORVERSION     = 0x0000002
+		VER_MINORVERSION     = 0x0000001
+		VER_PLATFORMID       = 0x0000008
+		VER_PRODUCT_TYPE     = 0x0000080
+		VER_SERVICEPACKMAJOR = 0x0000020
+		VER_SERVICEPACKMINOR = 0x0000010
+		VER_SUITENAME        = 0x0000040
+
+		VER_EQUAL         = 1
+		VER_GREATER       = 2
+		VER_GREATER_EQUAL = 3
+		VER_LESS          = 4
+		VER_LESS_EQUAL    = 5
+
+		ERROR_OLD_WIN_VERSION syscall.Errno = 1150
+	)
+
+	type OSVersionInfoEx struct {
+		OSVersionInfoSize uint32
+		MajorVersion      uint32
+		MinorVersion      uint32
+		BuildNumber       uint32
+		PlatformId        uint32
+		CSDVersion        [128]uint16
+		ServicePackMajor  uint16
+		ServicePackMinor  uint16
+		SuiteMask         uint16
+		ProductType       byte
+		Reserve           byte
+	}
+
+	d := GetDLL(t, "kernel32.dll")
+
+	var m1, m2 uintptr
+	VerSetConditionMask := d.Proc("VerSetConditionMask")
+	m1, m2, _ = VerSetConditionMask.Call(m1, m2, VER_MAJORVERSION, VER_GREATER_EQUAL)
+	m1, m2, _ = VerSetConditionMask.Call(m1, m2, VER_MINORVERSION, VER_GREATER_EQUAL)
+	m1, m2, _ = VerSetConditionMask.Call(m1, m2, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL)
+	m1, m2, _ = VerSetConditionMask.Call(m1, m2, VER_SERVICEPACKMINOR, VER_GREATER_EQUAL)
+
+	vi := OSVersionInfoEx{
+		MajorVersion:     5,
+		MinorVersion:     1,
+		ServicePackMajor: 2,
+		ServicePackMinor: 0,
+	}
+	vi.OSVersionInfoSize = uint32(unsafe.Sizeof(vi))
+	r, _, e2 := d.Proc("VerifyVersionInfoW").Call(
+		uintptr(unsafe.Pointer(&vi)),
+		VER_MAJORVERSION|VER_MINORVERSION|VER_SERVICEPACKMAJOR|VER_SERVICEPACKMINOR,
+		m1, m2)
+	if r == 0 && e2 != ERROR_OLD_WIN_VERSION {
+		t.Errorf("VerifyVersionInfo failed: %s", e2)
+	}
+}
+
+func TestCDecl(t *testing.T) {
+	var buf [50]byte
+	fmtp, _ := syscall.BytePtrFromString("%d %d %d")
+	a, _, _ := GetDLL(t, "user32.dll").Proc("wsprintfA").Call(
+		uintptr(unsafe.Pointer(&buf[0])),
+		uintptr(unsafe.Pointer(fmtp)),
+		1000, 2000, 3000)
+	if string(buf[:a]) != "1000 2000 3000" {
+		t.Error("cdecl USER32.wsprintfA returns", a, "buf=", buf[:a])
+	}
+}
+
+func TestEnumWindows(t *testing.T) {
+	d := GetDLL(t, "user32.dll")
+	isWindows := d.Proc("IsWindow")
+	counter := 0
+	cb := syscall.NewCallback(func(hwnd syscall.Handle, lparam uintptr) uintptr {
+		if lparam != 888 {
+			t.Error("lparam was not passed to callback")
+		}
+		b, _, _ := isWindows.Call(uintptr(hwnd))
+		if b == 0 {
+			t.Error("USER32.IsWindow returns FALSE")
+		}
+		counter++
+		return 1 // continue enumeration
+	})
+	a, _, _ := d.Proc("EnumWindows").Call(cb, 888)
+	if a == 0 {
+		t.Error("USER32.EnumWindows returns FALSE")
+	}
+	if counter == 0 {
+		t.Error("Callback has been never called or your have no windows")
+	}
+}
+
+func callback(hwnd syscall.Handle, lparam uintptr) uintptr {
+	(*(*func())(unsafe.Pointer(&lparam)))()
+	return 0 // stop enumeration
+}
+
+// nestedCall calls into Windows, back into Go, and finally to f.
+func nestedCall(t *testing.T, f func()) {
+	c := syscall.NewCallback(callback)
+	d := GetDLL(t, "user32.dll")
+	defer d.Release()
+	d.Proc("EnumWindows").Call(c, uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&f))))
+}
+
+func TestCallback(t *testing.T) {
+	var x = false
+	nestedCall(t, func() { x = true })
+	if !x {
+		t.Fatal("nestedCall did not call func")
+	}
+}
+
+func TestCallbackGC(t *testing.T) {
+	nestedCall(t, runtime.GC)
+}
+
+func TestCallbackPanicLocked(t *testing.T) {
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	if !runtime.LockedOSThread() {
+		t.Fatal("runtime.LockOSThread didn't")
+	}
+	defer func() {
+		s := recover()
+		if s == nil {
+			t.Fatal("did not panic")
+		}
+		if s.(string) != "callback panic" {
+			t.Fatal("wrong panic:", s)
+		}
+		if !runtime.LockedOSThread() {
+			t.Fatal("lost lock on OS thread after panic")
+		}
+	}()
+	nestedCall(t, func() { panic("callback panic") })
+	panic("nestedCall returned")
+}
+
+func TestCallbackPanic(t *testing.T) {
+	// Make sure panic during callback unwinds properly.
+	if runtime.LockedOSThread() {
+		t.Fatal("locked OS thread on entry to TestCallbackPanic")
+	}
+	defer func() {
+		s := recover()
+		if s == nil {
+			t.Fatal("did not panic")
+		}
+		if s.(string) != "callback panic" {
+			t.Fatal("wrong panic:", s)
+		}
+		if runtime.LockedOSThread() {
+			t.Fatal("locked OS thread on exit from TestCallbackPanic")
+		}
+	}()
+	nestedCall(t, func() { panic("callback panic") })
+	panic("nestedCall returned")
+}
+
+func TestCallbackPanicLoop(t *testing.T) {
+	// Make sure we don't blow out m->g0 stack.
+	for i := 0; i < 100000; i++ {
+		TestCallbackPanic(t)
+	}
+}
+
+func TestBlockingCallback(t *testing.T) {
+	c := make(chan int)
+	go func() {
+		for i := 0; i < 10; i++ {
+			c <- <-c
+		}
+	}()
+	nestedCall(t, func() {
+		for i := 0; i < 10; i++ {
+			c <- i
+			if j := <-c; j != i {
+				t.Errorf("out of sync %d != %d", j, i)
+			}
+		}
+	})
+}
+
+func TestCallbackInAnotherThread(t *testing.T) {
+	// TODO: test a function which calls back in another thread: QueueUserAPC() or CreateThread()
+}
+
+type cbDLLFunc int // int determines number of callback parameters
+
+func (f cbDLLFunc) stdcallName() string {
+	return fmt.Sprintf("stdcall%d", f)
+}
+
+func (f cbDLLFunc) cdeclName() string {
+	return fmt.Sprintf("cdecl%d", f)
+}
+
+func (f cbDLLFunc) buildOne(stdcall bool) string {
+	var funcname, attr string
+	if stdcall {
+		funcname = f.stdcallName()
+		attr = "__stdcall"
+	} else {
+		funcname = f.cdeclName()
+		attr = "__cdecl"
+	}
+	typename := "t" + funcname
+	p := make([]string, f)
+	for i := range p {
+		p[i] = "void*"
+	}
+	params := strings.Join(p, ",")
+	for i := range p {
+		p[i] = fmt.Sprintf("%d", i+1)
+	}
+	args := strings.Join(p, ",")
+	return fmt.Sprintf(`
+typedef void %s (*%s)(%s);
+void %s(%s f, void *n) {
+	int i;
+	for(i=0;i<(int)n;i++){
+		f(%s);
+	}
+}
+	`, attr, typename, params, funcname, typename, args)
+}
+
+func (f cbDLLFunc) build() string {
+	return f.buildOne(false) + f.buildOne(true)
+}
+
+var cbFuncs = [...]interface{}{
+	2: func(i1, i2 uintptr) uintptr {
+		if i1+i2 != 3 {
+			panic("bad input")
+		}
+		return 0
+	},
+	3: func(i1, i2, i3 uintptr) uintptr {
+		if i1+i2+i3 != 6 {
+			panic("bad input")
+		}
+		return 0
+	},
+	4: func(i1, i2, i3, i4 uintptr) uintptr {
+		if i1+i2+i3+i4 != 10 {
+			panic("bad input")
+		}
+		return 0
+	},
+	5: func(i1, i2, i3, i4, i5 uintptr) uintptr {
+		if i1+i2+i3+i4+i5 != 15 {
+			panic("bad input")
+		}
+		return 0
+	},
+	6: func(i1, i2, i3, i4, i5, i6 uintptr) uintptr {
+		if i1+i2+i3+i4+i5+i6 != 21 {
+			panic("bad input")
+		}
+		return 0
+	},
+	7: func(i1, i2, i3, i4, i5, i6, i7 uintptr) uintptr {
+		if i1+i2+i3+i4+i5+i6+i7 != 28 {
+			panic("bad input")
+		}
+		return 0
+	},
+	8: func(i1, i2, i3, i4, i5, i6, i7, i8 uintptr) uintptr {
+		if i1+i2+i3+i4+i5+i6+i7+i8 != 36 {
+			panic("bad input")
+		}
+		return 0
+	},
+	9: func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr {
+		if i1+i2+i3+i4+i5+i6+i7+i8+i9 != 45 {
+			panic("bad input")
+		}
+		return 0
+	},
+}
+
+type cbDLL struct {
+	name      string
+	buildArgs func(out, src string) []string
+}
+
+func (d *cbDLL) buildSrc(t *testing.T, path string) {
+	f, err := os.Create(path)
+	if err != nil {
+		t.Fatalf("failed to create source file: %v", err)
+	}
+	defer f.Close()
+
+	for i := 2; i < 10; i++ {
+		fmt.Fprint(f, cbDLLFunc(i).build())
+	}
+}
+
+func (d *cbDLL) build(t *testing.T, dir string) string {
+	srcname := d.name + ".c"
+	d.buildSrc(t, filepath.Join(dir, srcname))
+	outname := d.name + ".dll"
+	args := d.buildArgs(outname, srcname)
+	cmd := exec.Command(args[0], args[1:]...)
+	cmd.Dir = dir
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("failed to build dll: %v - %v", err, string(out))
+	}
+	return filepath.Join(dir, outname)
+}
+
+var cbDLLs = []cbDLL{
+	{
+		"test",
+		func(out, src string) []string {
+			return []string{"gcc", "-shared", "-s", "-o", out, src}
+		},
+	},
+	{
+		"testO2",
+		func(out, src string) []string {
+			return []string{"gcc", "-shared", "-s", "-o", out, "-O2", src}
+		},
+	},
+}
+
+type cbTest struct {
+	n     int     // number of callback parameters
+	param uintptr // dll function parameter
+}
+
+func (test *cbTest) run(t *testing.T, dllpath string) {
+	dll := syscall.MustLoadDLL(dllpath)
+	defer dll.Release()
+	cb := cbFuncs[test.n]
+	stdcall := syscall.NewCallback(cb)
+	f := cbDLLFunc(test.n)
+	test.runOne(t, dll, f.stdcallName(), stdcall)
+	cdecl := syscall.NewCallbackCDecl(cb)
+	test.runOne(t, dll, f.cdeclName(), cdecl)
+}
+
+func (test *cbTest) runOne(t *testing.T, dll *syscall.DLL, proc string, cb uintptr) {
+	defer func() {
+		if r := recover(); r != nil {
+			t.Errorf("dll call %v(..., %d) failed: %v", proc, test.param, r)
+		}
+	}()
+	dll.MustFindProc(proc).Call(cb, test.param)
+}
+
+var cbTests = []cbTest{
+	{2, 1},
+	{2, 10000},
+	{3, 3},
+	{4, 5},
+	{4, 6},
+	{5, 2},
+	{6, 7},
+	{6, 8},
+	{7, 6},
+	{8, 1},
+	{9, 8},
+	{9, 10000},
+	{3, 4},
+	{5, 3},
+	{7, 7},
+	{8, 2},
+	{9, 9},
+}
+
+func TestStdcallAndCDeclCallbacks(t *testing.T) {
+	tmp, err := ioutil.TempDir("", "TestCDeclCallback")
+	if err != nil {
+		t.Fatal("TempDir failed: ", err)
+	}
+	defer os.RemoveAll(tmp)
+
+	for _, dll := range cbDLLs {
+		dllPath := dll.build(t, tmp)
+		for _, test := range cbTests {
+			test.run(t, dllPath)
+		}
+	}
+}
diff --git a/src/runtime/thunk.s b/src/runtime/thunk.s
new file mode 100644
index 000000000..babc92768
--- /dev/null
+++ b/src/runtime/thunk.s
@@ -0,0 +1,150 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file exposes various internal runtime functions to other packages in std lib.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+#ifdef GOARCH_arm
+#define JMP B
+#endif
+
+TEXT net·runtimeNano(SB),NOSPLIT,$0-0
+	JMP	runtime·nanotime(SB)
+
+TEXT time·runtimeNano(SB),NOSPLIT,$0-0
+	JMP     runtime·nanotime(SB)
+
+TEXT time·Sleep(SB),NOSPLIT,$0-0
+	JMP     runtime·timeSleep(SB)
+
+TEXT time·startTimer(SB),NOSPLIT,$0-0
+	JMP     runtime·startTimer(SB)
+
+TEXT time·stopTimer(SB),NOSPLIT,$0-0
+	JMP     runtime·stopTimer(SB)
+
+TEXT sync·runtime_Syncsemacquire(SB),NOSPLIT,$0-0
+	JMP	runtime·syncsemacquire(SB)
+
+TEXT sync·runtime_Syncsemrelease(SB),NOSPLIT,$0-0
+	JMP	runtime·syncsemrelease(SB)
+
+TEXT sync·runtime_Syncsemcheck(SB),NOSPLIT,$0-0
+	JMP	runtime·syncsemcheck(SB)
+
+TEXT sync·runtime_Semacquire(SB),NOSPLIT,$0-0
+	JMP	runtime·asyncsemacquire(SB)
+
+TEXT sync·runtime_Semrelease(SB),NOSPLIT,$0-0
+	JMP	runtime·asyncsemrelease(SB)
+
+TEXT sync·runtime_registerPoolCleanup(SB),NOSPLIT,$0-0
+	JMP	runtime·registerPoolCleanup(SB)
+
+TEXT net·runtime_Semacquire(SB),NOSPLIT,$0-0
+	JMP	runtime·asyncsemacquire(SB)
+
+TEXT net·runtime_Semrelease(SB),NOSPLIT,$0-0
+	JMP	runtime·asyncsemrelease(SB)
+
+TEXT runtime∕pprof·runtime_cyclesPerSecond(SB),NOSPLIT,$0-0
+	JMP	runtime·tickspersecond(SB)
+
+TEXT bytes·Compare(SB),NOSPLIT,$0-0
+	JMP	runtime·cmpbytes(SB)
+
+TEXT runtime·reflectcall(SB), NOSPLIT, $0-0
+	JMP	reflect·call(SB)
+
+TEXT reflect·chanclose(SB), NOSPLIT, $0-0
+	JMP	runtime·closechan(SB)
+
+TEXT reflect·chanlen(SB), NOSPLIT, $0-0
+	JMP	runtime·reflect_chanlen(SB)
+
+TEXT reflect·chancap(SB), NOSPLIT, $0-0
+	JMP	runtime·reflect_chancap(SB)
+
+TEXT reflect·chansend(SB), NOSPLIT, $0-0
+	JMP	runtime·reflect_chansend(SB)
+
+TEXT reflect·chanrecv(SB), NOSPLIT, $0-0
+	JMP	runtime·reflect_chanrecv(SB)
+
+TEXT runtime∕debug·freeOSMemory(SB), NOSPLIT, $0-0
+	JMP	runtime·freeOSMemory(SB)
+
+TEXT net·runtime_pollServerInit(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollServerInit(SB)
+
+TEXT net·runtime_pollOpen(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollOpen(SB)
+
+TEXT net·runtime_pollClose(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollClose(SB)
+
+TEXT net·runtime_pollReset(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollReset(SB)
+
+TEXT net·runtime_pollWait(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollWait(SB)
+
+TEXT net·runtime_pollWaitCanceled(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollWaitCanceled(SB)
+
+TEXT net·runtime_pollSetDeadline(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollSetDeadline(SB)
+
+TEXT net·runtime_pollUnblock(SB),NOSPLIT,$0-0
+	JMP	runtime·netpollUnblock(SB)
+
+TEXT syscall·setenv_c(SB), NOSPLIT, $0-0
+	JMP	runtime·syscall_setenv_c(SB)
+
+TEXT reflect·makemap(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_makemap(SB)
+
+TEXT reflect·mapaccess(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapaccess(SB)
+
+TEXT reflect·mapassign(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapassign(SB)
+
+TEXT reflect·mapdelete(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapdelete(SB)
+
+TEXT reflect·mapiterinit(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapiterinit(SB)
+
+TEXT reflect·mapiterkey(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapiterkey(SB)
+
+TEXT reflect·mapiternext(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_mapiternext(SB)
+
+TEXT reflect·maplen(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_maplen(SB)
+
+TEXT reflect·ismapkey(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_ismapkey(SB)
+
+TEXT reflect·ifaceE2I(SB),NOSPLIT,$0-0
+	JMP	runtime·reflect_ifaceE2I(SB)
+
+TEXT reflect·unsafe_New(SB),NOSPLIT,$0-0
+	JMP	runtime·newobject(SB)
+
+TEXT reflect·unsafe_NewArray(SB),NOSPLIT,$0-0
+	JMP	runtime·newarray(SB)
+
+TEXT reflect·makechan(SB),NOSPLIT,$0-0
+	JMP	runtime·makechan(SB)
+
+TEXT reflect·rselect(SB), NOSPLIT, $0-0
+	JMP	runtime·reflect_rselect(SB)
+
+TEXT os·sigpipe(SB), NOSPLIT, $0-0
+	JMP	runtime·os_sigpipe(SB)
diff --git a/src/runtime/thunk_solaris_amd64.s b/src/runtime/thunk_solaris_amd64.s
new file mode 100644
index 000000000..352011e04
--- /dev/null
+++ b/src/runtime/thunk_solaris_amd64.s
@@ -0,0 +1,88 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file exposes various external library functions to Go code in the runtime.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "../../cmd/ld/textflag.h"
+
+TEXT runtime·libc_chdir(SB),NOSPLIT,$0
+	MOVQ	libc·chdir(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_chroot(SB),NOSPLIT,$0
+	MOVQ	libc·chroot(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_close(SB),NOSPLIT,$0
+	MOVQ	libc·close(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_dlopen(SB),NOSPLIT,$0
+	MOVQ	libc·dlopen(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_dlclose(SB),NOSPLIT,$0
+	MOVQ	libc·dlclose(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_dlsym(SB),NOSPLIT,$0
+	MOVQ	libc·dlsym(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_execve(SB),NOSPLIT,$0
+	MOVQ	libc·execve(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_exit(SB),NOSPLIT,$0
+	MOVQ	libc·exit(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_fcntl(SB),NOSPLIT,$0
+	MOVQ	libc·fcntl(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_forkx(SB),NOSPLIT,$0
+	MOVQ	libc·forkx(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_gethostname(SB),NOSPLIT,$0
+	MOVQ	libc·gethostname(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_ioctl(SB),NOSPLIT,$0
+	MOVQ	libc·ioctl(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_setgid(SB),NOSPLIT,$0
+	MOVQ	libc·setgid(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_setgroups(SB),NOSPLIT,$0
+	MOVQ	libc·setgroups(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_setsid(SB),NOSPLIT,$0
+	MOVQ	libc·setsid(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_setuid(SB),NOSPLIT,$0
+	MOVQ	libc·setuid(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_setpgid(SB),NOSPLIT,$0
+	MOVQ	libc·setpgid(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_syscall(SB),NOSPLIT,$0
+	MOVQ	libc·syscall(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_wait4(SB),NOSPLIT,$0
+	MOVQ	libc·wait4(SB), AX
+	JMP	AX
+
+TEXT runtime·libc_write(SB),NOSPLIT,$0
+	MOVQ	libc·write(SB), AX
+	JMP	AX
diff --git a/src/runtime/time.go b/src/runtime/time.go
new file mode 100644
index 000000000..8cf9eecf8
--- /dev/null
+++ b/src/runtime/time.go
@@ -0,0 +1,266 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Time-related runtime and pieces of package time.
+
+package runtime
+
+import "unsafe"
+
+// Package time knows the layout of this structure.
+// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
+// For GOOS=nacl, package syscall knows the layout of this structure.
+// If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer.
+type timer struct {
+	i int // heap index
+
+	// Timer wakes up at when, and then at when+period, ... (period > 0 only)
+	// each time calling f(now, arg) in the timer goroutine, so f must be
+	// a well-behaved function and not block.
+	when   int64
+	period int64
+	f      func(interface{}, uintptr)
+	arg    interface{}
+	seq    uintptr
+}
+
+var timers struct {
+	lock         mutex
+	gp           *g
+	created      bool
+	sleeping     bool
+	rescheduling bool
+	waitnote     note
+	t            []*timer
+}
+
+// nacl fake time support.
+var timens int64
+
+// Package time APIs.
+// Godoc uses the comments in package time, not these.
+
+// time.now is implemented in assembly.
+
+// Sleep puts the current goroutine to sleep for at least ns nanoseconds.
+func timeSleep(ns int64) {
+	if ns <= 0 {
+		return
+	}
+
+	t := new(timer)
+	t.when = nanotime() + ns
+	t.f = goroutineReady
+	t.arg = getg()
+	lock(&timers.lock)
+	addtimerLocked(t)
+	goparkunlock(&timers.lock, "sleep")
+}
+
+// startTimer adds t to the timer heap.
+func startTimer(t *timer) {
+	if raceenabled {
+		racerelease(unsafe.Pointer(t))
+	}
+	addtimer(t)
+}
+
+// stopTimer removes t from the timer heap if it is there.
+// It returns true if t was removed, false if t wasn't even there.
+func stopTimer(t *timer) bool {
+	return deltimer(t)
+}
+
+// Go runtime.
+
+// Ready the goroutine arg.
+func goroutineReady(arg interface{}, seq uintptr) {
+	goready(arg.(*g))
+}
+
+func addtimer(t *timer) {
+	lock(&timers.lock)
+	addtimerLocked(t)
+	unlock(&timers.lock)
+}
+
+// Add a timer to the heap and start or kick the timer proc.
+// If the new timer is earlier than any of the others.
+// Timers are locked.
+func addtimerLocked(t *timer) {
+	// when must never be negative; otherwise timerproc will overflow
+	// during its delta calculation and never expire other runtime·timers.
+	if t.when < 0 {
+		t.when = 1<<63 - 1
+	}
+	t.i = len(timers.t)
+	timers.t = append(timers.t, t)
+	siftupTimer(t.i)
+	if t.i == 0 {
+		// siftup moved to top: new earliest deadline.
+		if timers.sleeping {
+			timers.sleeping = false
+			notewakeup(&timers.waitnote)
+		}
+		if timers.rescheduling {
+			timers.rescheduling = false
+			goready(timers.gp)
+		}
+	}
+	if !timers.created {
+		timers.created = true
+		go timerproc()
+	}
+}
+
+// Delete timer t from the heap.
+// Do not need to update the timerproc: if it wakes up early, no big deal.
+func deltimer(t *timer) bool {
+	// Dereference t so that any panic happens before the lock is held.
+	// Discard result, because t might be moving in the heap.
+	_ = t.i
+
+	lock(&timers.lock)
+	// t may not be registered anymore and may have
+	// a bogus i (typically 0, if generated by Go).
+	// Verify it before proceeding.
+	i := t.i
+	last := len(timers.t) - 1
+	if i < 0 || i > last || timers.t[i] != t {
+		unlock(&timers.lock)
+		return false
+	}
+	if i != last {
+		timers.t[i] = timers.t[last]
+		timers.t[i].i = i
+	}
+	timers.t[last] = nil
+	timers.t = timers.t[:last]
+	if i != last {
+		siftupTimer(i)
+		siftdownTimer(i)
+	}
+	unlock(&timers.lock)
+	return true
+}
+
+// Timerproc runs the time-driven events.
+// It sleeps until the next event in the timers heap.
+// If addtimer inserts a new earlier event, addtimer1 wakes timerproc early.
+func timerproc() {
+	timers.gp = getg()
+	timers.gp.issystem = true
+	for {
+		lock(&timers.lock)
+		timers.sleeping = false
+		now := nanotime()
+		delta := int64(-1)
+		for {
+			if len(timers.t) == 0 {
+				delta = -1
+				break
+			}
+			t := timers.t[0]
+			delta = t.when - now
+			if delta > 0 {
+				break
+			}
+			if t.period > 0 {
+				// leave in heap but adjust next time to fire
+				t.when += t.period * (1 + -delta/t.period)
+				siftdownTimer(0)
+			} else {
+				// remove from heap
+				last := len(timers.t) - 1
+				if last > 0 {
+					timers.t[0] = timers.t[last]
+					timers.t[0].i = 0
+				}
+				timers.t[last] = nil
+				timers.t = timers.t[:last]
+				if last > 0 {
+					siftdownTimer(0)
+				}
+				t.i = -1 // mark as removed
+			}
+			f := t.f
+			arg := t.arg
+			seq := t.seq
+			unlock(&timers.lock)
+			if raceenabled {
+				raceacquire(unsafe.Pointer(t))
+			}
+			f(arg, seq)
+			lock(&timers.lock)
+		}
+		if delta < 0 {
+			// No timers left - put goroutine to sleep.
+			timers.rescheduling = true
+			goparkunlock(&timers.lock, "timer goroutine (idle)")
+			continue
+		}
+		// At least one timer pending.  Sleep until then.
+		timers.sleeping = true
+		noteclear(&timers.waitnote)
+		unlock(&timers.lock)
+		notetsleepg(&timers.waitnote, delta)
+	}
+}
+
+// Heap maintenance algorithms.
+
+func siftupTimer(i int) {
+	t := timers.t
+	when := t[i].when
+	tmp := t[i]
+	for i > 0 {
+		p := (i - 1) / 4 // parent
+		if when >= t[p].when {
+			break
+		}
+		t[i] = t[p]
+		t[i].i = i
+		t[p] = tmp
+		t[p].i = p
+		i = p
+	}
+}
+
+func siftdownTimer(i int) {
+	t := timers.t
+	n := len(t)
+	when := t[i].when
+	tmp := t[i]
+	for {
+		c := i*4 + 1 // left child
+		c3 := c + 2  // mid child
+		if c >= n {
+			break
+		}
+		w := t[c].when
+		if c+1 < n && t[c+1].when < w {
+			w = t[c+1].when
+			c++
+		}
+		if c3 < n {
+			w3 := t[c3].when
+			if c3+1 < n && t[c3+1].when < w3 {
+				w3 = t[c3+1].when
+				c3++
+			}
+			if w3 < w {
+				w = w3
+				c = c3
+			}
+		}
+		if w >= when {
+			break
+		}
+		t[i] = t[c]
+		t[i].i = i
+		t[c] = tmp
+		t[c].i = c
+		i = c
+	}
+}
diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s
new file mode 100644
index 000000000..7a247ab19
--- /dev/null
+++ b/src/runtime/tls_arm.s
@@ -0,0 +1,65 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "funcdata.h"
+#include "textflag.h"
+
+// We have to resort to TLS variable to save g(R10).
+// One reason is that external code might trigger
+// SIGSEGV, and our runtime.sigtramp don't even know we
+// are in external code, and will continue to use R10,
+// this might as well result in another SIGSEGV.
+// Note: both functions will clobber R0 and R11 and
+// can be called from 5c ABI code.
+
+// On android, runtime.tlsg is a normal variable.
+// TLS offset is computed in x_cgo_inittls.
+
+// save_g saves the g register into pthread-provided
+// thread-local memory, so that we can call externally compiled
+// ARM code that will overwrite those registers.
+// NOTE: runtime.gogo assumes that R1 is preserved by this function.
+//       runtime.mcall assumes this function only clobbers R0 and R11.
+// Returns with g in R0.
+TEXT runtime·save_g(SB),NOSPLIT,$-4
+#ifdef GOOS_nacl
+	// nothing to do as nacl/arm does not use TLS at all.
+	MOVW	g, R0 // preserve R0 across call to setg<>
+	RET
+#endif
+	MRC	15, 0, R0, C13, C0, 3 // fetch TLS base pointer
+	// $runtime.tlsg(SB) is a special linker symbol.
+	// It is the offset from the TLS base pointer to our
+	// thread-local storage for g.
+#ifdef GOOS_android
+	MOVW	runtime·tlsg(SB), R11
+#else
+	MOVW	$runtime·tlsg(SB), R11
+#endif
+	ADD	R11, R0
+	MOVW	g, 0(R0)
+	MOVW	g, R0 // preserve R0 across call to setg<>
+	RET
+
+// load_g loads the g register from pthread-provided
+// thread-local memory, for use after calling externally compiled
+// ARM code that overwrote those registers.
+TEXT runtime·load_g(SB),NOSPLIT,$0
+#ifdef GOOS_nacl
+	// nothing to do as nacl/arm does not use TLS at all.
+	RET
+#endif
+	MRC		15, 0, R0, C13, C0, 3 // fetch TLS base pointer
+	// $runtime.tlsg(SB) is a special linker symbol.
+	// It is the offset from the TLS base pointer to our
+	// thread-local storage for g.
+#ifdef GOOS_android
+	MOVW	runtime·tlsg(SB), R11
+#else
+	MOVW	$runtime·tlsg(SB), R11
+#endif
+	ADD	R11, R0
+	MOVW	0(R0), g
+	RET
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
new file mode 100644
index 000000000..ec7be28dc
--- /dev/null
+++ b/src/runtime/traceback.go
@@ -0,0 +1,639 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// The code in this file implements stack trace walking for all architectures.
+// The most important fact about a given architecture is whether it uses a link register.
+// On systems with link registers, the prologue for a non-leaf function stores the
+// incoming value of LR at the bottom of the newly allocated stack frame.
+// On systems without link registers, the architecture pushes a return PC during
+// the call instruction, so the return PC ends up above the stack frame.
+// In this file, the return PC is always called LR, no matter how it was found.
+//
+// To date, the opposite of a link register architecture is an x86 architecture.
+// This code may need to change if some other kind of non-link-register
+// architecture comes along.
+//
+// The other important fact is the size of a pointer: on 32-bit systems the LR
+// takes up only 4 bytes on the stack, while on 64-bit systems it takes up 8 bytes.
+// Typically this is ptrSize.
+//
+// As an exception, amd64p32 has ptrSize == 4 but the CALL instruction still
+// stores an 8-byte return PC onto the stack. To accommodate this, we use regSize
+// as the size of the architecture-pushed return PC.
+//
+// usesLR is defined below. ptrSize and regSize are defined in stubs.go.
+
+const usesLR = GOARCH != "amd64" && GOARCH != "amd64p32" && GOARCH != "386"
+
+var (
+	deferprocPC = funcPC(deferproc)
+	goexitPC    = funcPC(goexit)
+	jmpdeferPC  = funcPC(jmpdefer)
+	lessstackPC = funcPC(lessstack)
+	mcallPC     = funcPC(mcall)
+	morestackPC = funcPC(morestack)
+	mstartPC    = funcPC(mstart)
+	newprocPC   = funcPC(newproc)
+	newstackPC  = funcPC(newstack)
+	rt0_goPC    = funcPC(rt0_go)
+	sigpanicPC  = funcPC(sigpanic)
+
+	externalthreadhandlerp uintptr // initialized elsewhere
+)
+
+// System-specific hook. See traceback_windows.go
+var systraceback func(*_func, *stkframe, *g, bool, func(*stkframe, unsafe.Pointer) bool, unsafe.Pointer) (changed, aborted bool)
+
+// Generic traceback.  Handles runtime stack prints (pcbuf == nil),
+// the runtime.Callers function (pcbuf != nil), as well as the garbage
+// collector (callback != nil).  A little clunky to merge these, but avoids
+// duplicating the code and all its subtlety.
+func gentraceback(pc0 uintptr, sp0 uintptr, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, printall bool) int {
+	g := getg()
+	gotraceback := gotraceback(nil)
+	if pc0 == ^uintptr(0) && sp0 == ^uintptr(0) { // Signal to fetch saved values from gp.
+		if gp.syscallstack != 0 {
+			pc0 = gp.syscallpc
+			sp0 = gp.syscallsp
+			if usesLR {
+				lr0 = 0
+			}
+		} else {
+			pc0 = gp.sched.pc
+			sp0 = gp.sched.sp
+			if usesLR {
+				lr0 = gp.sched.lr
+			}
+		}
+	}
+
+	nprint := 0
+	var frame stkframe
+	frame.pc = pc0
+	frame.sp = sp0
+	if usesLR {
+		frame.lr = lr0
+	}
+	waspanic := false
+	wasnewproc := false
+	printing := pcbuf == nil && callback == nil
+	panic := gp._panic
+	_defer := gp._defer
+
+	for _defer != nil && uintptr(_defer.argp) == _NoArgs {
+		_defer = _defer.link
+	}
+	for panic != nil && panic._defer == nil {
+		panic = panic.link
+	}
+
+	// If the PC is zero, it's likely a nil function call.
+	// Start in the caller's frame.
+	if frame.pc == 0 {
+		if usesLR {
+			frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp))
+			frame.lr = 0
+		} else {
+			frame.pc = uintptr(*(*uintreg)(unsafe.Pointer(frame.sp)))
+			frame.sp += regSize
+		}
+	}
+
+	f := findfunc(frame.pc)
+	if f == nil {
+		if callback != nil {
+			print("runtime: unknown pc ", hex(frame.pc), "\n")
+			gothrow("unknown pc")
+		}
+		return 0
+	}
+	frame.fn = f
+
+	n := 0
+	stk := (*stktop)(unsafe.Pointer(gp.stackbase))
+	for n < max {
+		// Typically:
+		//	pc is the PC of the running function.
+		//	sp is the stack pointer at that program counter.
+		//	fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown.
+		//	stk is the stack containing sp.
+		//	The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp.
+		if frame.pc == lessstackPC {
+			// Hit top of stack segment.  Unwind to next segment.
+			frame.pc = stk.gobuf.pc
+			frame.sp = stk.gobuf.sp
+			frame.lr = 0
+			frame.fp = 0
+			if printing && showframe(nil, gp) {
+				print("----- stack segment boundary -----\n")
+			}
+			stk = (*stktop)(unsafe.Pointer(stk.stackbase))
+			f = findfunc(frame.pc)
+			if f == nil {
+				print("runtime: unknown pc ", hex(frame.pc), " after stack split\n")
+				if callback != nil {
+					gothrow("unknown pc")
+				}
+			}
+			frame.fn = f
+			continue
+		}
+		f = frame.fn
+
+		// Hook for handling Windows exception handlers. See traceback_windows.go.
+		if systraceback != nil {
+			changed, aborted := systraceback(f, (*stkframe)(noescape(unsafe.Pointer(&frame))), gp, printing, callback, v)
+			if aborted {
+				return n
+			}
+			if changed {
+				continue
+			}
+		}
+
+		// Found an actual function.
+		// Derive frame pointer and link register.
+		if frame.fp == 0 {
+			frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc))
+			if !usesLR {
+				// On x86, call instruction pushes return PC before entering new function.
+				frame.fp += regSize
+			}
+		}
+		var flr *_func
+		if topofstack(f) {
+			frame.lr = 0
+			flr = nil
+		} else if usesLR && f.entry == jmpdeferPC {
+			// jmpdefer modifies SP/LR/PC non-atomically.
+			// If a profiling interrupt arrives during jmpdefer,
+			// the stack unwind may see a mismatched register set
+			// and get confused. Stop if we see PC within jmpdefer
+			// to avoid that confusion.
+			// See golang.org/issue/8153.
+			if callback != nil {
+				gothrow("traceback_arm: found jmpdefer when tracing with callback")
+			}
+			frame.lr = 0
+		} else {
+			if usesLR {
+				if n == 0 && frame.sp < frame.fp || frame.lr == 0 {
+					frame.lr = *(*uintptr)(unsafe.Pointer(frame.sp))
+				}
+			} else {
+				if frame.lr == 0 {
+					frame.lr = uintptr(*(*uintreg)(unsafe.Pointer(frame.fp - regSize)))
+				}
+			}
+			flr = findfunc(frame.lr)
+			if flr == nil {
+				// This happens if you get a profiling interrupt at just the wrong time.
+				// In that context it is okay to stop early.
+				// But if callback is set, we're doing a garbage collection and must
+				// get everything, so crash loudly.
+				if callback != nil {
+					print("runtime: unexpected return pc for ", gofuncname(f), " called from ", hex(frame.lr), "\n")
+					gothrow("unknown caller pc")
+				}
+			}
+		}
+
+		frame.varp = frame.fp
+		if !usesLR {
+			// On x86, call instruction pushes return PC before entering new function.
+			frame.varp -= regSize
+		}
+
+		// Derive size of arguments.
+		// Most functions have a fixed-size argument block,
+		// so we can use metadata about the function f.
+		// Not all, though: there are some variadic functions
+		// in package runtime and reflect, and for those we use call-specific
+		// metadata recorded by f's caller.
+		if callback != nil || printing {
+			frame.argp = frame.fp
+			if usesLR {
+				frame.argp += ptrSize
+			}
+			if f.args != _ArgsSizeUnknown {
+				frame.arglen = uintptr(f.args)
+			} else if flr == nil {
+				frame.arglen = 0
+			} else if frame.lr == lessstackPC {
+				frame.arglen = uintptr(stk.argsize)
+			} else {
+				i := funcarglen(flr, frame.lr)
+				if i >= 0 {
+					frame.arglen = uintptr(i)
+				} else {
+					var tmp string
+					if flr != nil {
+						tmp = gofuncname(flr)
+					} else {
+						tmp = "?"
+					}
+					print("runtime: unknown argument frame size for ", gofuncname(f), " called from ", hex(frame.lr), " [", tmp, "]\n")
+					if callback != nil {
+						gothrow("invalid stack")
+					}
+					frame.arglen = 0
+				}
+			}
+		}
+
+		// Determine function SP where deferproc would find its arguments.
+		var sparg uintptr
+		if usesLR {
+			// On link register architectures, that's the standard bottom-of-stack plus 1 word
+			// for the saved LR. If the previous frame was a direct call to newproc/deferproc,
+			// however, the SP is three words lower than normal.
+			// If the function has no frame at all - perhaps it just started, or perhaps
+			// it is a leaf with no local variables - then we cannot possibly find its
+			// SP in a defer, and we might confuse its SP for its caller's SP, so
+			// leave sparg=0 in that case.
+			if frame.fp != frame.sp {
+				sparg = frame.sp + regSize
+				if wasnewproc {
+					sparg += 3 * regSize
+				}
+			}
+		} else {
+			// On x86 that's the standard bottom-of-stack, so SP exactly.
+			// If the previous frame was a direct call to newproc/deferproc, however,
+			// the SP is two words lower than normal.
+			sparg = frame.sp
+			if wasnewproc {
+				sparg += 2 * ptrSize
+			}
+		}
+
+		// Determine frame's 'continuation PC', where it can continue.
+		// Normally this is the return address on the stack, but if sigpanic
+		// is immediately below this function on the stack, then the frame
+		// stopped executing due to a trap, and frame.pc is probably not
+		// a safe point for looking up liveness information. In this panicking case,
+		// the function either doesn't return at all (if it has no defers or if the
+		// defers do not recover) or it returns from one of the calls to
+		// deferproc a second time (if the corresponding deferred func recovers).
+		// It suffices to assume that the most recent deferproc is the one that
+		// returns; everything live at earlier deferprocs is still live at that one.
+		frame.continpc = frame.pc
+		if waspanic {
+			if panic != nil && panic._defer.argp == sparg {
+				frame.continpc = panic._defer.pc
+			} else if _defer != nil && _defer.argp == sparg {
+				frame.continpc = _defer.pc
+			} else {
+				frame.continpc = 0
+			}
+		}
+
+		// Unwind our local panic & defer stacks past this frame.
+		for panic != nil && (panic._defer == nil || panic._defer.argp == sparg || panic._defer.argp == _NoArgs) {
+			panic = panic.link
+		}
+		for _defer != nil && (_defer.argp == sparg || _defer.argp == _NoArgs) {
+			_defer = _defer.link
+		}
+
+		if skip > 0 {
+			skip--
+			goto skipped
+		}
+
+		if pcbuf != nil {
+			(*[1 << 20]uintptr)(unsafe.Pointer(pcbuf))[n] = frame.pc
+		}
+		if callback != nil {
+			if !callback((*stkframe)(noescape(unsafe.Pointer(&frame))), v) {
+				return n
+			}
+		}
+		if printing {
+			if printall || showframe(f, gp) {
+				// Print during crash.
+				//	main(0x1, 0x2, 0x3)
+				//		/home/rsc/go/src/runtime/x.go:23 +0xf
+				//
+				tracepc := frame.pc // back up to CALL instruction for funcline.
+				if n > 0 && frame.pc > f.entry && !waspanic {
+					tracepc--
+				}
+				print(gofuncname(f), "(")
+				argp := (*[100]uintptr)(unsafe.Pointer(frame.argp))
+				for i := uintptr(0); i < frame.arglen/ptrSize; i++ {
+					if i >= 10 {
+						print(", ...")
+						break
+					}
+					if i != 0 {
+						print(", ")
+					}
+					print(hex(argp[i]))
+				}
+				print(")\n")
+				var file string
+				line := funcline(f, tracepc, &file)
+				print("\t", file, ":", line)
+				if frame.pc > f.entry {
+					print(" +", hex(frame.pc-f.entry))
+				}
+				if g.m.throwing > 0 && gp == g.m.curg || gotraceback >= 2 {
+					print(" fp=", hex(frame.fp), " sp=", hex(frame.sp))
+				}
+				print("\n")
+				nprint++
+			}
+		}
+		n++
+
+	skipped:
+		waspanic = f.entry == sigpanicPC
+		wasnewproc = f.entry == newprocPC || f.entry == deferprocPC
+
+		// Do not unwind past the bottom of the stack.
+		if flr == nil {
+			break
+		}
+
+		// Unwind to next frame.
+		frame.fn = flr
+		frame.pc = frame.lr
+		frame.lr = 0
+		frame.sp = frame.fp
+		frame.fp = 0
+
+		// On link register architectures, sighandler saves the LR on stack
+		// before faking a call to sigpanic.
+		if usesLR && waspanic {
+			x := *(*uintptr)(unsafe.Pointer(frame.sp))
+			frame.sp += ptrSize
+			f = findfunc(frame.pc)
+			frame.fn = f
+			if f == nil {
+				frame.pc = x
+			} else if f.frame == 0 {
+				frame.lr = x
+			}
+		}
+	}
+
+	if pcbuf == nil && callback == nil {
+		n = nprint
+	}
+
+	// If callback != nil, we're being called to gather stack information during
+	// garbage collection or stack growth. In that context, require that we used
+	// up the entire defer stack. If not, then there is a bug somewhere and the
+	// garbage collection or stack growth may not have seen the correct picture
+	// of the stack. Crash now instead of silently executing the garbage collection
+	// or stack copy incorrectly and setting up for a mysterious crash later.
+	//
+	// Note that panic != nil is okay here: there can be leftover panics,
+	// because the defers on the panic stack do not nest in frame order as
+	// they do on the defer stack. If you have:
+	//
+	//	frame 1 defers d1
+	//	frame 2 defers d2
+	//	frame 3 defers d3
+	//	frame 4 panics
+	//	frame 4's panic starts running defers
+	//	frame 5, running d3, defers d4
+	//	frame 5 panics
+	//	frame 5's panic starts running defers
+	//	frame 6, running d4, garbage collects
+	//	frame 6, running d2, garbage collects
+	//
+	// During the execution of d4, the panic stack is d4 -> d3, which
+	// is nested properly, and we'll treat frame 3 as resumable, because we
+	// can find d3. (And in fact frame 3 is resumable. If d4 recovers
+	// and frame 5 continues running, d3, d3 can recover and we'll
+	// resume execution in (returning from) frame 3.)
+	//
+	// During the execution of d2, however, the panic stack is d2 -> d3,
+	// which is inverted. The scan will match d2 to frame 2 but having
+	// d2 on the stack until then means it will not match d3 to frame 3.
+	// This is okay: if we're running d2, then all the defers after d2 have
+	// completed and their corresponding frames are dead. Not finding d3
+	// for frame 3 means we'll set frame 3's continpc == 0, which is correct
+	// (frame 3 is dead). At the end of the walk the panic stack can thus
+	// contain defers (d3 in this case) for dead frames. The inversion here
+	// always indicates a dead frame, and the effect of the inversion on the
+	// scan is to hide those dead frames, so the scan is still okay:
+	// what's left on the panic stack are exactly (and only) the dead frames.
+	//
+	// We require callback != nil here because only when callback != nil
+	// do we know that gentraceback is being called in a "must be correct"
+	// context as opposed to a "best effort" context. The tracebacks with
+	// callbacks only happen when everything is stopped nicely.
+	// At other times, such as when gathering a stack for a profiling signal
+	// or when printing a traceback during a crash, everything may not be
+	// stopped nicely, and the stack walk may not be able to complete.
+	// It's okay in those situations not to use up the entire defer stack:
+	// incomplete information then is still better than nothing.
+	if callback != nil && n < max && _defer != nil {
+		if _defer != nil {
+			print("runtime: g", gp.goid, ": leftover defer argp=", hex(_defer.argp), " pc=", hex(_defer.pc), "\n")
+		}
+		if panic != nil {
+			print("runtime: g", gp.goid, ": leftover panic argp=", hex(panic._defer.argp), " pc=", hex(panic._defer.pc), "\n")
+		}
+		for _defer = gp._defer; _defer != nil; _defer = _defer.link {
+			print("\tdefer ", _defer, " argp=", hex(_defer.argp), " pc=", hex(_defer.pc), "\n")
+		}
+		for panic = gp._panic; panic != nil; panic = panic.link {
+			print("\tpanic ", panic, " defer ", panic._defer)
+			if panic._defer != nil {
+				print(" argp=", hex(panic._defer.argp), " pc=", hex(panic._defer.pc))
+			}
+			print("\n")
+		}
+		gothrow("traceback has leftover defers or panics")
+	}
+
+	return n
+}
+
+func printcreatedby(gp *g) {
+	// Show what created goroutine, except main goroutine (goid 1).
+	pc := gp.gopc
+	f := findfunc(pc)
+	if f != nil && showframe(f, gp) && gp.goid != 1 {
+		print("created by ", gofuncname(f), "\n")
+		tracepc := pc // back up to CALL instruction for funcline.
+		if pc > f.entry {
+			tracepc -= _PCQuantum
+		}
+		var file string
+		line := funcline(f, tracepc, &file)
+		print("\t", file, ":", line)
+		if pc > f.entry {
+			print(" +", hex(pc-f.entry))
+		}
+		print("\n")
+	}
+}
+
+func traceback(pc uintptr, sp uintptr, lr uintptr, gp *g) {
+	var n int
+	if readgstatus(gp)&^_Gscan == _Gsyscall {
+		// Override signal registers if blocked in system call.
+		pc = gp.syscallpc
+		sp = gp.syscallsp
+	}
+	// Print traceback. By default, omits runtime frames.
+	// If that means we print nothing at all, repeat forcing all frames printed.
+	n = gentraceback(pc, sp, 0, gp, 0, nil, _TracebackMaxFrames, nil, nil, false)
+	if n == 0 {
+		n = gentraceback(pc, sp, 0, gp, 0, nil, _TracebackMaxFrames, nil, nil, true)
+	}
+	if n == _TracebackMaxFrames {
+		print("...additional frames elided...\n")
+	}
+	printcreatedby(gp)
+}
+
+func callers(skip int, pcbuf *uintptr, m int) int {
+	sp := getcallersp(unsafe.Pointer(&skip))
+	pc := uintptr(getcallerpc(unsafe.Pointer(&skip)))
+	return gentraceback(pc, sp, 0, getg(), skip, pcbuf, m, nil, nil, false)
+}
+
+func gcallers(gp *g, skip int, pcbuf *uintptr, m int) int {
+	return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, pcbuf, m, nil, nil, false)
+}
+
+func showframe(f *_func, gp *g) bool {
+	g := getg()
+	if g.m.throwing > 0 && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig) {
+		return true
+	}
+	traceback := gotraceback(nil)
+	name := gostringnocopy(funcname(f))
+
+	// Special case: always show runtime.panic frame, so that we can
+	// see where a panic started in the middle of a stack trace.
+	// See golang.org/issue/5832.
+	if name == "runtime.panic" {
+		return true
+	}
+
+	return traceback > 1 || f != nil && contains(name, ".") && !hasprefix(name, "runtime.")
+}
+
+func contains(s, t string) bool {
+	if len(t) == 0 {
+		return true
+	}
+	for i := 0; i < len(s); i++ {
+		if s[i] == t[0] && hasprefix(s[i:], t) {
+			return true
+		}
+	}
+	return false
+}
+
+func hasprefix(s, t string) bool {
+	return len(s) >= len(t) && s[:len(t)] == t
+}
+
+var gStatusStrings = [...]string{
+	_Gidle:      "idle",
+	_Grunnable:  "runnable",
+	_Grunning:   "running",
+	_Gsyscall:   "syscall",
+	_Gwaiting:   "waiting",
+	_Gdead:      "dead",
+	_Genqueue:   "enqueue",
+	_Gcopystack: "copystack",
+}
+
+var gScanStatusStrings = [...]string{
+	0:          "scan",
+	_Grunnable: "scanrunnable",
+	_Grunning:  "scanrunning",
+	_Gsyscall:  "scansyscall",
+	_Gwaiting:  "scanwaiting",
+	_Gdead:     "scandead",
+	_Genqueue:  "scanenqueue",
+}
+
+func goroutineheader(gp *g) {
+	gpstatus := readgstatus(gp)
+
+	// Basic string status
+	var status string
+	if 0 <= gpstatus && gpstatus < uint32(len(gStatusStrings)) {
+		status = gStatusStrings[gpstatus]
+	} else if gpstatus&_Gscan != 0 && 0 <= gpstatus&^_Gscan && gpstatus&^_Gscan < uint32(len(gStatusStrings)) {
+		status = gStatusStrings[gpstatus&^_Gscan]
+	} else {
+		status = "???"
+	}
+
+	// Override.
+	if (gpstatus == _Gwaiting || gpstatus == _Gscanwaiting) && gp.waitreason != "" {
+		status = gp.waitreason
+	}
+
+	// approx time the G is blocked, in minutes
+	var waitfor int64
+	gpstatus &^= _Gscan // drop the scan bit
+	if (gpstatus == _Gwaiting || gpstatus == _Gsyscall) && gp.waitsince != 0 {
+		waitfor = (nanotime() - gp.waitsince) / 60e9
+	}
+	print("goroutine ", gp.goid, " [", status)
+	if waitfor >= 1 {
+		print(", ", waitfor, " minutes")
+	}
+	if gp.lockedm != nil {
+		print(", locked to thread")
+	}
+	print("]:\n")
+}
+
+func tracebackothers(me *g) {
+	level := gotraceback(nil)
+
+	// Show the current goroutine first, if we haven't already.
+	g := getg()
+	gp := g.m.curg
+	if gp != nil && gp != me {
+		print("\n")
+		goroutineheader(gp)
+		traceback(^uintptr(0), ^uintptr(0), 0, gp)
+	}
+
+	lock(&allglock)
+	for _, gp := range allgs {
+		if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || gp.issystem && level < 2 {
+			continue
+		}
+		print("\n")
+		goroutineheader(gp)
+		if readgstatus(gp)&^_Gscan == _Grunning {
+			print("\tgoroutine running on other thread; stack unavailable\n")
+			printcreatedby(gp)
+		} else {
+			traceback(^uintptr(0), ^uintptr(0), 0, gp)
+		}
+	}
+	unlock(&allglock)
+}
+
+// Does f mark the top of a goroutine stack?
+func topofstack(f *_func) bool {
+	pc := f.entry
+	return pc == goexitPC ||
+		pc == mstartPC ||
+		pc == mcallPC ||
+		pc == morestackPC ||
+		pc == lessstackPC ||
+		pc == rt0_goPC ||
+		externalthreadhandlerp != 0 && pc == externalthreadhandlerp
+}
diff --git a/src/runtime/traceback_windows.go b/src/runtime/traceback_windows.go
new file mode 100644
index 000000000..89dc1336e
--- /dev/null
+++ b/src/runtime/traceback_windows.go
@@ -0,0 +1,63 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// sigtrampPC is the PC at the beginning of the jmpdefer assembly function.
+// The traceback needs to recognize it on link register architectures.
+var sigtrampPC uintptr
+
+func sigtramp()
+
+func init() {
+	sigtrampPC = funcPC(sigtramp)
+	systraceback = traceback_windows
+}
+
+func traceback_windows(f *_func, frame *stkframe, gp *g, printing bool, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer) (changed, aborted bool) {
+	// The main traceback thinks it has found a function. Check this.
+
+	// Windows exception handlers run on the actual g stack (there is room
+	// dedicated to this below the usual "bottom of stack"), not on a separate
+	// stack. As a result, we have to be able to unwind past the exception
+	// handler when called to unwind during stack growth inside the handler.
+	// Recognize the frame at the call to sighandler in sigtramp and unwind
+	// using the context argument passed to the call. This is awful.
+	if f != nil && f.entry == sigtrampPC && frame.pc > f.entry {
+		var r *context
+		// Invoke callback so that stack copier sees an uncopyable frame.
+		if callback != nil {
+			frame.continpc = frame.pc
+			frame.argp = 0
+			frame.arglen = 0
+			if !callback(frame, v) {
+				aborted = true
+				return
+			}
+		}
+		r = (*context)(unsafe.Pointer(frame.sp + ptrSize))
+		frame.pc = contextPC(r)
+		frame.sp = contextSP(r)
+		frame.lr = 0
+		frame.fp = 0
+		frame.fn = nil
+		if printing && showframe(nil, gp) {
+			print("----- exception handler -----\n")
+		}
+		f = findfunc(frame.pc)
+		if f == nil {
+			print("runtime: unknown pc ", hex(frame.pc), " after exception handler\n")
+			if callback != nil {
+				gothrow("unknown pc")
+			}
+		}
+		frame.fn = f
+		changed = true
+		return
+	}
+
+	return
+}
diff --git a/src/runtime/type.h b/src/runtime/type.h
new file mode 100644
index 000000000..de82e886f
--- /dev/null
+++ b/src/runtime/type.h
@@ -0,0 +1,113 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Runtime type representation.
+
+typedef struct Type Type;
+typedef struct UncommonType UncommonType;
+typedef struct InterfaceType InterfaceType;
+typedef struct Method Method;
+typedef struct IMethod IMethod;
+typedef struct SliceType SliceType;
+typedef struct FuncType FuncType;
+
+// Needs to be in sync with ../../cmd/ld/decodesym.c:/^commonsize and pkg/reflect/type.go:/type.
+struct Type
+{
+	uintptr size;
+	uint32 hash;
+	uint8 _unused;
+	uint8 align;
+	uint8 fieldAlign;
+	uint8 kind;
+	void* alg;
+	// gc stores type info required for garbage collector.
+	// If (kind&KindGCProg)==0, then gc directly contains sparse GC bitmap
+	// (no indirection), 4 bits per word.
+	// If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated
+	// read-only GC program; and gc[0] points to BSS space for sparse GC bitmap.
+	// For huge types (>MaxGCMask), runtime unrolls the program directly into
+	// GC bitmap and gc[0] is not used. For moderately-sized types, runtime
+	// unrolls the program into gc[0] space on first use. The first byte of gc[0]
+	// (gc[0][0]) contains 'unroll' flag saying whether the program is already
+	// unrolled into gc[0] or not.
+	uintptr gc[2];
+	String *string;
+	UncommonType *x;
+	Type *ptrto;
+	byte *zero;  // ptr to the zero value for this type
+};
+
+struct Method
+{
+	String *name;
+	String *pkgPath;
+	Type	*mtyp;
+	Type *typ;
+	void (*ifn)(void);
+	void (*tfn)(void);
+};
+
+struct UncommonType
+{
+	String *name;
+	String *pkgPath;
+	Slice mhdr;
+	Method m[];
+};
+
+struct IMethod
+{
+	String *name;
+	String *pkgPath;
+	Type *type;
+};
+
+struct InterfaceType
+{
+	Type  typ;
+	Slice mhdr;
+	IMethod m[];
+};
+
+struct MapType
+{
+	Type typ;
+	Type *key;
+	Type *elem;
+	Type *bucket;		// internal type representing a hash bucket
+	Type *hmap;		// internal type representing a Hmap
+	uint8 keysize;		// size of key slot
+	bool indirectkey;	// store ptr to key instead of key itself
+	uint8 valuesize;	// size of value slot
+	bool indirectvalue;	// store ptr to value instead of value itself
+	uint16 bucketsize;	// size of bucket
+};
+
+struct ChanType
+{
+	Type typ;
+	Type *elem;
+	uintptr dir;
+};
+
+struct SliceType
+{
+	Type typ;
+	Type *elem;
+};
+
+struct FuncType
+{
+	Type typ;
+	bool dotdotdot;
+	Slice in;
+	Slice out;
+};
+
+struct PtrType
+{
+	Type typ;
+	Type *elem;
+};
diff --git a/src/runtime/typekind.go b/src/runtime/typekind.go
new file mode 100644
index 000000000..598553628
--- /dev/null
+++ b/src/runtime/typekind.go
@@ -0,0 +1,44 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	kindBool = 1 + iota
+	kindInt
+	kindInt8
+	kindInt16
+	kindInt32
+	kindInt64
+	kindUint
+	kindUint8
+	kindUint16
+	kindUint32
+	kindUint64
+	kindUintptr
+	kindFloat32
+	kindFloat64
+	kindComplex64
+	kindComplex128
+	kindArray
+	kindChan
+	kindFunc
+	kindInterface
+	kindMap
+	kindPtr
+	kindSlice
+	kindString
+	kindStruct
+	kindUnsafePointer
+
+	kindDirectIface = 1 << 5
+	kindGCProg      = 1 << 6 // Type.gc points to GC program
+	kindNoPointers  = 1 << 7
+	kindMask        = (1 << 5) - 1
+)
+
+// isDirectIface reports whether t is stored directly in an interface value.
+func isDirectIface(t *_type) bool {
+	return t.kind&kindDirectIface != 0
+}
diff --git a/src/runtime/typekind.h b/src/runtime/typekind.h
new file mode 100644
index 000000000..7c611e8ba
--- /dev/null
+++ b/src/runtime/typekind.h
@@ -0,0 +1,41 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// PtrSize vs sizeof(void*): This file is also included from src/cmd/ld/...
+// which defines PtrSize to be different from sizeof(void*) when crosscompiling.
+
+enum {
+	KindBool = 1,
+	KindInt,
+	KindInt8,
+	KindInt16,
+	KindInt32,
+	KindInt64,
+	KindUint,
+	KindUint8,
+	KindUint16,
+	KindUint32,
+	KindUint64,
+	KindUintptr,
+	KindFloat32,
+	KindFloat64,
+	KindComplex64,
+	KindComplex128,
+	KindArray,
+	KindChan,
+	KindFunc,
+	KindInterface,
+	KindMap,
+	KindPtr,
+	KindSlice,
+	KindString,
+	KindStruct,
+	KindUnsafePointer,
+
+	KindDirectIface = 1<<5,
+	KindGCProg = 1<<6,	// Type.gc points to GC program
+	KindNoPointers = 1<<7,
+	KindMask = (1<<5)-1,
+};
+
diff --git a/src/runtime/vdso_linux_amd64.c b/src/runtime/vdso_linux_amd64.c
new file mode 100644
index 000000000..38e115243
--- /dev/null
+++ b/src/runtime/vdso_linux_amd64.c
@@ -0,0 +1,366 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// Look up symbols in the Linux vDSO.
+
+// This code was originally based on the sample Linux vDSO parser at
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c
+
+// This implements the ELF dynamic linking spec at
+// http://sco.com/developers/gabi/latest/ch5.dynamic.html
+
+// The version section is documented at
+// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
+
+#define AT_RANDOM 25
+#define AT_SYSINFO_EHDR 33
+#define AT_NULL	0    /* End of vector */
+#define PT_LOAD	1    /* Loadable program segment */
+#define PT_DYNAMIC 2 /* Dynamic linking information */
+#define DT_NULL 0    /* Marks end of dynamic section */
+#define DT_HASH 4    /* Dynamic symbol hash table */
+#define DT_STRTAB 5  /* Address of string table */
+#define DT_SYMTAB 6  /* Address of symbol table */
+#define DT_VERSYM 0x6ffffff0
+#define	DT_VERDEF 0x6ffffffc
+
+#define VER_FLG_BASE 0x1 /* Version definition of file itself */
+#define SHN_UNDEF 0      /* Undefined section */
+#define SHT_DYNSYM 11    /* Dynamic linker symbol table */
+#define STT_FUNC 2       /* Symbol is a code object */
+#define STB_GLOBAL 1     /* Global symbol */
+#define STB_WEAK 2       /* Weak symbol */
+
+/* How to extract and insert information held in the st_info field.  */
+#define ELF64_ST_BIND(val) (((byte) (val)) >> 4)
+#define ELF64_ST_TYPE(val) ((val) & 0xf)
+
+#define EI_NIDENT (16)
+
+typedef uint16 Elf64_Half;
+typedef uint32 Elf64_Word;
+typedef	int32  Elf64_Sword;
+typedef uint64 Elf64_Xword;
+typedef	int64  Elf64_Sxword;
+typedef uint64 Elf64_Addr;
+typedef uint64 Elf64_Off;
+typedef uint16 Elf64_Section;
+typedef Elf64_Half Elf64_Versym;
+
+
+typedef struct
+{
+	Elf64_Word st_name;
+	byte st_info;
+	byte st_other;
+	Elf64_Section st_shndx;
+	Elf64_Addr st_value;
+	Elf64_Xword st_size;
+} Elf64_Sym;
+
+typedef struct
+{
+	Elf64_Half vd_version; /* Version revision */
+	Elf64_Half vd_flags;   /* Version information */
+	Elf64_Half vd_ndx;     /* Version Index */
+	Elf64_Half vd_cnt;     /* Number of associated aux entries */
+	Elf64_Word vd_hash;    /* Version name hash value */
+	Elf64_Word vd_aux;     /* Offset in bytes to verdaux array */
+	Elf64_Word vd_next;    /* Offset in bytes to next verdef entry */
+} Elf64_Verdef;
+
+typedef struct
+{
+	byte e_ident[EI_NIDENT]; /* Magic number and other info */
+	Elf64_Half e_type;       /* Object file type */
+	Elf64_Half e_machine;    /* Architecture */
+	Elf64_Word e_version;    /* Object file version */
+	Elf64_Addr e_entry;      /* Entry point virtual address */
+	Elf64_Off e_phoff;       /* Program header table file offset */
+	Elf64_Off e_shoff;       /* Section header table file offset */
+	Elf64_Word e_flags;      /* Processor-specific flags */
+	Elf64_Half e_ehsize;     /* ELF header size in bytes */
+	Elf64_Half e_phentsize;  /* Program header table entry size */
+	Elf64_Half e_phnum;      /* Program header table entry count */
+	Elf64_Half e_shentsize;  /* Section header table entry size */
+	Elf64_Half e_shnum;      /* Section header table entry count */
+	Elf64_Half e_shstrndx;   /* Section header string table index */
+} Elf64_Ehdr;
+
+typedef struct
+{
+	Elf64_Word p_type;    /* Segment type */
+	Elf64_Word p_flags;   /* Segment flags */
+	Elf64_Off p_offset;   /* Segment file offset */
+	Elf64_Addr p_vaddr;   /* Segment virtual address */
+	Elf64_Addr p_paddr;   /* Segment physical address */
+	Elf64_Xword p_filesz; /* Segment size in file */
+	Elf64_Xword p_memsz;  /* Segment size in memory */
+	Elf64_Xword p_align;  /* Segment alignment */
+} Elf64_Phdr;
+
+typedef struct
+{
+	Elf64_Word sh_name;       /* Section name (string tbl index) */
+	Elf64_Word sh_type;       /* Section type */
+	Elf64_Xword sh_flags;     /* Section flags */
+	Elf64_Addr sh_addr;       /* Section virtual addr at execution */
+	Elf64_Off sh_offset;      /* Section file offset */
+	Elf64_Xword sh_size;      /* Section size in bytes */
+	Elf64_Word sh_link;       /* Link to another section */
+	Elf64_Word sh_info;       /* Additional section information */
+	Elf64_Xword sh_addralign; /* Section alignment */
+	Elf64_Xword sh_entsize;   /* Entry size if section holds table */
+} Elf64_Shdr;
+
+typedef struct
+{
+	Elf64_Sxword d_tag; /* Dynamic entry type */
+	union
+	{
+		Elf64_Xword d_val;  /* Integer value */
+		Elf64_Addr d_ptr;   /* Address value */
+	} d_un;
+} Elf64_Dyn;
+
+typedef struct
+{
+	Elf64_Word vda_name; /* Version or dependency names */
+	Elf64_Word vda_next; /* Offset in bytes to next verdaux entry */
+} Elf64_Verdaux;
+
+typedef struct
+{
+	uint64 a_type;        /* Entry type */
+	union
+	{
+		uint64 a_val; /* Integer value */
+	} a_un;
+} Elf64_auxv_t;
+
+
+typedef struct {
+	byte* name;
+	int32 sym_hash;
+	void** var_ptr;
+} symbol_key;
+
+typedef struct {
+	byte* version;
+	int32 ver_hash;
+} version_key;
+
+struct vdso_info {
+	bool valid;
+
+	/* Load information */
+	uintptr load_addr;
+	uintptr load_offset;  /* load_addr - recorded vaddr */
+
+	/* Symbol table */
+	Elf64_Sym *symtab;
+	const byte *symstrings;
+	Elf64_Word *bucket, *chain;
+	Elf64_Word nbucket, nchain;
+
+	/* Version table */
+	Elf64_Versym *versym;
+	Elf64_Verdef *verdef;
+};
+
+static version_key linux26 = { (byte*)"LINUX_2.6", 0x3ae75f6 };
+
+// initialize with vsyscall fallbacks
+void* runtime·__vdso_time_sym = (void*)0xffffffffff600400ULL;
+void* runtime·__vdso_gettimeofday_sym = (void*)0xffffffffff600000ULL;
+void* runtime·__vdso_clock_gettime_sym = (void*)0;
+
+#define SYM_KEYS_COUNT 3
+static symbol_key sym_keys[] = {
+	{ (byte*)"__vdso_time", 0xa33c485, &runtime·__vdso_time_sym },
+	{ (byte*)"__vdso_gettimeofday", 0x315ca59, &runtime·__vdso_gettimeofday_sym },
+	{ (byte*)"__vdso_clock_gettime", 0xd35ec75, &runtime·__vdso_clock_gettime_sym },
+};
+
+static void
+vdso_init_from_sysinfo_ehdr(struct vdso_info *vdso_info, Elf64_Ehdr* hdr)
+{
+	uint64 i;
+	bool found_vaddr = false;
+	Elf64_Phdr *pt;
+	Elf64_Dyn *dyn;
+	Elf64_Word *hash;
+
+	vdso_info->valid = false;
+	vdso_info->load_addr = (uintptr) hdr;
+
+	pt = (Elf64_Phdr*)(vdso_info->load_addr + hdr->e_phoff);
+	dyn = nil;
+
+	// We need two things from the segment table: the load offset
+	// and the dynamic table.
+	for(i=0; i<hdr->e_phnum; i++) {
+		if(pt[i].p_type == PT_LOAD && found_vaddr == false) {
+			found_vaddr = true;
+			vdso_info->load_offset =	(uintptr)hdr
+				+ (uintptr)pt[i].p_offset
+				- (uintptr)pt[i].p_vaddr;
+		} else if(pt[i].p_type == PT_DYNAMIC) {
+			dyn = (Elf64_Dyn*)((uintptr)hdr + pt[i].p_offset);
+		}
+	}
+
+	if(found_vaddr == false || dyn == nil)
+		return;  // Failed
+
+	// Fish out the useful bits of the dynamic table.
+	hash = nil;
+	vdso_info->symstrings = nil;
+	vdso_info->symtab = nil;
+	vdso_info->versym = nil;
+	vdso_info->verdef = nil;
+	for(i=0; dyn[i].d_tag!=DT_NULL; i++) {
+		switch(dyn[i].d_tag) {
+		case DT_STRTAB:
+			vdso_info->symstrings = (const byte *)
+				((uintptr)dyn[i].d_un.d_ptr
+				 + vdso_info->load_offset);
+			break;
+		case DT_SYMTAB:
+			vdso_info->symtab = (Elf64_Sym *)
+				((uintptr)dyn[i].d_un.d_ptr
+				 + vdso_info->load_offset);
+			break;
+		case DT_HASH:
+			hash = (Elf64_Word *)
+			  ((uintptr)dyn[i].d_un.d_ptr
+			   + vdso_info->load_offset);
+			break;
+		case DT_VERSYM:
+			vdso_info->versym = (Elf64_Versym *)
+				((uintptr)dyn[i].d_un.d_ptr
+				 + vdso_info->load_offset);
+			break;
+		case DT_VERDEF:
+			vdso_info->verdef = (Elf64_Verdef *)
+				((uintptr)dyn[i].d_un.d_ptr
+				 + vdso_info->load_offset);
+			break;
+		}
+	}
+	if(vdso_info->symstrings == nil || vdso_info->symtab == nil || hash == nil)
+		return;  // Failed
+
+	if(vdso_info->verdef == nil)
+		vdso_info->versym = 0;
+
+	// Parse the hash table header.
+	vdso_info->nbucket = hash[0];
+	vdso_info->nchain = hash[1];
+	vdso_info->bucket = &hash[2];
+	vdso_info->chain = &hash[vdso_info->nbucket + 2];
+
+	// That's all we need.
+	vdso_info->valid = true;
+}
+
+static int32
+vdso_find_version(struct vdso_info *vdso_info, version_key* ver)
+{
+	if(vdso_info->valid == false) {
+		return 0;
+	}
+	Elf64_Verdef *def = vdso_info->verdef;
+	while(true) {
+		if((def->vd_flags & VER_FLG_BASE) == 0) {
+			Elf64_Verdaux *aux = (Elf64_Verdaux*)((byte *)def + def->vd_aux);
+			if(def->vd_hash == ver->ver_hash &&
+				runtime·strcmp(ver->version, vdso_info->symstrings + aux->vda_name) == 0) {
+				return def->vd_ndx & 0x7fff;
+			}
+		}
+
+		if(def->vd_next == 0) {
+			break;
+		}
+		def = (Elf64_Verdef *)((byte *)def + def->vd_next);
+	}
+	return -1; // can not match any version
+}
+
+static void
+vdso_parse_symbols(struct vdso_info *vdso_info, int32 version)
+{
+	int32 i;
+	Elf64_Word chain;
+	Elf64_Sym *sym;
+
+	if(vdso_info->valid == false)
+		return;
+
+	for(i=0; i<SYM_KEYS_COUNT; i++) {
+		for(chain = vdso_info->bucket[sym_keys[i].sym_hash % vdso_info->nbucket];
+			chain != 0; chain = vdso_info->chain[chain]) {
+
+			sym = &vdso_info->symtab[chain];
+			if(ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+				continue;
+			if(ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+				 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+				continue;
+			if(sym->st_shndx == SHN_UNDEF)
+				continue;
+			if(runtime·strcmp(sym_keys[i].name, vdso_info->symstrings + sym->st_name) != 0)
+				continue;
+
+			// Check symbol version.
+			if(vdso_info->versym != nil && version != 0
+				&& vdso_info->versym[chain] & 0x7fff != version)
+				continue;
+
+			*sym_keys[i].var_ptr = (void *)(vdso_info->load_offset + sym->st_value);
+			break;
+		}
+	}
+}
+
+static void
+runtime·linux_setup_vdso(int32 argc, uint8** argv)
+{
+	struct vdso_info vdso_info;
+
+	// skip argvc
+	byte **p = argv;
+	p = &p[argc+1];
+
+	// skip envp to get to ELF auxiliary vector.
+	for(; *p!=0; p++) {}
+
+	// skip NULL separator
+	p++;
+
+	// now, p points to auxv
+	Elf64_auxv_t *elf_auxv = (Elf64_auxv_t*) p;
+
+	for(int32 i=0; elf_auxv[i].a_type!=AT_NULL; i++) {
+		if(elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+			if(elf_auxv[i].a_un.a_val == 0) {
+				// Something went wrong
+				continue;
+			}
+			vdso_init_from_sysinfo_ehdr(&vdso_info, (Elf64_Ehdr*)elf_auxv[i].a_un.a_val);
+			vdso_parse_symbols(&vdso_info, vdso_find_version(&vdso_info, &linux26));
+			continue;
+		}
+		if(elf_auxv[i].a_type == AT_RANDOM) {
+		        runtime·startup_random_data = (byte*)elf_auxv[i].a_un.a_val;
+		        runtime·startup_random_data_len = 16;
+			continue;
+		}
+	}
+}
+
+void (*runtime·sysargs)(int32, uint8**) = runtime·linux_setup_vdso;
diff --git a/src/runtime/vlop_386.s b/src/runtime/vlop_386.s
new file mode 100644
index 000000000..ce8e7d064
--- /dev/null
+++ b/src/runtime/vlop_386.s
@@ -0,0 +1,56 @@
+// Inferno's libkern/vlop-386.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlop-386.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "textflag.h"
+
+/*
+ * C runtime for 64-bit divide.
+ */
+
+// runtime·_mul64x32(r *uint64, a uint64, b uint32) uint32
+// sets *r = low 64 bits of 96-bit product a*b; returns high 32 bits.
+TEXT runtime·_mul64by32(SB), NOSPLIT, $0
+	MOVL	r+0(FP), CX
+	MOVL	a+4(FP), AX
+	MULL	b+12(FP)
+	MOVL	AX, 0(CX)
+	MOVL	DX, BX
+	MOVL	a+8(FP), AX
+	MULL	b+12(FP)
+	ADDL	AX, BX
+	ADCL	$0, DX
+	MOVL	BX, 4(CX)
+	MOVL	DX, AX
+	MOVL	AX, ret+16(FP)
+	RET
+
+TEXT runtime·_div64by32(SB), NOSPLIT, $0
+	MOVL	r+12(FP), CX
+	MOVL	a+0(FP), AX
+	MOVL	a+4(FP), DX
+	DIVL	b+8(FP)
+	MOVL	DX, 0(CX)
+	MOVL	AX, ret+16(FP)
+	RET
diff --git a/src/runtime/vlop_arm.s b/src/runtime/vlop_arm.s
new file mode 100644
index 000000000..b4b905bb7
--- /dev/null
+++ b/src/runtime/vlop_arm.s
@@ -0,0 +1,317 @@
+// Inferno's libkern/vlop-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "zasm_GOOS_GOARCH.h"
+#include "textflag.h"
+
+arg=0
+
+/* replaced use of R10 by R11 because the former can be the data segment base register */
+
+TEXT _mulv(SB), NOSPLIT, $0
+	MOVW	l0+0(FP), R2	/* l0 */
+	MOVW	h0+4(FP), R11	/* h0 */
+	MOVW	l1+8(FP), R4	/* l1 */
+	MOVW	h1+12(FP), R5	/* h1 */
+	MULLU	R4, R2, (R7,R6)
+	MUL	R11, R4, R8
+	ADD	R8, R7
+	MUL	R2, R5, R8
+	ADD	R8, R7
+	MOVW	R6, ret_lo+16(FP)
+	MOVW	R7, ret_hi+20(FP)
+	RET
+
+// trampoline for _sfloat2. passes LR as arg0 and
+// saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
+// be changed by _sfloat2.
+TEXT _sfloat(SB), NOSPLIT, $68-0 // 4 arg + 14*4 saved regs + cpsr + return value
+	MOVW	R14, 4(R13)
+	MOVW	R0, 8(R13)
+	MOVW	$12(R13), R0
+	MOVM.IA.W	[R1-R12], (R0)
+	MOVW	$72(R13), R1 // correct for frame size
+	MOVW	R1, 60(R13)
+	WORD	$0xe10f1000 // mrs r1, cpsr
+	MOVW	R1, 64(R13)
+	// Disable preemption of this goroutine during _sfloat2 by
+	// m->locks++ and m->locks-- around the call.
+	// Rescheduling this goroutine may cause the loss of the
+	// contents of the software floating point registers in 
+	// m->freghi, m->freglo, m->fflag, if the goroutine is moved
+	// to a different m or another goroutine runs on this m.
+	// Rescheduling at ordinary function calls is okay because
+	// all registers are caller save, but _sfloat2 and the things
+	// that it runs are simulating the execution of individual
+	// program instructions, and those instructions do not expect
+	// the floating point registers to be lost.
+	// An alternative would be to move the software floating point
+	// registers into G, but they do not need to be kept at the 
+	// usual places a goroutine reschedules (at function calls),
+	// so it would be a waste of 132 bytes per G.
+	MOVW	g_m(g), R8
+	MOVW	m_locks(R8), R1
+	ADD	$1, R1
+	MOVW	R1, m_locks(R8)
+	MOVW	$1, R1
+	MOVW	R1, m_softfloat(R8)
+	BL	runtime·_sfloat2(SB)
+	MOVW	68(R13), R0
+	MOVW	g_m(g), R8
+	MOVW	m_locks(R8), R1
+	SUB	$1, R1
+	MOVW	R1, m_locks(R8)
+	MOVW	$0, R1
+	MOVW	R1, m_softfloat(R8)
+	MOVW	R0, 0(R13)
+	MOVW	64(R13), R1
+	WORD	$0xe128f001	// msr cpsr_f, r1
+	MOVW	$12(R13), R0
+	// Restore R1-R12, R0.
+	MOVM.IA.W	(R0), [R1-R12]
+	MOVW	8(R13), R0
+	RET
+
+// trampoline for _sfloat2 panic.
+// _sfloat2 instructs _sfloat to return here.
+// We need to push a fake saved LR onto the stack,
+// load the signal fault address into LR, and jump
+// to the real sigpanic.
+// This simulates what sighandler does for a memory fault.
+TEXT _sfloatpanic(SB),NOSPLIT,$-4
+	MOVW	$0, R0
+	MOVW.W	R0, -4(R13)
+	MOVW	g_sigpc(g), LR
+	B	runtime·sigpanic(SB)
+
+// func udiv(n, d uint32) (q, r uint32)
+// Reference: 
+// Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software
+// Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740
+q = 0 // input d, output q
+r = 1 // input n, output r
+s = 2 // three temporary variables
+M = 3
+a = 11
+// Be careful: R(a) == R11 will be used by the linker for synthesized instructions.
+TEXT udiv<>(SB),NOSPLIT,$-4
+	CLZ 	R(q), R(s) // find normalizing shift
+	MOVW.S	R(q)<<R(s), R(a)
+	MOVW	$fast_udiv_tab<>-64(SB), R(M)
+	ADD.NE	R(a)>>25, R(M), R(a) // index by most significant 7 bits of divisor
+	MOVBU.NE	(R(a)), R(a)
+
+	SUB.S	$7, R(s)
+	RSB 	$0, R(q), R(M) // M = -q
+	MOVW.PL	R(a)<<R(s), R(q)
+
+	// 1st Newton iteration
+	MUL.PL	R(M), R(q), R(a) // a = -q*d
+	BMI 	udiv_by_large_d
+	MULAWT	R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32)
+	TEQ 	R(M)->1, R(M) // check for d=0 or d=1
+
+	// 2nd Newton iteration
+	MUL.NE	R(M), R(q), R(a)
+	MOVW.NE	$0, R(s)
+	MULAL.NE R(q), R(a), (R(q),R(s))
+	BEQ 	udiv_by_0_or_1
+
+	// q now accurate enough for a remainder r, 0<=r<3*d
+	MULLU	R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32	
+	ADD 	R(M), R(r), R(r) // r = n - d
+	MULA	R(M), R(q), R(r), R(r) // r = n - (q+1)*d
+
+	// since 0 <= n-q*d < 3*d; thus -d <= r < 2*d
+	CMN 	R(M), R(r) // t = r-d
+	SUB.CS	R(M), R(r), R(r) // if (t<-d || t>=0) r=r+d
+	ADD.CC	$1, R(q)
+	ADD.PL	R(M)<<1, R(r)
+	ADD.PL	$2, R(q)
+	RET
+
+udiv_by_large_d:
+	// at this point we know d>=2^(31-6)=2^25
+	SUB 	$4, R(a), R(a)
+	RSB 	$0, R(s), R(s)
+	MOVW	R(a)>>R(s), R(q)
+	MULLU	R(q), R(r), (R(q),R(s))
+	MULA	R(M), R(q), R(r), R(r)
+
+	// q now accurate enough for a remainder r, 0<=r<4*d
+	CMN 	R(r)>>1, R(M) // if(r/2 >= d)
+	ADD.CS	R(M)<<1, R(r)
+	ADD.CS	$2, R(q)
+	CMN 	R(r), R(M)
+	ADD.CS	R(M), R(r)
+	ADD.CS	$1, R(q)
+	RET
+
+udiv_by_0_or_1:
+	// carry set if d==1, carry clear if d==0
+	BCC udiv_by_0
+	MOVW	R(r), R(q)
+	MOVW	$0, R(r)
+	RET
+
+udiv_by_0:
+	// The ARM toolchain expects it can emit references to DIV and MOD
+	// instructions. The linker rewrites each pseudo-instruction into
+	// a sequence that pushes two values onto the stack and then calls
+	// _divu, _modu, _div, or _mod (below), all of which have a 16-byte
+	// frame plus the saved LR. The traceback routine knows the expanded
+	// stack frame size at the pseudo-instruction call site, but it
+	// doesn't know that the frame has a non-standard layout. In particular,
+	// it expects to find a saved LR in the bottom word of the frame.
+	// Unwind the stack back to the pseudo-instruction call site, copy the
+	// saved LR where the traceback routine will look for it, and make it
+	// appear that panicdivide was called from that PC.
+	MOVW	0(R13), LR
+	ADD	$20, R13
+	MOVW	8(R13), R1 // actual saved LR
+	MOVW	R1, 0(R13) // expected here for traceback
+	B 	runtime·panicdivide(SB)
+
+// var tab [64]byte
+// tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) }
+// laid out here as little-endian uint32s
+DATA fast_udiv_tab<>+0x00(SB)/4, $0xf4f8fcff
+DATA fast_udiv_tab<>+0x04(SB)/4, $0xe6eaedf0
+DATA fast_udiv_tab<>+0x08(SB)/4, $0xdadde0e3
+DATA fast_udiv_tab<>+0x0c(SB)/4, $0xcfd2d4d7
+DATA fast_udiv_tab<>+0x10(SB)/4, $0xc5c7cacc
+DATA fast_udiv_tab<>+0x14(SB)/4, $0xbcbec0c3
+DATA fast_udiv_tab<>+0x18(SB)/4, $0xb4b6b8ba
+DATA fast_udiv_tab<>+0x1c(SB)/4, $0xacaeb0b2
+DATA fast_udiv_tab<>+0x20(SB)/4, $0xa5a7a8aa
+DATA fast_udiv_tab<>+0x24(SB)/4, $0x9fa0a2a3
+DATA fast_udiv_tab<>+0x28(SB)/4, $0x999a9c9d
+DATA fast_udiv_tab<>+0x2c(SB)/4, $0x93949697
+DATA fast_udiv_tab<>+0x30(SB)/4, $0x8e8f9092
+DATA fast_udiv_tab<>+0x34(SB)/4, $0x898a8c8d
+DATA fast_udiv_tab<>+0x38(SB)/4, $0x85868788
+DATA fast_udiv_tab<>+0x3c(SB)/4, $0x81828384
+GLOBL fast_udiv_tab<>(SB), RODATA, $64
+
+// The linker will pass numerator in R(TMP), and it also
+// expects the result in R(TMP)
+TMP = 11
+
+TEXT _divu(SB), NOSPLIT, $16
+	MOVW	R(q), 4(R13)
+	MOVW	R(r), 8(R13)
+	MOVW	R(s), 12(R13)
+	MOVW	R(M), 16(R13)
+
+	MOVW	R(TMP), R(r)		/* numerator */
+	MOVW	0(FP), R(q) 		/* denominator */
+	BL  	udiv<>(SB)
+	MOVW	R(q), R(TMP)
+	MOVW	4(R13), R(q)
+	MOVW	8(R13), R(r)
+	MOVW	12(R13), R(s)
+	MOVW	16(R13), R(M)
+	RET
+
+TEXT _modu(SB), NOSPLIT, $16
+	MOVW	R(q), 4(R13)
+	MOVW	R(r), 8(R13)
+	MOVW	R(s), 12(R13)
+	MOVW	R(M), 16(R13)
+
+	MOVW	R(TMP), R(r)		/* numerator */
+	MOVW	0(FP), R(q) 		/* denominator */
+	BL  	udiv<>(SB)
+	MOVW	R(r), R(TMP)
+	MOVW	4(R13), R(q)
+	MOVW	8(R13), R(r)
+	MOVW	12(R13), R(s)
+	MOVW	16(R13), R(M)
+	RET
+
+TEXT _div(SB),NOSPLIT,$16
+	MOVW	R(q), 4(R13)
+	MOVW	R(r), 8(R13)
+	MOVW	R(s), 12(R13)
+	MOVW	R(M), 16(R13)
+	MOVW	R(TMP), R(r)		/* numerator */
+	MOVW	0(FP), R(q) 		/* denominator */
+	CMP 	$0, R(r)
+	BGE 	d1
+	RSB 	$0, R(r), R(r)
+	CMP 	$0, R(q)
+	BGE 	d2
+	RSB 	$0, R(q), R(q)
+d0:
+	BL  	udiv<>(SB)  		/* none/both neg */
+	MOVW	R(q), R(TMP)
+	B		out1
+d1:
+	CMP 	$0, R(q)
+	BGE 	d0
+	RSB 	$0, R(q), R(q)
+d2:
+	BL  	udiv<>(SB)  		/* one neg */
+	RSB		$0, R(q), R(TMP)
+out1:
+	MOVW	4(R13), R(q)
+	MOVW	8(R13), R(r)
+	MOVW	12(R13), R(s)
+	MOVW	16(R13), R(M)
+	RET
+
+TEXT _mod(SB),NOSPLIT,$16
+	MOVW	R(q), 4(R13)
+	MOVW	R(r), 8(R13)
+	MOVW	R(s), 12(R13)
+	MOVW	R(M), 16(R13)
+	MOVW	R(TMP), R(r)		/* numerator */
+	MOVW	0(FP), R(q) 		/* denominator */
+	CMP 	$0, R(q)
+	RSB.LT	$0, R(q), R(q)
+	CMP 	$0, R(r)
+	BGE 	m1
+	RSB 	$0, R(r), R(r)
+	BL  	udiv<>(SB)  		/* neg numerator */
+	RSB 	$0, R(r), R(TMP)
+	B   	out
+m1:
+	BL  	udiv<>(SB)  		/* pos numerator */
+	MOVW	R(r), R(TMP)
+out:
+	MOVW	4(R13), R(q)
+	MOVW	8(R13), R(r)
+	MOVW	12(R13), R(s)
+	MOVW	16(R13), R(M)
+	RET
+
+// _mul64by32 and _div64by32 not implemented on arm
+TEXT runtime·_mul64by32(SB), NOSPLIT, $0
+	MOVW	$0, R0
+	MOVW	(R0), R1 // crash
+
+TEXT runtime·_div64by32(SB), NOSPLIT, $0
+	MOVW	$0, R0
+	MOVW	(R0), R1 // crash
diff --git a/src/runtime/vlop_arm_test.go b/src/runtime/vlop_arm_test.go
new file mode 100644
index 000000000..cd28419ad
--- /dev/null
+++ b/src/runtime/vlop_arm_test.go
@@ -0,0 +1,70 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import "testing"
+
+// arm soft division benchmarks adapted from
+// http://ridiculousfish.com/files/division_benchmarks.tar.gz
+
+const numeratorsSize = 1 << 21
+
+var numerators = randomNumerators()
+
+type randstate struct {
+	hi, lo uint32
+}
+
+func (r *randstate) rand() uint32 {
+	r.hi = r.hi<<16 + r.hi>>16
+	r.hi += r.lo
+	r.lo += r.hi
+	return r.hi
+}
+
+func randomNumerators() []uint32 {
+	numerators := make([]uint32, numeratorsSize)
+	random := &randstate{2147483563, 2147483563 ^ 0x49616E42}
+	for i := range numerators {
+		numerators[i] = random.rand()
+	}
+	return numerators
+}
+
+func bmUint32Div(divisor uint32, b *testing.B) {
+	var sum uint32
+	for i := 0; i < b.N; i++ {
+		sum += numerators[i&(numeratorsSize-1)] / divisor
+	}
+}
+
+func BenchmarkUint32Div7(b *testing.B)         { bmUint32Div(7, b) }
+func BenchmarkUint32Div37(b *testing.B)        { bmUint32Div(37, b) }
+func BenchmarkUint32Div123(b *testing.B)       { bmUint32Div(123, b) }
+func BenchmarkUint32Div763(b *testing.B)       { bmUint32Div(763, b) }
+func BenchmarkUint32Div1247(b *testing.B)      { bmUint32Div(1247, b) }
+func BenchmarkUint32Div9305(b *testing.B)      { bmUint32Div(9305, b) }
+func BenchmarkUint32Div13307(b *testing.B)     { bmUint32Div(13307, b) }
+func BenchmarkUint32Div52513(b *testing.B)     { bmUint32Div(52513, b) }
+func BenchmarkUint32Div60978747(b *testing.B)  { bmUint32Div(60978747, b) }
+func BenchmarkUint32Div106956295(b *testing.B) { bmUint32Div(106956295, b) }
+
+func bmUint32Mod(divisor uint32, b *testing.B) {
+	var sum uint32
+	for i := 0; i < b.N; i++ {
+		sum += numerators[i&(numeratorsSize-1)] % divisor
+	}
+}
+
+func BenchmarkUint32Mod7(b *testing.B)         { bmUint32Mod(7, b) }
+func BenchmarkUint32Mod37(b *testing.B)        { bmUint32Mod(37, b) }
+func BenchmarkUint32Mod123(b *testing.B)       { bmUint32Mod(123, b) }
+func BenchmarkUint32Mod763(b *testing.B)       { bmUint32Mod(763, b) }
+func BenchmarkUint32Mod1247(b *testing.B)      { bmUint32Mod(1247, b) }
+func BenchmarkUint32Mod9305(b *testing.B)      { bmUint32Mod(9305, b) }
+func BenchmarkUint32Mod13307(b *testing.B)     { bmUint32Mod(13307, b) }
+func BenchmarkUint32Mod52513(b *testing.B)     { bmUint32Mod(52513, b) }
+func BenchmarkUint32Mod60978747(b *testing.B)  { bmUint32Mod(60978747, b) }
+func BenchmarkUint32Mod106956295(b *testing.B) { bmUint32Mod(106956295, b) }
diff --git a/src/runtime/vlrt.c b/src/runtime/vlrt.c
new file mode 100644
index 000000000..cb0d14796
--- /dev/null
+++ b/src/runtime/vlrt.c
@@ -0,0 +1,914 @@
+// Inferno's libkern/vlrt-386.c
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-386.c
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// +build arm 386
+
+#include "textflag.h"
+
+/*
+ * C runtime for 64-bit divide, others.
+ *
+ * TODO(rsc): The simple functions are dregs--8c knows how
+ * to generate the code directly now.  Find and remove.
+ */
+
+void	runtime·panicdivide(void);
+
+typedef	unsigned long	ulong;
+typedef	unsigned int	uint;
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef	signed char	schar;
+
+#define	SIGN(n)	(1UL<<(n-1))
+
+typedef	struct	Vlong	Vlong;
+struct	Vlong
+{
+	ulong	lo;
+	ulong	hi;
+};
+
+typedef	union	Vlong64	Vlong64;
+union	Vlong64
+{
+	long long	v;
+	Vlong	v2;
+};
+
+void	runtime·abort(void);
+
+#pragma textflag NOSPLIT
+Vlong
+_addv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.lo = a.lo + b.lo;
+	r.hi = a.hi + b.hi;
+	if(r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_subv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.lo = a.lo - b.lo;
+	r.hi = a.hi - b.hi;
+	if(r.lo > a.lo)
+		r.hi--;
+	return r;
+}
+
+Vlong
+_d2v(double d)
+{
+	union { double d; Vlong vl; } x;
+	ulong xhi, xlo, ylo, yhi;
+	int sh;
+	Vlong y;
+
+	x.d = d;
+
+	xhi = (x.vl.hi & 0xfffff) | 0x100000;
+	xlo = x.vl.lo;
+	sh = 1075 - ((x.vl.hi >> 20) & 0x7ff);
+
+	ylo = 0;
+	yhi = 0;
+	if(sh >= 0) {
+		/* v = (hi||lo) >> sh */
+		if(sh < 32) {
+			if(sh == 0) {
+				ylo = xlo;
+				yhi = xhi;
+			} else {
+				ylo = (xlo >> sh) | (xhi << (32-sh));
+				yhi = xhi >> sh;
+			}
+		} else {
+			if(sh == 32) {
+				ylo = xhi;
+			} else
+			if(sh < 64) {
+				ylo = xhi >> (sh-32);
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh = -sh;
+		if(sh <= 10) { /* NOTE: sh <= 11 on ARM??? */
+			ylo = xlo << sh;
+			yhi = (xhi << sh) | (xlo >> (32-sh));
+		} else {
+			/* overflow */
+			yhi = d;	/* causes something awful */
+		}
+	}
+	if(x.vl.hi & SIGN(32)) {
+		if(ylo != 0) {
+			ylo = -ylo;
+			yhi = ~yhi;
+		} else
+			yhi = -yhi;
+	}
+
+	y.hi = yhi;
+	y.lo = ylo;
+	return y;
+}
+
+Vlong
+_f2v(float f)
+{
+	return _d2v(f);
+}
+
+double
+_ul2d(ulong u)
+{
+	// compensate for bug in c
+	if(u & SIGN(32)) {
+		u ^= SIGN(32);
+		return 2147483648. + u;
+	}
+	return u;
+}
+
+double
+_v2d(Vlong x)
+{
+	if(x.hi & SIGN(32)) {
+		if(x.lo) {
+			x.lo = -x.lo;
+			x.hi = ~x.hi;
+		} else
+			x.hi = -x.hi;
+		return -(_ul2d(x.hi)*4294967296. + _ul2d(x.lo));
+	}
+	return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+	return _v2d(x);
+}
+
+ulong	runtime·_div64by32(Vlong, ulong, ulong*);
+int	runtime·_mul64by32(Vlong*, Vlong, ulong);
+
+static void
+slowdodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
+{
+	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
+	int i;
+
+	numhi = num.hi;
+	numlo = num.lo;
+	denhi = den.hi;
+	denlo = den.lo;
+
+	/*
+	 * get a divide by zero
+	 */
+	if(denlo==0 && denhi==0) {
+		runtime·panicdivide();
+	}
+
+	/*
+	 * set up the divisor and find the number of iterations needed
+	 */
+	if(numhi >= SIGN(32)) {
+		quohi = SIGN(32);
+		quolo = 0;
+	} else {
+		quohi = numhi;
+		quolo = numlo;
+	}
+	i = 0;
+	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+		denhi = (denhi<<1) | (denlo>>31);
+		denlo <<= 1;
+		i++;
+	}
+
+	quohi = 0;
+	quolo = 0;
+	for(; i >= 0; i--) {
+		quohi = (quohi<<1) | (quolo>>31);
+		quolo <<= 1;
+		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+			t = numlo;
+			numlo -= denlo;
+			if(numlo > t)
+				numhi--;
+			numhi -= denhi;
+			quolo |= 1;
+		}
+		denlo = (denlo>>1) | (denhi<<31);
+		denhi >>= 1;
+	}
+
+	if(q) {
+		q->lo = quolo;
+		q->hi = quohi;
+	}
+	if(r) {
+		r->lo = numlo;
+		r->hi = numhi;
+	}
+}
+
+#ifdef GOARCH_arm
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	slowdodiv(num, den, qp, rp);
+}
+#endif
+
+#ifdef GOARCH_386
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	ulong n;
+	Vlong x, q, r;
+	
+	if(den.hi > num.hi || (den.hi == num.hi && den.lo > num.lo)){
+		if(qp) {
+			qp->hi = 0;
+			qp->lo = 0;
+		}
+		if(rp) {
+			rp->hi = num.hi;
+			rp->lo = num.lo;
+		}
+		return;
+	}
+
+	if(den.hi != 0){
+		q.hi = 0;
+		n = num.hi/den.hi;
+		if(runtime·_mul64by32(&x, den, n) || x.hi > num.hi || (x.hi == num.hi && x.lo > num.lo))
+			slowdodiv(num, den, &q, &r);
+		else {
+			q.lo = n;
+			*(long long*)&r = *(long long*)&num - *(long long*)&x;
+		}
+	} else {
+		if(num.hi >= den.lo){
+			if(den.lo == 0)
+				runtime·panicdivide();
+			q.hi = n = num.hi/den.lo;
+			num.hi -= den.lo*n;
+		} else {
+			q.hi = 0;
+		}
+		q.lo = runtime·_div64by32(num, den.lo, &r.lo);
+		r.hi = 0;
+	}
+	if(qp) {
+		qp->lo = q.lo;
+		qp->hi = q.hi;
+	}
+	if(rp) {
+		rp->lo = r.lo;
+		rp->hi = r.hi;
+	}
+}
+#endif
+
+Vlong
+_divvu(Vlong n, Vlong d)
+{
+	Vlong q;
+
+	if(n.hi == 0 && d.hi == 0) {
+		if(d.lo == 0)
+			runtime·panicdivide();
+		q.hi = 0;
+		q.lo = n.lo / d.lo;
+		return q;
+	}
+	dodiv(n, d, &q, 0);
+	return q;
+}
+
+Vlong
+_modvu(Vlong n, Vlong d)
+{
+	Vlong r;
+
+	if(n.hi == 0 && d.hi == 0) {
+		if(d.lo == 0)
+			runtime·panicdivide();
+		r.hi = 0;
+		r.lo = n.lo % d.lo;
+		return r;
+	}
+	dodiv(n, d, 0, &r);
+	return r;
+}
+
+static void
+vneg(Vlong *v)
+{
+
+	if(v->lo == 0) {
+		v->hi = -v->hi;
+		return;
+	}
+	v->lo = -v->lo;
+	v->hi = ~v->hi;
+}
+
+Vlong
+_divv(Vlong n, Vlong d)
+{
+	long nneg, dneg;
+	Vlong q;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
+			// special case: 32-bit -0x80000000 / -1 causes divide error,
+			// but it's okay in this 64-bit context.
+			q.lo = 0x80000000;
+			q.hi = 0;
+			return q;
+		}
+		if(d.lo == 0)
+			runtime·panicdivide();
+		q.lo = (long)n.lo / (long)d.lo;
+		q.hi = ((long)q.lo) >> 31;
+		return q;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, &q, 0);
+	if(nneg != dneg)
+		vneg(&q);
+	return q;
+}
+
+Vlong
+_modv(Vlong n, Vlong d)
+{
+	long nneg, dneg;
+	Vlong r;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
+			// special case: 32-bit -0x80000000 % -1 causes divide error,
+			// but it's okay in this 64-bit context.
+			r.lo = 0;
+			r.hi = 0;
+			return r;
+		}
+		if(d.lo == 0)
+			runtime·panicdivide();
+		r.lo = (long)n.lo % (long)d.lo;
+		r.hi = ((long)r.lo) >> 31;
+		return r;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, 0, &r);
+	if(nneg)
+		vneg(&r);
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_rshav(Vlong a, int b)
+{
+	long t;
+	Vlong r;
+
+	t = a.hi;
+	if(b >= 32) {
+		r.hi = t>>31;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r.lo = t>>31;
+			return r;
+		}
+		r.lo = t >> (b-32);
+		return r;
+	}
+	if(b <= 0) {
+		r.hi = t;
+		r.lo = a.lo;
+		return r;
+	}
+	r.hi = t >> b;
+	r.lo = (t << (32-b)) | (a.lo >> b);
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_rshlv(Vlong a, int b)
+{
+	ulong t;
+	Vlong r;
+
+	t = a.hi;
+	if(b >= 32) {
+		r.hi = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r.lo = 0;
+			return r;
+		}
+		r.lo = t >> (b-32);
+		return r;
+	}
+	if(b <= 0) {
+		r.hi = t;
+		r.lo = a.lo;
+		return r;
+	}
+	r.hi = t >> b;
+	r.lo = (t << (32-b)) | (a.lo >> b);
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_lshv(Vlong a, int b)
+{
+	ulong t;
+
+	t = a.lo;
+	if(b >= 32) {
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			return (Vlong){0, 0};
+		}
+		return (Vlong){0, t<<(b-32)};
+	}
+	if(b <= 0) {
+		return (Vlong){t, a.hi};
+	}
+	return (Vlong){t<<b, (t >> (32-b)) | (a.hi << b)};
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_andv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi & b.hi;
+	r.lo = a.lo & b.lo;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_orv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi | b.hi;
+	r.lo = a.lo | b.lo;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_xorv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi ^ b.hi;
+	r.lo = a.lo ^ b.lo;
+	return r;
+}
+
+Vlong
+_vpp(Vlong *r)
+{
+	Vlong l;
+
+	l = *r;
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	return l;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_vmm(Vlong *r)
+{
+	Vlong l;
+
+	l = *r;
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	return l;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_ppv(Vlong *r)
+{
+
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	return *r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_mmv(Vlong *r)
+{
+
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	return *r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_vasop(void *lv, Vlong fn(Vlong, Vlong), int type, Vlong rv)
+{
+	Vlong t, u;
+
+	u.lo = 0;
+	u.hi = 0;
+	switch(type) {
+	default:
+		runtime·abort();
+		break;
+
+	case 1:	/* schar */
+		t.lo = *(schar*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(schar*)lv = u.lo;
+		break;
+
+	case 2:	/* uchar */
+		t.lo = *(uchar*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(uchar*)lv = u.lo;
+		break;
+
+	case 3:	/* short */
+		t.lo = *(short*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(short*)lv = u.lo;
+		break;
+
+	case 4:	/* ushort */
+		t.lo = *(ushort*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(ushort*)lv = u.lo;
+		break;
+
+	case 9:	/* int */
+		t.lo = *(int*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(int*)lv = u.lo;
+		break;
+
+	case 10:	/* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 5:	/* long */
+		t.lo = *(long*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(long*)lv = u.lo;
+		break;
+
+	case 6:	/* ulong */
+		t.lo = *(ulong*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(ulong*)lv = u.lo;
+		break;
+
+	case 7:	/* vlong */
+	case 8:	/* uvlong */
+		if((void*)fn == _lshv || (void*)fn == _rshav || (void*)fn == _rshlv)
+			u = ((Vlong(*)(Vlong,int))fn)(*(Vlong*)lv, *(int*)&rv);
+		else
+			u = fn(*(Vlong*)lv, rv);
+		*(Vlong*)lv = u;
+		break;
+	}
+	return u;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_p2v(void *p)
+{
+	long t;
+	Vlong ret;
+
+	t = (ulong)p;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_sl2v(long sl)
+{
+	long t;
+	Vlong ret;
+
+	t = sl;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_ul2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_si2v(int si)
+{
+	return (Vlong){si, si>>31};
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_ui2v(uint ui)
+{
+	long t;
+	Vlong ret;
+
+	t = ui;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_sh2v(long sh)
+{
+	long t;
+	Vlong ret;
+
+	t = (sh << 16) >> 16;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_uh2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul & 0xffff;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_sc2v(long uc)
+{
+	long t;
+	Vlong ret;
+
+	t = (uc << 24) >> 24;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_uc2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul & 0xff;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2sc(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xff;
+	return (t << 24) >> 24;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2uc(Vlong rv)
+{
+
+	return rv.lo & 0xff;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2sh(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xffff;
+	return (t << 16) >> 16;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2uh(Vlong rv)
+{
+
+	return rv.lo & 0xffff;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2sl(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2ul(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2si(Vlong rv)
+{
+	return rv.lo;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2ui(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+#pragma textflag NOSPLIT
+int
+_testv(Vlong rv)
+{
+	return rv.lo || rv.hi;
+}
+
+#pragma textflag NOSPLIT
+int
+_eqv(Vlong lv, Vlong rv)
+{
+	return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+#pragma textflag NOSPLIT
+int
+_nev(Vlong lv, Vlong rv)
+{
+	return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+#pragma textflag NOSPLIT
+int
+_ltv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_lev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_gtv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_gev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_lov(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_lsv(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_hiv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_hsv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
diff --git a/src/runtime/vlrt.go b/src/runtime/vlrt.go
new file mode 100644
index 000000000..6370732ca
--- /dev/null
+++ b/src/runtime/vlrt.go
@@ -0,0 +1,258 @@
+// Inferno's libkern/vlrt-arm.c
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-arm.c
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// +build arm 386
+
+package runtime
+
+import "unsafe"
+
+const (
+	sign32 = 1 << (32 - 1)
+	sign64 = 1 << (64 - 1)
+)
+
+func float64toint64(d float64) (y uint64) {
+	_d2v(&y, d)
+	return
+}
+
+func float64touint64(d float64) (y uint64) {
+	_d2v(&y, d)
+	return
+}
+
+func int64tofloat64(y int64) float64 {
+	if y < 0 {
+		return -uint64tofloat64(-uint64(y))
+	}
+	return uint64tofloat64(uint64(y))
+}
+
+func uint64tofloat64(y uint64) float64 {
+	hi := float64(uint32(y >> 32))
+	lo := float64(uint32(y))
+	d := hi*(1<<32) + lo
+	return d
+}
+
+func _d2v(y *uint64, d float64) {
+	x := *(*uint64)(unsafe.Pointer(&d))
+
+	xhi := uint32(x>>32)&0xfffff | 0x100000
+	xlo := uint32(x)
+	sh := 1075 - int32(uint32(x>>52)&0x7ff)
+
+	var ylo, yhi uint32
+	if sh >= 0 {
+		sh := uint32(sh)
+		/* v = (hi||lo) >> sh */
+		if sh < 32 {
+			if sh == 0 {
+				ylo = xlo
+				yhi = xhi
+			} else {
+				ylo = xlo>>sh | xhi<<(32-sh)
+				yhi = xhi >> sh
+			}
+		} else {
+			if sh == 32 {
+				ylo = xhi
+			} else if sh < 64 {
+				ylo = xhi >> (sh - 32)
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh := uint32(-sh)
+		if sh <= 11 {
+			ylo = xlo << sh
+			yhi = xhi<<sh | xlo>>(32-sh)
+		} else {
+			/* overflow */
+			yhi = uint32(d) /* causes something awful */
+		}
+	}
+	if x&sign64 != 0 {
+		if ylo != 0 {
+			ylo = -ylo
+			yhi = ^yhi
+		} else {
+			yhi = -yhi
+		}
+	}
+
+	*y = uint64(yhi)<<32 | uint64(ylo)
+}
+
+func uint64div(n, d uint64) uint64 {
+	// Check for 32 bit operands
+	if uint32(n>>32) == 0 && uint32(d>>32) == 0 {
+		if uint32(d) == 0 {
+			panicdivide()
+		}
+		return uint64(uint32(n) / uint32(d))
+	}
+	q, _ := dodiv(n, d)
+	return q
+}
+
+func uint64mod(n, d uint64) uint64 {
+	// Check for 32 bit operands
+	if uint32(n>>32) == 0 && uint32(d>>32) == 0 {
+		if uint32(d) == 0 {
+			panicdivide()
+		}
+		return uint64(uint32(n) % uint32(d))
+	}
+	_, r := dodiv(n, d)
+	return r
+}
+
+func int64div(n, d int64) int64 {
+	// Check for 32 bit operands
+	if int64(int32(n)) == n && int64(int32(d)) == d {
+		if int32(n) == -0x80000000 && int32(d) == -1 {
+			// special case: 32-bit -0x80000000 / -1 = -0x80000000,
+			// but 64-bit -0x80000000 / -1 = 0x80000000.
+			return 0x80000000
+		}
+		if int32(d) == 0 {
+			panicdivide()
+		}
+		return int64(int32(n) / int32(d))
+	}
+
+	nneg := n < 0
+	dneg := d < 0
+	if nneg {
+		n = -n
+	}
+	if dneg {
+		d = -d
+	}
+	uq, _ := dodiv(uint64(n), uint64(d))
+	q := int64(uq)
+	if nneg != dneg {
+		q = -q
+	}
+	return q
+}
+
+func int64mod(n, d int64) int64 {
+	// Check for 32 bit operands
+	if int64(int32(n)) == n && int64(int32(d)) == d {
+		if int32(d) == 0 {
+			panicdivide()
+		}
+		return int64(int32(n) % int32(d))
+	}
+
+	nneg := n < 0
+	if nneg {
+		n = -n
+	}
+	if d < 0 {
+		d = -d
+	}
+	_, ur := dodiv(uint64(n), uint64(d))
+	r := int64(ur)
+	if nneg {
+		r = -r
+	}
+	return r
+}
+
+//go:noescape
+func _mul64by32(lo64 *uint64, a uint64, b uint32) (hi32 uint32)
+
+//go:noescape
+func _div64by32(a uint64, b uint32, r *uint32) (q uint32)
+
+func dodiv(n, d uint64) (q, r uint64) {
+	if GOARCH == "arm" {
+		// arm doesn't have a division instruction, so
+		// slowdodiv is the best that we can do.
+		// TODO: revisit for arm64.
+		return slowdodiv(n, d)
+	}
+
+	if d > n {
+		return 0, n
+	}
+
+	if uint32(d>>32) != 0 {
+		t := uint32(n>>32) / uint32(d>>32)
+		var lo64 uint64
+		hi32 := _mul64by32(&lo64, d, t)
+		if hi32 != 0 || lo64 > n {
+			return slowdodiv(n, d)
+		}
+		return uint64(t), n - lo64
+	}
+
+	// d is 32 bit
+	var qhi uint32
+	if uint32(n>>32) >= uint32(d) {
+		if uint32(d) == 0 {
+			panicdivide()
+		}
+		qhi = uint32(n>>32) / uint32(d)
+		n -= uint64(uint32(d)*qhi) << 32
+	} else {
+		qhi = 0
+	}
+
+	var rlo uint32
+	qlo := _div64by32(n, uint32(d), &rlo)
+	return uint64(qhi)<<32 + uint64(qlo), uint64(rlo)
+}
+
+func slowdodiv(n, d uint64) (q, r uint64) {
+	if d == 0 {
+		panicdivide()
+	}
+
+	// Set up the divisor and find the number of iterations needed.
+	capn := n
+	if n >= sign64 {
+		capn = sign64
+	}
+	i := 0
+	for d < capn {
+		d <<= 1
+		i++
+	}
+
+	for ; i >= 0; i-- {
+		q <<= 1
+		if n >= d {
+			n -= d
+			q |= 1
+		}
+		d >>= 1
+	}
+	return q, n
+}
author	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
committer	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
commit	8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree	334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/runtime
parent	73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download	go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz