summaryrefslogtreecommitdiff
path: root/src/liblink
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2014-04-01 12:51:02 -0700
committerKeith Randall <khr@golang.org>2014-04-01 12:51:02 -0700
commit8595a9617da50fc99ca953101e332952378a3b6e (patch)
tree203c4f89df716609c8743e8bfb93249bdd353282 /src/liblink
parentcf0e6253c7847a5d5bda488fb5dbadd58f17c86b (diff)
downloadgo-8595a9617da50fc99ca953101e332952378a3b6e.tar.gz
runtime: get rid of most uses of REP for copying/zeroing.
REP MOVSQ and REP STOSQ have a really high startup overhead. Use a Duff's device to do the repetition instead. benchmark old ns/op new ns/op delta BenchmarkClearFat32 7.20 1.60 -77.78% BenchmarkCopyFat32 6.88 2.38 -65.41% BenchmarkClearFat64 7.15 3.20 -55.24% BenchmarkCopyFat64 6.88 3.44 -50.00% BenchmarkClearFat128 9.53 5.34 -43.97% BenchmarkCopyFat128 9.27 5.56 -40.02% BenchmarkClearFat256 13.8 9.53 -30.94% BenchmarkCopyFat256 13.5 10.3 -23.70% BenchmarkClearFat512 22.3 18.0 -19.28% BenchmarkCopyFat512 22.0 19.7 -10.45% BenchmarkCopyFat1024 36.5 38.4 +5.21% BenchmarkClearFat1024 35.1 35.0 -0.28% TODO: use for stack frame zeroing TODO: REP prefixes are still used for "reverse" copying when src/dst regions overlap. Might be worth fixing. LGTM=rsc R=golang-codereviews, rsc CC=golang-codereviews, r https://codereview.appspot.com/81370046
Diffstat (limited to 'src/liblink')
-rw-r--r--src/liblink/asm6.c9
-rw-r--r--src/liblink/asm8.c9
2 files changed, 18 insertions, 0 deletions
diff --git a/src/liblink/asm6.c b/src/liblink/asm6.c
index b2690bf0e..040366521 100644
--- a/src/liblink/asm6.c
+++ b/src/liblink/asm6.c
@@ -507,6 +507,11 @@ static uchar ycall[] =
Ynone, Ybr, Zcall, 1,
0
};
+static uchar yduff[] =
+{
+ Ynone, Yi32, Zcall, 1,
+ 0
+};
static uchar yjmp[] =
{
Ynone, Yml, Zo_m64, 2,
@@ -1519,6 +1524,9 @@ Optab optab[] =
{ APCDATA, ypcdata, Px, 0,0 },
{ ACHECKNIL },
{ AVARDEF },
+ { AVARKILL },
+ { ADUFFCOPY, yduff, Px, 0xe8 },
+ { ADUFFZERO, yduff, Px, 0xe8 },
{ AEND },
0
@@ -3030,6 +3038,7 @@ found:
r = addrel(ctxt->cursym);
r->off = p->pc + ctxt->andptr - ctxt->and;
r->sym = p->to.sym;
+ r->add = p->to.offset;
r->type = D_PCREL;
r->siz = 4;
put4(ctxt, 0);
diff --git a/src/liblink/asm8.c b/src/liblink/asm8.c
index 15d9c038c..2e4bc709e 100644
--- a/src/liblink/asm8.c
+++ b/src/liblink/asm8.c
@@ -420,6 +420,11 @@ static uchar ycall[] =
Ynone, Yi32, Zcallcon, 1,
0
};
+static uchar yduff[] =
+{
+ Ynone, Yi32, Zcall, 1,
+ 0
+};
static uchar yjmp[] =
{
Ynone, Yml, Zo_m, 2,
@@ -1147,6 +1152,9 @@ static Optab optab[] =
{ APCDATA, ypcdata, Px, 0,0 },
{ ACHECKNIL },
{ AVARDEF },
+ { AVARKILL },
+ { ADUFFCOPY, yduff, Px, 0xe8 },
+ { ADUFFZERO, yduff, Px, 0xe8 },
0
};
@@ -2377,6 +2385,7 @@ found:
r->type = D_PCREL;
r->siz = 4;
r->sym = p->to.sym;
+ r->add = p->to.offset;
put4(ctxt, 0);
break;