summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaisuke Nojiri <dnojiri@google.com>2014-02-11 17:06:50 -0800
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-02-13 20:37:05 +0000
commite7e0cf2cae2794437c4c71884b54c96cb6caf9b5 (patch)
tree3a0d51c3342587e90b7bfa6cf9d7e11cb42dd801
parent7aa3258ae71f114e2ec328851bd201b2c9799ad4 (diff)
downloadchrome-ec-e7e0cf2cae2794437c4c71884b54c96cb6caf9b5.tar.gz
Optimize memmove
This speeds up memmove by copying a word at a time. Ran the unit test on Peppy: > runtest ... Running test_memmove... (speed gain: 2156 -> 592 us) OK ... Ran make buildall: ... Running test_memmove... (speed gain: 143918 -> 32367 us) OK ... TEST=Described above. BUG=chrome-os-partner:23720 BRANCH=none Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org> Tested-by: Daisuke Nojiri <dnojiri@google.com> Change-Id: I6a3ac6aed27a404c3bef227b6c886a59414b51d7 Reviewed-on: https://chromium-review.googlesource.com/186020 Reviewed-by: Vic Yang <victoryang@chromium.org> Reviewed-by: Randall Spangler <rspangler@chromium.org>
-rw-r--r--common/util.c44
-rw-r--r--test/utils.c43
2 files changed, 71 insertions, 16 deletions
diff --git a/common/util.c b/common/util.c
index 628aef9e82..5158d8aa06 100644
--- a/common/util.c
+++ b/common/util.c
@@ -238,17 +238,45 @@ void *memmove(void *dest, const void *src, int len)
* memcpy(). */
return memcpy(dest, src, len);
} else {
- /* Copy from end, so we don't overwrite the source */
+ /* Need to copy from tail because there is overlap. */
char *d = (char *)dest + len;
const char *s = (const char *)src + len;
- /*
- * TODO(crosbug.com/p/23720): if src/dest are aligned, copy a
- * word at a time instead.
- */
- while (len > 0) {
- *(--d) = *(--s);
- len--;
+ uint32_t *dw;
+ const uint32_t *sw;
+ char *head;
+ char * const tail = (char *)dest;
+ /* Set 'body' to the last word boundary */
+ uint32_t * const body = (uint32_t *)(((uintptr_t)tail+3) & ~3);
+
+ if (((uintptr_t)dest & 3) != ((uintptr_t)src & 3)) {
+ /* Misaligned. no body, no tail. */
+ head = tail;
+ } else {
+ /* Aligned */
+ if ((uintptr_t)tail > ((uintptr_t)d & ~3))
+ /* Shorter than the first word boundary */
+ head = tail;
+ else
+ /* Set 'head' to the first word boundary */
+ head = (char *)((uintptr_t)d & ~3);
}
+
+ /* Copy head */
+ while (d > head)
+ *(--d) = *(--s);
+
+ /* Copy body */
+ dw = (uint32_t *)d;
+ sw = (uint32_t *)s;
+ while (dw > body)
+ *(--dw) = *(--sw);
+
+ /* Copy tail */
+ d = (char *)dw;
+ s = (const char *)sw;
+ while (d > tail)
+ *(--d) = *(--s);
+
return dest;
}
}
diff --git a/test/utils.c b/test/utils.c
index 1e58184977..9e1138074f 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -68,18 +68,45 @@ static int test_parse_bool(void)
static int test_memmove(void)
{
- char buf[100];
int i;
+ timestamp_t t0, t1, t2, t3;
+ char *buf;
+ const int buf_size = 1000;
+ const int len = 400;
+ const int iteration = 1000;
+
+ TEST_ASSERT(shared_mem_acquire(buf_size, &buf) == EC_SUCCESS);
- for (i = 0; i < 100; ++i)
+ for (i = 0; i < len; ++i)
+ buf[i] = i & 0x7f;
+ for (i = len; i < buf_size; ++i)
buf[i] = 0;
- for (i = 0; i < 30; ++i)
- buf[i] = i;
- memmove(buf + 60, buf, 30);
- TEST_ASSERT_ARRAY_EQ(buf, buf + 60, 30);
- memmove(buf + 10, buf, 30);
- TEST_ASSERT_ARRAY_EQ(buf + 10, buf + 60, 30);
+ t0 = get_time();
+ for (i = 0; i < iteration; ++i)
+ memmove(buf + 101, buf, len); /* unaligned */
+ t1 = get_time();
+ TEST_ASSERT_ARRAY_EQ(buf + 101, buf, len);
+ ccprintf(" (speed gain: %d ->", t1.val-t0.val);
+
+ t2 = get_time();
+ for (i = 0; i < iteration; ++i)
+ memmove(buf + 100, buf, len); /* aligned */
+ t3 = get_time();
+ ccprintf(" %d us) ", t3.val-t2.val);
+ TEST_ASSERT_ARRAY_EQ(buf + 100, buf, len);
+
+ /* Expected about 4x speed gain. Use 3x because it fluctuates */
+ TEST_ASSERT((t1.val-t0.val) > (t3.val-t2.val) * 3);
+
+ /* Test small moves */
+ memmove(buf + 1, buf, 1);
+ TEST_ASSERT_ARRAY_EQ(buf + 1, buf, 1);
+ memmove(buf + 5, buf, 4);
+ memmove(buf + 1, buf, 4);
+ TEST_ASSERT_ARRAY_EQ(buf + 1, buf + 5, 4);
+
+ shared_mem_release(buf);
return EC_SUCCESS;
}