diff options
author | Daisuke Nojiri <dnojiri@google.com> | 2014-02-08 17:03:15 -0800 |
---|---|---|
committer | chrome-internal-fetch <chrome-internal-fetch@google.com> | 2014-02-12 19:40:52 +0000 |
commit | d3facbd92fe4e3f9815a9c4896bf2d5b31e51899 (patch) | |
tree | 30211686ce73766e052cc6dfac9a167eec441039 /test | |
parent | a78c59e4acbbd5b85c221b477b2db43f5e5d679b (diff) | |
download | chrome-ec-d3facbd92fe4e3f9815a9c4896bf2d5b31e51899.tar.gz |
Optimize memcpy
This speeds up memcpy by copying a word at a time if source and destination are
aligned in mod 4. That is, if n and m are a positive integer:
4n -> 4m: aligned, 4x speed.
4n -> 4m+1: misaligned.
4n+1 -> 4m+1: aligned in mod 4, 4x speed.
Ran the unit test on Peppy:
> runtest
...
Running test_memcpy... (speed gain: 120300 -> 38103 us) OK
...
Ran make buildall -j:
...
Running test_memcpy... (speed gain: 2084 -> 549 us) OK
...
Note misaligned case is also optimized. Unit test runs in 298 us on Peppy while
it takes about 475 with the original memcpy.
TEST=Described above.
BUG=chrome-os-partner:23720
BRANCH=none
Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Change-Id: Ic12260451c5efd0896d6353017cd45d29cb672db
Tested-by: Daisuke Nojiri <dnojiri@google.com>
Reviewed-on: https://chromium-review.googlesource.com/185618
Reviewed-by: Randall Spangler <rspangler@chromium.org>
Reviewed-by: Vincent Palatin <vpalatin@chromium.org>
Commit-Queue: Daisuke Nojiri <dnojiri@google.com>
Diffstat (limited to 'test')
-rw-r--r-- | test/utils.c | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/test/utils.c b/test/utils.c index 60d2c83a0f..1e58184977 100644 --- a/test/utils.c +++ b/test/utils.c @@ -83,6 +83,57 @@ static int test_memmove(void) return EC_SUCCESS; } +static int test_memcpy(void) +{ + int i; + timestamp_t t0, t1, t2, t3; + char *buf; + const int buf_size = 1000; + const int len = 400; + const int dest_offset = 500; + const int iteration = 1000; + + TEST_ASSERT(shared_mem_acquire(buf_size, &buf) == EC_SUCCESS); + + for (i = 0; i < len; ++i) + buf[i] = i & 0x7f; + for (i = len; i < buf_size; ++i) + buf[i] = 0; + + t0 = get_time(); + for (i = 0; i < iteration; ++i) + memcpy(buf + dest_offset + 1, buf, len); /* unaligned */ + t1 = get_time(); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset + 1, buf, len); + ccprintf(" (speed gain: %d ->", t1.val-t0.val); + + t2 = get_time(); + for (i = 0; i < iteration; ++i) + memcpy(buf + dest_offset, buf, len); /* aligned */ + t3 = get_time(); + ccprintf(" %d us) ", t3.val-t2.val); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset, buf, len); + + /* Expected about 4x speed gain. Use 3x because it fluctuates */ + TEST_ASSERT((t1.val-t0.val) > (t3.val-t2.val) * 3); + + memcpy(buf + dest_offset + 1, buf + 1, len - 1); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset + 1, buf + 1, len - 1); + + /* Test small copies */ + memcpy(buf + dest_offset, buf, 1); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset, buf, 1); + memcpy(buf + dest_offset, buf, 4); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset, buf, 4); + memcpy(buf + dest_offset + 1, buf, 1); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset + 1, buf, 1); + memcpy(buf + dest_offset + 1, buf, 4); + TEST_ASSERT_ARRAY_EQ(buf + dest_offset + 1, buf, 4); + + shared_mem_release(buf); + return EC_SUCCESS; +} + static int test_strzcpy(void) { char dest[10]; @@ -305,6 +356,7 @@ void run_test(void) RUN_TEST(test_strtoi); RUN_TEST(test_parse_bool); RUN_TEST(test_memmove); + RUN_TEST(test_memcpy); RUN_TEST(test_strzcpy); RUN_TEST(test_strlen); RUN_TEST(test_strcasecmp); |