summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Stogov <dmitry@zend.com>2015-02-20 16:08:57 +0300
committerDmitry Stogov <dmitry@zend.com>2015-02-20 16:08:57 +0300
commit623810eff40d82e6d7c9f17aeaacec43d741d4c6 (patch)
tree8f13cb587d34aef72f950a527e868ee90f0195da
parent5100afb7dd06b2515a3c524b4be02c8d3a80a091 (diff)
downloadphp-git-623810eff40d82e6d7c9f17aeaacec43d741d4c6.tar.gz
Use fast SSE2 memcpy() for copying block of data from SHM to process memory
-rw-r--r--ext/opcache/ZendAccelerator.c6
-rw-r--r--ext/opcache/zend_accelerator_util_funcs.c35
-rw-r--r--ext/opcache/zend_persist.c5
-rw-r--r--ext/opcache/zend_persist_calc.c10
4 files changed, 56 insertions, 0 deletions
diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c
index 62e62e1821..cdcbfeda56 100644
--- a/ext/opcache/ZendAccelerator.c
+++ b/ext/opcache/ZendAccelerator.c
@@ -1163,7 +1163,13 @@ static zend_persistent_script *cache_script_in_shared_memory(zend_persistent_scr
memory_used = zend_accel_script_persist_calc(new_persistent_script, key, key_length);
/* Allocate shared memory */
+#ifdef __SSE2__
+ /* Align to 64-byte boundary */
+ ZCG(mem) = zend_shared_alloc(memory_used + 64);
+ ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L);
+#else
ZCG(mem) = zend_shared_alloc(memory_used);
+#endif
if (!ZCG(mem)) {
zend_accel_schedule_restart_if_necessary(ACCEL_RESTART_OOM);
zend_shared_alloc_unlock();
diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c
index 8a52a53103..2e7f0c8da5 100644
--- a/ext/opcache/zend_accelerator_util_funcs.c
+++ b/ext/opcache/zend_accelerator_util_funcs.c
@@ -771,6 +771,34 @@ failure:
zend_error(E_ERROR, "Cannot redeclare class %s", ce1->name->val);
}
+#ifdef __SSE2__
+#include <mmintrin.h>
+#include <emmintrin.h>
+
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size)
+{
+ __m128i *dqdest = (__m128i*)dest;
+ const __m128i *dqsrc = (const __m128i*)src;
+ const __m128i *end = (const __m128i*)((const char*)src + size);
+
+ do {
+ _mm_prefetch(dqsrc + 4, _MM_HINT_NTA);
+ _mm_prefetch(dqsrc + 6, _MM_HINT_NTA);
+
+ __m128i xmm0 = _mm_load_si128(dqsrc + 0);
+ __m128i xmm1 = _mm_load_si128(dqsrc + 1);
+ __m128i xmm2 = _mm_load_si128(dqsrc + 2);
+ __m128i xmm3 = _mm_load_si128(dqsrc + 3);
+ dqsrc += 4;
+ _mm_stream_si128(dqdest + 0, xmm0);
+ _mm_stream_si128(dqdest + 1, xmm1);
+ _mm_stream_si128(dqdest + 2, xmm2);
+ _mm_stream_si128(dqdest + 3, xmm3);
+ dqdest += 4;
+ } while (dqsrc != end);
+}
+#endif
+
zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script, int from_shared_memory)
{
zend_op_array *op_array;
@@ -784,8 +812,15 @@ zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script,
ZCG(current_persistent_script) = persistent_script;
ZCG(arena_mem) = NULL;
if (EXPECTED(persistent_script->arena_size)) {
+#ifdef __SSE2__
+ /* Target address must be aligned to 64-byte boundary */
+ ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + 64);
+ ZCG(arena_mem) = (void*)(((zend_uintptr_t)ZCG(arena_mem) + 63L) & ~63L);
+ fast_memcpy(ZCG(arena_mem), persistent_script->arena_mem, persistent_script->arena_size);
+#else
ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size);
memcpy(ZCG(arena_mem), persistent_script->arena_mem, persistent_script->arena_size);
+#endif
}
/* Copy all the necessary stuff from shared memory to regular memory, and protect the shared script */
diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c
index 1c440eceae..eb442e9742 100644
--- a/ext/opcache/zend_persist.c
+++ b/ext/opcache/zend_persist.c
@@ -734,6 +734,11 @@ zend_persistent_script *zend_accel_script_persist(zend_persistent_script *script
*key = zend_accel_memdup(*key, key_length + 1);
zend_accel_store_string(script->full_path);
+#ifdef __SSE2__
+ /* Align to 64-byte boundary */
+ ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L);
+#endif
+
script->arena_mem = ZCG(arena_mem) = ZCG(mem);
ZCG(mem) = (void*)((char*)ZCG(mem) + script->arena_size);
diff --git a/ext/opcache/zend_persist_calc.c b/ext/opcache/zend_persist_calc.c
index 2f211e43cb..7d77d62b8d 100644
--- a/ext/opcache/zend_persist_calc.c
+++ b/ext/opcache/zend_persist_calc.c
@@ -361,10 +361,20 @@ uint zend_accel_script_persist_calc(zend_persistent_script *new_persistent_scrip
ADD_DUP_SIZE(key, key_length + 1);
ADD_STRING(new_persistent_script->full_path);
+#ifdef __SSE2__
+ /* Align size to 64-byte boundary */
+ new_persistent_script->size = (new_persistent_script->size + 63) & ~63;
+#endif
+
zend_accel_persist_class_table_calc(&new_persistent_script->class_table);
zend_hash_persist_calc(&new_persistent_script->function_table, zend_persist_op_array_calc);
zend_persist_op_array_calc_ex(&new_persistent_script->main_op_array);
+#ifdef __SSE2__
+ /* Align size to 64-byte boundary */
+ new_persistent_script->arena_size = (new_persistent_script->arena_size + 63) & ~63;
+#endif
+
new_persistent_script->size += new_persistent_script->arena_size;
ZCG(current_persistent_script) = NULL;