summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnurag Thakur <anurag105csec21@bpitindia.edu.in>2022-09-16 00:45:17 +0530
committerAnurag Thakur <anurag105csec21@bpitindia.edu.in>2022-10-04 03:16:11 +0530
commit0b682c54171d2923b6f3a6c0f4cae05b707ee711 (patch)
tree834250b655892347c4d5cdd9fd70cb31a2716296
parent3d0e33d2aaff957a30a9c49985485d2930341de1 (diff)
downloadfreetype2-0b682c54171d2923b6f3a6c0f4cae05b707ee711.tar.gz
Add SIMD
-rw-r--r--.vscode/settings.json5
-rw-r--r--src/dense/ftdense.c47
-rw-r--r--src/dense/rules.mk5
3 files changed, 39 insertions, 18 deletions
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 1a2f8af47..7f56b4bc5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,6 +2,7 @@
"files.associations": {
"ftoutln.h": "c",
"svprop.h": "c",
- "ftdebug.h": "c"
+ "ftdebug.h": "c",
+ "tmmintrin.h": "c"
}
-} \ No newline at end of file
+}
diff --git a/src/dense/ftdense.c b/src/dense/ftdense.c
index dfd0f7417..4b0a20b28 100644
--- a/src/dense/ftdense.c
+++ b/src/dense/ftdense.c
@@ -11,6 +11,7 @@
#include "ftdense.h"
#include <math.h>
+#include <tmmintrin.h>
#include "ftdenseerrs.h"
#define PIXEL_BITS 8
@@ -372,22 +373,40 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
unsigned char* dest = target->buffer;
unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
- float value = 0.0f;
- while ( dest < dest_end )
- {
- value += *source++;
- if ( value > 0.0f )
- {
- int n = (int)( fabs( value ) * 255.0f + 0.5f );
- if ( n > 255 )
- n = 255;
- *dest = (unsigned char)n;
- }
- else
- *dest = 0;
- dest++;
+
+ __m128 offset = _mm_setzero_ps();
+ __m128i mask = _mm_set1_epi32(0x0c080400);
+ __m128 sign_mask = _mm_set1_ps(-0.f);
+ for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
+ __m128 x = _mm_load_ps(&source[i]);
+ x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
+ x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
+ x = _mm_add_ps(x, offset);
+ __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
+ y = _mm_min_ps(y, _mm_set1_ps(1.0f));
+ y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
+ __m128i z = _mm_cvtps_epi32(y);
+ z = _mm_shuffle_epi8(z, mask);
+ _mm_store_ss((float *)&dest[i], (__m128)z);
+ offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
}
+ // float value = 0.0f;
+ // while ( dest < dest_end )
+ // {
+ // value += *source++;
+ // if ( value > 0.0f )
+ // {
+ // int n = (int)( fabs( value ) * 255.0f + 0.5f );
+ // if ( n > 255 )
+ // n = 255;
+ // *dest = (unsigned char)n;
+ // }
+ // else
+ // *dest = 0;
+ // dest++;
+ // }
+
free(worker->m_a);
return error;
}
diff --git a/src/dense/rules.mk b/src/dense/rules.mk
index 005116873..38874f28e 100644
--- a/src/dense/rules.mk
+++ b/src/dense/rules.mk
@@ -22,8 +22,9 @@ DENSE_DIR := $(SRC_DIR)/dense
#
DENSE_COMPILE := $(CC) $(ANSIFLAGS) \
$I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \
- $(INCLUDE_FLAGS) \
- $(FT_CFLAGS)
+ $(INCLUDE_FLAGS) \
+ $(FT_CFLAGS) \
+ "-msse4.1"
# DENSE driver sources (i.e., C files)