summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteinar H. Gunderson <sgunderson@bigfoot.com>2019-03-18 23:52:11 +0100
committerXiang, Haihao <haihao.xiang@intel.com>2019-04-29 10:25:31 +0800
commitf6573cb75a1c6d57c35f22a8b5d8ce134a9d3d86 (patch)
tree13d6355f46c2857e6a1a4d491b2b7e7aa8540a04
parent881e67a49b14631cb24a3880a28af9e5e5bacb96 (diff)
downloadlibva-intel-driver-f6573cb75a1c6d57c35f22a8b5d8ce134a9d3d86.tar.gz
i965_encoder: Speed up i965_MapBuffer for JPEG encoding somewhat.
Searching for the EOI marker byte-by-byte turned out to notable in profiles when encoding large amounts of JPEGs (~15% of a core at 480 fps of 1080p images). Using memmem() will typically give us an AVX-optimized version of at least finding the 0xFF character, which is much more efficient. It seems to speed up this part by about 3–4x in practice, taking it largely off the profiles. Signed-off-by: Steinar H. Gunderson <steinar+nageru@gunderson.no>
-rw-r--r--src/i965_drv_video.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index e1b688ae..4aa8da7b 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -3116,12 +3116,12 @@ i965_MapBuffer(VADriverContextP ctx,
}
if (coded_buffer_segment->codec == CODEC_JPEG) {
- for (i = 0; i < obj_buffer->size_element - header_offset - 1 - 0x1000; i++) {
- if ((buffer[i] == 0xFF) && (buffer[i + 1] == 0xD9)) {
- break;
- }
- }
- coded_buffer_segment->base.size = i + 2;
+ int len = obj_buffer->size_element - header_offset - 1 - 0x1000;
+ unsigned char *end_of_file_marker = memmem(buffer, len, "\xff\xd9", 2);
+ if (end_of_file_marker == NULL)
+ coded_buffer_segment->base.size = len + 2;
+ else
+ coded_buffer_segment->base.size = (end_of_file_marker - buffer) + 2;
} else if (coded_buffer_segment->codec != CODEC_VP8) {
/* vp8 coded buffer size can be told by vp8 internal statistics buffer,
so it don't need to traversal the coded buffer */