From bf58d1725c2f6b12e0403d584ab973c569056a71 Mon Sep 17 00:00:00 2001 From: hkuang Date: Wed, 2 Jul 2014 13:08:29 -0700 Subject: Revert "Revert "Revert "Revert 3 patches from Hangyu to get Chrome to build:""" This reverts commit 749e0c7b2883139afa14b4886bbd6a940d021f4f. Change-Id: I0c63a152baf94d38496dd925a40040366153bf4f --- test/user_priv_test.cc | 2 +- vp9/decoder/vp9_decoder.c | 9 +- vp9/decoder/vp9_dthread.h | 17 +++ vp9/vp9_dx_iface.c | 292 +++++++++++++++++++++++++++++++++------------- 4 files changed, 237 insertions(+), 83 deletions(-) diff --git a/test/user_priv_test.cc b/test/user_priv_test.cc index 22fce857c..f9aef33da 100644 --- a/test/user_priv_test.cc +++ b/test/user_priv_test.cc @@ -78,7 +78,7 @@ string DecodeFile(const string &filename) { ref.idx = rnd.Rand8() % 3; decoder.Control(VP9_GET_REFERENCE, &ref); - CheckUserPrivateData(ref.img.user_priv, NULL); + CheckUserPrivateData(ref.img.user_priv, &frame_num); } md5.Add(img); } diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index d154e9d81..13f9d71d7 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -210,7 +210,10 @@ static void swap_frame_buffers(VP9Decoder *pbi) { } cm->frame_to_show = get_frame_new_buffer(cm); - cm->frame_bufs[cm->new_fb_idx].ref_count--; + + if (!pbi->frame_parallel_decode || !cm->show_frame) { + --cm->frame_bufs[cm->new_fb_idx].ref_count; + } // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < 3; ref_index++) @@ -239,7 +242,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, } // Check if the previous frame was a frame without any references to it. - if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) + // Release frame buffer if not decoding in frame parallel mode. + if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 + && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index a727e2aef..01c07f1a0 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -40,6 +40,23 @@ typedef struct VP9LfSyncData { int sync_range; } VP9LfSync; +// WorkerData for the FrameWorker thread. It contains all the information of +// the worker and decode structures for decoding a frame. +typedef struct FrameWorkerData { + struct VP9Decoder *pbi; + const uint8_t *data; + const uint8_t *data_end; + size_t data_size; + void *user_priv; + int result; + int worker_id; + + // scratch_buffer is used in frame parallel mode only. + // It is used to make a copy of the compressed data. + uint8_t *scratch_buffer; + size_t scratch_buffer_size; +} FrameWorkerData; + // Allocate memory for loopfilter row synchronization. void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync, int rows, int width); diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index c3ca7ee8f..95897ae63 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -32,15 +32,19 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; - struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; - int img_avail; int invert_tile_order; int frame_parallel_decode; // frame-based threading. + int last_show_frame; // Index of last output frame. + + VP9Worker *frame_workers; + int num_frame_workers; + int next_submit_thread_id; + int next_output_thread_id; // External frame buffer info to save for VP9 common. void *ext_priv; // Private data associated with the external frame buffers. @@ -85,11 +89,17 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, } static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { - if (ctx->pbi) { - vp9_decoder_remove(ctx->pbi); - ctx->pbi = NULL; + if (ctx->frame_workers != NULL) { + int i; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_decoder_remove(worker_data->pbi); + vpx_free(worker_data); + } } + vpx_free(ctx->frame_workers); vpx_free(ctx); return VPX_CODEC_OK; @@ -188,32 +198,42 @@ static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static void set_error_detail(vpx_codec_alg_priv_t *ctx, + const char *const error) { + ctx->base.err_detail = error; +} + static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { if (error->error_code) - ctx->base.err_detail = error->has_detail ? error->detail : NULL; + set_error_detail(ctx, error->has_detail ? error->detail : NULL); return error->error_code; } static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { - VP9_COMMON *const cm = &ctx->pbi->common; - - cm->new_fb_idx = -1; - - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - cm->get_fb_cb = ctx->get_ext_fb_cb; - cm->release_fb_cb = ctx->release_ext_fb_cb; - cm->cb_priv = ctx->ext_priv; - } else { - cm->get_fb_cb = vp9_get_frame_buffer; - cm->release_fb_cb = vp9_release_frame_buffer; + int i; + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + VP9_COMMON *const cm = &worker_data->pbi->common; + + cm->new_fb_idx = -1; + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + cm->get_fb_cb = ctx->get_ext_fb_cb; + cm->release_fb_cb = ctx->release_ext_fb_cb; + cm->cb_priv = ctx->ext_priv; + } else { + cm->get_fb_cb = vp9_get_frame_buffer; + cm->release_fb_cb = vp9_release_frame_buffer; - if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); + if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); - cm->cb_priv = &cm->int_frame_buffers; + cm->cb_priv = &cm->int_frame_buffers; + } } } @@ -232,14 +252,59 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, flags->noise_level = ctx->postproc_cfg.noise_level; } -static void init_decoder(vpx_codec_alg_priv_t *ctx) { - ctx->pbi = vp9_decoder_create(); - if (ctx->pbi == NULL) - return; +static int frame_worker_hook(void *arg1, void *arg2) { + FrameWorkerData *const worker_data = (FrameWorkerData *)arg1; + const uint8_t *data = worker_data->data; + (void)arg2; + worker_data->result = vp9_receive_compressed_data(worker_data->pbi, + worker_data->data_size, + &data); + worker_data->data_end = data; + return !worker_data->result; +} + +static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { + int i; + const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); + + ctx->last_show_frame = -1; + ctx->next_submit_thread_id = 0; + ctx->next_output_thread_id = 0; + ctx->num_frame_workers = + (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + + ctx->frame_workers = (VP9Worker *) + vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); + if (ctx->frame_workers == NULL) { + set_error_detail(ctx, "Failed to allocate frame_workers"); + return VPX_CODEC_MEM_ERROR; + } - ctx->pbi->max_threads = ctx->cfg.threads; - ctx->pbi->inv_tile_order = ctx->invert_tile_order; - ctx->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VP9Worker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *worker_data = NULL; + winterface->init(worker); + worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); + if (worker->data1 == NULL) { + set_error_detail(ctx, "Failed to allocate worker_data"); + return VPX_CODEC_MEM_ERROR; + } + worker_data = (FrameWorkerData *)worker->data1; + worker_data->pbi = vp9_decoder_create(); + if (worker_data->pbi == NULL) { + set_error_detail(ctx, "Failed to allocate worker_data"); + return VPX_CODEC_MEM_ERROR; + } + + // If decoding in serial mode, FrameWorker thread could create tile worker + // thread or loopfilter thread. + worker_data->pbi->max_threads = + (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; + + worker_data->pbi->inv_tile_order = ctx->invert_tile_order; + worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + worker->hook = (VP9WorkerHook)frame_worker_hook; + } // If postprocessing was enabled by the application and a // configuration has not been provided, default it. @@ -248,20 +313,17 @@ static void init_decoder(vpx_codec_alg_priv_t *ctx) { set_default_ppflags(&ctx->postproc_cfg); init_buffer_callbacks(ctx); + + return VPX_CODEC_OK; } static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - VP9_COMMON *cm = NULL; - + vp9_ppflags_t flags = {0}; + const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); (void)deadline; - vp9_zero(sd); - ctx->img_avail = 0; - // Determine the stream parameters. Note that we rely on peek_si to // validate that we have a buffer that does not wrap around the top // of the heap. @@ -276,32 +338,40 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_ERROR; } - // Initialize the decoder instance on the first frame - if (ctx->pbi == NULL) { - init_decoder(ctx); - if (ctx->pbi == NULL) - return VPX_CODEC_ERROR; + // Initialize the decoder workers on the first frame + if (ctx->frame_workers == NULL) { + const vpx_codec_err_t res = init_decoder(ctx); + if (res != VPX_CODEC_OK) + return res; } - // Set these even if already initialized. The caller may have changed the - // decrypt config between frames. - ctx->pbi->decrypt_cb = ctx->decrypt_cb; - ctx->pbi->decrypt_state = ctx->decrypt_state; + if (!ctx->frame_parallel_decode) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + worker_data->data = *data; + worker_data->data_size = data_sz; + worker_data->user_priv = user_priv; - cm = &ctx->pbi->common; + // Set these even if already initialized. The caller may have changed the + // decrypt config between frames. + worker_data->pbi->decrypt_cb = ctx->decrypt_cb; + worker_data->pbi->decrypt_state = ctx->decrypt_state; - if (vp9_receive_compressed_data(ctx->pbi, data_sz, data)) - return update_error_state(ctx, &cm->error); + worker->had_error = 0; + winterface->execute(worker); - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); + // Update data pointer after decode. + *data = worker_data->data_end; - if (vp9_get_raw_frame(ctx->pbi, &sd, &flags)) - return update_error_state(ctx, &cm->error); + if (worker->had_error) + return update_error_state(ctx, &worker_data->pbi->common.error); + } else { + // TODO(hkuang): Implement frame parallel decode. + return VPX_CODEC_INCAPABLE; + } - yuvconfig2image(&ctx->img, &sd, user_priv); - ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->img_avail = 1; + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); return VPX_CODEC_OK; } @@ -412,7 +482,7 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, vpx_codec_err_t res; if (data_start < data || frame_size > (uint32_t) (data_end - data_start)) { - ctx->base.err_detail = "Invalid frame size in index"; + set_error_detail(ctx, "Invalid frame size in index"); return VPX_CODEC_CORRUPT_FRAME; } @@ -430,7 +500,7 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, // Extra data detected after the frame. if (data_start < data_end - 1) { - ctx->base.err_detail = "Fail to decode frame in parallel mode"; + set_error_detail(ctx, "Fail to decode frame in parallel mode"); return VPX_CODEC_INCAPABLE; } } @@ -445,7 +515,7 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, vpx_codec_err_t res; if (data_start < data || frame_size > (uint32_t) (data_end - data_start)) { - ctx->base.err_detail = "Invalid frame size in index"; + set_error_detail(ctx, "Invalid frame size in index"); return VPX_CODEC_CORRUPT_FRAME; } @@ -483,15 +553,31 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; - if (ctx->img_avail) { - // iter acts as a flip flop, so an image is only returned on the first - // call to get_frame. - if (!(*iter)) { + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. + if (*iter == NULL && ctx->frame_workers != NULL) { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + + VP9Worker *const worker = &ctx->frame_workers[ctx->next_output_thread_id]; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + if (vp9_get_raw_frame(worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &worker_data->pbi->common; + yuvconfig2image(&ctx->img, &sd, worker_data->user_priv); + ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; img = &ctx->img; *iter = img; + // Decrease reference count of last output frame in frame parallel mode. + if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { + --cm->frame_bufs[ctx->last_show_frame].ref_count; + if (cm->frame_bufs[ctx->last_show_frame].ref_count == 0) { + cm->release_fb_cb(cm->cb_priv, + &cm->frame_bufs[ctx->last_show_frame].raw_frame_buffer); + } + } + ctx->last_show_frame = worker_data->pbi->common.new_fb_idx; } } - ctx->img_avail = 0; return img; } @@ -502,7 +588,7 @@ static vpx_codec_err_t decoder_set_fb_fn( vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { if (cb_get == NULL || cb_release == NULL) { return VPX_CODEC_INVALID_PARAM; - } else if (ctx->pbi == NULL) { + } else if (ctx->frame_workers == NULL) { // If the decoder has already been initialized, do not accept changes to // the frame buffer functions. ctx->get_ext_fb_cb = cb_get; @@ -518,12 +604,19 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; - + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(&ctx->pbi->common, + return vp9_set_reference_dec(&worker_data->pbi->common, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -534,13 +627,19 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; YV12_BUFFER_CONFIG sd; - + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - - return vp9_copy_reference_dec(ctx->pbi, + return vp9_copy_reference_dec(worker_data->pbi, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -551,11 +650,18 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (data) { YV12_BUFFER_CONFIG* fb; - - vp9_get_reference_dec(ctx->pbi, data->idx, &fb); - yuvconfig2image(&data->img, fb, NULL); + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_get_reference_dec(worker_data->pbi, data->idx, &fb); + yuvconfig2image(&data->img, fb, worker_data->user_priv); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -592,11 +698,20 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, va_list args) { int *const update_info = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (update_info) { - if (ctx->pbi) - *update_info = ctx->pbi->refresh_frame_flags; - else + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + *update_info = worker_data->pbi->refresh_frame_flags; + } else { return VPX_CODEC_ERROR; + } return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -608,11 +723,20 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, va_list args) { int *corrupted = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (corrupted) { - if (ctx->pbi) - *corrupted = ctx->pbi->common.frame_to_show->corrupted; - else + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + *corrupted = worker_data->pbi->common.frame_to_show->corrupted; + } else { return VPX_CODEC_ERROR; + } return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -623,9 +747,17 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, va_list args) { int *const display_size = va_arg(args, int *); + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + if (display_size) { - if (ctx->pbi) { - const VP9_COMMON *const cm = &ctx->pbi->common; + if (ctx->frame_workers) { + VP9Worker *const worker = ctx->frame_workers; + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &worker_data->pbi->common; display_size[0] = cm->display_width; display_size[1] = cm->display_height; } else { -- cgit v1.2.1 From 28a794f680ebfbb50a1618b0ceaff0cb6cf1b972 Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 20 Jun 2014 15:02:52 -0700 Subject: Seperate the frame buffers from VP9 encoder/decoder structure. Prepare for frame parallel decoding, the frame buffers must be separated from the encoder and decoder structure, while the encoder and decoder will hold the pointer of the BufferPool. Change-Id: I172c78f876e41fb5aea11be5f632adadf2a6f466 --- vp9/common/vp9_alloccommon.c | 23 ++++++++++++---------- vp9/common/vp9_onyxc_int.h | 42 ++++++++++++++++++++++++++------------- vp9/decoder/vp9_decodeframe.c | 12 ++++++----- vp9/decoder/vp9_decoder.c | 41 ++++++++++++++++++++++---------------- vp9/decoder/vp9_decoder.h | 2 +- vp9/encoder/vp9_encoder.c | 46 ++++++++++++++++++++++--------------------- vp9/encoder/vp9_encoder.h | 8 +++++--- vp9/encoder/vp9_rdopt.c | 3 ++- vp9/vp9_cx_iface.c | 10 +++++++++- vp9/vp9_dx_iface.c | 36 +++++++++++++++++++++------------ 10 files changed, 136 insertions(+), 87 deletions(-) diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index ccbf3f64a..f847e1276 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -97,14 +97,15 @@ static void free_mi(VP9_COMMON *cm) { void vp9_free_frame_buffers(VP9_COMMON *cm) { int i; + BufferPool *const pool = cm->buffer_pool; for (i = 0; i < FRAME_BUFFERS; ++i) { - vp9_free_frame_buffer(&cm->frame_bufs[i].buf); + vp9_free_frame_buffer(&pool->frame_bufs[i].buf); - if (cm->frame_bufs[i].ref_count > 0 && - cm->frame_bufs[i].raw_frame_buffer.data != NULL) { - cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer); - cm->frame_bufs[i].ref_count = 0; + if (pool->frame_bufs[i].ref_count > 0 && + pool->frame_bufs[i].raw_frame_buffer.data != NULL) { + pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); + pool->frame_bufs[i].ref_count = 0; } } @@ -176,13 +177,14 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { static void init_frame_bufs(VP9_COMMON *cm) { int i; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; cm->new_fb_idx = FRAME_BUFFERS - 1; - cm->frame_bufs[cm->new_fb_idx].ref_count = 1; + frame_bufs[cm->new_fb_idx].ref_count = 1; for (i = 0; i < REF_FRAMES; ++i) { cm->ref_frame_map[i] = i; - cm->frame_bufs[i].ref_count = 1; + frame_bufs[i].ref_count = 1; } } @@ -190,12 +192,13 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { int i; const int ss_x = cm->subsampling_x; const int ss_y = cm->subsampling_y; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; vp9_free_frame_buffers(cm); for (i = 0; i < FRAME_BUFFERS; ++i) { - cm->frame_bufs[i].ref_count = 0; - if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height, + frame_bufs[i].ref_count = 0; + if (vp9_alloc_frame_buffer(&frame_bufs[i].buf, width, height, ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0) goto fail; } @@ -256,7 +259,7 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { void vp9_remove_common(VP9_COMMON *cm) { vp9_free_frame_buffers(cm); vp9_free_context_buffers(cm); - vp9_free_internal_frame_buffers(&cm->int_frame_buffers); + vp9_free_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers); } void vp9_update_frame_size(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index e1753a11b..640e953e7 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -11,6 +11,7 @@ #ifndef VP9_COMMON_VP9_ONYXC_INT_H_ #define VP9_COMMON_VP9_ONYXC_INT_H_ +#include #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vp9_rtcd.h" @@ -63,6 +64,26 @@ typedef struct { YV12_BUFFER_CONFIG buf; } RefCntBuffer; +typedef struct { + // Protect BufferPool from being accessed by several FrameWorkers at + // the same time during frame parallel decode. + // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. +#if CONFIG_MULTITHREAD + pthread_mutex_t pool_mutex; +#endif + + // Private data associated with the frame buffer callbacks. + void *cb_priv; + + vpx_get_frame_buffer_cb_fn_t get_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_fb_cb; + + RefCntBuffer frame_bufs[FRAME_BUFFERS]; + + // Frame buffers allocated internally by the codec. + InternalFrameBufferList int_frame_buffers; +} BufferPool; + typedef struct VP9Common { struct vpx_internal_error_info error; @@ -89,8 +110,6 @@ typedef struct VP9Common { YV12_BUFFER_CONFIG *frame_to_show; - RefCntBuffer frame_bufs[FRAME_BUFFERS]; - int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and @@ -202,30 +221,26 @@ typedef struct VP9Common { int log2_tile_cols, log2_tile_rows; - // Private data associated with the frame buffer callbacks. - void *cb_priv; - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; + // External BufferPool passed from outside. + BufferPool *buffer_pool; PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; } VP9_COMMON; static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { - return &cm->frame_bufs[cm->new_fb_idx].buf; + return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; } static INLINE int get_free_fb(VP9_COMMON *cm) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; int i; - for (i = 0; i < FRAME_BUFFERS; i++) - if (cm->frame_bufs[i].ref_count == 0) + for (i = 0; i < FRAME_BUFFERS; ++i) + if (frame_bufs[i].ref_count == 0) break; assert(i < FRAME_BUFFERS); - cm->frame_bufs[i].ref_count = 1; + frame_bufs[i].ref_count = 1; return i; } @@ -310,7 +325,6 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - // num_4x4_blocks_wide_lookup[bsize] / 2 const int bs = num_8x8_blocks_wide_lookup[bsize]; // update the partition context at the end notes. set partition bits diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 8b96abb9d..5b892bace 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -621,6 +621,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { } static void apply_frame_size(VP9_COMMON *cm, int width, int height) { + BufferPool *const pool = cm->buffer_pool; if (cm->width != width || cm->height != height) { // Change in frame size. // TODO(agrange) Don't test width/height, check overall size. @@ -640,8 +641,8 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) { if (vp9_realloc_frame_buffer( get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS, - &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb, - cm->cb_priv)) { + &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, + pool->cb_priv)) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } @@ -1076,6 +1077,7 @@ static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; size_t sz; int i; @@ -1095,12 +1097,12 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, // Show an existing frame directly. const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)]; - if (frame_to_show < 0 || cm->frame_bufs[frame_to_show].ref_count < 1) + if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Buffer %d does not contain a decoded frame", frame_to_show); - ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show); + ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); pbi->refresh_frame_flags = 0; cm->lf.filter_level = 0; cm->show_frame = 1; @@ -1161,7 +1163,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, const int idx = cm->ref_frame_map[ref]; RefBuffer *const ref_frame = &cm->frame_refs[i]; ref_frame->idx = idx; - ref_frame->buf = &cm->frame_bufs[idx].buf; + ref_frame->buf = &frame_bufs[idx].buf; cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); } diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 13f9d71d7..75283ab93 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -41,7 +41,7 @@ static void initialize_dec() { } } -VP9Decoder *vp9_decoder_create() { +VP9Decoder *vp9_decoder_create(BufferPool *const pool) { VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; @@ -66,6 +66,7 @@ VP9Decoder *vp9_decoder_create() { cm->current_video_frame = 0; pbi->ready_for_new_data = 1; + pbi->common.buffer_pool = pool; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of @@ -124,7 +125,7 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, */ if (ref_frame_flag == VP9_LAST_FLAG) { const YV12_BUFFER_CONFIG *const cfg = - &cm->frame_bufs[cm->ref_frame_map[0]].buf; + &cm->buffer_pool->frame_bufs[cm->ref_frame_map[0]].buf; if (!equal_dimensions(cfg, sd)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); @@ -143,6 +144,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { RefBuffer *ref_buf = NULL; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the // encoder is using the frame buffers for. This is just a stub to keep the @@ -170,11 +172,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, const int free_fb = get_free_fb(cm); // Decrease ref_count since it will be increased again in // ref_cnt_fb() below. - cm->frame_bufs[free_fb].ref_count--; + --frame_bufs[free_fb].ref_count; // Manage the reference counters and copy image. - ref_cnt_fb(cm->frame_bufs, ref_fb_ptr, free_fb); - ref_buf->buf = &cm->frame_bufs[*ref_fb_ptr].buf; + ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); + ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; vp8_yv12_copy_frame(sd, ref_buf->buf); } @@ -184,27 +186,30 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; if (index < 0 || index >= REF_FRAMES) return -1; - *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf; + *fb = &frame_bufs[cm->ref_frame_map[index]].buf; return 0; } /* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; - VP9_COMMON *const cm = &pbi->common; + VP9_COMMON * const cm = &pbi->common; + BufferPool * const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { if (mask & 1) { const int old_idx = cm->ref_frame_map[ref_index]; - ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index], + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[ref_index], cm->new_fb_idx); - if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0) - cm->release_fb_cb(cm->cb_priv, - &cm->frame_bufs[old_idx].raw_frame_buffer); + if (old_idx >= 0 && frame_bufs[old_idx].ref_count == 0) + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[old_idx].raw_frame_buffer); } ++ref_index; } @@ -212,7 +217,7 @@ static void swap_frame_buffers(VP9Decoder *pbi) { cm->frame_to_show = get_frame_new_buffer(cm); if (!pbi->frame_parallel_decode || !cm->show_frame) { - --cm->frame_bufs[cm->new_fb_idx].ref_count; + --frame_bufs[cm->new_fb_idx].ref_count; } // Invalidate these references until the next frame starts. @@ -223,6 +228,8 @@ static void swap_frame_buffers(VP9Decoder *pbi) { int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource) { VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; const uint8_t *source = *psource; int retcode = 0; @@ -244,9 +251,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, // Check if the previous frame was a frame without any references to it. // Release frame buffer if not decoding in frame parallel mode. if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 - && cm->frame_bufs[cm->new_fb_idx].ref_count == 0) - cm->release_fb_cb(cm->cb_priv, - &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer); + && frame_bufs[cm->new_fb_idx].ref_count == 0) + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); if (setjmp(cm->error.jmp)) { @@ -262,8 +269,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, if (cm->frame_refs[0].idx != INT_MAX && cm->frame_refs[0].buf != NULL) cm->frame_refs[0].buf->corrupted = 1; - if (cm->new_fb_idx > 0 && cm->frame_bufs[cm->new_fb_idx].ref_count > 0) - cm->frame_bufs[cm->new_fb_idx].ref_count--; + if (frame_bufs[cm->new_fb_idx].ref_count > 0) + --frame_bufs[cm->new_fb_idx].ref_count; return -1; } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index ab4f9a2c3..29b18ca8f 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -78,7 +78,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decoder *vp9_decoder_create(); +struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 4f17a15a9..a1c6ca07d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -713,7 +713,8 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } -VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { +VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, + BufferPool *const pool) { unsigned int i, j; VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; @@ -734,6 +735,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { vp9_rtcd(); cpi->use_svc = 0; + cpi->common.buffer_pool = pool; init_config(cpi, oxcf); vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc); @@ -1273,7 +1275,7 @@ int vp9_get_reference_enc(VP9_COMP *cpi, int index, YV12_BUFFER_CONFIG **fb) { if (index < 0 || index >= REF_FRAMES) return -1; - *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf; + *fb = &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; return 0; } @@ -1542,14 +1544,13 @@ static int recode_loop_test(const VP9_COMP *cpi, void vp9_update_reference_frames(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } else if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) { /* Preserve the previously existing golden frame and update the frame in @@ -1563,8 +1564,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { */ int tmp; - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; @@ -1577,19 +1577,17 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { arf_idx = gf_group->arf_update_idx[gf_group->index]; } - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[arf_idx], cm->new_fb_idx); + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); } if (cpi->refresh_golden_frame) { - ref_cnt_fb(cm->frame_bufs, + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); } } if (cpi->refresh_last_frame) { - ref_cnt_fb(cm->frame_bufs, - &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); + ref_cnt_fb(frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); } #if CONFIG_DENOISING vp9_denoiser_update_frame_info(&cpi->denoiser, @@ -1630,34 +1628,36 @@ void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG}; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; + const YV12_BUFFER_CONFIG *const ref = &frame_bufs[idx].buf; // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) && (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) { const int new_fb = get_free_fb(cm); - vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf, + vp9_realloc_frame_buffer(&frame_bufs[new_fb].buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); - scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf); + scale_and_extend_frame(ref, &frame_bufs[new_fb].buf); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; } else { cpi->scaled_ref_idx[ref_frame - 1] = idx; - cm->frame_bufs[idx].ref_count++; + ++frame_bufs[idx].ref_count; } } } static void release_scaled_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; int i; - for (i = 0; i < 3; i++) - cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--; + for (i = 0; i < 3; ++i) + --frame_bufs[cpi->scaled_ref_idx[i]].ref_count; } static void full_to_model_count(unsigned int *model_count, @@ -2520,6 +2520,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, const int is_spatial_svc = cpi->use_svc && (cpi->svc.number_temporal_layers == 1) && (cpi->svc.number_spatial_layers > 1); + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; if (!cpi) return -1; @@ -2602,7 +2603,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->show_frame = 1; cm->intra_only = 0; - // Check to see if the frame should be encoded as an arf overlay. + // Check to see if the frame to be encoded is an overlay for a previous + // arf frame and if so configure it as such. check_src_altref(cpi); } } @@ -2656,7 +2658,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, /* find a free buffer for the new frame, releasing the reference previously * held. */ - cm->frame_bufs[cm->new_fb_idx].ref_count--; + --frame_bufs[cm->new_fb_idx].ref_count; cm->new_fb_idx = get_free_fb(cm); if (!cpi->use_svc && cpi->multi_arf_allowed) { @@ -2690,7 +2692,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf; + YV12_BUFFER_CONFIG *const buf = &frame_bufs[idx].buf; RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; ref_buf->buf = buf; ref_buf->idx = idx; diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index b38f9c246..072c1322a 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -436,7 +436,8 @@ typedef struct VP9_COMP { void vp9_initialize_enc(); -struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf); +struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, + BufferPool *const pool); void vp9_remove_compressor(VP9_COMP *cpi); void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); @@ -494,8 +495,9 @@ static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON * const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] + VP9_COMMON *const cm = &cpi->common; + BufferPool *const pool = cm->buffer_pool; + return &pool->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] .buf; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e1a03a62f..e577017e6 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2271,9 +2271,10 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, int ref_frame) { const VP9_COMMON *const cm = &cpi->common; + const RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; - return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; + return (scaled_idx != ref_idx) ? &frame_bufs[scaled_idx].buf : NULL; } int vp9_get_switchable_rate(const VP9_COMP *cpi) { diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index b1501619e..09949f24b 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -90,6 +90,8 @@ struct vpx_codec_alg_priv { vp8_postproc_cfg_t preview_ppcfg; vpx_codec_pkt_list_decl(128) pkt_list; unsigned int fixed_kf_cntr; + // BufferPool that holds all reference frames. + BufferPool *buffer_pool; }; static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { @@ -630,6 +632,10 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, ctx->priv->alg_priv = priv; ctx->priv->init_flags = ctx->init_flags; ctx->priv->enc.total_encoders = 1; + ctx->priv->alg_priv->buffer_pool = + (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); + if (ctx->priv->alg_priv->buffer_pool == NULL) + return VPX_CODEC_MEM_ERROR; if (ctx->config.enc) { // Update the reference to the config structure to an internal copy. @@ -667,7 +673,8 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, set_encoder_config(&ctx->priv->alg_priv->oxcf, &ctx->priv->alg_priv->cfg, &ctx->priv->alg_priv->extra_cfg); - cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); + cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf, + ctx->priv->alg_priv->buffer_pool); if (cpi == NULL) res = VPX_CODEC_MEM_ERROR; else @@ -681,6 +688,7 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); vp9_remove_compressor(ctx->cpi); + vpx_free(ctx->buffer_pool); free(ctx); return VPX_CODEC_OK; } diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 95897ae63..3a73eeaa1 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -46,6 +46,9 @@ struct vpx_codec_alg_priv { int next_submit_thread_id; int next_output_thread_id; + // BufferPool that holds all reference frames. Shared by all the FrameWorkers. + BufferPool *buffer_pool; + // External frame buffer info to save for VP9 common. void *ext_priv; // Private data associated with the external frame buffers. vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; @@ -100,6 +103,7 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { } vpx_free(ctx->frame_workers); + vpx_free(ctx->buffer_pool); vpx_free(ctx); return VPX_CODEC_OK; @@ -218,21 +222,22 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { VP9Worker *const worker = &ctx->frame_workers[i]; FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; VP9_COMMON *const cm = &worker_data->pbi->common; + BufferPool *const pool = cm->buffer_pool; cm->new_fb_idx = -1; if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - cm->get_fb_cb = ctx->get_ext_fb_cb; - cm->release_fb_cb = ctx->release_ext_fb_cb; - cm->cb_priv = ctx->ext_priv; + pool->get_fb_cb = ctx->get_ext_fb_cb; + pool->release_fb_cb = ctx->release_ext_fb_cb; + pool->cb_priv = ctx->ext_priv; } else { - cm->get_fb_cb = vp9_get_frame_buffer; - cm->release_fb_cb = vp9_release_frame_buffer; + pool->get_fb_cb = vp9_get_frame_buffer; + pool->release_fb_cb = vp9_release_frame_buffer; - if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) + if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to initialize internal frame buffers"); - cm->cb_priv = &cm->int_frame_buffers; + pool->cb_priv = &pool->int_frame_buffers; } } } @@ -272,6 +277,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->next_output_thread_id = 0; ctx->num_frame_workers = (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); + if (ctx->buffer_pool == NULL) + return VPX_CODEC_MEM_ERROR; ctx->frame_workers = (VP9Worker *) vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); @@ -290,7 +298,7 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { return VPX_CODEC_MEM_ERROR; } worker_data = (FrameWorkerData *)worker->data1; - worker_data->pbi = vp9_decoder_create(); + worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); if (worker_data->pbi == NULL) { set_error_detail(ctx, "Failed to allocate worker_data"); return VPX_CODEC_MEM_ERROR; @@ -563,16 +571,18 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; if (vp9_get_raw_frame(worker_data->pbi, &sd, &flags) == 0) { VP9_COMMON *const cm = &worker_data->pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; yuvconfig2image(&ctx->img, &sd, worker_data->user_priv); - ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; img = &ctx->img; *iter = img; // Decrease reference count of last output frame in frame parallel mode. if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { - --cm->frame_bufs[ctx->last_show_frame].ref_count; - if (cm->frame_bufs[ctx->last_show_frame].ref_count == 0) { - cm->release_fb_cb(cm->cb_priv, - &cm->frame_bufs[ctx->last_show_frame].raw_frame_buffer); + --frame_bufs[ctx->last_show_frame].ref_count; + if (frame_bufs[ctx->last_show_frame].ref_count == 0) { + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[ctx->last_show_frame].raw_frame_buffer); } } ctx->last_show_frame = worker_data->pbi->common.new_fb_idx; -- cgit v1.2.1 From 10aa23f751069fe464fbc3c0e35dcf9294f055f7 Mon Sep 17 00:00:00 2001 From: hkuang Date: Wed, 2 Jul 2014 17:20:45 -0700 Subject: ctrl_get_reference does not need user_priv. The relationship of the user private data at runtime is not preserved from decode() to this call which may occur at an unknown point in the future Change-Id: Ia7eb25365c805147614574c3af87aedbe0305fc6 --- test/user_priv_test.cc | 2 +- vp9/vp9_dx_iface.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/user_priv_test.cc b/test/user_priv_test.cc index f9aef33da..22fce857c 100644 --- a/test/user_priv_test.cc +++ b/test/user_priv_test.cc @@ -78,7 +78,7 @@ string DecodeFile(const string &filename) { ref.idx = rnd.Rand8() % 3; decoder.Control(VP9_GET_REFERENCE, &ref); - CheckUserPrivateData(ref.img.user_priv, &frame_num); + CheckUserPrivateData(ref.img.user_priv, NULL); } md5.Add(img); } diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 3a73eeaa1..746fab006 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -671,7 +671,7 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, VP9Worker *const worker = ctx->frame_workers; FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; vp9_get_reference_dec(worker_data->pbi, data->idx, &fb); - yuvconfig2image(&data->img, fb, worker_data->user_priv); + yuvconfig2image(&data->img, fb, NULL); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; -- cgit v1.2.1 From 3cffa0c74ee6689e22a4a416b219b50676ed669e Mon Sep 17 00:00:00 2001 From: hkuang Date: Mon, 7 Jul 2014 14:52:19 -0700 Subject: Move vp9_thread.* to common. Prepare for frame parallel decoding, the reference count buffers need to be protected by mutex. Move vp9_thread.* to common folder so that those buffers could use cross-platform mutex from vp9_thread.*. (cherry picked from commit 337e8015c9deaf8ab7e8d0c3c132160a77dd1590) Change-Id: I0587a08447925f4554d7788686a31483c2ae3f37 --- test/vp9_thread_test.cc | 2 +- vp9/common/vp9_thread.c | 183 +++++++++++++++++++++++++++++++++++ vp9/common/vp9_thread.h | 219 ++++++++++++++++++++++++++++++++++++++++++ vp9/decoder/vp9_decodeframe.c | 2 +- vp9/decoder/vp9_decoder.h | 2 +- vp9/decoder/vp9_dthread.h | 2 +- vp9/decoder/vp9_thread.c | 183 ----------------------------------- vp9/decoder/vp9_thread.h | 219 ------------------------------------------ vp9/vp9_common.mk | 2 + vp9/vp9dx.mk | 2 - 10 files changed, 408 insertions(+), 408 deletions(-) create mode 100644 vp9/common/vp9_thread.c create mode 100644 vp9/common/vp9_thread.h delete mode 100644 vp9/decoder/vp9_thread.c delete mode 100644 vp9/decoder/vp9_thread.h diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc index 72719a698..fa51835a5 100644 --- a/test/vp9_thread_test.cc +++ b/test/vp9_thread_test.cc @@ -18,7 +18,7 @@ #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif -#include "vp9/decoder/vp9_thread.h" +#include "vp9/common/vp9_thread.h" namespace { diff --git a/vp9/common/vp9_thread.c b/vp9/common/vp9_thread.c new file mode 100644 index 000000000..348bdf6db --- /dev/null +++ b/vp9/common/vp9_thread.c @@ -0,0 +1,183 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 08ad4e1fecba302bf1247645e84a7d2779956bc3 src/utils/thread.c + +#include +#include // for memset() +#include "./vp9_thread.h" +#include "vpx_mem/vpx_mem.h" + +#if CONFIG_MULTITHREAD + +struct VP9WorkerImpl { + pthread_mutex_t mutex_; + pthread_cond_t condition_; + pthread_t thread_; +}; + +//------------------------------------------------------------------------------ + +static void execute(VP9Worker *const worker); // Forward declaration. + +static THREADFN thread_loop(void *ptr) { + VP9Worker *const worker = (VP9Worker*)ptr; + int done = 0; + while (!done) { + pthread_mutex_lock(&worker->impl_->mutex_); + while (worker->status_ == OK) { // wait in idling mode + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); + } + if (worker->status_ == WORK) { + execute(worker); + worker->status_ = OK; + } else if (worker->status_ == NOT_OK) { // finish the worker + done = 1; + } + // signal to the main thread that we're done (for sync()) + pthread_cond_signal(&worker->impl_->condition_); + pthread_mutex_unlock(&worker->impl_->mutex_); + } + return THREAD_RETURN(NULL); // Thread is finished +} + +// main thread state control +static void change_state(VP9Worker *const worker, + VP9WorkerStatus new_status) { + // No-op when attempting to change state on a thread that didn't come up. + // Checking status_ without acquiring the lock first would result in a data + // race. + if (worker->impl_ == NULL) return; + + pthread_mutex_lock(&worker->impl_->mutex_); + if (worker->status_ >= OK) { + // wait for the worker to finish + while (worker->status_ != OK) { + pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); + } + // assign new status and release the working thread if needed + if (new_status != OK) { + worker->status_ = new_status; + pthread_cond_signal(&worker->impl_->condition_); + } + } + pthread_mutex_unlock(&worker->impl_->mutex_); +} + +#endif // CONFIG_MULTITHREAD + +//------------------------------------------------------------------------------ + +static void init(VP9Worker *const worker) { + memset(worker, 0, sizeof(*worker)); + worker->status_ = NOT_OK; +} + +static int sync(VP9Worker *const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, OK); +#endif + assert(worker->status_ <= OK); + return !worker->had_error; +} + +static int reset(VP9Worker *const worker) { + int ok = 1; + worker->had_error = 0; + if (worker->status_ < OK) { +#if CONFIG_MULTITHREAD + worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); + if (worker->impl_ == NULL) { + return 0; + } + if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { + goto Error; + } + if (pthread_cond_init(&worker->impl_->condition_, NULL)) { + pthread_mutex_destroy(&worker->impl_->mutex_); + goto Error; + } + pthread_mutex_lock(&worker->impl_->mutex_); + ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); + if (ok) worker->status_ = OK; + pthread_mutex_unlock(&worker->impl_->mutex_); + if (!ok) { + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); + Error: + vpx_free(worker->impl_); + worker->impl_ = NULL; + return 0; + } +#else + worker->status_ = OK; +#endif + } else if (worker->status_ > OK) { + ok = sync(worker); + } + assert(!ok || (worker->status_ == OK)); + return ok; +} + +static void execute(VP9Worker *const worker) { + if (worker->hook != NULL) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } +} + +static void launch(VP9Worker *const worker) { +#if CONFIG_MULTITHREAD + change_state(worker, WORK); +#else + execute(worker); +#endif +} + +static void end(VP9Worker *const worker) { + if (worker->status_ >= OK) { +#if CONFIG_MULTITHREAD + change_state(worker, NOT_OK); + pthread_join(worker->impl_->thread_, NULL); + pthread_mutex_destroy(&worker->impl_->mutex_); + pthread_cond_destroy(&worker->impl_->condition_); +#else + worker->status_ = NOT_OK; +#endif + } + vpx_free(worker->impl_); + worker->impl_ = NULL; + assert(worker->status_ == NOT_OK); +} + +//------------------------------------------------------------------------------ + +static VP9WorkerInterface g_worker_interface = { + init, reset, sync, launch, execute, end +}; + +int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) { + if (winterface == NULL || + winterface->init == NULL || winterface->reset == NULL || + winterface->sync == NULL || winterface->launch == NULL || + winterface->execute == NULL || winterface->end == NULL) { + return 0; + } + g_worker_interface = *winterface; + return 1; +} + +const VP9WorkerInterface *vp9_get_worker_interface(void) { + return &g_worker_interface; +} + +//------------------------------------------------------------------------------ diff --git a/vp9/common/vp9_thread.h b/vp9/common/vp9_thread.h new file mode 100644 index 000000000..864579c03 --- /dev/null +++ b/vp9/common/vp9_thread.h @@ -0,0 +1,219 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Multi-threaded worker +// +// Original source: +// http://git.chromium.org/webm/libwebp.git +// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h + +#ifndef VP9_DECODER_VP9_THREAD_H_ +#define VP9_DECODER_VP9_THREAD_H_ + +#include "./vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if CONFIG_MULTITHREAD + +#if defined(_WIN32) +#include // NOLINT +#include // NOLINT +#include // NOLINT +typedef HANDLE pthread_t; +typedef CRITICAL_SECTION pthread_mutex_t; +typedef struct { + HANDLE waiting_sem_; + HANDLE received_sem_; + HANDLE signal_event_; +} pthread_cond_t; + +//------------------------------------------------------------------------------ +// simplistic pthread emulation layer + +// _beginthreadex requires __stdcall +#define THREADFN unsigned int __stdcall +#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) + +static INLINE int pthread_create(pthread_t* const thread, const void* attr, + unsigned int (__stdcall *start)(void*), + void* arg) { + (void)attr; + *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ + 0, /* unsigned stack_size */ + start, + arg, + 0, /* unsigned initflag */ + NULL); /* unsigned *thrdaddr */ + if (*thread == NULL) return 1; + SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); + return 0; +} + +static INLINE int pthread_join(pthread_t thread, void** value_ptr) { + (void)value_ptr; + return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || + CloseHandle(thread) == 0); +} + +// Mutex +static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, + void* mutexattr) { + (void)mutexattr; + InitializeCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { + return TryEnterCriticalSection(mutex) ? 0 : EBUSY; +} + +static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { + EnterCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { + LeaveCriticalSection(mutex); + return 0; +} + +static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { + DeleteCriticalSection(mutex); + return 0; +} + +// Condition +static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { + int ok = 1; + ok &= (CloseHandle(condition->waiting_sem_) != 0); + ok &= (CloseHandle(condition->received_sem_) != 0); + ok &= (CloseHandle(condition->signal_event_) != 0); + return !ok; +} + +static INLINE int pthread_cond_init(pthread_cond_t *const condition, + void* cond_attr) { + (void)cond_attr; + condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL); + condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); + if (condition->waiting_sem_ == NULL || + condition->received_sem_ == NULL || + condition->signal_event_ == NULL) { + pthread_cond_destroy(condition); + return 1; + } + return 0; +} + +static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { + int ok = 1; + if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { + // a thread is waiting in pthread_cond_wait: allow it to be notified + ok = SetEvent(condition->signal_event_); + // wait until the event is consumed so the signaler cannot consume + // the event via its own pthread_cond_wait. + ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != + WAIT_OBJECT_0); + } + return !ok; +} + +static INLINE int pthread_cond_wait(pthread_cond_t *const condition, + pthread_mutex_t *const mutex) { + int ok; + // note that there is a consumer available so the signal isn't dropped in + // pthread_cond_signal + if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) + return 1; + // now unlock the mutex so pthread_cond_signal may be issued + pthread_mutex_unlock(mutex); + ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == + WAIT_OBJECT_0); + ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); + pthread_mutex_lock(mutex); + return !ok; +} +#else // _WIN32 +#include // NOLINT +# define THREADFN void* +# define THREAD_RETURN(val) val +#endif + +#endif // CONFIG_MULTITHREAD + +// State of the worker thread object +typedef enum { + NOT_OK = 0, // object is unusable + OK, // ready to work + WORK // busy finishing the current task +} VP9WorkerStatus; + +// Function to be called by the worker thread. Takes two opaque pointers as +// arguments (data1 and data2), and should return false in case of error. +typedef int (*VP9WorkerHook)(void*, void*); + +// Platform-dependent implementation details for the worker. +typedef struct VP9WorkerImpl VP9WorkerImpl; + +// Synchronization object used to launch job in the worker thread +typedef struct { + VP9WorkerImpl *impl_; + VP9WorkerStatus status_; + VP9WorkerHook hook; // hook to call + void *data1; // first argument passed to 'hook' + void *data2; // second argument passed to 'hook' + int had_error; // return value of the last call to 'hook' +} VP9Worker; + +// The interface for all thread-worker related functions. All these functions +// must be implemented. +typedef struct { + // Must be called first, before any other method. + void (*init)(VP9Worker *const worker); + // Must be called to initialize the object and spawn the thread. Re-entrant. + // Will potentially launch the thread. Returns false in case of error. + int (*reset)(VP9Worker *const worker); + // Makes sure the previous work is finished. Returns true if worker->had_error + // was not set and no error condition was triggered by the working thread. + int (*sync)(VP9Worker *const worker); + // Triggers the thread to call hook() with data1 and data2 arguments. These + // hook/data1/data2 values can be changed at any time before calling this + // function, but not be changed afterward until the next call to Sync(). + void (*launch)(VP9Worker *const worker); + // This function is similar to launch() except that it calls the + // hook directly instead of using a thread. Convenient to bypass the thread + // mechanism while still using the VP9Worker structs. sync() must + // still be called afterward (for error reporting). + void (*execute)(VP9Worker *const worker); + // Kill the thread and terminate the object. To use the object again, one + // must call reset() again. + void (*end)(VP9Worker *const worker); +} VP9WorkerInterface; + +// Install a new set of threading functions, overriding the defaults. This +// should be done before any workers are started, i.e., before any encoding or +// decoding takes place. The contents of the interface struct are copied, it +// is safe to free the corresponding memory after this call. This function is +// not thread-safe. Return false in case of invalid pointer or methods. +int vp9_set_worker_interface(const VP9WorkerInterface *const winterface); + +// Retrieve the currently set thread worker interface. +const VP9WorkerInterface *vp9_get_worker_interface(void); + +//------------------------------------------------------------------------------ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_THREAD_H_ diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 5b892bace..628257c87 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -28,6 +28,7 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_thread.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/decoder/vp9_decodeframe.h" @@ -38,7 +39,6 @@ #include "vp9/decoder/vp9_dthread.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_reader.h" -#include "vp9/decoder/vp9_thread.h" #define MAX_VP9_HEADER_SIZE 80 diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 29b18ca8f..758d49006 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -18,10 +18,10 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_ppflags.h" +#include "vp9/common/vp9_thread.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_thread.h" #ifdef __cplusplus extern "C" { diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 01c07f1a0..75b652518 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -12,8 +12,8 @@ #define VP9_DECODER_VP9_DTHREAD_H_ #include "./vpx_config.h" +#include "vp9/common/vp9_thread.h" #include "vp9/decoder/vp9_reader.h" -#include "vp9/decoder/vp9_thread.h" struct VP9Common; struct VP9Decoder; diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c deleted file mode 100644 index 348bdf6db..000000000 --- a/vp9/decoder/vp9_thread.c +++ /dev/null @@ -1,183 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 08ad4e1fecba302bf1247645e84a7d2779956bc3 src/utils/thread.c - -#include -#include // for memset() -#include "./vp9_thread.h" -#include "vpx_mem/vpx_mem.h" - -#if CONFIG_MULTITHREAD - -struct VP9WorkerImpl { - pthread_mutex_t mutex_; - pthread_cond_t condition_; - pthread_t thread_; -}; - -//------------------------------------------------------------------------------ - -static void execute(VP9Worker *const worker); // Forward declaration. - -static THREADFN thread_loop(void *ptr) { - VP9Worker *const worker = (VP9Worker*)ptr; - int done = 0; - while (!done) { - pthread_mutex_lock(&worker->impl_->mutex_); - while (worker->status_ == OK) { // wait in idling mode - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - if (worker->status_ == WORK) { - execute(worker); - worker->status_ = OK; - } else if (worker->status_ == NOT_OK) { // finish the worker - done = 1; - } - // signal to the main thread that we're done (for sync()) - pthread_cond_signal(&worker->impl_->condition_); - pthread_mutex_unlock(&worker->impl_->mutex_); - } - return THREAD_RETURN(NULL); // Thread is finished -} - -// main thread state control -static void change_state(VP9Worker *const worker, - VP9WorkerStatus new_status) { - // No-op when attempting to change state on a thread that didn't come up. - // Checking status_ without acquiring the lock first would result in a data - // race. - if (worker->impl_ == NULL) return; - - pthread_mutex_lock(&worker->impl_->mutex_); - if (worker->status_ >= OK) { - // wait for the worker to finish - while (worker->status_ != OK) { - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - // assign new status and release the working thread if needed - if (new_status != OK) { - worker->status_ = new_status; - pthread_cond_signal(&worker->impl_->condition_); - } - } - pthread_mutex_unlock(&worker->impl_->mutex_); -} - -#endif // CONFIG_MULTITHREAD - -//------------------------------------------------------------------------------ - -static void init(VP9Worker *const worker) { - memset(worker, 0, sizeof(*worker)); - worker->status_ = NOT_OK; -} - -static int sync(VP9Worker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, OK); -#endif - assert(worker->status_ <= OK); - return !worker->had_error; -} - -static int reset(VP9Worker *const worker) { - int ok = 1; - worker->had_error = 0; - if (worker->status_ < OK) { -#if CONFIG_MULTITHREAD - worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); - if (worker->impl_ == NULL) { - return 0; - } - if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { - goto Error; - } - if (pthread_cond_init(&worker->impl_->condition_, NULL)) { - pthread_mutex_destroy(&worker->impl_->mutex_); - goto Error; - } - pthread_mutex_lock(&worker->impl_->mutex_); - ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); - if (ok) worker->status_ = OK; - pthread_mutex_unlock(&worker->impl_->mutex_); - if (!ok) { - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - Error: - vpx_free(worker->impl_); - worker->impl_ = NULL; - return 0; - } -#else - worker->status_ = OK; -#endif - } else if (worker->status_ > OK) { - ok = sync(worker); - } - assert(!ok || (worker->status_ == OK)); - return ok; -} - -static void execute(VP9Worker *const worker) { - if (worker->hook != NULL) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } -} - -static void launch(VP9Worker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, WORK); -#else - execute(worker); -#endif -} - -static void end(VP9Worker *const worker) { - if (worker->status_ >= OK) { -#if CONFIG_MULTITHREAD - change_state(worker, NOT_OK); - pthread_join(worker->impl_->thread_, NULL); - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); -#else - worker->status_ = NOT_OK; -#endif - } - vpx_free(worker->impl_); - worker->impl_ = NULL; - assert(worker->status_ == NOT_OK); -} - -//------------------------------------------------------------------------------ - -static VP9WorkerInterface g_worker_interface = { - init, reset, sync, launch, execute, end -}; - -int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) { - if (winterface == NULL || - winterface->init == NULL || winterface->reset == NULL || - winterface->sync == NULL || winterface->launch == NULL || - winterface->execute == NULL || winterface->end == NULL) { - return 0; - } - g_worker_interface = *winterface; - return 1; -} - -const VP9WorkerInterface *vp9_get_worker_interface(void) { - return &g_worker_interface; -} - -//------------------------------------------------------------------------------ diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h deleted file mode 100644 index 864579c03..000000000 --- a/vp9/decoder/vp9_thread.h +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h - -#ifndef VP9_DECODER_VP9_THREAD_H_ -#define VP9_DECODER_VP9_THREAD_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_MULTITHREAD - -#if defined(_WIN32) -#include // NOLINT -#include // NOLINT -#include // NOLINT -typedef HANDLE pthread_t; -typedef CRITICAL_SECTION pthread_mutex_t; -typedef struct { - HANDLE waiting_sem_; - HANDLE received_sem_; - HANDLE signal_event_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -// _beginthreadex requires __stdcall -#define THREADFN unsigned int __stdcall -#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - unsigned int (__stdcall *start)(void*), - void* arg) { - (void)attr; - *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ - 0, /* unsigned stack_size */ - start, - arg, - 0, /* unsigned initflag */ - NULL); /* unsigned *thrdaddr */ - if (*thread == NULL) return 1; - SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || - CloseHandle(thread) == 0); -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - InitializeCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return TryEnterCriticalSection(mutex) ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - EnterCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - LeaveCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - DeleteCriticalSection(mutex); - return 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= (CloseHandle(condition->waiting_sem_) != 0); - ok &= (CloseHandle(condition->received_sem_) != 0); - ok &= (CloseHandle(condition->signal_event_) != 0); - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - (void)cond_attr; - condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL); - condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL); - condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); - if (condition->waiting_sem_ == NULL || - condition->received_sem_ == NULL || - condition->signal_event_ == NULL) { - pthread_cond_destroy(condition); - return 1; - } - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { - // a thread is waiting in pthread_cond_wait: allow it to be notified - ok = SetEvent(condition->signal_event_); - // wait until the event is consumed so the signaler cannot consume - // the event via its own pthread_cond_wait. - ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != - WAIT_OBJECT_0); - } - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok; - // note that there is a consumer available so the signal isn't dropped in - // pthread_cond_signal - if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) - return 1; - // now unlock the mutex so pthread_cond_signal may be issued - pthread_mutex_unlock(mutex); - ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == - WAIT_OBJECT_0); - ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); - pthread_mutex_lock(mutex); - return !ok; -} -#else // _WIN32 -#include // NOLINT -# define THREADFN void* -# define THREAD_RETURN(val) val -#endif - -#endif // CONFIG_MULTITHREAD - -// State of the worker thread object -typedef enum { - NOT_OK = 0, // object is unusable - OK, // ready to work - WORK // busy finishing the current task -} VP9WorkerStatus; - -// Function to be called by the worker thread. Takes two opaque pointers as -// arguments (data1 and data2), and should return false in case of error. -typedef int (*VP9WorkerHook)(void*, void*); - -// Platform-dependent implementation details for the worker. -typedef struct VP9WorkerImpl VP9WorkerImpl; - -// Synchronization object used to launch job in the worker thread -typedef struct { - VP9WorkerImpl *impl_; - VP9WorkerStatus status_; - VP9WorkerHook hook; // hook to call - void *data1; // first argument passed to 'hook' - void *data2; // second argument passed to 'hook' - int had_error; // return value of the last call to 'hook' -} VP9Worker; - -// The interface for all thread-worker related functions. All these functions -// must be implemented. -typedef struct { - // Must be called first, before any other method. - void (*init)(VP9Worker *const worker); - // Must be called to initialize the object and spawn the thread. Re-entrant. - // Will potentially launch the thread. Returns false in case of error. - int (*reset)(VP9Worker *const worker); - // Makes sure the previous work is finished. Returns true if worker->had_error - // was not set and no error condition was triggered by the working thread. - int (*sync)(VP9Worker *const worker); - // Triggers the thread to call hook() with data1 and data2 arguments. These - // hook/data1/data2 values can be changed at any time before calling this - // function, but not be changed afterward until the next call to Sync(). - void (*launch)(VP9Worker *const worker); - // This function is similar to launch() except that it calls the - // hook directly instead of using a thread. Convenient to bypass the thread - // mechanism while still using the VP9Worker structs. sync() must - // still be called afterward (for error reporting). - void (*execute)(VP9Worker *const worker); - // Kill the thread and terminate the object. To use the object again, one - // must call reset() again. - void (*end)(VP9Worker *const worker); -} VP9WorkerInterface; - -// Install a new set of threading functions, overriding the defaults. This -// should be done before any workers are started, i.e., before any encoding or -// decoding takes place. The contents of the interface struct are copied, it -// is safe to free the corresponding memory after this call. This function is -// not thread-safe. Return false in case of invalid pointer or methods. -int vp9_set_worker_interface(const VP9WorkerInterface *const winterface); - -// Retrieve the currently set thread worker interface. -const VP9WorkerInterface *vp9_get_worker_interface(void); - -//------------------------------------------------------------------------------ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_THREAD_H_ diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 8c1f34567..81fe6a620 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -50,6 +50,8 @@ VP9_COMMON_SRCS-yes += common/vp9_seg_common.h VP9_COMMON_SRCS-yes += common/vp9_seg_common.c VP9_COMMON_SRCS-yes += common/vp9_systemdependent.h VP9_COMMON_SRCS-yes += common/vp9_textblit.h +VP9_COMMON_SRCS-yes += common/vp9_thread.h +VP9_COMMON_SRCS-yes += common/vp9_thread.c VP9_COMMON_SRCS-yes += common/vp9_tile_common.h VP9_COMMON_SRCS-yes += common/vp9_tile_common.c VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk index 92ec6fd16..1fcb36f66 100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@ -31,8 +31,6 @@ VP9_DX_SRCS-yes += decoder/vp9_decodemv.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h VP9_DX_SRCS-yes += decoder/vp9_decoder.c VP9_DX_SRCS-yes += decoder/vp9_decoder.h -VP9_DX_SRCS-yes += decoder/vp9_thread.c -VP9_DX_SRCS-yes += decoder/vp9_thread.h VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h -- cgit v1.2.1 From 294b849796d7c359c61aacc0390cf7da16565f64 Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 11 Jul 2014 15:28:20 -0700 Subject: Include the right header for VP9 worker thread. pthread.h is not supported in windows. vp9_thread.h includes the emulation layer for pthread in windows. Change-Id: I2b1c8ec299928472faca7ebeea998170c9f4d744 --- vp9/common/vp9_onyxc_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 640e953e7..f31e137b0 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -11,7 +11,6 @@ #ifndef VP9_COMMON_VP9_ONYXC_INT_H_ #define VP9_COMMON_VP9_ONYXC_INT_H_ -#include #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vp9_rtcd.h" @@ -21,6 +20,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_frame_buffers.h" #include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_thread.h" #include "vp9/common/vp9_tile_common.h" #if CONFIG_VP9_POSTPROC -- cgit v1.2.1 From 7eca086707f3e0f631a5be8952f594f4c387ca74 Mon Sep 17 00:00:00 2001 From: hkuang Date: Thu, 17 Jul 2014 14:25:41 -0700 Subject: Add segmentation map array for current and last frame segmentation. The original implementation only allocates one segmentation map and this works fine for serial decode. But for frame parallel decode, each thread need to have its own segmentation map and the last frame segmentation map should be provided from last frame decoding thread. After finishing decoding a frame, thread need to serve the old segmentation map that associate with the previous decoded frame. The thread also need to use another segmentation map for decoding the current frame. Change-Id: I442ddff36b5de9cb8a7eb59e225744c78f4492d8 --- vp9/common/vp9_alloccommon.c | 57 +++++++++++++++++++++++++++++++++++++------- vp9/common/vp9_alloccommon.h | 2 ++ vp9/common/vp9_entropymode.c | 3 +++ vp9/common/vp9_onyxc_int.h | 13 +++++++--- vp9/decoder/vp9_decodemv.c | 6 +++-- vp9/decoder/vp9_decoder.c | 2 ++ 6 files changed, 69 insertions(+), 14 deletions(-) diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index f847e1276..04081f107 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -55,7 +55,7 @@ static void setup_mi(VP9_COMMON *cm) { static int alloc_mi(VP9_COMMON *cm, int mi_size) { int i; - for (i = 0; i < 2; ++i) { + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { cm->mip_array[i] = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip)); if (cm->mip_array[i] == NULL) @@ -82,7 +82,7 @@ static int alloc_mi(VP9_COMMON *cm, int mi_size) { static void free_mi(VP9_COMMON *cm) { int i; - for (i = 0; i < 2; ++i) { + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { vpx_free(cm->mip_array[i]); cm->mip_array[i] = NULL; vpx_free(cm->mi_grid_base_array[i]); @@ -95,6 +95,37 @@ static void free_mi(VP9_COMMON *cm) { cm->prev_mi_grid_base = NULL; } +static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); + if (cm->seg_map_array[i] == NULL) + return 1; + } + + // Init the index. + cm->seg_map_idx = 0; + cm->prev_seg_map_idx = 1; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; + + return 0; +} + +static void free_seg_map(VP9_COMMON *cm) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + vpx_free(cm->seg_map_array[i]); + cm->seg_map_array[i] = NULL; + } + + cm->current_frame_seg_map = NULL; + cm->last_frame_seg_map = NULL; +} + void vp9_free_frame_buffers(VP9_COMMON *cm) { int i; BufferPool *const pool = cm->buffer_pool; @@ -115,8 +146,7 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) { void vp9_free_context_buffers(VP9_COMMON *cm) { free_mi(cm); - vpx_free(cm->last_frame_seg_map); - cm->last_frame_seg_map = NULL; + free_seg_map(cm); vpx_free(cm->above_context); cm->above_context = NULL; @@ -147,9 +177,8 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { setup_mi(cm); // Create the segmentation map structure and set to 0. - vpx_free(cm->last_frame_seg_map); - cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1); - if (!cm->last_frame_seg_map) + free_seg_map(cm); + if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail; vpx_free(cm->above_context); @@ -270,8 +299,8 @@ void vp9_update_frame_size(VP9_COMMON *cm) { setup_mi(cm); // Initialize the previous frame segment map to 0. - if (cm->last_frame_seg_map) - vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); + if (cm->current_frame_seg_map) + vpx_memset(cm->current_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); } void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { @@ -292,3 +321,13 @@ void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } + +void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { + // Swap indices. + const int tmp = cm->seg_map_idx; + cm->seg_map_idx = cm->prev_seg_map_idx; + cm->prev_seg_map_idx = tmp; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; +} diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index c4b1b8d2d..1207d6e24 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -34,6 +34,8 @@ void vp9_update_frame_size(struct VP9Common *cm); void vp9_swap_mi_and_prev_mi(struct VP9Common *cm); +void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 5b00b0082..79c8b9bc5 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -442,6 +442,9 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { if (cm->last_frame_seg_map) vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + if (cm->current_frame_seg_map) + vpx_memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + // Reset the mode ref deltas for loop filter vp9_zero(lf->last_ref_deltas); vp9_zero(lf->last_mode_deltas); diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index f31e137b0..13c500147 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -44,6 +44,8 @@ extern "C" { #define FRAME_CONTEXTS_LOG2 2 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) +#define NUM_PING_PONG_BUFFERS 2 + extern const struct { PARTITION_CONTEXT above; PARTITION_CONTEXT left; @@ -163,8 +165,8 @@ typedef struct VP9Common { int mi_idx; int prev_mi_idx; - MODE_INFO *mip_array[2]; - MODE_INFO **mi_grid_base_array[2]; + MODE_INFO *mip_array[NUM_PING_PONG_BUFFERS]; + MODE_INFO **mi_grid_base_array[NUM_PING_PONG_BUFFERS]; MODE_INFO *mip; /* Base of allocated array */ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ @@ -177,7 +179,12 @@ typedef struct VP9Common { MODE_INFO **prev_mi_grid_visible; // Persistent mb segment id map used in prediction. - unsigned char *last_frame_seg_map; + int seg_map_idx; + int prev_seg_map_idx; + + uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; + uint8_t *last_frame_seg_map; + uint8_t *current_frame_seg_map; INTERP_FILTER interp_filter; diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 1afaee1e3..187ff1307 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -96,7 +96,7 @@ static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) - cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; + cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; } static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, @@ -129,8 +129,10 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, predicted_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col); - if (!seg->update_map) + if (!seg->update_map) { + set_segment_id(cm, bsize, mi_row, mi_col, predicted_segment_id); return predicted_segment_id; + } if (seg->temporal_update) { const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 75283ab93..07fe2899d 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -295,6 +295,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->current_video_frame++; } + vp9_swap_current_and_last_seg_map(cm); + pbi->ready_for_new_data = 0; cm->error.setjmp = 0; -- cgit v1.2.1 From 44395a21da75a8fa6ca71a39bee1c9fe55a1358e Mon Sep 17 00:00:00 2001 From: hkuang Date: Mon, 28 Jul 2014 11:06:24 -0700 Subject: Move vp9_dec_build_inter_predictors_* to decoder folder. Change-Id: Ibe9fa28440cc79ba9f3504d78c7dca7bb01a23e1 --- vp9/common/vp9_reconinter.c | 212 +----------------------------------------- vp9/common/vp9_reconinter.h | 23 ++++- vp9/decoder/vp9_decodeframe.c | 205 ++++++++++++++++++++++++++++++++++++++++ vp9/decoder/vp9_decodeframe.h | 2 + 4 files changed, 230 insertions(+), 212 deletions(-) diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index d4fcb62f8..b6b9c873f 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -20,50 +20,7 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" -static void build_mc_border(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, int w, int h) { - // Get a pointer to the start of the real data for this row. - const uint8_t *ref_row = src - x - y * src_stride; - - if (y >= h) - ref_row += (h - 1) * src_stride; - else if (y > 0) - ref_row += y * src_stride; - - do { - int right = 0, copy; - int left = x < 0 ? -x : 0; - - if (left > b_w) - left = b_w; - - if (x + b_w > w) - right = x + b_w - w; - - if (right > b_w) - right = b_w; - - copy = b_w - left - right; - - if (left) - memset(dst, ref_row[0], left); - - if (copy) - memcpy(dst + left, ref_row + x + left, copy); - - if (right) - memset(dst + left + copy, ref_row[w - 1], right); - - dst += dst_stride; - ++y; - - if (y > 0 && y < h) - ref_row += src_stride; - } while (--b_h); -} - -static void inter_predictor(const uint8_t *src, int src_stride, +void inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, @@ -151,7 +108,7 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, return clamped_mv; } -static MV average_split_mvs(const struct macroblockd_plane *pd, int plane, +MV average_split_mvs(const struct macroblockd_plane *pd, int plane, const MODE_INFO *mi, int ref, int block) { const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); MV res = {0, 0}; @@ -174,7 +131,7 @@ static MV average_split_mvs(const struct macroblockd_plane *pd, int plane, return res; } -static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, +void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y) { @@ -270,169 +227,6 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, MAX_MB_PLANE - 1); } -// TODO(jingning): This function serves as a placeholder for decoder prediction -// using on demand border extension. It should be moved to /decoder/ directory. -static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi[0]; - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); - int ref; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->mbmi.sb_type < BLOCK_8X8 - ? average_split_mvs(pd, plane, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; - - - // TODO(jkoleszar): This clamping is done in the incorrect place for the - // scaling case. It needs to be done on the scaled MV, not the pre-scaling - // MV. Note however that it performs the subsampling aware scaling so - // that the result is always q4. - // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); - - MV32 scaled_mv; - int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, - subpel_x, subpel_y; - uint8_t *ref_frame, *buf_ptr; - const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf; - - // Get reference frame pointer, width and height. - if (plane == 0) { - frame_width = ref_buf->y_crop_width; - frame_height = ref_buf->y_crop_height; - ref_frame = ref_buf->y_buffer; - } else { - frame_width = ref_buf->uv_crop_width; - frame_height = ref_buf->uv_crop_height; - ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer; - } - - if (vp9_is_scaled(sf)) { - // Co-ordinate of containing block to pixel precision. - int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = (x_start + x) << SUBPEL_BITS; - y0_16 = (y_start + y) << SUBPEL_BITS; - - // Co-ordinate of current block in reference frame - // to 1/16th pixel precision. - x0_16 = sf->scale_value_x(x0_16, sf); - y0_16 = sf->scale_value_y(y0_16, sf); - - // Map the top left corner of the block into the reference frame. - x0 = sf->scale_value_x(x_start + x, sf); - y0 = sf->scale_value_y(y_start + y, sf); - - // Scale the MV and incorporate the sub-pixel offset of the block - // in the reference frame. - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - // Co-ordinate of containing block to pixel precision. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = x0 << SUBPEL_BITS; - y0_16 = y0 << SUBPEL_BITS; - - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; - } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - - // Calculate the top left corner of the best matching block in the reference frame. - x0 += scaled_mv.col >> SUBPEL_BITS; - y0 += scaled_mv.row >> SUBPEL_BITS; - x0_16 += scaled_mv.col; - y0_16 += scaled_mv.row; - - // Get reference block pointer. - buf_ptr = ref_frame + y0 * pre_buf->stride + x0; - buf_stride = pre_buf->stride; - - // Do border extension if there is motion or the - // width/height is not a multiple of 8 pixels. - if (scaled_mv.col || scaled_mv.row || - (frame_width & 0x7) || (frame_height & 0x7)) { - // Get reference block bottom right coordinate. - int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; - int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; - int x_pad = 0, y_pad = 0; - - if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) { - x0 -= VP9_INTERP_EXTEND - 1; - x1 += VP9_INTERP_EXTEND; - x_pad = 1; - } - - if (subpel_y || (sf->y_step_q4 & SUBPEL_MASK)) { - y0 -= VP9_INTERP_EXTEND - 1; - y1 += VP9_INTERP_EXTEND; - y_pad = 1; - } - - // Skip border extension if block is inside the frame. - if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width || - y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { - uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0; - // Extend the border. - build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1, - x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width, - frame_height); - buf_stride = x1 - x0 + 1; - buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; - } - } - - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -} - -void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int bw = 4 * num_4x4_w; - const int bh = 4 * num_4x4_h; - - if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { - int i = 0, x, y; - assert(bsize == BLOCK_8X8); - for (y = 0; y < num_4x4_h; ++y) - for (x = 0; x < num_4x4_w; ++x) - dec_build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); - } else { - dec_build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); - } - } -} - void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 58c596ee8..5686c111f 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -18,6 +18,26 @@ extern "C" { #endif +void inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys); + +MV average_split_mvs(const struct macroblockd_plane *pd, int plane, + const MODE_INFO *mi, int ref, int block); + +MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, + int bw, int bh, int ss_x, int ss_y); + +void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y); + void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); @@ -27,9 +47,6 @@ void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); -void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *mv_q3, diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 628257c87..fae4255da 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1410,3 +1410,208 @@ void vp9_decode_frame(VP9Decoder *pbi, if (cm->refresh_frame_context) cm->frame_contexts[cm->frame_context_idx] = cm->fc; } + +static void build_mc_border(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, int w, int h) { + // Get a pointer to the start of the real data for this row. + const uint8_t *ref_row = src - x - y * src_stride; + + if (y >= h) + ref_row += (h - 1) * src_stride; + else if (y > 0) + ref_row += y * src_stride; + + do { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > b_w) + left = b_w; + + if (x + b_w > w) + right = x + b_w - w; + + if (right > b_w) + right = b_w; + + copy = b_w - left - right; + + if (left) + memset(dst, ref_row[0], left); + + if (copy) + memcpy(dst + left, ref_row + x + left, copy); + + if (right) + memset(dst + left + copy, ref_row[w - 1], right); + + dst += dst_stride; + ++y; + + if (y > 0 && y < h) + ref_row += src_stride; + } while (--b_h); +} + +void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO *mi = xd->mi[0]; + const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); + int ref; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + struct buf_2d *const dst_buf = &pd->dst; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + const MV mv = mi->mbmi.sb_type < BLOCK_8X8 + ? average_split_mvs(pd, plane, mi, ref, block) + : mi->mbmi.mv[ref].as_mv; + + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + // mv_precision precision is MV_PRECISION_Q4. + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + + MV32 scaled_mv; + int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, + subpel_x, subpel_y; + uint8_t *ref_frame, *buf_ptr; + const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf; + + // Get reference frame pointer, width and height. + if (plane == 0) { + frame_width = ref_buf->y_crop_width; + frame_height = ref_buf->y_crop_height; + ref_frame = ref_buf->y_buffer; + } else { + frame_width = ref_buf->uv_crop_width; + frame_height = ref_buf->uv_crop_height; + ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer; + } + + if (vp9_is_scaled(sf)) { + // Co-ordinate of containing block to pixel precision. + int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (x_start + x) << SUBPEL_BITS; + y0_16 = (y_start + y) << SUBPEL_BITS; + + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf->scale_value_x(x0_16, sf); + y0_16 = sf->scale_value_y(y0_16, sf); + + // Map the top left corner of the block into the reference frame. + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << SUBPEL_BITS; + y0_16 = y0 << SUBPEL_BITS; + + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + + // Calculate the top left corner of the best matching block in the + // reference frame. + x0 += scaled_mv.col >> SUBPEL_BITS; + y0 += scaled_mv.row >> SUBPEL_BITS; + x0_16 += scaled_mv.col; + y0_16 += scaled_mv.row; + + // Get reference block pointer. + buf_ptr = ref_frame + y0 * pre_buf->stride + x0; + buf_stride = pre_buf->stride; + + // Do border extension if there is motion or the + // width/height is not a multiple of 8 pixels. + if (scaled_mv.col || scaled_mv.row || + (frame_width & 0x7) || (frame_height & 0x7)) { + // Get reference block bottom right coordinate. + int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; + int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; + int x_pad = 0, y_pad = 0; + + if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) { + x0 -= VP9_INTERP_EXTEND - 1; + x1 += VP9_INTERP_EXTEND; + x_pad = 1; + } + + if (subpel_y || (sf->y_step_q4 & SUBPEL_MASK)) { + y0 -= VP9_INTERP_EXTEND - 1; + y1 += VP9_INTERP_EXTEND; + y_pad = 1; + } + + // Skip border extension if block is inside the frame. + if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width || + y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { + uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0; + // Extend the border. + build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1, + x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width, + frame_height); + buf_stride = x1 - x0 + 1; + buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; + } + } + + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +} + +void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + dec_build_inter_predictors(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y); + } else { + dec_build_inter_predictors(xd, plane, 0, bw, bh, + 0, 0, bw, bh, mi_x, mi_y); + } + } +} diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h index fb15645a9..6fbd50c8b 100644 --- a/vp9/decoder/vp9_decodeframe.h +++ b/vp9/decoder/vp9_decodeframe.h @@ -25,6 +25,8 @@ void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); +void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); #ifdef __cplusplus } // extern "C" #endif -- cgit v1.2.1 From 48c5d470e78436a6abde1c14c183f1a9d30f04de Mon Sep 17 00:00:00 2001 From: hkuang Date: Thu, 7 Aug 2014 14:29:41 -0700 Subject: Manually pick "Make the api behavior conform to api spec." from master branch. Change-Id: I7323ec4cf8b8b7841e37f2bf90548cefa9de9795 --- test/decode_test_driver.cc | 45 +++++++++++++++++++++++++++------------------ vp8/vp8_dx_iface.c | 9 +++++++++ vp9/vp9_dx_iface.c | 11 +++++++++-- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc index 8bea4ccf9..161dbb262 100644 --- a/test/decode_test_driver.cc +++ b/test/decode_test_driver.cc @@ -45,36 +45,45 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) { ASSERT_TRUE(decoder != NULL); const char *codec_name = decoder->GetDecoderName(); const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; + bool end_of_file = false; // Decode frames. - for (video->Begin(); !::testing::Test::HasFailure() && video->cxdata(); + for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file; video->Next()) { PreDecodeFrameHook(*video, decoder); vpx_codec_stream_info_t stream_info; stream_info.sz = sizeof(stream_info); - const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(), - video->frame_size(), - &stream_info); - if (is_vp8) { - /* Vp8's implementation of PeekStream returns an error if the frame you - * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first - * frame, which must be a keyframe. */ - if (video->frame_number() == 0) + + if (video->cxdata() != NULL) { + const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(), + video->frame_size(), + &stream_info); + if (is_vp8) { + /* Vp8's implementation of PeekStream returns an error if the frame you + * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the + * first frame, which must be a keyframe. */ + if (video->frame_number() == 0) + ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " + << vpx_codec_err_to_string(res_peek); + } else { + /* The Vp9 implementation of PeekStream returns an error only if the + * data passed to it isn't a valid Vp9 chunk. */ ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " << vpx_codec_err_to_string(res_peek); + } + + vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(), + video->frame_size()); + if (!HandleDecodeResult(res_dec, *video, decoder)) + break; } else { - /* The Vp9 implementation of PeekStream returns an error only if the - * data passed to it isn't a valid Vp9 chunk. */ - ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " - << vpx_codec_err_to_string(res_peek); + // Signal end of the file to the decoder. + const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); + end_of_file = true; } - vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(), - video->frame_size()); - if (!HandleDecodeResult(res_dec, *video, decoder)) - break; - DxDataIterator dec_iter = decoder->GetDxData(); const vpx_image_t *img = NULL; diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 56394fb1c..d5e319e8d 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -60,6 +60,7 @@ struct vpx_codec_alg_priv vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; + int flushed; int img_setup; struct frame_buffers yv12_frame_buffers; void *user_priv; @@ -89,6 +90,7 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx) ctx->priv->alg_priv->decrypt_cb = NULL; ctx->priv->alg_priv->decrypt_state = NULL; ctx->priv->init_flags = ctx->init_flags; + ctx->priv->alg_priv->flushed = 0; if (ctx->config.dec) { @@ -327,6 +329,13 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, unsigned int resolution_change = 0; unsigned int w, h; + if (data == NULL && data_sz == 0) { + ctx->flushed = 1; + return VPX_CODEC_OK; + } + + /* Reset flushed when receiving a valid frame */ + ctx->flushed = 0; /* Update the input fragment data */ if(update_fragments(ctx, data, data_sz, &res) <= 0) diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 746fab006..3bfdea6ad 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -37,6 +37,7 @@ struct vpx_codec_alg_priv { vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; + int flushed; int invert_tile_order; int frame_parallel_decode; // frame-based threading. int last_show_frame; // Index of last output frame. @@ -75,6 +76,7 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, ctx->priv->alg_priv = alg_priv; ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); ctx->priv->init_flags = ctx->init_flags; + ctx->priv->alg_priv->flushed = 0; ctx->priv->alg_priv->frame_parallel_decode = (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING); @@ -468,8 +470,13 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, uint32_t frame_sizes[8]; int frame_count; - if (data == NULL || data_sz == 0) - return VPX_CODEC_INVALID_PARAM; + if (data == NULL && data_sz == 0) { + ctx->flushed = 1; + return VPX_CODEC_OK; + } + + // Reset flushed when receiving a valid frame. + ctx->flushed = 0; res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count, ctx->decrypt_cb, ctx->decrypt_state); -- cgit v1.2.1 From a3ef7d5a5086b2aa2832ff92b23fa6b6b0664c98 Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 8 Aug 2014 14:49:55 -0700 Subject: Add VP9 frame-parallel unit test. Make sure VP9 frame-parallel decode passes all the standard test vectors. Only test running with 2,3,4 threads now. Also refactor the video decode test driver to support passing in decode flags which is used to enable frame-parallel decode. Change-Id: I6a712464232c2e13681634951c7e176312522e1e --- test/codec_factory.h | 28 +++++++++++++- test/decode_test_driver.cc | 11 +++++- test/decode_test_driver.h | 20 ++++++++-- test/test_vector_test.cc | 91 ++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 131 insertions(+), 19 deletions(-) diff --git a/test/codec_factory.h b/test/codec_factory.h index 7f9398cc8..286c6aa57 100644 --- a/test/codec_factory.h +++ b/test/codec_factory.h @@ -35,6 +35,10 @@ class CodecFactory { virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) const = 0; + virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, + const vpx_codec_flags_t flags, + unsigned long deadline) const = 0; // NOLINT + virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, @@ -72,6 +76,10 @@ class VP8Decoder : public Decoder { VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) : Decoder(cfg, deadline) {} + VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag, + unsigned long deadline) // NOLINT + : Decoder(cfg, flag, deadline) {} + protected: virtual vpx_codec_iface_t* CodecInterface() const { #if CONFIG_VP8_DECODER @@ -104,8 +112,14 @@ class VP8CodecFactory : public CodecFactory { virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) const { + return CreateDecoder(cfg, 0, deadline); + } + + virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, + const vpx_codec_flags_t flags, + unsigned long deadline) const { // NOLINT #if CONFIG_VP8_DECODER - return new VP8Decoder(cfg, deadline); + return new VP8Decoder(cfg, flags, deadline); #else return NULL; #endif @@ -154,6 +168,10 @@ class VP9Decoder : public Decoder { VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) : Decoder(cfg, deadline) {} + VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag, + unsigned long deadline) // NOLINT + : Decoder(cfg, flag, deadline) {} + protected: virtual vpx_codec_iface_t* CodecInterface() const { #if CONFIG_VP9_DECODER @@ -186,8 +204,14 @@ class VP9CodecFactory : public CodecFactory { virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) const { + return CreateDecoder(cfg, 0, deadline); + } + + virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg, + const vpx_codec_flags_t flags, + unsigned long deadline) const { // NOLINT #if CONFIG_VP9_DECODER - return new VP9Decoder(cfg, deadline); + return new VP9Decoder(cfg, flags, deadline); #else return NULL; #endif diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc index 8bea4ccf9..a10dcf5fb 100644 --- a/test/decode_test_driver.cc +++ b/test/decode_test_driver.cc @@ -40,8 +40,7 @@ vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, } void DecoderTest::RunLoop(CompressedVideoSource *video) { - vpx_codec_dec_cfg_t dec_cfg = {0}; - Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0); + Decoder* const decoder = codec_->CreateDecoder(cfg_, flags_); ASSERT_TRUE(decoder != NULL); const char *codec_name = decoder->GetDecoderName(); const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; @@ -85,4 +84,12 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) { delete decoder; } + +void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) { + memcpy(&cfg_, &dec_cfg, sizeof(cfg_)); +} + +void DecoderTest::set_flags(const vpx_codec_flags_t flags) { + flags_ = flags; +} } // namespace libvpx_test diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h index dd3593e1e..dc9745eab 100644 --- a/test/decode_test_driver.h +++ b/test/decode_test_driver.h @@ -41,7 +41,13 @@ class DxDataIterator { class Decoder { public: Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline) - : cfg_(cfg), deadline_(deadline), init_done_(false) { + : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) { + memset(&decoder_, 0, sizeof(decoder_)); + } + + Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag, + unsigned long deadline) // NOLINT + : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) { memset(&decoder_, 0, sizeof(decoder_)); } @@ -102,7 +108,7 @@ class Decoder { if (!init_done_) { const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_, CodecInterface(), - &cfg_, 0); + &cfg_, flags_); ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); init_done_ = true; } @@ -110,6 +116,7 @@ class Decoder { vpx_codec_ctx_t decoder_; vpx_codec_dec_cfg_t cfg_; + vpx_codec_flags_t flags_; unsigned int deadline_; bool init_done_; }; @@ -120,6 +127,9 @@ class DecoderTest { // Main decoding loop virtual void RunLoop(CompressedVideoSource *video); + virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg); + virtual void set_flags(const vpx_codec_flags_t flags); + // Hook to be called before decompressing every frame. virtual void PreDecodeFrameHook(const CompressedVideoSource& video, Decoder *decoder) {} @@ -137,11 +147,15 @@ class DecoderTest { const unsigned int frame_number) {} protected: - explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {} + explicit DecoderTest(const CodecFactory *codec) : codec_(codec), flags_(0) { + memset(&cfg_, 0, sizeof(cfg_)); + } virtual ~DecoderTest() {} const CodecFactory *codec_; + vpx_codec_dec_cfg_t cfg_; + vpx_codec_flags_t flags_; }; } // namespace libvpx_test diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc index 1f294f20b..b2f9d590a 100644 --- a/test/test_vector_test.cc +++ b/test/test_vector_test.cc @@ -12,6 +12,7 @@ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" +#include "../tools_common.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" @@ -26,10 +27,24 @@ namespace { +enum DecodeMode { + kSerialMode, + kFrameParallMode +}; + +const int kDecodeMode = 0; +const int kThreads = 1; +const int kFileName = 2; + +typedef std::tr1::tuple DecodeParam; + class TestVectorTest : public ::libvpx_test::DecoderTest, - public ::libvpx_test::CodecTestWithParam { + public ::libvpx_test::CodecTestWithParam { protected: - TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {} + TestVectorTest() + : DecoderTest(GET_PARAM(0)), + md5_file_(NULL) { + } virtual ~TestVectorTest() { if (md5_file_) @@ -71,8 +86,25 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, // checksums match the correct md5 data, then the test is passed. Otherwise, // the test failed. TEST_P(TestVectorTest, MD5Match) { - const std::string filename = GET_PARAM(1); + const DecodeParam input = GET_PARAM(1); + const std::string filename = std::tr1::get(input); + const int threads = std::tr1::get(input); + const int mode = std::tr1::get(input); libvpx_test::CompressedVideoSource *video = NULL; + vpx_codec_flags_t flags = 0; + vpx_codec_dec_cfg_t cfg = {0}; + char str[256]; + + if (mode == kFrameParallMode) { + flags |= VPX_CODEC_USE_FRAME_THREADING; + } + + cfg.threads = threads; + + snprintf(str, sizeof(str) / sizeof(str[0]) - 1, + "file: %s mode: %s threads: %d", + filename.c_str(), mode == 0 ? "Serial" : "Parallel", threads); + SCOPED_TRACE(str); // Open compressed video file. if (filename.substr(filename.length() - 3, 3) == "ivf") { @@ -92,18 +124,53 @@ TEST_P(TestVectorTest, MD5Match) { const std::string md5_filename = filename + ".md5"; OpenMD5File(md5_filename); + // Set decode config and flags. + set_cfg(cfg); + set_flags(flags); + // Decode frame, and check the md5 matching. ASSERT_NO_FATAL_FAILURE(RunLoop(video)); delete video; } -VP8_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP8TestVectors, - libvpx_test::kVP8TestVectors + - libvpx_test::kNumVP8TestVectors)); -VP9_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP9TestVectors, - libvpx_test::kVP9TestVectors + - libvpx_test::kNumVP9TestVectors)); - +// Test VP8 decode in serial mode with single thread. +// NOTE: VP8 only support serial mode. +INSTANTIATE_TEST_CASE_P( + VP8, TestVectorTest, + ::testing::Combine( + ::testing::Values( + static_cast(&libvpx_test::kVP8)), + ::testing::Combine( + ::testing::Values(0), // Serial Mode. + ::testing::Values(1), // Single thread. + ::testing::ValuesIn(libvpx_test::kVP8TestVectors, + libvpx_test::kVP8TestVectors + + libvpx_test::kNumVP8TestVectors)))); + +// Test VP9 decode in serial mode with single thread. +INSTANTIATE_TEST_CASE_P( + VP9, TestVectorTest, + ::testing::Combine( + ::testing::Values( + static_cast(&libvpx_test::kVP9)), + ::testing::Combine( + ::testing::Values(0), // Serial Mode. + ::testing::Values(1), // Single thread. + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)))); + + +// Test VP9 decode in frame parallel mode with different number of threads. +INSTANTIATE_TEST_CASE_P( + VP9MultiThreadedFrameParallel, TestVectorTest, + ::testing::Combine( + ::testing::Values( + static_cast(&libvpx_test::kVP9)), + ::testing::Combine( + ::testing::Values(1), // Frame Parallel mode. + ::testing::Values(2, 3, 4), // With 2, 3, 4 threads. + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)))); } // namespace -- cgit v1.2.1 From 5106e4dfa882a9a3edc172434ddd32c5b52a103c Mon Sep 17 00:00:00 2001 From: hkuang Date: Mon, 18 Aug 2014 11:16:15 -0700 Subject: Fix a bug in adding frame parallel unit test. There are two CreateDecoder functions and decode_test_driver is not calling the right function now. This bug is discovered during really enable the frame parallel flag inside libvpx. This bug does not affect any existing unit test though. Change-Id: Icd9633c4b66d50e422a09c4310ff791082878936 --- test/decode_test_driver.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc index 89f2b28cd..4147039d9 100644 --- a/test/decode_test_driver.cc +++ b/test/decode_test_driver.cc @@ -40,7 +40,7 @@ vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, } void DecoderTest::RunLoop(CompressedVideoSource *video) { - Decoder* const decoder = codec_->CreateDecoder(cfg_, flags_); + Decoder* const decoder = codec_->CreateDecoder(cfg_, flags_, 0); ASSERT_TRUE(decoder != NULL); const char *codec_name = decoder->GetDecoderName(); const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; -- cgit v1.2.1 From 4d0d78424b6aff08a8c11046a9001184425d8485 Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 5 Sep 2014 14:26:16 -0700 Subject: Increase the thread test range to cover 5, 6, 7, 8 threads. Change-Id: Id25c294720551bb5153987d8758668befaa57929 --- test/test_vector_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc index b2f9d590a..a509e831b 100644 --- a/test/test_vector_test.cc +++ b/test/test_vector_test.cc @@ -169,7 +169,7 @@ INSTANTIATE_TEST_CASE_P( static_cast(&libvpx_test::kVP9)), ::testing::Combine( ::testing::Values(1), // Frame Parallel mode. - ::testing::Values(2, 3, 4), // With 2, 3, 4 threads. + ::testing::Range(2, 9), // With 2 ~ 8 threads. ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors)))); -- cgit v1.2.1 From 9ce3a7d76c5ef702337b96b9aa2c944da1b31869 Mon Sep 17 00:00:00 2001 From: Hangyu Kuang Date: Wed, 30 Jul 2014 20:43:40 -0700 Subject: Implement frame parallel decode for VP9. Using 4 threads, frame parallel decode is ~3x faster than single thread decode and around 30% faster than tile parallel decode for frame parallel encoded video on both Android and desktop with 4 threads. Decode speed is scalable to threads too which means decode could be even faster with more threads. Change-Id: Ia0a549aaa3e83b5a17b31d8299aa496ea4f21e3e --- vp9/common/vp9_alloccommon.c | 104 +++++++++--- vp9/common/vp9_entropymode.c | 5 +- vp9/common/vp9_mvref_common.c | 31 ++-- vp9/common/vp9_mvref_common.h | 4 +- vp9/common/vp9_onyxc_int.h | 38 ++++- vp9/decoder/vp9_decodeframe.c | 172 ++++++++++++++----- vp9/decoder/vp9_decodeframe.h | 3 +- vp9/decoder/vp9_decodemv.c | 23 ++- vp9/decoder/vp9_decodemv.h | 3 +- vp9/decoder/vp9_decoder.c | 93 +++++++++-- vp9/decoder/vp9_decoder.h | 6 + vp9/decoder/vp9_dthread.c | 165 +++++++++++++++++++ vp9/decoder/vp9_dthread.h | 29 +++- vp9/encoder/vp9_pickmode.c | 2 +- vp9/encoder/vp9_rdopt.c | 3 +- vp9/vp9_dx_iface.c | 372 ++++++++++++++++++++++++++++++------------ vpx/vpx_frame_buffer.h | 5 +- 17 files changed, 842 insertions(+), 216 deletions(-) diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 04081f107..4ea62398f 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -12,11 +12,37 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_systemdependent.h" +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +void unlock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +static INLINE void alloc_mi_array(VP9_COMMON *cm, int mi_size, int idx) { + CHECK_MEM_ERROR(cm, cm->mip_array[idx], + vpx_calloc(mi_size, sizeof(*cm->mip_array[0]))); + CHECK_MEM_ERROR(cm, cm->mi_grid_base_array[idx], + vpx_calloc(mi_size, sizeof(*cm->mi_grid_base_array[0]))); +} + static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) { int i; @@ -49,40 +75,47 @@ static void setup_mi(VP9_COMMON *cm) { vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); - clear_mi_border(cm, cm->prev_mip); + // Only clear mi border in non frame-parallel decode. In frame-parallel + // decode, prev_mip is managed by previous decoding thread. While in + // non frame-parallel decode, prev_mip and mip are both managed by + // current decoding thread. + if (!cm->frame_parallel_decode) + clear_mi_border(cm, cm->prev_mip); } static int alloc_mi(VP9_COMMON *cm, int mi_size) { int i; for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { - cm->mip_array[i] = - (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip)); - if (cm->mip_array[i] == NULL) - return 1; - - cm->mi_grid_base_array[i] = - (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base)); - if (cm->mi_grid_base_array[i] == NULL) - return 1; + // Delay reallocation as another thread is accessing prev_mi. + if (cm->frame_parallel_decode && i == cm->prev_mi_idx) { + cm->update_prev_mi = 1; + continue; + } + alloc_mi_array(cm, mi_size, i); } - // Init the index. - cm->mi_idx = 0; - cm->prev_mi_idx = 1; - cm->mip = cm->mip_array[cm->mi_idx]; - cm->prev_mip = cm->mip_array[cm->prev_mi_idx]; cm->mi_grid_base = cm->mi_grid_base_array[cm->mi_idx]; - cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx]; + + if (!cm->frame_parallel_decode) { + cm->mi_idx = 0; + cm->prev_mi_idx = 1; + // In frame-parallel decode, prev_mip comes from another thread, + // so current decoding thread should not touch it. + cm->prev_mip = cm->mip_array[cm->prev_mi_idx]; + cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx]; + } return 0; } -static void free_mi(VP9_COMMON *cm) { +static void free_mi(VP9_COMMON *cm, int decode_done) { int i; for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + if (cm->frame_parallel_decode && i == cm->prev_mi_idx && !decode_done) + continue; vpx_free(cm->mip_array[i]); cm->mip_array[i] = NULL; vpx_free(cm->mi_grid_base_array[i]); @@ -90,9 +123,12 @@ static void free_mi(VP9_COMMON *cm) { } cm->mip = NULL; - cm->prev_mip = NULL; cm->mi_grid_base = NULL; - cm->prev_mi_grid_base = NULL; + + if (!cm->frame_parallel_decode) { + cm->prev_mip = NULL; + cm->prev_mi_grid_base = NULL; + } } static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { @@ -109,7 +145,10 @@ static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { cm->prev_seg_map_idx = 1; cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; - cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; + + if (!cm->frame_parallel_decode) { + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; + } return 0; } @@ -123,7 +162,10 @@ static void free_seg_map(VP9_COMMON *cm) { } cm->current_frame_seg_map = NULL; - cm->last_frame_seg_map = NULL; + + if (!cm->frame_parallel_decode) { + cm->last_frame_seg_map = NULL; + } } void vp9_free_frame_buffers(VP9_COMMON *cm) { @@ -144,8 +186,7 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) { } void vp9_free_context_buffers(VP9_COMMON *cm) { - free_mi(cm); - + free_mi(cm, 1); free_seg_map(cm); vpx_free(cm->above_context); @@ -170,7 +211,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { set_mb_mi(cm, aligned_width, aligned_height); - free_mi(cm); + free_mi(cm, 0); if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) goto fail; @@ -288,7 +329,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { void vp9_remove_common(VP9_COMMON *cm) { vp9_free_frame_buffers(cm); vp9_free_context_buffers(cm); - vp9_free_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers); } void vp9_update_frame_size(VP9_COMMON *cm) { @@ -306,6 +346,20 @@ void vp9_update_frame_size(VP9_COMMON *cm) { void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { // Swap indices. const int tmp = cm->mi_idx; + + // Only used in frame parallel decode: Update the prev_mi buffer if + // needed. The worker that was accessing it must already finish decoding. + // So it can be resized safely now. + if (cm->update_prev_mi) { + const int mi_size = cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE); + vpx_free(cm->mip_array[cm->prev_mi_idx]); + vpx_free(cm->mi_grid_base_array[cm->prev_mi_idx]); + cm->mip_array[cm->prev_mi_idx] = NULL; + cm->mi_grid_base_array[cm->prev_mi_idx] = NULL; + alloc_mi_array(cm, mi_size, cm->prev_mi_idx); + cm->update_prev_mi = 0; + } + cm->mi_idx = cm->prev_mi_idx; cm->prev_mi_idx = tmp; diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 79c8b9bc5..4d98cf9cc 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -439,7 +439,8 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { int i; vp9_clearall_segfeatures(&cm->seg); cm->seg.abs_delta = SEGMENT_DELTADATA; - if (cm->last_frame_seg_map) + + if (cm->last_frame_seg_map && !cm->frame_parallel_decode) vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); if (cm->current_frame_seg_map) @@ -467,7 +468,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { cm->frame_contexts[cm->frame_context_idx] = cm->fc; } - if (frame_is_intra_only(cm)) + if (frame_is_intra_only(cm) && !cm->frame_parallel_decode) vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 0fe58c5c8..5913d356f 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -17,14 +17,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int block, int mi_row, int mi_col) { + int block, int mi_row, int mi_col, + find_mv_refs_sync sync, void *const data) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi - ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] - : NULL; - const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; - + MODE_INFO *prev_mi = NULL; + MB_MODE_INFO *prev_mbmi = NULL; const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; @@ -71,6 +69,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, } } + // Synchronize here for frame parallel decode if sync function is provided. + if (sync != NULL) { + sync(data, mi_row); + } + prev_mi = cm->coding_use_prev_mi && cm->prev_mi ? + cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] : NULL; + prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; + // Check the last frame's mode and mv info. if (prev_mbmi) { if (prev_mbmi->ref_frame[0] == ref_frame) @@ -109,12 +115,13 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, } void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - const TileInfo *const tile, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col) { + const TileInfo *const tile, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int mi_row, int mi_col, + find_mv_refs_sync sync, void *const data) { find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col); + mi_row, mi_col, sync, data); } static void lower_mv_precision(MV *mv, int allow_hp) { @@ -152,7 +159,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block, - mi_row, mi_col); + mi_row, mi_col, NULL, NULL); near->as_int = 0; switch (block) { diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h index 7bce3fa37..14defed9c 100644 --- a/vp9/common/vp9_mvref_common.h +++ b/vp9/common/vp9_mvref_common.h @@ -204,10 +204,12 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } +typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int mi_row, int mi_col); + int_mv *mv_ref_list, int mi_row, int mi_col, + find_mv_refs_sync sync, void *const data); // check a list of motion vectors by sad score using a number rows of pixels // above and a number cols of pixels in the left to select the one with best diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 13c500147..7425abdd7 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -36,10 +36,13 @@ extern "C" { #define REF_FRAMES_LOG2 3 #define REF_FRAMES (1 << REF_FRAMES_LOG2) -// 1 scratch frame for the new frame, 3 for scaled references on the encoder +// 4 scratch frames for the new frames to support a maximum of 4 cores decoding +// in parallel, 3 for scaled references on the encoder. +// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number +// of framebuffers. // TODO(jkoleszar): These 3 extra references could probably come from the // normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 4) +#define FRAME_BUFFERS (REF_FRAMES + 7) #define FRAME_CONTEXTS_LOG2 2 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) @@ -64,6 +67,18 @@ typedef struct { int ref_count; vpx_codec_frame_buffer_t raw_frame_buffer; YV12_BUFFER_CONFIG buf; + + // The Following variables will only be used in frame parallel decode. + + // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means + // that no FrameWorker owns, or is decoding, this buffer. + VP9Worker *frame_worker_owner; + + // row and col indicate which position frame has been decoded to in real + // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX + // when the frame is fully decoded. + int row; + int col; } RefCntBuffer; typedef struct { @@ -114,6 +129,10 @@ typedef struct VP9Common { int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ + // Prepare ref_frame_map for the next frame. + // Only used in frame parallel decode. + int next_ref_frame_map[REF_FRAMES]; + // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and // roll new_fb_idx into it. @@ -178,6 +197,9 @@ typedef struct VP9Common { MODE_INFO **prev_mi_grid_base; MODE_INFO **prev_mi_grid_visible; + // Used in frame parallel decode for delay resizing prev_mi. + int update_prev_mi; + // Persistent mb segment id map used in prediction. int seg_map_idx; int prev_seg_map_idx; @@ -197,6 +219,10 @@ typedef struct VP9Common { struct loopfilter lf; struct segmentation seg; + // TODO(hkuang): Remove this as it is the same as frame_parallel_decode + // in pbi. + int frame_parallel_decode; // frame-based threading. + // Context probabilities for reference frame prediction int allow_comp_inter_inter; MV_REFERENCE_FRAME comp_fixed_ref; @@ -235,6 +261,11 @@ typedef struct VP9Common { ENTROPY_CONTEXT *above_context; } VP9_COMMON; +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool); +void unlock_buffer_pool(BufferPool *const pool); + static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; } @@ -242,12 +273,15 @@ static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { static INLINE int get_free_fb(VP9_COMMON *cm) { RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; int i; + + lock_buffer_pool(cm->buffer_pool); for (i = 0; i < FRAME_BUFFERS; ++i) if (frame_bufs[i].ref_count == 0) break; assert(i < FRAME_BUFFERS); frame_bufs[i].ref_count = 1; + unlock_buffer_pool(cm->buffer_pool); return i; } diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index fae4255da..da973c3c8 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -327,21 +327,24 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; xd->block_refs[idx] = ref_buffer; + if (!vp9_is_valid_scale(&ref_buffer->sf)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid scale factors"); vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, &ref_buffer->sf); - xd->corrupted |= ref_buffer->buf->corrupted; + if (!cm->frame_parallel_decode) + xd->corrupted |= ref_buffer->buf->corrupted; } -static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, +static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &pbi->common; const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); - vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); + vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; @@ -365,7 +368,7 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, set_ref(cm, xd, 1, mi_row, mi_col); // Prediction - vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + vp9_dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize); // Reconstruction if (!mbmi->skip) { @@ -404,10 +407,11 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, return p; } -static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, +static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &pbi->common; const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize, uv_subsize; @@ -422,27 +426,27 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); if (subsize < BLOCK_8X8) { - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); + decode_partition(pbi, xd, tile, mi_row, mi_col, r, subsize); + decode_partition(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_partition(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(0 && "Invalid partition type"); @@ -638,6 +642,7 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) { vp9_update_frame_size(cm); } + lock_buffer_pool(pool); if (vp9_realloc_frame_buffer( get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS, @@ -646,6 +651,7 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } + unlock_buffer_pool(pool); } static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { @@ -778,7 +784,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const int tile_rows = 1 << cm->log2_tile_rows; TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; - int mi_row, mi_col; + int mi_row = 0, mi_col = 0; TileData *tile_data = NULL; if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) { @@ -798,7 +804,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_copy(lf_data->planes, pbi->mb.plane); lf_data->stop = 0; lf_data->y_only = 0; - vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } assert(tile_rows <= 4); @@ -856,7 +861,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col, + decode_partition(pbi, &tile_data->xd, &tile, mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } @@ -880,6 +885,12 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, winterface->execute(&pbi->lf_worker); } } + // After loopfiltering, the last 7 row pixels in each superblock row may + // still be changed by the longest loopfilter of the next superblock + // row. + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, + mi_row << MI_BLOCK_SIZE_LOG2); } } @@ -895,6 +906,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, // Get last tile data. tile_data = pbi->tile_data + tile_cols * tile_rows - 1; + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); return vp9_reader_find_end(&tile_data->bit_reader); } @@ -909,7 +922,7 @@ static int tile_worker_hook(void *arg1, void *arg2) { vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data->cm, &tile_data->xd, tile, + decode_partition(tile_data->pbi, &tile_data->xd, tile, mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } @@ -1015,10 +1028,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, TileInfo *const tile = (TileInfo*)worker->data2; TileBuffer *const buf = &tile_buffers[0][n]; - tile_data->cm = cm; + tile_data->pbi = pbi; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; - vp9_tile_init(tile, tile_data->cm, 0, buf->col); + vp9_tile_init(tile, &pbi->common, 0, buf->col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); @@ -1078,8 +1091,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + BufferPool *const pool = pbi->common.buffer_pool; + int i, mask, ref_index = 0; size_t sz; - int i; cm->last_frame_type = cm->frame_type; @@ -1096,16 +1110,22 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, if (cm->show_existing_frame) { // Show an existing frame directly. const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)]; - + lock_buffer_pool(pool); if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Buffer %d does not contain a decoded frame", frame_to_show); ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); + unlock_buffer_pool(pool); pbi->refresh_frame_flags = 0; cm->lf.filter_level = 0; cm->show_frame = 1; + + if (pbi->frame_parallel_decode) { + for (i = 0; i < REF_FRAMES; ++i) + cm->next_ref_frame_map[i] = cm->ref_frame_map[i]; + } return 0; } @@ -1166,7 +1186,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, ref_frame->buf = &frame_bufs[idx].buf; cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); } - setup_frame_size_with_refs(cm, rb); cm->allow_high_precision_mv = vp9_rb_read_bit(rb); @@ -1198,6 +1217,29 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, // below, forcing the use of context 0 for those frame types. cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); + // Generate next_ref_frame_map. + lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; + ++frame_bufs[cm->new_fb_idx].ref_count; + } else { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + } + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + ++ref_index; + } + + for (; ref_index < REF_FRAMES; ++ref_index) { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + } + unlock_buffer_pool(pool); + if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp9_setup_past_independence(cm); @@ -1343,6 +1385,7 @@ void vp9_decode_frame(VP9Decoder *pbi, VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0}; + int context_updated = 0; uint8_t clear_data[MAX_VP9_HEADER_SIZE]; const size_t first_partition_size = read_uncompressed_header(pbi, @@ -1380,6 +1423,28 @@ void vp9_decode_frame(VP9Decoder *pbi, xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); + if (cm->lf.filter_level) { + vp9_loop_filter_frame_init(cm, cm->lf.filter_level); + } + + // If encoded in frame parallel mode, frame context is ready after decoding + // the frame header. + if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) { + VP9Worker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + if (cm->refresh_frame_context) { + context_updated = 1; + cm->frame_contexts[cm->frame_context_idx] = cm->fc; + } + vp9_frameworker_lock_stats(worker); + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + frame_worker_data->frame_context_ready = 1; + // Signal the main thread that context is ready. + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } + // TODO(jzern): remove frame_parallel_decoding_mode restriction for // single-frame tile decoding. if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 && @@ -1407,7 +1472,8 @@ void vp9_decode_frame(VP9Decoder *pbi, } } - if (cm->refresh_frame_context) + // Non frame parallel update frame context here. + if (cm->refresh_frame_context && !context_updated) cm->frame_contexts[cm->frame_context_idx] = cm->fc; } @@ -1454,10 +1520,9 @@ static void build_mc_border(const uint8_t *src, int src_stride, } while (--b_h); } -void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y) { +void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, + int plane, int block, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); @@ -1484,20 +1549,23 @@ void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, pd->subsampling_y); MV32 scaled_mv; - int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, - subpel_x, subpel_y; + int xs, ys, x0, y0, x0_16, y0_16, y1, frame_width, frame_height, + buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; - const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; // Get reference frame pointer, width and height. if (plane == 0) { - frame_width = ref_buf->y_crop_width; - frame_height = ref_buf->y_crop_height; - ref_frame = ref_buf->y_buffer; + frame_width = ref_frame_buf->buf.y_crop_width; + frame_height = ref_frame_buf->buf.y_crop_height; + ref_frame = ref_frame_buf->buf.y_buffer; } else { - frame_width = ref_buf->uv_crop_width; - frame_height = ref_buf->uv_crop_height; - ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer; + frame_width = ref_frame_buf->buf.uv_crop_width; + frame_height = ref_frame_buf->buf.uv_crop_height; + ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer + : ref_frame_buf->buf.v_buffer; } if (vp9_is_scaled(sf)) { @@ -1550,15 +1618,18 @@ void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, buf_ptr = ref_frame + y0 * pre_buf->stride + x0; buf_stride = pre_buf->stride; + // Get reference block bottom right vertical coordinate. + y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; + // Do border extension if there is motion or the // width/height is not a multiple of 8 pixels. if (scaled_mv.col || scaled_mv.row || (frame_width & 0x7) || (frame_height & 0x7)) { - // Get reference block bottom right coordinate. - int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; - int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; int x_pad = 0, y_pad = 0; + // Get reference block bottom right horizontal coordinate. + int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; + if (subpel_x || (sf->x_step_q4 & SUBPEL_MASK)) { x0 -= VP9_INTERP_EXTEND - 1; x1 += VP9_INTERP_EXTEND; @@ -1571,6 +1642,12 @@ void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, y_pad = 1; } + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (pbi->frame_parallel_decode) + vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, + (y1 + 7) << (plane == 0 ? 0 : 1)); + // Skip border extension if block is inside the frame. if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width || y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { @@ -1582,6 +1659,12 @@ void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, buf_stride = x1 - x0 + 1; buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; } + } else { + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (pbi->frame_parallel_decode) + vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, + (y1 + 7) << (plane == 0 ? 0 : 1)); } inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, @@ -1589,7 +1672,8 @@ void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } } -void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, +void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, BLOCK_SIZE bsize) { int plane; const int mi_x = mi_col * MI_SIZE; @@ -1607,10 +1691,10 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - dec_build_inter_predictors(xd, plane, i++, bw, bh, + dec_build_inter_predictors(pbi, xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y); } else { - dec_build_inter_predictors(xd, plane, 0, bw, bh, + dec_build_inter_predictors(pbi, xd, plane, 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y); } } diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h index 6fbd50c8b..901607ea1 100644 --- a/vp9/decoder/vp9_decodeframe.h +++ b/vp9/decoder/vp9_decodeframe.h @@ -25,7 +25,8 @@ void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); -void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, +void vp9_dec_build_inter_predictors_sb(struct VP9Decoder *const pbi, + MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); #ifdef __cplusplus } // extern "C" diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 187ff1307..bd9046187 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -420,11 +420,18 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } -static void read_inter_block_mode_info(VP9_COMMON *const cm, +static void fpm_sync(void *const data, int mi_row) { + VP9Decoder *const pbi = (VP9Decoder *)data; + vp9_frameworker_wait(pbi->frame_worker_owner, pbi->prev_buf, + mi_row << MI_BLOCK_SIZE_LOG2); +} + +static void read_inter_block_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, const TileInfo *const tile, MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; const int allow_hp = cm->allow_high_precision_mv; @@ -438,7 +445,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, for (ref = 0; ref < 1 + is_compound; ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame], - mi_row, mi_col); + mi_row, mi_col, fpm_sync, (void *)pbi); } inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]]; @@ -512,10 +519,13 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, } } -static void read_inter_frame_mode_info(VP9_COMMON *const cm, +// TODO(hkuang): Pass cm instead of pbi. This requires change in +// vp9_frameworker_wait. +static void read_inter_frame_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; @@ -529,16 +539,17 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, !mbmi->skip || !inter_block, r); if (inter_block) - read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); + read_inter_block_mode_info(pbi, xd, tile, mi, mi_row, mi_col, r); else read_intra_block_mode_info(cm, mi, r); } -void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { + VP9_COMMON *const cm = &pbi->common; if (frame_is_intra_only(cm)) read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); else - read_inter_frame_mode_info(cm, xd, tile, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, xd, tile, mi_row, mi_col, r); } diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h index 7394b62b4..dd97d8da0 100644 --- a/vp9/decoder/vp9_decodemv.h +++ b/vp9/decoder/vp9_decodemv.h @@ -11,6 +11,7 @@ #ifndef VP9_DECODER_VP9_DECODEMV_H_ #define VP9_DECODER_VP9_DECODEMV_H_ +#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_reader.h" #ifdef __cplusplus @@ -19,7 +20,7 @@ extern "C" { struct TileInfo; -void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, const struct TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 07fe2899d..0a5ed0c3a 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -26,6 +26,7 @@ #endif #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_systemdependent.h" +#include "vp9/common/vp9_thread.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_decoder.h" @@ -63,6 +64,7 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) { // Initialize the references to not point to any frame buffers. vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + vpx_memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; @@ -195,29 +197,51 @@ int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { return 0; } +static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, + BufferPool *const pool) { + if (idx >= 0) { + --frame_bufs[idx].ref_count; + if (frame_bufs[idx].ref_count == 0) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); + } + } +} + /* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; - VP9_COMMON * const cm = &pbi->common; - BufferPool * const pool = cm->buffer_pool; + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + lock_buffer_pool(pool); for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - if (mask & 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - ref_cnt_fb(frame_bufs, &cm->ref_frame_map[ref_index], - cm->new_fb_idx); - if (old_idx >= 0 && frame_bufs[old_idx].ref_count == 0) - pool->release_fb_cb(pool->cb_priv, - &frame_bufs[old_idx].raw_frame_buffer); + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if ((mask & 1) && old_idx >= 0) { + decrease_ref_count(old_idx, frame_bufs, pool); } + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; + } + unlock_buffer_pool(pool); + cm->frame_to_show = get_frame_new_buffer(cm); if (!pbi->frame_parallel_decode || !cm->show_frame) { + lock_buffer_pool(pool); --frame_bufs[cm->new_fb_idx].ref_count; + unlock_buffer_pool(pool); } // Invalidate these references until the next frame starts. @@ -256,6 +280,20 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); + + if (pbi->frame_parallel_decode) { + VP9Worker *const worker = pbi->frame_worker_owner; + vp9_frameworker_lock_stats(worker); + frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; + // Reset decoding progress. + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + vp9_frameworker_unlock_stats(worker); + } else { + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + } + if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; @@ -283,19 +321,38 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, vp9_clear_system_state(); - cm->last_width = cm->width; - cm->last_height = cm->height; - if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame; - if (cm->show_frame) { - if (!cm->show_existing_frame) - vp9_swap_mi_and_prev_mi(cm); - cm->current_video_frame++; - } + // Update progress in frame parallel decode. + if (pbi->frame_parallel_decode) { + // Need to lock the mutex here as another thread may + // be accessing this buffer. + VP9Worker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + vp9_frameworker_lock_stats(worker); + + if (cm->show_frame) { + if (!cm->show_existing_frame) + vp9_swap_mi_and_prev_mi(cm); + cm->current_video_frame++; + } + vp9_swap_current_and_last_seg_map(cm); + frame_worker_data->frame_decoded = 1; + frame_worker_data->frame_context_ready = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } else { + cm->last_width = cm->width; + cm->last_height = cm->height; + if (cm->show_frame) { + if (!cm->show_existing_frame) + vp9_swap_mi_and_prev_mi(cm); + cm->current_video_frame++; + } - vp9_swap_current_and_last_seg_map(cm); + vp9_swap_current_and_last_seg_map(cm); + } pbi->ready_for_new_data = 0; diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 758d49006..9844e2031 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -45,6 +45,12 @@ typedef struct VP9Decoder { int frame_parallel_decode; // frame-based threading. + // TODO(hkuang): Combine this with cur_buf in macroblockd as they are + // the same. + RefCntBuffer *cur_buf; // Current decoding frame buffer. + RefCntBuffer *prev_buf; // Previous decoding frame buffer. + + VP9Worker *frame_worker_owner; // frame_worker that owns this pbi. VP9Worker lf_worker; VP9Worker *tile_workers; int num_tile_workers; diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index 5dda49a0f..f599c2a8b 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -17,6 +17,8 @@ #include "vp9/decoder/vp9_dthread.h" #include "vp9/decoder/vp9_decoder.h" +// #define DEBUG_THREAD + #if CONFIG_MULTITHREAD static INLINE void mutex_lock(pthread_mutex_t *const mutex) { const int kMaxTryLocks = 4000; @@ -279,3 +281,166 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) { vp9_zero(*lf_sync); } } + +// TODO(hkuang): Clean up all the #ifdef in this file. +void vp9_frameworker_lock_stats(VP9Worker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_lock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_unlock_stats(VP9Worker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_unlock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_signal_stats(VP9Worker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + // TODO(hkuang): Investigate using broadcast or signal. + pthread_cond_signal(&worker_data->stats_cond); +#else + (void)worker; +#endif +} + +// TODO(hkuang): Remove worker parameter as it is only used in debug code. +void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf, + int row) { +#if CONFIG_MULTITHREAD + if (!ref_buf) + return; + + // Enabling the following line of code will get harmless tsan error but + // will get best performance. + // if (ref_buf->row >= row) return; + + { + // Find the worker thread that owns the reference frame. If the reference + // frame has been fully decoded, it may not have owner. + VP9Worker *const ref_worker = ref_buf->frame_worker_owner; + FrameWorkerData *const ref_worker_data = + (FrameWorkerData *)ref_worker->data1; + const VP9Decoder *const pbi = ref_worker_data->pbi; + +#ifdef DEBUG_THREAD + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n", + worker_data->worker_id, worker, ref_worker_data->worker_id, + ref_buf->frame_worker_owner, row, ref_buf->row); + } +#endif + + vp9_frameworker_lock_stats(ref_worker); + while (ref_buf->row < row && pbi->cur_buf == ref_buf) { + pthread_cond_wait(&ref_worker_data->stats_cond, + &ref_worker_data->stats_mutex); + } + vp9_frameworker_unlock_stats(ref_worker); + } +#else + (void)ref_buf; + (void)row; + (void)ref_buf; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { +#if CONFIG_MULTITHREAD + VP9Worker *worker = buf->frame_worker_owner; + +#ifdef DEBUG_THREAD + printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, + buf->frame_worker_owner, row); +#endif + + vp9_frameworker_lock_stats(worker); + buf->row = row; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); +#else + (void)buf; + (void)row; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_copy_context(VP9Worker *const dst_worker, + VP9Worker *const src_worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1; + FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1; + VP9_COMMON *const src_cm = &src_worker_data->pbi->common; + VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common; + int i; + + // Wait until source frame's context is ready. + vp9_frameworker_lock_stats(src_worker); + while (!src_worker_data->frame_context_ready) { + pthread_cond_wait(&src_worker_data->stats_cond, + &src_worker_data->stats_mutex); + } + + // src worker may have already finished decoding a frame and swapped the mi. + // TODO(hkuang): Remove following code after implenment no ModeInfo decoding. + if (src_worker_data->frame_decoded) { + dst_cm->prev_mip = src_cm->prev_mip; + dst_cm->prev_mi = src_cm->prev_mi; + dst_cm->prev_mi_grid_base = src_cm->prev_mi_grid_base; + dst_cm->prev_mi_grid_visible = src_cm->prev_mi_grid_visible; + dst_cm->last_frame_seg_map = src_cm->last_frame_seg_map; + } else { + dst_cm->prev_mip = src_cm->mip; + dst_cm->prev_mi = src_cm->mi; + dst_cm->prev_mi_grid_base = src_cm->mi_grid_base; + dst_cm->prev_mi_grid_visible = src_cm->mi_grid_visible; + dst_cm->last_frame_seg_map = src_cm->current_frame_seg_map; + } + + vp9_frameworker_unlock_stats(src_worker); + + dst_worker_data->pbi->prev_buf = + src_worker_data->pbi->common.show_existing_frame ? + NULL : src_worker_data->pbi->cur_buf; + + dst_cm->last_width = !src_cm->show_existing_frame ? + src_cm->width : src_cm->last_width; + dst_cm->last_height = !src_cm->show_existing_frame ? + src_cm->height : src_cm->last_height; + dst_cm->display_width = src_cm->display_width; + dst_cm->display_height = src_cm->display_height; + dst_cm->subsampling_x = src_cm->subsampling_x; + dst_cm->subsampling_y = src_cm->subsampling_y; + dst_cm->last_show_frame = !src_cm->show_existing_frame ? + src_cm->show_frame : src_cm->last_show_frame; + dst_cm->last_frame_type = src_cm->last_frame_type; + dst_cm->frame_type = src_cm->frame_type; + dst_cm->y_dc_delta_q = src_cm->y_dc_delta_q; + dst_cm->uv_dc_delta_q = src_cm->uv_dc_delta_q; + dst_cm->uv_ac_delta_q = src_cm->uv_ac_delta_q; + dst_cm->base_qindex = src_cm->base_qindex; + + for (i = 0; i < REF_FRAMES; ++i) + dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; + + memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr, + (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh)); + dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level; + dst_cm->lf.filter_level = src_cm->lf.filter_level; + memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS); + memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); + dst_cm->seg = src_cm->seg; + memcpy(dst_cm->frame_contexts, src_cm->frame_contexts, + FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0])); +#else + (void) dst_worker; + (void) src_worker; +#endif // CONFIG_MULTITHREAD +} diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 75b652518..52c3233e4 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -19,7 +19,7 @@ struct VP9Common; struct VP9Decoder; typedef struct TileWorkerData { - struct VP9Common *cm; + struct VP9Decoder *pbi; vp9_reader bit_reader; DECLARE_ALIGNED(16, struct macroblockd, xd); @@ -55,6 +55,14 @@ typedef struct FrameWorkerData { // It is used to make a copy of the compressed data. uint8_t *scratch_buffer; size_t scratch_buffer_size; + +#if CONFIG_MULTITHREAD + pthread_mutex_t stats_mutex; + pthread_cond_t stats_cond; +#endif + + int frame_context_ready; // Current frame's context is ready to read. + int frame_decoded; // Finished decoding current frame. } FrameWorkerData; // Allocate memory for loopfilter row synchronization. @@ -71,4 +79,23 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, int frame_filter_level, int y_only); +void vp9_frameworker_lock_stats(VP9Worker *const worker); +void vp9_frameworker_unlock_stats(VP9Worker *const worker); +void vp9_frameworker_signal_stats(VP9Worker *const worker); + +// Wait until ref_buf has been decoded to row in real pixel unit. +// Note: worker may already finish decoding ref_buf and release it in order to +// start decoding next frame. So need to check whether worker is still decoding +// ref_buf. +void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf, + int row); + +// FrameWorker broadcasts its decoding progress so other workers that are +// waiting on it can resume decoding. +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); + +// Copy necessary decoding context from src worker to dst worker. +void vp9_frameworker_copy_context(VP9Worker *const dst_worker, + VP9Worker *const src_worker); + #endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d9edeae3e..c7ab2209c 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -484,7 +484,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cm->coding_use_prev_mi) vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame, - candidates, mi_row, mi_col); + candidates, mi_row, mi_col, NULL, NULL); else const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0], ref_frame, candidates, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e577017e6..b817fac98 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2253,7 +2253,8 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them - vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); + vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col, + NULL, NULL); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 3bfdea6ad..d56ee0076 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -18,6 +18,7 @@ #include "vpx/vpx_decoder.h" #include "vp9/common/vp9_frame_buffers.h" +#include "vp9/common/vp9_thread.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_read_bit_buffer.h" @@ -28,6 +29,15 @@ typedef vpx_codec_stream_info_t vp9_stream_info_t; +// This limit is due to framebuffer numbers. +// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. +#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. + +typedef struct cache_frame { + int fb_idx; + vpx_image_t img; +} cache_frame; + struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; @@ -35,17 +45,24 @@ struct vpx_codec_alg_priv { int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; - void *decrypt_state; + void *decrypt_state; vpx_image_t img; int flushed; int invert_tile_order; - int frame_parallel_decode; // frame-based threading. int last_show_frame; // Index of last output frame. + // Frame parallel related. + int frame_parallel_decode; // frame-based threading. VP9Worker *frame_workers; int num_frame_workers; - int next_submit_thread_id; - int next_output_thread_id; + int next_submit_worker_id; + int last_submit_worker_id; + int next_output_worker_id; + int available_threads; + cache_frame frame_cache[FRAME_CACHE_SIZE]; + int frame_cache_write; + int frame_cache_read; + int num_cache_frames; // BufferPool that holds all reference frames. Shared by all the FrameWorkers. BufferPool *buffer_pool; @@ -77,11 +94,10 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); ctx->priv->init_flags = ctx->init_flags; ctx->priv->alg_priv->flushed = 0; + // Only do frame parallel decode when threads > 1. ctx->priv->alg_priv->frame_parallel_decode = - (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING); - - // Disable frame parallel decoding for now. - ctx->priv->alg_priv->frame_parallel_decode = 0; + ((ctx->config.dec->threads > 1) && + (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) ? 1 : 0; if (ctx->config.dec) { // Update the reference to the config structure to an internal copy. @@ -98,10 +114,21 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { int i; for (i = 0; i < ctx->num_frame_workers; ++i) { VP9Worker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - vp9_decoder_remove(worker_data->pbi); - vpx_free(worker_data); + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + vp9_get_worker_interface()->end(worker); + vp9_decoder_remove(frame_worker_data->pbi); + vpx_free(frame_worker_data->scratch_buffer); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&frame_worker_data->stats_mutex); + pthread_cond_destroy(&frame_worker_data->stats_cond); +#endif + vpx_free(frame_worker_data); } +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex); +#endif + vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); } vpx_free(ctx->frame_workers); @@ -222,8 +249,8 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { for (i = 0; i < ctx->num_frame_workers; ++i) { VP9Worker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - VP9_COMMON *const cm = &worker_data->pbi->common; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + VP9_COMMON *const cm = &frame_worker_data->pbi->common; BufferPool *const pool = cm->buffer_pool; cm->new_fb_idx = -1; @@ -260,14 +287,15 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, } static int frame_worker_hook(void *arg1, void *arg2) { - FrameWorkerData *const worker_data = (FrameWorkerData *)arg1; - const uint8_t *data = worker_data->data; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; + const uint8_t *data = frame_worker_data->data; (void)arg2; - worker_data->result = vp9_receive_compressed_data(worker_data->pbi, - worker_data->data_size, - &data); - worker_data->data_end = data; - return !worker_data->result; + frame_worker_data->result = + vp9_receive_compressed_data(frame_worker_data->pbi, + frame_worker_data->data_size, + &data); + frame_worker_data->data_end = data; + return !frame_worker_data->result; } static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { @@ -275,14 +303,28 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); ctx->last_show_frame = -1; - ctx->next_submit_thread_id = 0; - ctx->next_output_thread_id = 0; + ctx->next_submit_worker_id = 0; + ctx->last_submit_worker_id = 0; + ctx->next_output_worker_id = 0; + ctx->frame_cache_read = 0; + ctx->frame_cache_write = 0; + ctx->num_cache_frames = 0; ctx->num_frame_workers = (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + ctx->available_threads = ctx->num_frame_workers; + ctx->flushed = 0; + ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); if (ctx->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR; +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate buffer pool mutex"); + return VPX_CODEC_MEM_ERROR; + } +#endif + ctx->frame_workers = (VP9Worker *) vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); if (ctx->frame_workers == NULL) { @@ -292,28 +334,51 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { for (i = 0; i < ctx->num_frame_workers; ++i) { VP9Worker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *worker_data = NULL; + FrameWorkerData *frame_worker_data = NULL; winterface->init(worker); worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); if (worker->data1 == NULL) { - set_error_detail(ctx, "Failed to allocate worker_data"); + set_error_detail(ctx, "Failed to allocate frame_worker_data"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); + if (frame_worker_data->pbi == NULL) { + set_error_detail(ctx, "Failed to allocate frame_worker_data"); return VPX_CODEC_MEM_ERROR; } - worker_data = (FrameWorkerData *)worker->data1; - worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); - if (worker_data->pbi == NULL) { - set_error_detail(ctx, "Failed to allocate worker_data"); + frame_worker_data->pbi->frame_worker_owner = worker; + frame_worker_data->pbi->common.mi_idx = 0; + frame_worker_data->pbi->common.prev_mi_idx = 1; + frame_worker_data->worker_id = i; + frame_worker_data->scratch_buffer = NULL; + frame_worker_data->scratch_buffer_size = 0; + frame_worker_data->frame_context_ready = 0; +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); return VPX_CODEC_MEM_ERROR; } + if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); + return VPX_CODEC_MEM_ERROR; + } +#endif // If decoding in serial mode, FrameWorker thread could create tile worker // thread or loopfilter thread. - worker_data->pbi->max_threads = + frame_worker_data->pbi->max_threads = (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; - worker_data->pbi->inv_tile_order = ctx->invert_tile_order; - worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; + frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + frame_worker_data->pbi->common.frame_parallel_decode = + ctx->frame_parallel_decode; worker->hook = (VP9WorkerHook)frame_worker_hook; + if (!winterface->reset(worker)) { + set_error_detail(ctx, "Frame Worker thread creation failed"); + return VPX_CODEC_MEM_ERROR; + } } // If postprocessing was enabled by the application and a @@ -348,36 +413,66 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_ERROR; } - // Initialize the decoder workers on the first frame - if (ctx->frame_workers == NULL) { - const vpx_codec_err_t res = init_decoder(ctx); - if (res != VPX_CODEC_OK) - return res; - } - if (!ctx->frame_parallel_decode) { VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - worker_data->data = *data; - worker_data->data_size = data_sz; - worker_data->user_priv = user_priv; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->data = *data; + frame_worker_data->data_size = data_sz; + frame_worker_data->user_priv = user_priv; // Set these even if already initialized. The caller may have changed the // decrypt config between frames. - worker_data->pbi->decrypt_cb = ctx->decrypt_cb; - worker_data->pbi->decrypt_state = ctx->decrypt_state; + frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb; + frame_worker_data->pbi->decrypt_state = ctx->decrypt_state; worker->had_error = 0; winterface->execute(worker); // Update data pointer after decode. - *data = worker_data->data_end; + *data = frame_worker_data->data_end; if (worker->had_error) - return update_error_state(ctx, &worker_data->pbi->common.error); + return update_error_state(ctx, &frame_worker_data->pbi->common.error); } else { - // TODO(hkuang): Implement frame parallel decode. - return VPX_CODEC_INCAPABLE; + const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); + VP9Worker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + // Copy context from last worker thread to next worker thread. + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + vp9_frameworker_copy_context( + &ctx->frame_workers[ctx->next_submit_worker_id], + &ctx->frame_workers[ctx->last_submit_worker_id]); + + // Copy the compressed data into worker's internal buffer. + // TODO(hkuang): Will all the workers allocate the same size + // as the size of the first intra frame be better? This will + // avoid too many deallocate and allocate. + if (frame_worker_data->scratch_buffer_size < data_sz) { + frame_worker_data->scratch_buffer = + (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz); + if (frame_worker_data->scratch_buffer == NULL) { + set_error_detail(ctx, "Failed to reallocate scratch buffer"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data->scratch_buffer_size = data_sz; + } + frame_worker_data->data_size = data_sz; + vpx_memcpy(frame_worker_data->scratch_buffer, *data, data_sz); + + frame_worker_data->frame_decoded = 0; + frame_worker_data->frame_context_ready = 0; + frame_worker_data->data = frame_worker_data->scratch_buffer; + frame_worker_data->user_priv = user_priv; + + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + ctx->last_submit_worker_id = + (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers; + + ctx->next_submit_worker_id = + (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; + + --ctx->available_threads; + winterface->launch(worker); } if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) @@ -461,6 +556,30 @@ static vpx_codec_err_t parse_superframe_index(const uint8_t *data, return VPX_CODEC_OK; } +static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); + VP9Worker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + winterface->sync(worker); + ++ctx->available_threads; + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; + yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd, + frame_worker_data->user_priv); + ctx->frame_cache[ctx->frame_cache_write].img.fb_priv = + frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->frame_cache_write = + (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE; + ++ctx->num_cache_frames; + } +} + static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { @@ -478,6 +597,13 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, // Reset flushed when receiving a valid frame. ctx->flushed = 0; + // Initialize the decoder workers on the first frame. + if (ctx->frame_workers == NULL) { + const vpx_codec_err_t res = init_decoder(ctx); + if (res != VPX_CODEC_OK) + return res; + } + res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count, ctx->decrypt_cb, ctx->decrypt_state); if (res != VPX_CODEC_OK) @@ -494,30 +620,46 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, for (i = 0; i < frame_count; ++i) { const uint8_t *data_start_copy = data_start; const uint32_t frame_size = frame_sizes[i]; - vpx_codec_err_t res; if (data_start < data || frame_size > (uint32_t) (data_end - data_start)) { set_error_detail(ctx, "Invalid frame size in index"); return VPX_CODEC_CORRUPT_FRAME; } + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline); if (res != VPX_CODEC_OK) return res; - data_start += frame_size; } } else { - res = decode_one(ctx, &data_start, data_sz, user_priv, deadline); + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + + res = decode_one(ctx, &data, data_sz, user_priv, deadline); if (res != VPX_CODEC_OK) return res; - - // Extra data detected after the frame. - if (data_start < data_end - 1) { - set_error_detail(ctx, "Fail to decode frame in parallel mode"); - return VPX_CODEC_INCAPABLE; - } } } else { // Decode in serial mode. @@ -561,41 +703,73 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, } } - return VPX_CODEC_OK; + return res; +} + +static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { + RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs; + // Decrease reference count of last output frame in frame parallel mode. + if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { + BufferPool *const pool = ctx->buffer_pool; + lock_buffer_pool(pool); + --frame_bufs[ctx->last_show_frame].ref_count; + if (frame_bufs[ctx->last_show_frame].ref_count == 0) { + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[ctx->last_show_frame].raw_frame_buffer); + } + unlock_buffer_pool(pool); + } } static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; + // Only return frame when all the cpu are busy or + // application fluhsed the decoder in frame parallel decode. + if (ctx->frame_parallel_decode && ctx->available_threads > 0 && + !ctx->flushed) { + return img; + } + + // Output the frames in the cache first. + if (ctx->num_cache_frames > 0) { + release_last_output_frame(ctx); + ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx; + img = &ctx->frame_cache[ctx->frame_cache_read].img; + ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE; + --ctx->num_cache_frames; + return img; + } + // iter acts as a flip flop, so an image is only returned on the first // call to get_frame. if (*iter == NULL && ctx->frame_workers != NULL) { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - - VP9Worker *const worker = &ctx->frame_workers[ctx->next_output_thread_id]; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - if (vp9_get_raw_frame(worker_data->pbi, &sd, &flags) == 0) { - VP9_COMMON *const cm = &worker_data->pbi->common; - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - yuvconfig2image(&ctx->img, &sd, worker_data->user_priv); - ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - img = &ctx->img; - *iter = img; - // Decrease reference count of last output frame in frame parallel mode. - if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { - --frame_bufs[ctx->last_show_frame].ref_count; - if (frame_bufs[ctx->last_show_frame].ref_count == 0) { - pool->release_fb_cb(pool->cb_priv, - &frame_bufs[ctx->last_show_frame].raw_frame_buffer); - } + do { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); + VP9Worker *const worker = + &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + // Wait for the frame from worker thread. + winterface->sync(worker); + ++ctx->available_threads; + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + release_last_output_frame(ctx); + ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; + yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); + ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + img = &ctx->img; + return img; } - ctx->last_show_frame = worker_data->pbi->common.new_fb_idx; - } + } while (ctx->next_output_worker_id != ctx->next_submit_worker_id); } - return img; } @@ -631,9 +805,9 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(&worker_data->pbi->common, + return vp9_set_reference_dec(&frame_worker_data->pbi->common, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -654,9 +828,9 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; YV12_BUFFER_CONFIG sd; VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; image2yuvconfig(&frame->img, &sd); - return vp9_copy_reference_dec(worker_data->pbi, + return vp9_copy_reference_dec(frame_worker_data->pbi, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; @@ -676,8 +850,8 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, if (data) { YV12_BUFFER_CONFIG* fb; VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - vp9_get_reference_dec(worker_data->pbi, data->idx, &fb); + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + vp9_get_reference_dec(frame_worker_data->pbi, data->idx, &fb); yuvconfig2image(&data->img, fb, NULL); return VPX_CODEC_OK; } else { @@ -724,8 +898,9 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, if (update_info) { if (ctx->frame_workers) { VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - *update_info = worker_data->pbi->refresh_frame_flags; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + *update_info = frame_worker_data->pbi->refresh_frame_flags; } else { return VPX_CODEC_ERROR; } @@ -735,22 +910,18 @@ static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, } } - static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, va_list args) { int *corrupted = va_arg(args, int *); - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - if (corrupted) { if (ctx->frame_workers) { VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - *corrupted = worker_data->pbi->common.frame_to_show->corrupted; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + RefCntBuffer *const frame_bufs = + frame_worker_data->pbi->common.buffer_pool->frame_bufs; + *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; } else { return VPX_CODEC_ERROR; } @@ -773,8 +944,9 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, if (display_size) { if (ctx->frame_workers) { VP9Worker *const worker = ctx->frame_workers; - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &worker_data->pbi->common; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; display_size[0] = cm->display_width; display_size[1] = cm->display_height; } else { diff --git a/vpx/vpx_frame_buffer.h b/vpx/vpx_frame_buffer.h index e69df4bc8..0741e6e71 100644 --- a/vpx/vpx_frame_buffer.h +++ b/vpx/vpx_frame_buffer.h @@ -22,8 +22,11 @@ extern "C" { #include "./vpx_integer.h" /*!\brief The maximum number of work buffers used by libvpx. + * Support maximum 4 threads to decode video in parallel. + * Each thread will use one work buffer. + * TODO(hkuang): Add support to set number of worker threads dynamically. */ -#define VPX_MAXIMUM_WORK_BUFFERS 1 +#define VPX_MAXIMUM_WORK_BUFFERS 4 /*!\brief The maximum number of reference buffers that a VP9 encoder may use. */ -- cgit v1.2.1 From ad693e1ff514f56928fc62790315906f9564395e Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 31 Oct 2014 12:10:46 -0700 Subject: Add key frame seeking to webmdec and webm_video_source. This is for the frame parallel's pause/seek/resume unit test. Change-Id: Ie235f86ca5f3d525896222766f6d610e6682fd76 --- test/webm_video_source.h | 12 ++++++++++++ webmdec.cc | 2 ++ webmdec.h | 1 + 3 files changed, 15 insertions(+) diff --git a/test/webm_video_source.h b/test/webm_video_source.h index 11d3d234d..650bc52dc 100644 --- a/test/webm_video_source.h +++ b/test/webm_video_source.h @@ -69,6 +69,18 @@ class WebMVideoSource : public CompressedVideoSource { } } + void SeekToNextKeyFrame() { + ASSERT_TRUE(vpx_ctx_->file != NULL); + do { + const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_); + ASSERT_GE(status, 0) << "webm_read_frame failed"; + ++frame_; + if (status == 1) { + end_of_file_ = true; + } + } while (!webm_ctx_->is_key_frame && !end_of_file_); + } + virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; } diff --git a/webmdec.cc b/webmdec.cc index 4383e8efd..d591f3e3d 100644 --- a/webmdec.cc +++ b/webmdec.cc @@ -41,6 +41,7 @@ void reset(struct WebmInputContext *const webm_ctx) { webm_ctx->block_frame_index = 0; webm_ctx->video_track_index = 0; webm_ctx->timestamp_ns = 0; + webm_ctx->is_key_frame = false; } void get_first_cluster(struct WebmInputContext *const webm_ctx) { @@ -182,6 +183,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx, } *bytes_in_buffer = frame.len; webm_ctx->timestamp_ns = block->GetTime(cluster); + webm_ctx->is_key_frame = block->IsKey(); mkvparser::MkvReader *const reader = reinterpret_cast(webm_ctx->reader); diff --git a/webmdec.h b/webmdec.h index 29b815da1..1cd35d41a 100644 --- a/webmdec.h +++ b/webmdec.h @@ -28,6 +28,7 @@ struct WebmInputContext { int block_frame_index; int video_track_index; uint64_t timestamp_ns; + int is_key_frame; }; // Checks if the input is a WebM file. If so, initializes WebMInputContext so -- cgit v1.2.1 From f2fe530e157f3d6261c03be0f14456cf1f79c69e Mon Sep 17 00:00:00 2001 From: hkuang Date: Tue, 4 Nov 2014 14:54:31 -0800 Subject: Add two test vectors to test frame parallel decode. The added vectors are mainly used to test the output cache mechanism in frame parallel decode. Change-Id: I3d413d060daa5abf72358f6350bd1d16d71adc5a --- test/test-data.sha1 | 4 ++++ test/test.mk | 4 ++++ test/test_vectors.cc | 1 + 3 files changed, 9 insertions(+) diff --git a/test/test-data.sha1 b/test/test-data.sha1 index f9c09c641..c1a56833c 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -661,3 +661,7 @@ d3964f9dad9f60363c81b688324d95b4ec7c8038 invalid-vp90-2-00-quantizer-00.webm.iv 456d1493e52d32a5c30edf44a27debc1fa6b253a invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res c123d1f9f02fb4143abb5e271916e3a3080de8f6 invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf 456d1493e52d32a5c30edf44a27debc1fa6b253a invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res +f97088c7359fc8d3d5aa5eafe57bc7308b3ee124 vp90-2-20-big_superframe-01.webm +47d7d409785afa33b123376de0c907336e6c7bd7 vp90-2-20-big_superframe-01.webm.md5 +65ade6d2786209582c50d34cfe22b3cdb033abaf vp90-2-20-big_superframe-02.webm +7c0ed8d04c4d06c5411dd2e5de2411d37f092db5 vp90-2-20-big_superframe-02.webm.md5 diff --git a/test/test.mk b/test/test.mk index 85212d96e..166ee6272 100644 --- a/test/test.mk +++ b/test/test.mk @@ -773,6 +773,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5 # Invalid files for testing libvpx error checking. LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm diff --git a/test/test_vectors.cc b/test/test_vectors.cc index 41c9e2657..dda56c652 100644 --- a/test/test_vectors.cc +++ b/test/test_vectors.cc @@ -181,6 +181,7 @@ const char *const kVP9TestVectors[] = { "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm", "vp91-2-04-yuv444.webm", + "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm", }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER -- cgit v1.2.1 From a9a20a104018589de1efb58c61aeb84b327b54b7 Mon Sep 17 00:00:00 2001 From: hkuang Date: Fri, 7 Nov 2014 16:42:27 -0800 Subject: Fix a bug in frame parallel decode and add a unit test for that. A flush bug is discovered during putting frame parallel decoder into Android. This test will expose that bug. Change-Id: Ia047f27972f4da0471649f79f1f91e7695297473 --- test/test-data.sha1 | 2 + test/test.mk | 3 + test/test_vectors.cc | 3 +- test/vp9_frame_parallel_test.cc | 122 ++++++++++++++++++++++++++++++++++++++++ vp9/decoder/vp9_decoder.c | 9 +-- vp9/vp9_dx_iface.c | 4 +- 6 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 test/vp9_frame_parallel_test.cc diff --git a/test/test-data.sha1 b/test/test-data.sha1 index c1a56833c..b1380652d 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -665,3 +665,5 @@ f97088c7359fc8d3d5aa5eafe57bc7308b3ee124 vp90-2-20-big_superframe-01.webm 47d7d409785afa33b123376de0c907336e6c7bd7 vp90-2-20-big_superframe-01.webm.md5 65ade6d2786209582c50d34cfe22b3cdb033abaf vp90-2-20-big_superframe-02.webm 7c0ed8d04c4d06c5411dd2e5de2411d37f092db5 vp90-2-20-big_superframe-02.webm.md5 +667ec8718c982aef6be07eb94f083c2efb9d2d16 vp90-2-07-frame_parallel-1.webm +bfc82bf848e9c05020d61e3ffc1e62f25df81d19 vp90-2-07-frame_parallel-1.webm.md5 diff --git a/test/test.mk b/test/test.mk index 166ee6272..ece2b37aa 100644 --- a/test/test.mk +++ b/test/test.mk @@ -31,6 +31,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc @@ -683,6 +684,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm diff --git a/test/test_vectors.cc b/test/test_vectors.cc index dda56c652..98d032d7b 100644 --- a/test/test_vectors.cc +++ b/test/test_vectors.cc @@ -154,7 +154,8 @@ const char *const kVP9TestVectors[] = { "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm", "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm", "vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm", - "vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm", + "vp90-2-07-frame_parallel.webm", "vp90-2-07-frame_parallel-1.webm", + "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x4.webm", "vp90-2-08-tile_1x8_frame_parallel.webm", "vp90-2-08-tile_1x8.webm", "vp90-2-08-tile-4x4.webm", diff --git a/test/vp9_frame_parallel_test.cc b/test/vp9_frame_parallel_test.cc new file mode 100644 index 000000000..d159c2709 --- /dev/null +++ b/test/vp9_frame_parallel_test.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_config.h" +#include "test/codec_factory.h" +#include "test/decode_test_driver.h" +#include "test/ivf_video_source.h" +#include "test/md5_helper.h" +#include "test/util.h" +#if CONFIG_WEBM_IO +#include "test/webm_video_source.h" +#endif +#include "vpx_mem/vpx_mem.h" + +namespace { + +using std::string; + +#if CONFIG_WEBM_IO + +struct FileList { + const char *name; + // md5 sum for decoded frames which does not include skipped frames. + const char *expected_md5; + const int pause_frame_num; +}; + +// Decodes |filename| with |num_threads|. Pause at the specified frame_num, +// seek to next key frame and then continue decoding until the end. Return +// the md5 of the decoded frames which does not include skipped frames. +string DecodeFile(const string &filename, int num_threads, int pause_num) { + libvpx_test::WebMVideoSource video(filename); + video.Init(); + int in_frames = 0; + int out_frames = 0; + + vpx_codec_dec_cfg_t cfg = {0}; + cfg.threads = num_threads; + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_FRAME_THREADING; + libvpx_test::VP9Decoder decoder(cfg, flags, 0); + + libvpx_test::MD5 md5; + video.Begin(); + + do { + ++in_frames; + const vpx_codec_err_t res = + decoder.DecodeFrame(video.cxdata(), video.frame_size()); + if (res != VPX_CODEC_OK) { + EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); + break; + } + + // Pause at specified frame number. + if (in_frames == pause_num) { + // Flush the decoder and then seek to next key frame. + decoder.DecodeFrame(NULL, 0); + video.SeekToNextKeyFrame(); + } else { + video.Next(); + } + + // Flush the decoder at the end of the video. + if (!video.cxdata()) + decoder.DecodeFrame(NULL, 0); + + libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img; + + // Get decompressed data + while ((img = dec_iter.Next())) { + ++out_frames; + md5.Add(img); + } + } while (video.cxdata() != NULL); + + EXPECT_EQ(in_frames, out_frames) << + "Input frame count does not match output frame count"; + + return string(md5.Get()); +} + +void DecodeFiles(const FileList files[]) { + for (const FileList *iter = files; iter->name != NULL; ++iter) { + SCOPED_TRACE(iter->name); + for (int t = 2; t <= 8; ++t) { + EXPECT_EQ(iter->expected_md5, + DecodeFile(iter->name, t, iter->pause_frame_num)) + << "threads = " << t; + } + } +} + +TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) { + // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with + // one key frame for every ten frames. + static const FileList files[] = { + { "vp90-2-07-frame_parallel-1.webm", + "6ea7c3875d67252e7caf2bc6e75b36b1", 6}, + { "vp90-2-07-frame_parallel-1.webm", + "4bb634160c7356a8d7d4299b6dc83a45", 12}, + { "vp90-2-07-frame_parallel-1.webm", + "89772591e6ef461f9fa754f916c78ed8", 26}, + { NULL, NULL, 0}, + }; + DecodeFiles(files); +} + +#endif // CONFIG_WEBM_IO +} // namespace diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 0a5ed0c3a..b5f3b1448 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -272,6 +272,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->frame_refs[0].buf->corrupted = 1; } + pbi->ready_for_new_data = 0; + // Check if the previous frame was a frame without any references to it. // Release frame buffer if not decoding in frame parallel mode. if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 @@ -296,6 +298,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; + pbi->ready_for_new_data = 1; // We do not know if the missing frame(s) was supposed to update // any of the reference buffers, but we act conservative and @@ -354,8 +357,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, vp9_swap_current_and_last_seg_map(cm); } - pbi->ready_for_new_data = 0; - cm->error.setjmp = 0; return retcode; } @@ -370,12 +371,12 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, if (pbi->ready_for_new_data == 1) return ret; + pbi->ready_for_new_data = 1; + /* no raw frame to show!!! */ if (pbi->common.show_frame == 0) return ret; - pbi->ready_for_new_data = 1; - #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(&pbi->common, sd, flags); #else diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index d56ee0076..1d919daba 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -443,6 +443,8 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, &ctx->frame_workers[ctx->next_submit_worker_id], &ctx->frame_workers[ctx->last_submit_worker_id]); + frame_worker_data->pbi->ready_for_new_data = 0; + // Copy the compressed data into worker's internal buffer. // TODO(hkuang): Will all the workers allocate the same size // as the size of the first intra frame be better? This will @@ -757,10 +759,10 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; // Wait for the frame from worker thread. winterface->sync(worker); - ++ctx->available_threads; if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { VP9_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + ++ctx->available_threads; release_last_output_frame(ctx); ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); -- cgit v1.2.1 From d05cf10fe718ebb09394d9c183ed046d05a8e6a2 Mon Sep 17 00:00:00 2001 From: hkuang Date: Tue, 18 Nov 2014 16:18:20 -0800 Subject: Add error handling for frame parallel decode and unit test for that. Change-Id: I6e309e11f1641618d2424b7a2c0fe744b8974dec --- test/invalid_file_test.cc | 2 +- test/test-data.sha1 | 7 +++- test/test.mk | 7 +++- test/vp9_frame_parallel_test.cc | 86 +++++++++++++++++++++++++++++++++++++++++ test/vp9_thread_test.cc | 2 +- vp9/decoder/vp9_decodeframe.c | 23 +++++++++-- vp9/decoder/vp9_decoder.c | 57 +++++++++++++++------------ vp9/decoder/vp9_decoder.h | 17 ++++++++ vp9/decoder/vp9_dthread.c | 22 ++++++++--- vp9/vp9_dx_iface.c | 42 ++++++++++++++++---- vpx/vpx_frame_buffer.h | 2 +- 11 files changed, 219 insertions(+), 48 deletions(-) diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc index 8c830340e..e91b815ca 100644 --- a/test/invalid_file_test.cc +++ b/test/invalid_file_test.cc @@ -97,7 +97,7 @@ const char *const kVP9InvalidFileTests[] = { "invalid-vp90-01.webm", "invalid-vp90-02.webm", "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf", - "invalid-vp90-03.webm", + "invalid-vp90-03-v3.webm", "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf", "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf", }; diff --git a/test/test-data.sha1 b/test/test-data.sha1 index b1380652d..41207ae65 100644 --- a/test/test-data.sha1 +++ b/test/test-data.sha1 @@ -4,8 +4,8 @@ fe346136b9b8c1e6f6084cc106485706915795e4 invalid-vp90-01.webm 25751f5d3b05ff03f0719ad42cd625348eb8961e invalid-vp90-01.webm.res d78e2fceba5ac942246503ec8366f879c4775ca5 invalid-vp90-02.webm 2dadee5306245fa5eeb0f99652d0e17afbcba96d invalid-vp90-02.webm.res -df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03.webm -8fe6fd82bf537340f586f97a7ae31fb37ccda302 invalid-vp90-03.webm.res +df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03-v3.webm +4935c62becc68c13642a03db1e6d3e2331c1c612 invalid-vp90-03-v3.webm.res a432f96ff0a787268e2f94a8092ab161a18d1b06 park_joy_90p_10_420.y4m 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 park_joy_90p_10_422.y4m ff0e0a21dc2adc95b8c1b37902713700655ced17 park_joy_90p_10_444.y4m @@ -667,3 +667,6 @@ f97088c7359fc8d3d5aa5eafe57bc7308b3ee124 vp90-2-20-big_superframe-01.webm 7c0ed8d04c4d06c5411dd2e5de2411d37f092db5 vp90-2-20-big_superframe-02.webm.md5 667ec8718c982aef6be07eb94f083c2efb9d2d16 vp90-2-07-frame_parallel-1.webm bfc82bf848e9c05020d61e3ffc1e62f25df81d19 vp90-2-07-frame_parallel-1.webm.md5 +efd5a51d175cfdacd169ed23477729dc558030dc invalid-vp90-2-07-frame_parallel-1.webm +9f912712ec418be69adb910e2ca886a63c4cec08 invalid-vp90-2-07-frame_parallel-2.webm +445f5a53ca9555341852997ccdd480a51540bd14 invalid-vp90-2-07-frame_parallel-3.webm diff --git a/test/test.mk b/test/test.mk index ece2b37aa..75111f5af 100644 --- a/test/test.mk +++ b/test/test.mk @@ -786,14 +786,17 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm.res -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03.webm -LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/test/vp9_frame_parallel_test.cc b/test/vp9_frame_parallel_test.cc index d159c2709..6c44a285f 100644 --- a/test/vp9_frame_parallel_test.cc +++ b/test/vp9_frame_parallel_test.cc @@ -118,5 +118,91 @@ TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) { DecodeFiles(files); } +struct InvalidFileList { + const char *name; + // md5 sum for decoded frames which does not include corrupted frames. + const char *expected_md5; + // Expected number of decoded frames which does not include corrupted frames. + const int expected_frame_count; +}; + +// Decodes |filename| with |num_threads|. Return the md5 of the decoded +// frames which does not include corrupted frames. +string DecodeInvalidFile(const string &filename, int num_threads, + int expected_frame_count) { + libvpx_test::WebMVideoSource video(filename); + video.Init(); + + vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); + cfg.threads = num_threads; + const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING; + libvpx_test::VP9Decoder decoder(cfg, flags, 0); + + libvpx_test::MD5 md5; + video.Begin(); + + int out_frames = 0; + do { + const vpx_codec_err_t res = + decoder.DecodeFrame(video.cxdata(), video.frame_size()); + // TODO(hkuang): frame parallel mode should return an error on corruption. + if (res != VPX_CODEC_OK) { + EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); + break; + } + + video.Next(); + + // Flush the decoder at the end of the video. + if (!video.cxdata()) + decoder.DecodeFrame(NULL, 0); + + libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); + const vpx_image_t *img; + + // Get decompressed data + while ((img = dec_iter.Next())) { + ++out_frames; + md5.Add(img); + } + } while (video.cxdata() != NULL); + + EXPECT_EQ(expected_frame_count, out_frames) << + "Input frame count does not match expected output frame count"; + + return string(md5.Get()); +} + +void DecodeInvalidFiles(const InvalidFileList files[]) { + for (const InvalidFileList *iter = files; iter->name != NULL; ++iter) { + SCOPED_TRACE(iter->name); + for (int t = 2; t <= 8; ++t) { + EXPECT_EQ(iter->expected_md5, + DecodeInvalidFile(iter->name, t, iter->expected_frame_count)) + << "threads = " << t; + } + } +} + +TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) { + static const InvalidFileList files[] = { + // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with + // one key frame for every ten frames. The 11th frame has corrupted data. + { "invalid-vp90-2-07-frame_parallel-1.webm", + "0549d0f45f60deaef8eb708e6c0eb6cb", 30}, + // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with + // one key frame for every ten frames. The 1st and 31st frames have + // corrupted data. + { "invalid-vp90-2-07-frame_parallel-2.webm", + "6a1f3cf6f9e7a364212fadb9580d525e", 20}, + // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with + // one key frame for every ten frames. The 13th frame has corrupted data. + { "invalid-vp90-2-07-frame_parallel-3.webm", + "a567c8259d27ad32b1b7f58db5ac89dd", 32}, + { NULL, NULL, 0}, + }; + DecodeInvalidFiles(files); +} + #endif // CONFIG_WEBM_IO } // namespace diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc index fa51835a5..485a4bf6b 100644 --- a/test/vp9_thread_test.cc +++ b/test/vp9_thread_test.cc @@ -175,7 +175,7 @@ int Reset(VP9Worker *const /*worker*/) { return 1; } int Sync(VP9Worker *const worker) { return !worker->had_error; } void Execute(VP9Worker *const worker) { - worker->had_error |= worker->hook(worker->data1, worker->data2); + worker->had_error |= !worker->hook(worker->data1, worker->data2); } void Launch(VP9Worker *const worker) { Execute(worker); } diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index da973c3c8..b93eed8f4 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -648,6 +648,7 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) { cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS, &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { + unlock_buffer_pool(pool); vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } @@ -1165,6 +1166,10 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, } setup_frame_size(cm, rb); + if (pbi->need_resync) { + vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } } else { cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); @@ -1176,6 +1181,10 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); setup_frame_size(cm, rb); + if (pbi->need_resync) { + vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } } else { pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); for (i = 0; i < REFS_PER_FRAME; ++i) { @@ -1203,6 +1212,12 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, } } + if (pbi->need_resync) { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Keyframe / intra-only frame required to reset decoder" + " state"); + } + if (!cm->error_resilient_mode) { cm->coding_use_prev_mi = 1; cm->refresh_frame_context = vp9_rb_read_bit(rb); @@ -1239,6 +1254,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; } unlock_buffer_pool(pool); + pbi->hold_ref_buf = 1; if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp9_setup_past_independence(cm); @@ -1457,9 +1473,7 @@ void vp9_decode_frame(VP9Decoder *pbi, *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } - new_fb->corrupted |= xd->corrupted; - - if (!new_fb->corrupted) { + if (!xd->corrupted) { if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(cm); @@ -1470,6 +1484,9 @@ void vp9_decode_frame(VP9Decoder *pbi, } else { debug_check_frame_counts(cm); } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); } // Non frame parallel update frame context here. diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index b5f3b1448..85d454203 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -58,6 +58,7 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) { } cm->error.setjmp = 1; + pbi->need_resync = 1; initialize_dec(); vp9_rtcd(); @@ -197,16 +198,6 @@ int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { return 0; } -static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, - BufferPool *const pool) { - if (idx >= 0) { - --frame_bufs[idx].ref_count; - if (frame_bufs[idx].ref_count == 0) { - pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); - } - } -} - /* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; @@ -235,7 +226,7 @@ static void swap_frame_buffers(VP9Decoder *pbi) { cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; } unlock_buffer_pool(pool); - + pbi->hold_ref_buf = 0; cm->frame_to_show = get_frame_new_buffer(cm); if (!pbi->frame_parallel_decode || !cm->show_frame) { @@ -256,7 +247,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; const uint8_t *source = *psource; int retcode = 0; - cm->error.error_code = VPX_CODEC_OK; if (size == 0) { @@ -282,7 +272,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); cm->new_fb_idx = get_free_fb(cm); - + pbi->hold_ref_buf = 0; if (pbi->frame_parallel_decode) { VP9Worker *const worker = pbi->frame_worker_owner; vp9_frameworker_lock_stats(worker); @@ -300,18 +290,35 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, cm->error.setjmp = 0; pbi->ready_for_new_data = 1; - // We do not know if the missing frame(s) was supposed to update - // any of the reference buffers, but we act conservative and - // mark only the last buffer as corrupted. - // - // TODO(jkoleszar): Error concealment is undefined and non-normative - // at this point, but if it becomes so, [0] may not always be the correct - // thing to do here. - if (cm->frame_refs[0].idx != INT_MAX && cm->frame_refs[0].buf != NULL) - cm->frame_refs[0].buf->corrupted = 1; - - if (frame_bufs[cm->new_fb_idx].ref_count > 0) - --frame_bufs[cm->new_fb_idx].ref_count; + lock_buffer_pool(pool); + // Release all the reference buffers if worker thread is holding them. + if (pbi->hold_ref_buf == 1) { + int ref_index = 0, mask; + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if ((mask & 1) && old_idx >= 0) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + } + pbi->hold_ref_buf = 0; + } + // Release current frame. + decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); + unlock_buffer_pool(pool); return -1; } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 9844e2031..aa4217103 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -65,6 +65,8 @@ typedef struct VP9Decoder { int max_threads; int inv_tile_order; + int need_resync; // wait for key/intra-only frame. + int hold_ref_buf; // hold the reference buffer. } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, @@ -88,6 +90,21 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); +static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, + BufferPool *const pool) { + if (idx >= 0) { + --frame_bufs[idx].ref_count; + // A worker may only get a free framebuffer index when calling get_free_fb. + // But the private buffer is not set up until finish decoding header. + // So any error happens during decoding header, the frame_bufs will not + // have valid priv buffer. + if (frame_bufs[idx].ref_count == 0 && + frame_bufs[idx].raw_frame_buffer.priv) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); + } + } +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index f599c2a8b..d0be882df 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -320,7 +320,7 @@ void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf, // Enabling the following line of code will get harmless tsan error but // will get best performance. - // if (ref_buf->row >= row) return; + // if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return; { // Find the worker thread that owns the reference frame. If the reference @@ -340,10 +340,19 @@ void vp9_frameworker_wait(VP9Worker *const worker, RefCntBuffer *const ref_buf, #endif vp9_frameworker_lock_stats(ref_worker); - while (ref_buf->row < row && pbi->cur_buf == ref_buf) { + while (ref_buf->row < row && pbi->cur_buf == ref_buf && + ref_buf->buf.corrupted != 1) { pthread_cond_wait(&ref_worker_data->stats_cond, &ref_worker_data->stats_mutex); } + + if (ref_buf->buf.corrupted == 1) { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_frameworker_unlock_stats(ref_worker); + vpx_internal_error(&worker_data->pbi->common.error, + VPX_CODEC_CORRUPT_FRAME, + "Worker %p failed to decode frame", worker); + } vp9_frameworker_unlock_stats(ref_worker); } #else @@ -358,8 +367,11 @@ void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { VP9Worker *worker = buf->frame_worker_owner; #ifdef DEBUG_THREAD - printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, - buf->frame_worker_owner, row); + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, + buf->frame_worker_owner, row); + } #endif vp9_frameworker_lock_stats(worker); @@ -403,7 +415,7 @@ void vp9_frameworker_copy_context(VP9Worker *const dst_worker, dst_cm->prev_mi_grid_visible = src_cm->mi_grid_visible; dst_cm->last_frame_seg_map = src_cm->current_frame_seg_map; } - + dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; vp9_frameworker_unlock_stats(src_worker); dst_worker_data->pbi->prev_buf = diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 1d919daba..dbdfb4640 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -290,11 +290,37 @@ static int frame_worker_hook(void *arg1, void *arg2) { FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; const uint8_t *data = frame_worker_data->data; (void)arg2; + frame_worker_data->result = vp9_receive_compressed_data(frame_worker_data->pbi, frame_worker_data->data_size, &data); frame_worker_data->data_end = data; + + if (frame_worker_data->pbi->frame_parallel_decode) { + // In frame parallel decoding, a worker thread must successfully decode all + // the compressed data. + if (frame_worker_data->result != 0 || + frame_worker_data->data + frame_worker_data->data_size - 1 > data) { + VP9Worker *const worker = frame_worker_data->pbi->frame_worker_owner; + BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool; + // Signal all the other threads that are waiting for this frame. + vp9_frameworker_lock_stats(worker); + frame_worker_data->frame_context_ready = 1; + lock_buffer_pool(pool); + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + unlock_buffer_pool(pool); + frame_worker_data->pbi->need_resync = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + return 0; + } + } else if (frame_worker_data->result != 0) { + // Check decode result in serial decode. + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + frame_worker_data->pbi->need_resync = 1; + } + return !frame_worker_data->result; } @@ -444,7 +470,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, &ctx->frame_workers[ctx->last_submit_worker_id]); frame_worker_data->pbi->ready_for_new_data = 0; - // Copy the compressed data into worker's internal buffer. // TODO(hkuang): Will all the workers allocate the same size // as the size of the first intra frame be better? This will @@ -474,6 +499,7 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; --ctx->available_threads; + worker->had_error = 0; winterface->launch(worker); } @@ -714,11 +740,7 @@ static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { BufferPool *const pool = ctx->buffer_pool; lock_buffer_pool(pool); - --frame_bufs[ctx->last_show_frame].ref_count; - if (frame_bufs[ctx->last_show_frame].ref_count == 0) { - pool->release_fb_cb(pool->cb_priv, - &frame_bufs[ctx->last_show_frame].raw_frame_buffer); - } + decrease_ref_count(ctx->last_show_frame, frame_bufs, pool); unlock_buffer_pool(pool); } } @@ -758,8 +780,12 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, ctx->next_output_worker_id = (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; // Wait for the frame from worker thread. - winterface->sync(worker); - if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + if (!winterface->sync(worker)) { + // Decoding failed. Release the worker thread. + ++ctx->available_threads; + if (ctx->flushed != 1) + return img; + } else if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { VP9_COMMON *const cm = &frame_worker_data->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; ++ctx->available_threads; diff --git a/vpx/vpx_frame_buffer.h b/vpx/vpx_frame_buffer.h index 0741e6e71..a9a7499be 100644 --- a/vpx/vpx_frame_buffer.h +++ b/vpx/vpx_frame_buffer.h @@ -26,7 +26,7 @@ extern "C" { * Each thread will use one work buffer. * TODO(hkuang): Add support to set number of worker threads dynamically. */ -#define VPX_MAXIMUM_WORK_BUFFERS 4 +#define VPX_MAXIMUM_WORK_BUFFERS 8 /*!\brief The maximum number of reference buffers that a VP9 encoder may use. */ -- cgit v1.2.1