diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2014-01-07 13:46:06 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2014-01-07 13:46:06 -0500 |
commit | c25836205da1f2163064c7d819b3ac1bf48c40a0 (patch) | |
tree | 3e9276d8f70070e0b44ce745458e9365462d3d39 /silk | |
parent | 5f807c176fd9bc395f5e1cd030dfca3f0b14ba9f (diff) | |
download | opus-c25836205da1f2163064c7d819b3ac1bf48c40a0.tar.gz |
Delaying allocation of the SILK temporary output buffer to reduce peak stack
Diffstat (limited to 'silk')
-rw-r--r-- | silk/dec_API.c | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/silk/dec_API.c b/silk/dec_API.c index 660f93db..1087c672 100644 --- a/silk/dec_API.c +++ b/silk/dec_API.c @@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" #include "stack_alloc.h" +#include "os_support.h" /************************/ /* Decoder Super Struct */ @@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage ); + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); VARDECL( opus_int16, samplesOut2_tmp ); opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int16 *resample_out_ptr; @@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co silk_decoder_state *channel_state = psDec->channel_state; opus_int has_side; opus_int stereo_to_mono; + int delay_stack_alloc; SAVE_STACK; silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); @@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co psDec->channel_state[ 1 ].first_frame_after_reset = 1; } - ALLOC( samplesOut1_tmp_storage, - decControl->nChannelsInternal*( - channel_state[ 0 ].frame_length + 2 ), + /* Check if the temp buffer fits into the output PCM buffer. If it fits, + we can delay allocating the temp buffer until after the SILK peak stack + usage. We need to use a < and not a <= because of the two extra samples. */ + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal + < decControl->API_sampleRate*decControl->nChannelsAPI; + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), opus_int16 ); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage - + channel_state[ 0 ].frame_length + 2; + if ( delay_stack_alloc ) + { + samplesOut1_tmp[ 0 ] = samplesOut; + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; + } else { + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; + } if( lostFlag == FLAG_DECODE_NORMAL ) { has_side = !decode_only_middle; @@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co resample_out_ptr = samplesOut; } + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) + : ALLOC_NONE, + opus_int16 ); + if ( delay_stack_alloc ) { + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; + } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { /* Resample decoded signal to API_sampleRate */ |