summaryrefslogtreecommitdiff
path: root/silk
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2014-01-07 13:46:06 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2014-01-07 13:46:06 -0500
commitc25836205da1f2163064c7d819b3ac1bf48c40a0 (patch)
tree3e9276d8f70070e0b44ce745458e9365462d3d39 /silk
parent5f807c176fd9bc395f5e1cd030dfca3f0b14ba9f (diff)
downloadopus-c25836205da1f2163064c7d819b3ac1bf48c40a0.tar.gz
Delaying allocation of the SILK temporary output buffer to reduce peak stack
Diffstat (limited to 'silk')
-rw-r--r--silk/dec_API.c35
1 files changed, 28 insertions, 7 deletions
diff --git a/silk/dec_API.c b/silk/dec_API.c
index 660f93db..1087c672 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
+#include "os_support.h"
/************************/
/* Decoder Super Struct */
@@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
- VARDECL( opus_int16, samplesOut1_tmp_storage );
+ VARDECL( opus_int16, samplesOut1_tmp_storage1 );
+ VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
@@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
+ int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
- ALLOC( samplesOut1_tmp_storage,
- decControl->nChannelsInternal*(
- channel_state[ 0 ].frame_length + 2 ),
+ /* Check if the temp buffer fits into the output PCM buffer. If it fits,
+ we can delay allocating the temp buffer until after the SILK peak stack
+ usage. We need to use a < and not a <= because of the two extra samples. */
+ delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
+ < decControl->API_sampleRate*decControl->nChannelsAPI;
+ ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
+ : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
- samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
- samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
- + channel_state[ 0 ].frame_length + 2;
+ if ( delay_stack_alloc )
+ {
+ samplesOut1_tmp[ 0 ] = samplesOut;
+ samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
+ } else {
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
+ }
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
@@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut;
}
+ ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
+ ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
+ : ALLOC_NONE,
+ opus_int16 );
+ if ( delay_stack_alloc ) {
+ OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
+ }
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */