diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-12-08 23:31:30 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-12-09 15:26:52 -0500 |
commit | ff072009fe5bdd3540ac6ac331e9961e83da722a (patch) | |
tree | 765e4d4ff0b9835a355be8de4c796b178b966d42 | |
parent | 0f869cba0f271f3d9aee1289d490c44ce1e12975 (diff) | |
download | opus-ff072009fe5bdd3540ac6ac331e9961e83da722a.tar.gz |
Replaces inline copies and initialization with OPUS_*() macros.
This is a bit faster at -O2 because memcpy()/memmove()/memset() are
vectorized. The code is also cleaner.
-rw-r--r-- | celt/bands.c | 19 | ||||
-rw-r--r-- | celt/celt_encoder.c | 36 | ||||
-rw-r--r-- | src/opus_encoder.c | 23 |
3 files changed, 30 insertions, 48 deletions
diff --git a/celt/bands.c b/celt/bands.c index cce56e2f..cd7d88d1 100644 --- a/celt/bands.c +++ b/celt/bands.c @@ -249,8 +249,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, } while (++j<band_end); } celt_assert(start <= end); - for (i=M*eBands[end];i<N;i++) - *f++ = 0; + OPUS_CLEAR(&freq[c*N+M*eBands[end]], N-M*eBands[end]); } while (++c<C); } @@ -409,8 +408,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) Er = MULT16_16(mid2, mid2) + side + 2*xp; if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) { - for (j=0;j<N;j++) - Y[j] = X[j]; + OPUS_COPY(Y, X, N); return; } @@ -567,8 +565,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;j<N0;j++) tmp[i*N0+j] = X[j*stride+i]; } - for (j=0;j<N;j++) - X[j] = tmp[j]; + OPUS_COPY(X, tmp, N); RESTORE_STACK; } @@ -591,8 +588,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) for (j=0;j<N0;j++) tmp[j*stride+i] = X[i*N0+j]; } - for (j=0;j<N;j++) - X[j] = tmp[j]; + OPUS_COPY(X, tmp, N); RESTORE_STACK; } @@ -1021,8 +1017,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;j<N;j++) - X[j] = 0; + OPUS_CLEAR(X, N); } else { if (lowband == NULL) { @@ -1098,9 +1093,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) { - int j; - for (j=0;j<N;j++) - lowband_scratch[j] = lowband[j]; + OPUS_COPY(lowband_scratch, lowband, N); lowband = lowband_scratch; } diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 917f9195..db183430 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -276,8 +276,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int } /*printf("\n");*/ /* First few samples are bad because we don't propagate the memory */ - for (i=0;i<12;i++) - tmp[i] = 0; + OPUS_CLEAR(tmp, 12); #ifdef FIXED_POINT /* Normalize tmp to max range */ @@ -453,8 +452,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS int bound = B*N/upsample; for (i=0;i<bound;i++) out[c*B*N+i] *= upsample; - for (;i<B*N;i++) - out[c*B*N+i] = 0; + OPUS_CLEAR(&out[c*B*N+bound], B*N-bound); } while (++c<C); } } @@ -489,8 +487,7 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES Nu = N/upsample; if (upsample!=1) { - for (i=0;i<N;i++) - inp[i] = 0; + OPUS_CLEAR(inp, N); } for (i=0;i<Nu;i++) inp[i*upsample] = SCALEIN(pcmp[CC*i]); @@ -586,8 +583,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, N = (m->eBands[i+1]-m->eBands[i])<<LM; /* band is too narrow to be split down to LM=-1 */ narrow = (m->eBands[i+1]-m->eBands[i])==1; - for (j=0;j<N;j++) - tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)]; + OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N); /* Just add the right channel if we're in stereo */ /*if (C==2) for (j=0;j<N;j++) @@ -597,8 +593,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, /* Check the -1 case for transients */ if (isTransient && !narrow) { - for (j=0;j<N;j++) - tmp_1[j] = tmp[j]; + OPUS_COPY(tmp_1, tmp, N); haar1(tmp_1, N>>LM, 1<<LM); L1 = l1_metric(tmp_1, N, LM+1, bias); if (L1<best_L1) @@ -903,8 +898,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 SAVE_STACK; ALLOC(follower, C*nbEBands, opus_val16); ALLOC(noise_floor, C*nbEBands, opus_val16); - for (i=0;i<nbEBands;i++) - offsets[i] = 0; + OPUS_CLEAR(offsets, nbEBands); /* Dynamic allocation code */ maxDepth=-QCONST16(31.9f, DB_SHIFT); for (i=0;i<end;i++) @@ -1566,8 +1560,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); ALLOC(surround_dynalloc, C*nbEBands, opus_val16); - for(i=0;i<st->end;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, st->end); /* This computes how much masking takes place between surround channels */ if (st->start==0&&st->energy_mask&&!st->lfe) { @@ -1629,8 +1622,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, disabling masking. */ mask_avg = 0; diff = 0; - for(i=0;i<mask_end;i++) - surround_dynalloc[i] = 0; + OPUS_CLEAR(surround_dynalloc, mask_end); } else { for(i=0;i<mask_end;i++) surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); @@ -1666,8 +1658,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (!secondMdct) { - for (i=0;i<C*nbEBands;i++) - bandLogE2[i] = bandLogE[i]; + OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); } /* Last chance to catch any transient we might have missed in the @@ -2059,16 +2050,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, #endif if (CC==2&&C==1) { - for (i=0;i<nbEBands;i++) - oldBandE[nbEBands+i]=oldBandE[i]; + OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); } if (!isTransient) { - for (i=0;i<CC*nbEBands;i++) - oldLogE2[i] = oldLogE[i]; - for (i=0;i<CC*nbEBands;i++) - oldLogE[i] = oldBandE[i]; + OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); + OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); } else { for (i=0;i<CC*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index dc73a46b..dd4999b3 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -1425,8 +1425,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_init(&enc, data, max_data_bytes-1); ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); - for (i=0;i<total_buffer*st->channels;i++) - pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; + OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); if (st->mode == MODE_CELT_ONLY) hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -1611,8 +1610,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); - for(i=0;i<prefill_offset;i++) - st->delay_buffer[i]=0; + OPUS_CLEAR(st->delay_buffer, prefill_offset); #ifdef FIXED_POINT pcm_silk = st->delay_buffer; #else @@ -1739,15 +1737,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) { - for (i=0;i<st->channels*st->Fs/400;i++) - tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; + OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); } - for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) - st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; - for (;i<st->encoder_buffer*st->channels;i++) - st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; - + if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) + { + OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); + OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], + &pcm_buf[0], + (frame_size+total_buffer)*st->channels); + } else { + OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); + } /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { |