summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2016-11-18 16:56:35 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2016-11-23 13:30:22 -0500
commitf02ad480f044d544ddc728db27ef69b109c1c920 (patch)
tree0686c2c44ed4d5aeaa531a26c5a2207286ef0d72
parentf82f2e472366a0d24643f4a30492204442dd29b3 (diff)
downloadopus-f02ad480f044d544ddc728db27ef69b109c1c920.tar.gz
Fixing bandwidth detection for 24 kHz analysis
-rw-r--r--src/analysis.c24
-rw-r--r--src/analysis.h5
-rw-r--r--src/opus_encoder.c25
-rw-r--r--src/opus_private.h6
4 files changed, 42 insertions, 18 deletions
diff --git a/src/analysis.c b/src/analysis.c
index 7eb229e1..1c12aa24 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -104,7 +104,7 @@ static const int tbands[NB_TBANDS+1] = {
};
static const int extra_bands[NB_TOT_BANDS+1] = {
- 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+ 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
};
/*static const float tweight[NB_TBANDS+1] = {
@@ -225,6 +225,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float noise_floor;
int remaining;
AnalysisInfo *info;
+ float hp_ener;
SAVE_STACK;
tonal->last_transition++;
@@ -241,7 +242,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
kfft = celt_mode->mdct.kfft[0];
if (tonal->count==0)
tonal->mem_fill = 240;
- downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
+ tonal->hp_ener_accum += downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
{
tonal->mem_fill += len;
@@ -249,6 +250,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
RESTORE_STACK;
return;
}
+ hp_ener = tonal->hp_ener_accum;
info = &tonal->info[tonal->write_pos++];
if (tonal->write_pos>=DETECT_SIZE)
tonal->write_pos-=DETECT_SIZE;
@@ -267,7 +269,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
}
OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
- downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
+ tonal->hp_ener_accum = downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
tonal->mem_fill = 240 + remaining;
opus_fft(kfft, in, out, tonal->arch);
#ifndef FIXED_POINT
@@ -417,8 +419,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float E=0;
int band_start, band_end;
/* Keep a margin of 300 Hz for aliasing */
- band_start = extra_bands[b];
- band_end = extra_bands[b+1];
+ band_start = 2*extra_bands[b];
+ band_end = 2*extra_bands[b+1];
for (i=band_start;i<band_end;i++)
{
float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
@@ -439,6 +441,18 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
bandwidth = b;
}
+ /* Special case for the last two bands, for which we don't have spectrum but only
+ the energy above 12 kHz. */
+ {
+ float E = hp_ener*(1./(240*240));
+ maxE = MAX32(maxE, E);
+ tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
+ E = MAX32(E, tonal->meanE[b]);
+ /* Use a simple follower with 13 dB/Bark slope for spreading function */
+ bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+ if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*160)
+ bandwidth = 20;
+ }
if (tonal->count<=2)
bandwidth = 20;
frame_loudness = 20*(float)log10(frame_loudness);
diff --git a/src/analysis.h b/src/analysis.h
index 86bd6340..5ed791b1 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -33,7 +33,7 @@
#define NB_FRAMES 8
#define NB_TBANDS 18
-#define NB_TOT_BANDS 21
+#define NB_TOT_BANDS 19
#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
#define DETECT_SIZE 200
@@ -51,7 +51,7 @@ typedef struct {
float E[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS];
float highE[NB_TBANDS];
- float meanE[NB_TOT_BANDS];
+ float meanE[NB_TOT_BANDS+1];
float mem[32];
float cmean[8];
float std[9];
@@ -76,6 +76,7 @@ typedef struct {
int write_pos;
int read_pos;
int read_subframe;
+ float hp_ener_accum;
AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index f64bd346..3bbd5700 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -579,7 +579,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
#endif
#ifndef FIXED_POINT
-void silk_resampler_down2_float(
+float silk_resampler_down2_float(
opus_val32 *S, /* I/O State vector [ 2 ] */
opus_val16 *out, /* O Output signal [ floor(len/2) ] */
const opus_val16 *in, /* I Input signal [ len ] */
@@ -587,8 +587,8 @@ void silk_resampler_down2_float(
)
{
int k, len2 = inLen/2;
- opus_val32 in32, out32, Y, X;
-
+ opus_val32 in32, out32, out32_hp, Y, X;
+ float hp_ener = 0;
/* Internal variables and state are in Q10 format */
for( k = 0; k < len2; k++ ) {
/* Convert to Q10 */
@@ -599,7 +599,7 @@ void silk_resampler_down2_float(
X = 0.6074371f*Y;
out32 = ADD32( S[ 0 ], X );
S[ 0 ] = ADD32( in32, X );
-
+ out32_hp = out32;
/* Convert to Q10 */
in32 = in[ 2 * k + 1 ];
@@ -610,13 +610,21 @@ void silk_resampler_down2_float(
out32 = ADD32( out32, X );
S[ 1 ] = ADD32( in32, X );
+ Y = SUB32( -in32, S[ 2 ] );
+ X = 0.15063f*Y;
+ out32_hp = ADD32( out32_hp, S[ 2 ] );
+ out32_hp = ADD32( out32_hp, X );
+ S[ 2 ] = ADD32( -in32, X );
+
+ hp_ener += out32_hp*out32_hp;
/* Add, convert back to int16 and store to output */
out[ k ] = .5*out32;
}
+ return hp_ener;
}
#endif
-void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+opus_val32 downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
{
const float *x;
opus_val32 scale;
@@ -648,11 +656,12 @@ void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, in
scale /= 2;
for (j=0;j<subframe;j++)
sub[j] *= scale;
+ return 0;
}
#endif
-float S[2];
-void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+float S[3];
+opus_val32 downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
{
VARDECL(opus_val16, tmp);
const opus_int16 *x;
@@ -691,7 +700,7 @@ void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int
scale /= 2;
for (j=0;j<subframe;j++)
tmp[j] *= scale;
- silk_resampler_down2_float(S, sub, tmp, subframe);
+ return silk_resampler_down2_float(S, sub, tmp, subframe);
}
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
diff --git a/src/opus_private.h b/src/opus_private.h
index a731cc55..82b206f6 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -84,9 +84,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
#define OPUS_SET_FORCE_MODE_REQUEST 11002
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
-typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
-void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
-void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+typedef opus_val32 (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+opus_val32 downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+opus_val32 downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
int encode_size(int size, unsigned char *data);