summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2016-07-29 16:30:34 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2016-08-05 12:56:52 -0400
commit48ee78d2ee498abd98b6a70d41707b087a79121c (patch)
treeea008539098ede19c19497c87db78f3a4443a667
parent80e80c2f0540eeaaae3c3cfcf62ebd41b5a6014c (diff)
downloadopus-exp_stereo5.tar.gz
Partially collapsing the stereo image when channels have different energyexp_stereo5
We can lower the distortion of the most important channel, at the expense of the other channel.
-rw-r--r--celt/bands.c65
-rw-r--r--doc/stereo.lyx545
2 files changed, 602 insertions, 8 deletions
diff --git a/celt/bands.c b/celt/bands.c
index d53f0280..a089d31e 100644
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -409,9 +409,55 @@ static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, con
}
}
-static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
+static void stereo_split_collapse(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y,
+ int N, opus_val16 w[2], int utheta, int itheta)
{
int j;
+#ifdef FIXED_POINT
+ (void)utheta;
+ (void)itheta;
+#else
+ /* When we care more about one of the channels, adjust x and y to minimize
+ the weighted distortion. See doc/stereo.lyx for more details. */
+ if (w[0] != w[1])
+ {
+ float phi;
+ float S;
+ float tan_dx, tan_dy;
+ float cos_dx, cos_dy;
+ float cos_phi, sin_phi;
+ float gxx, gxy, gyx, gyy;
+ float gx, gy;
+ phi = utheta*M_PI/16384;
+ cos_phi = cos(phi);
+ sin_phi = sin(phi);
+ /* S=dx+dy is the error due to quantizing phi. */
+ S = (utheta-itheta)*M_PI/16384.f;
+ /* Angular displacement for x and y. */
+ tan_dx = w[1]*sin(S)/(w[0] + w[1]*cos(S));
+ tan_dy = w[0]*sin(S)/(w[1] + w[0]*cos(S));
+ /* Identify: cos(x) = 1/sqrt(1 + x^2) */
+ cos_dx = 1.f/sqrt(1.f + tan_dx*tan_dx);
+ cos_dy = 1.f/sqrt(1.f + tan_dy*tan_dy);
+ /* Normalization factors such that x and y end up with unit norm. */
+ gx = cos_dx/(EPSILON+sin_phi);
+ gy = cos_dy/(EPSILON+sin_phi);
+ /* Linear combinations of x and y that produce the desired rotations
+ by dx and dy. */
+ gxy = gx*tan_dx;
+ gxx = gx*(sin_phi - cos_phi*tan_dx);
+ gyx = gy*tan_dy;
+ gyy = gy*(sin_phi - cos_phi*tan_dy);
+ for (j=0;j<N;j++)
+ {
+ float x, y;
+ x = X[j];
+ y = Y[j];
+ X[j] = gxx*x + gxy*y;
+ Y[j] = gyx*x + gyy*y;
+ }
+ }
+#endif
for (j=0;j<N;j++)
{
opus_val32 r, l;
@@ -683,6 +729,7 @@ struct band_ctx {
opus_uint32 seed;
int arch;
int theta_round;
+ opus_val16 w[2];
};
struct split_ctx {
@@ -700,7 +747,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
int stereo, int *fill)
{
int qn;
- int itheta=0;
+ int itheta=0, utheta=0;
int delta;
int imid, iside;
int qalloc;
@@ -734,7 +781,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
side and mid. With just that parameter, we can re-scale both
mid and side because we know that 1) they have unit norm and
2) they are orthogonal. */
- itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
+ utheta = itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
}
tell = ec_tell_frac(ec);
if (qn!=1)
@@ -825,7 +872,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
if (itheta==0)
intensity_stereo(m, X, Y, bandE, i, N);
else
- stereo_split(X, Y, N);
+ stereo_split_collapse(X, Y, N, ctx->w, utheta, itheta);
}
/* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
Let's do that at higher complexity */
@@ -1435,6 +1482,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.arch = arch;
ctx.resynth = resynth;
ctx.theta_round = 0;
+ ctx.w[0] = ctx.w[1] = Q15ONE;
for (i=start;i<end;i++)
{
opus_int32 tell;
@@ -1541,8 +1589,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
int nstart_bytes, nend_bytes, save_bytes;
unsigned char *bytes_buf;
unsigned char bytes_save[1275];
- opus_val16 w[2];
- compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
+ compute_channel_weights(bandE[i], bandE[i+m->nbEBands], ctx.w);
/* Make a copy. */
cm = x_cm|y_cm;
ec_save = *ec;
@@ -1554,7 +1601,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
- dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+ dist0 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch))
+ + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch));
/* Save first result. */
cm2 = x_cm;
@@ -1580,7 +1628,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
- dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+ dist1 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch))
+ + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch));
if (dist0 >= dist1) {
x_cm = cm2;
*ec = ec_save2;
diff --git a/doc/stereo.lyx b/doc/stereo.lyx
new file mode 100644
index 00000000..202af5f0
--- /dev/null
+++ b/doc/stereo.lyx
@@ -0,0 +1,545 @@
+#LyX 2.2 created this file. For more info see http://www.lyx.org/
+\lyxformat 508
+\begin_document
+\begin_header
+\save_transient_properties true
+\origin unavailable
+\textclass article
+\use_default_options true
+\maintain_unincluded_children false
+\language english
+\language_package default
+\inputencoding auto
+\fontencoding global
+\font_roman "default" "default"
+\font_sans "default" "default"
+\font_typewriter "default" "default"
+\font_math "auto" "auto"
+\font_default_family default
+\use_non_tex_fonts false
+\font_sc false
+\font_osf false
+\font_sf_scale 100 100
+\font_tt_scale 100 100
+\graphics default
+\default_output_format default
+\output_sync 0
+\bibtex_command default
+\index_command default
+\paperfontsize default
+\spacing single
+\use_hyperref false
+\papersize default
+\use_geometry true
+\use_package amsmath 1
+\use_package amssymb 1
+\use_package cancel 1
+\use_package esint 1
+\use_package mathdots 1
+\use_package mathtools 1
+\use_package mhchem 1
+\use_package stackrel 1
+\use_package stmaryrd 1
+\use_package undertilde 1
+\cite_engine basic
+\cite_engine_type default
+\biblio_style plain
+\use_bibtopic false
+\use_indices false
+\paperorientation portrait
+\suppress_date false
+\justification true
+\use_refstyle 1
+\index Index
+\shortcut idx
+\color #008000
+\end_index
+\leftmargin 2cm
+\topmargin 2cm
+\rightmargin 2cm
+\bottommargin 2cm
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation indent
+\paragraph_indentation default
+\quotes_language english
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tracking_changes false
+\output_changes false
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict false
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Stereo Quantization Improvements in Opus/CELT
+\end_layout
+
+\begin_layout Author
+Jean-Marc Valin
+\end_layout
+
+\begin_layout Section
+Modifying stereo input vectors
+\end_layout
+
+\begin_layout Standard
+Let
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ denote the normalized vector for a band of the left channel and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ denote the corresponding vector for the right channel.
+ When quantizing stereo, the first step is to quantize the angle derived
+ from the ratio of the magnitude of the mid to the magnitude of the side
+\begin_inset Formula
+\[
+\theta=\arctan\frac{\left\Vert \mathbf{M}\right\Vert }{\left\Vert \mathbf{S}\right\Vert }\,,
+\]
+
+\end_inset
+
+where
+\begin_inset Formula $\mathbf{M}=\mathbf{x}+\mathbf{y}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{S}=\mathbf{x}-\mathbf{y}$
+\end_inset
+
+.
+
+\end_layout
+
+\begin_layout Standard
+It can be shown that the angle is
+\begin_inset Formula $\theta$
+\end_inset
+
+ is related to the angle
+\begin_inset Formula $\phi$
+\end_inset
+
+ between
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ by
+\begin_inset Formula $\phi=2\theta$
+\end_inset
+
+, where
+\begin_inset Formula
+\[
+\cos\phi=\mathbf{x}^{T}\mathbf{y}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+When
+\begin_inset Formula $\theta$
+\end_inset
+
+ is quantized to
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+, it causes distortion to both channels.
+ The distortion (sum of squared errors) for each channel is given by the
+ law of cosines to be
+\begin_inset Formula
+\[
+D=2-2\cos\delta\,,
+\]
+
+\end_inset
+
+where
+\begin_inset Formula $\delta$
+\end_inset
+
+ is the angle by which each of the vectors was
+\emph on
+moved
+\emph default
+ by the quantization.
+ Since both channels are affected by the same amount,
+\begin_inset Formula $\delta=\frac{\hat{\phi}-\phi}{2}=\hat{\theta}-\theta$
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+However, we may want to change that behaviour when the two channels differ
+ in loudness.
+ Let
+\begin_inset Formula $w_{x}$
+\end_inset
+
+ and
+\begin_inset Formula $w_{y}$
+\end_inset
+
+ be the weight we assign to each of the channels.
+ The total weighted distortion then becomes
+\end_layout
+
+\begin_layout Standard
+\begin_inset Formula
+\[
+D=w_{x}\left(2-2\cos\delta_{x}\right)+w_{y}\left(2-2\cos\delta_{y}\right)\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Let
+\begin_inset Formula $S=\delta_{x}+\delta_{y}=\hat{\phi}-\phi$
+\end_inset
+
+ be a known value (from the quantization process).
+ We can minimize the weighted distortion by substituting
+\begin_inset Formula $\delta_{y}=S-\delta_{x}$
+\end_inset
+
+ and solving:
+\begin_inset Formula
+\begin{align*}
+\frac{\partial D}{\partial\delta_{x}}=2w_{x}\sin\delta_{x}-2w_{y}\sin\left(S-\delta_{x}\right) & =0\\
+2w_{x}\sin\delta_{x}-2w_{y}\left(\sin S\cos\delta_{x}-\cos S\sin\delta_{x}\right) & =0\\
+w_{x}\sin\delta_{x}+w_{y}\cos S\sin\delta_{x} & =w_{y}\sin S\cos\delta_{y}\\
+\sin\delta_{x}\cdot & \left(w_{x}+w_{y}\cos S\right)=w_{y}\sin S\cos\delta_{x}\\
+\tan\delta_{x} & =\frac{w_{y}\sin S}{w_{x}+w_{y}\cos S}\,.
+\end{align*}
+
+\end_inset
+
+Using a similar derivation, we can find
+\begin_inset Formula
+\[
+\tan\delta_{y}=\frac{w_{x}\sin S}{w_{y}+w_{x}\cos S}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Given these values, we want to compute
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ that will be quantized instead of
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ Since quantizing
+\begin_inset Formula $\theta$
+\end_inset
+
+ keep
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ in the same plane, we also want
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ to lie on the same plane as
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ We express them as linear combinations of
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ such that the angle between
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ is
+\begin_inset Formula $\delta_{x}$
+\end_inset
+
+ and the angle between
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ is
+\begin_inset Formula $\delta_{y}$
+\end_inset
+
+.
+ To make the calcualtion easier, we are not yet concerned about the norm
+ of
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+.
+ Let us consider
+\begin_inset Formula $\tilde{\mathbf{x}}=\mathbf{x}+\alpha_{x}\mathbf{y}$
+\end_inset
+
+, the angle between
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ is given by
+\begin_inset Formula
+\[
+\delta_{x}=\arctan\frac{\alpha_{x}\sin\phi}{1+\alpha_{x}cos\phi}\,,
+\]
+
+\end_inset
+
+where again
+\begin_inset Formula $\phi$
+\end_inset
+
+ is the angle between
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ Solving for
+\begin_inset Formula $\alpha_{x}$
+\end_inset
+
+, we get
+\begin_inset Formula
+\begin{align*}
+\tan\delta_{x}\left(1+\alpha_{x}\cos\phi\right) & =\alpha_{x}\sin\phi\\
+\tan\delta_{x} & =\alpha_{x}\sin\phi-\alpha_{x}\cos\phi\tan\delta_{x}\\
+\alpha_{x} & =\frac{\tan\delta_{x}}{\sin\phi-\cos\phi\tan\delta_{x}}\,.
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Since we are not concerned with scaling, we can avoid the division by simply
+ defining a denormalized
+\begin_inset Formula
+\[
+\tilde{\mathbf{x}}_{d}=g_{xx}\mathbf{x}+g_{xy}\mathbf{y}\,,
+\]
+
+\end_inset
+
+with
+\begin_inset Formula
+\begin{align*}
+g_{xx} & =\sin\phi-\cos\phi\tan\delta_{x}\\
+g_{xy} & =\tan\delta_{x}\,.
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Using the law of cosines, the magnitude of
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ is given by
+\begin_inset Formula
+\begin{align*}
+\left\Vert \tilde{\mathbf{x}}_{d}\right\Vert & =\tan^{2}\delta_{x}+\left(\sin\phi-\cos\phi\tan\delta_{x}\right)^{2}+2\cos\phi\tan\delta_{x}\left(\sin\phi-\cos\phi\tan\delta_{x}\right)\\
+ & =\tan^{2}\delta_{x}+\sin^{2}\phi+\cos^{2}\phi\tan^{2}\delta_{x}-2\sin\phi\cos\phi\tan\delta_{x}+2\cos\phi\tan\delta_{x}\sin\phi-2\cos^{2}\phi\tan^{2}\delta_{x}\\
+ & =\tan^{2}\delta_{x}+\sin^{2}\phi-\cos^{2}\phi\tan^{2}\delta_{x}\\
+ & =\left(1-\cos^{2}\phi\right)\tan^{2}\delta_{x}+\sin^{2}\phi\\
+ & =\sin^{2}\phi\left(1+\tan^{2}\delta_{x}\right)\\
+ & =\frac{\sin^{2}\phi}{\cos^{2}\delta_{x}}\,.
+\end{align*}
+
+\end_inset
+
+Knowing this, we can compute a normalized
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ as
+\begin_inset Formula
+\[
+\tilde{\mathbf{x}}=\frac{\cos\delta_{x}}{\sin\phi}\tilde{\mathbf{x}}_{d}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+We can then compute
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ similarly.
+ Replacing
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ with
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ in the quantization process, we can give more weight to one channel or
+ the other.
+ When trying multiple values of
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+, we will derive a different value of
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ and each
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+.
+
+\end_layout
+
+\begin_layout Section
+Stereo bit allocation
+\end_layout
+
+\begin_layout Standard
+By dumping quantization data from the encoder and looking at the normalized
+ distortion as a function of the angle
+\begin_inset Formula $\phi$
+\end_inset
+
+ and the rate, we have come up with the following approximation that best
+ fits the data with a simple enough function:
+\end_layout
+
+\begin_layout Standard
+\begin_inset Formula
+\[
+D=3\left(4^{-r}\sin\phi+4^{-2r}\left(1-\sin\phi\right)\right)\,,
+\]
+
+\end_inset
+
+where
+\begin_inset Formula $r$
+\end_inset
+
+ is the bit depth
+\begin_inset Formula
+\[
+r=\frac{b}{2N-1}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Solving for
+\begin_inset Formula $r$
+\end_inset
+
+, we get
+\begin_inset Formula
+\[
+R=\frac{-3\sin\phi+\sqrt{9\sin^{2}\phi+12D\left(1-\sin\phi\right)}}{6\left(1-\sin\phi\right)}\,,
+\]
+
+\end_inset
+
+with
+\begin_inset Formula $r=-\log_{4}R$
+\end_inset
+
+.
+
+\end_layout
+
+\end_body
+\end_document