Partially collapsing the stereo image when channels have different energyexp_stereo5

We can lower the distortion of the most important channel, at the expense of the other channel.
author: Jean-Marc Valin <jmvalin@jmvalin.ca> 2016-07-29 16:30:34 -0400
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2016-08-05 12:56:52 -0400
commit: 48ee78d2ee498abd98b6a70d41707b087a79121c (patch)
tree: ea008539098ede19c19497c87db78f3a4443a667
parent: 80e80c2f0540eeaaae3c3cfcf62ebd41b5a6014c (diff)
download: opus-exp_stereo5.tar.gz
2 files changed, 602 insertions, 8 deletions
diff --git a/celt/bands.c b/celt/bands.c
index d53f0280..a089d31e 100644
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -409,9 +409,55 @@ static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, con
    }
 }
 
-static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
+static void stereo_split_collapse(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y,
+      int N, opus_val16 w[2], int utheta, int itheta)
 {
    int j;
+#ifdef FIXED_POINT
+   (void)utheta;
+   (void)itheta;
+#else
+   /* When we care more about one of the channels, adjust x and y to minimize
+      the weighted distortion. See doc/stereo.lyx for more details. */
+   if (w[0] != w[1])
+   {
+      float phi;
+      float S;
+      float tan_dx, tan_dy;
+      float cos_dx, cos_dy;
+      float cos_phi, sin_phi;
+      float gxx, gxy, gyx, gyy;
+      float gx, gy;
+      phi = utheta*M_PI/16384;
+      cos_phi = cos(phi);
+      sin_phi = sin(phi);
+      /* S=dx+dy is the error due to quantizing phi. */
+      S = (utheta-itheta)*M_PI/16384.f;
+      /* Angular displacement for x and y. */
+      tan_dx = w[1]*sin(S)/(w[0] + w[1]*cos(S));
+      tan_dy = w[0]*sin(S)/(w[1] + w[0]*cos(S));
+      /* Identify: cos(x) = 1/sqrt(1 + x^2) */
+      cos_dx = 1.f/sqrt(1.f + tan_dx*tan_dx);
+      cos_dy = 1.f/sqrt(1.f + tan_dy*tan_dy);
+      /* Normalization factors such that x and y end up with unit norm. */
+      gx = cos_dx/(EPSILON+sin_phi);
+      gy = cos_dy/(EPSILON+sin_phi);
+      /* Linear combinations of x and y that produce the desired rotations
+         by dx and dy. */
+      gxy = gx*tan_dx;
+      gxx = gx*(sin_phi - cos_phi*tan_dx);
+      gyx = gy*tan_dy;
+      gyy = gy*(sin_phi - cos_phi*tan_dy);
+      for (j=0;j<N;j++)
+      {
+         float x, y;
+         x = X[j];
+         y = Y[j];
+         X[j] = gxx*x + gxy*y;
+         Y[j] = gyx*x + gyy*y;
+      }
+   }
+#endif
    for (j=0;j<N;j++)
    {
       opus_val32 r, l;
@@ -683,6 +729,7 @@ struct band_ctx {
    opus_uint32 seed;
    int arch;
    int theta_round;
+   opus_val16 w[2];
 };
 
 struct split_ctx {
@@ -700,7 +747,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
       int stereo, int *fill)
 {
    int qn;
-   int itheta=0;
+   int itheta=0, utheta=0;
    int delta;
    int imid, iside;
    int qalloc;
@@ -734,7 +781,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
          side and mid. With just that parameter, we can re-scale both
          mid and side because we know that 1) they have unit norm and
          2) they are orthogonal. */
-      itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
+      utheta = itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
    }
    tell = ec_tell_frac(ec);
    if (qn!=1)
@@ -825,7 +872,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
          if (itheta==0)
             intensity_stereo(m, X, Y, bandE, i, N);
          else
-            stereo_split(X, Y, N);
+            stereo_split_collapse(X, Y, N, ctx->w, utheta, itheta);
       }
       /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
                Let's do that at higher complexity */
@@ -1435,6 +1482,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
    ctx.arch = arch;
    ctx.resynth = resynth;
    ctx.theta_round = 0;
+   ctx.w[0] = ctx.w[1] = Q15ONE;
    for (i=start;i<end;i++)
    {
       opus_int32 tell;
@@ -1541,8 +1589,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
                int nstart_bytes, nend_bytes, save_bytes;
                unsigned char *bytes_buf;
                unsigned char bytes_save[1275];
-               opus_val16 w[2];
-               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
+               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], ctx.w);
                /* Make a copy. */
                cm = x_cm|y_cm;
                ec_save = *ec;
@@ -1554,7 +1601,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
-               dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+               dist0 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch))
+                     + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch));
 
                /* Save first result. */
                cm2 = x_cm;
@@ -1580,7 +1628,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
-               dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+               dist1 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch))
+                     + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch));
                if (dist0 >= dist1) {
                   x_cm = cm2;
                   *ec = ec_save2;
diff --git a/doc/stereo.lyx b/doc/stereo.lyx
new file mode 100644
index 00000000..202af5f0
--- /dev/null
+++ b/doc/stereo.lyx
@@ -0,0 +1,545 @@
+#LyX 2.2 created this file. For more info see http://www.lyx.org/
+\lyxformat 508
+\begin_document
+\begin_header
+\save_transient_properties true
+\origin unavailable
+\textclass article
+\use_default_options true
+\maintain_unincluded_children false
+\language english
+\language_package default
+\inputencoding auto
+\fontencoding global
+\font_roman "default" "default"
+\font_sans "default" "default"
+\font_typewriter "default" "default"
+\font_math "auto" "auto"
+\font_default_family default
+\use_non_tex_fonts false
+\font_sc false
+\font_osf false
+\font_sf_scale 100 100
+\font_tt_scale 100 100
+\graphics default
+\default_output_format default
+\output_sync 0
+\bibtex_command default
+\index_command default
+\paperfontsize default
+\spacing single
+\use_hyperref false
+\papersize default
+\use_geometry true
+\use_package amsmath 1
+\use_package amssymb 1
+\use_package cancel 1
+\use_package esint 1
+\use_package mathdots 1
+\use_package mathtools 1
+\use_package mhchem 1
+\use_package stackrel 1
+\use_package stmaryrd 1
+\use_package undertilde 1
+\cite_engine basic
+\cite_engine_type default
+\biblio_style plain
+\use_bibtopic false
+\use_indices false
+\paperorientation portrait
+\suppress_date false
+\justification true
+\use_refstyle 1
+\index Index
+\shortcut idx
+\color #008000
+\end_index
+\leftmargin 2cm
+\topmargin 2cm
+\rightmargin 2cm
+\bottommargin 2cm
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation indent
+\paragraph_indentation default
+\quotes_language english
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tracking_changes false
+\output_changes false
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict false
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Stereo Quantization Improvements in Opus/CELT
+\end_layout
+
+\begin_layout Author
+Jean-Marc Valin
+\end_layout
+
+\begin_layout Section
+Modifying stereo input vectors
+\end_layout
+
+\begin_layout Standard
+Let 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ denote the normalized vector for a band of the left channel and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ denote the corresponding vector for the right channel.
+ When quantizing stereo, the first step is to quantize the angle derived
+ from the ratio of the magnitude of the mid to the magnitude of the side
+\begin_inset Formula 
+\[
+\theta=\arctan\frac{\left\Vert \mathbf{M}\right\Vert }{\left\Vert \mathbf{S}\right\Vert }\,,
+\]
+
+\end_inset
+
+where 
+\begin_inset Formula $\mathbf{M}=\mathbf{x}+\mathbf{y}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{S}=\mathbf{x}-\mathbf{y}$
+\end_inset
+
+.
+ 
+\end_layout
+
+\begin_layout Standard
+It can be shown that the angle is 
+\begin_inset Formula $\theta$
+\end_inset
+
+ is related to the angle 
+\begin_inset Formula $\phi$
+\end_inset
+
+ between 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ by 
+\begin_inset Formula $\phi=2\theta$
+\end_inset
+
+, where
+\begin_inset Formula 
+\[
+\cos\phi=\mathbf{x}^{T}\mathbf{y}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+When 
+\begin_inset Formula $\theta$
+\end_inset
+
+ is quantized to 
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+, it causes distortion to both channels.
+ The distortion (sum of squared errors) for each channel is given by the
+ law of cosines to be
+\begin_inset Formula 
+\[
+D=2-2\cos\delta\,,
+\]
+
+\end_inset
+
+where 
+\begin_inset Formula $\delta$
+\end_inset
+
+ is the angle by which each of the vectors was 
+\emph on
+moved
+\emph default
+ by the quantization.
+ Since both channels are affected by the same amount, 
+\begin_inset Formula $\delta=\frac{\hat{\phi}-\phi}{2}=\hat{\theta}-\theta$
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+However, we may want to change that behaviour when the two channels differ
+ in loudness.
+ Let 
+\begin_inset Formula $w_{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $w_{y}$
+\end_inset
+
+ be the weight we assign to each of the channels.
+ The total weighted distortion then becomes
+\end_layout
+
+\begin_layout Standard
+\begin_inset Formula 
+\[
+D=w_{x}\left(2-2\cos\delta_{x}\right)+w_{y}\left(2-2\cos\delta_{y}\right)\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Let 
+\begin_inset Formula $S=\delta_{x}+\delta_{y}=\hat{\phi}-\phi$
+\end_inset
+
+ be a known value (from the quantization process).
+ We can minimize the weighted distortion by substituting 
+\begin_inset Formula $\delta_{y}=S-\delta_{x}$
+\end_inset
+
+ and solving:
+\begin_inset Formula 
+\begin{align*}
+\frac{\partial D}{\partial\delta_{x}}=2w_{x}\sin\delta_{x}-2w_{y}\sin\left(S-\delta_{x}\right) & =0\\
+2w_{x}\sin\delta_{x}-2w_{y}\left(\sin S\cos\delta_{x}-\cos S\sin\delta_{x}\right) & =0\\
+w_{x}\sin\delta_{x}+w_{y}\cos S\sin\delta_{x} & =w_{y}\sin S\cos\delta_{y}\\
+\sin\delta_{x}\cdot & \left(w_{x}+w_{y}\cos S\right)=w_{y}\sin S\cos\delta_{x}\\
+\tan\delta_{x} & =\frac{w_{y}\sin S}{w_{x}+w_{y}\cos S}\,.
+\end{align*}
+
+\end_inset
+
+Using a similar derivation, we can find
+\begin_inset Formula 
+\[
+\tan\delta_{y}=\frac{w_{x}\sin S}{w_{y}+w_{x}\cos S}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Given these values, we want to compute 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ that will be quantized instead of 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ Since quantizing 
+\begin_inset Formula $\theta$
+\end_inset
+
+ keep 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ in the same plane, we also want 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ to lie on the same plane as 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ We express them as linear combinations of 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ such that the angle between 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ is 
+\begin_inset Formula $\delta_{x}$
+\end_inset
+
+ and the angle between 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ is 
+\begin_inset Formula $\delta_{y}$
+\end_inset
+
+.
+ To make the calcualtion easier, we are not yet concerned about the norm
+ of 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+.
+ Let us consider 
+\begin_inset Formula $\tilde{\mathbf{x}}=\mathbf{x}+\alpha_{x}\mathbf{y}$
+\end_inset
+
+, the angle between 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ is given by
+\begin_inset Formula 
+\[
+\delta_{x}=\arctan\frac{\alpha_{x}\sin\phi}{1+\alpha_{x}cos\phi}\,,
+\]
+
+\end_inset
+
+where again 
+\begin_inset Formula $\phi$
+\end_inset
+
+ is the angle between 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+.
+ Solving for 
+\begin_inset Formula $\alpha_{x}$
+\end_inset
+
+, we get
+\begin_inset Formula 
+\begin{align*}
+\tan\delta_{x}\left(1+\alpha_{x}\cos\phi\right) & =\alpha_{x}\sin\phi\\
+\tan\delta_{x} & =\alpha_{x}\sin\phi-\alpha_{x}\cos\phi\tan\delta_{x}\\
+\alpha_{x} & =\frac{\tan\delta_{x}}{\sin\phi-\cos\phi\tan\delta_{x}}\,.
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Since we are not concerned with scaling, we can avoid the division by simply
+ defining a denormalized 
+\begin_inset Formula 
+\[
+\tilde{\mathbf{x}}_{d}=g_{xx}\mathbf{x}+g_{xy}\mathbf{y}\,,
+\]
+
+\end_inset
+
+with
+\begin_inset Formula 
+\begin{align*}
+g_{xx} & =\sin\phi-\cos\phi\tan\delta_{x}\\
+g_{xy} & =\tan\delta_{x}\,.
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Using the law of cosines, the magnitude of 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ is given by
+\begin_inset Formula 
+\begin{align*}
+\left\Vert \tilde{\mathbf{x}}_{d}\right\Vert  & =\tan^{2}\delta_{x}+\left(\sin\phi-\cos\phi\tan\delta_{x}\right)^{2}+2\cos\phi\tan\delta_{x}\left(\sin\phi-\cos\phi\tan\delta_{x}\right)\\
+ & =\tan^{2}\delta_{x}+\sin^{2}\phi+\cos^{2}\phi\tan^{2}\delta_{x}-2\sin\phi\cos\phi\tan\delta_{x}+2\cos\phi\tan\delta_{x}\sin\phi-2\cos^{2}\phi\tan^{2}\delta_{x}\\
+ & =\tan^{2}\delta_{x}+\sin^{2}\phi-\cos^{2}\phi\tan^{2}\delta_{x}\\
+ & =\left(1-\cos^{2}\phi\right)\tan^{2}\delta_{x}+\sin^{2}\phi\\
+ & =\sin^{2}\phi\left(1+\tan^{2}\delta_{x}\right)\\
+ & =\frac{\sin^{2}\phi}{\cos^{2}\delta_{x}}\,.
+\end{align*}
+
+\end_inset
+
+Knowing this, we can compute a normalized 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ as 
+\begin_inset Formula 
+\[
+\tilde{\mathbf{x}}=\frac{\cos\delta_{x}}{\sin\phi}\tilde{\mathbf{x}}_{d}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+We can then compute 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ similarly.
+ Replacing 
+\begin_inset Formula $\mathbf{x}$
+\end_inset
+
+ and 
+\begin_inset Formula $\mathbf{y}$
+\end_inset
+
+ with 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ in the quantization process, we can give more weight to one channel or
+ the other.
+ When trying multiple values of 
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+, we will derive a different value of 
+\begin_inset Formula $\tilde{\mathbf{x}}$
+\end_inset
+
+ and 
+\begin_inset Formula $\tilde{\mathbf{y}}$
+\end_inset
+
+ and each 
+\begin_inset Formula $\hat{\theta}$
+\end_inset
+
+.
+ 
+\end_layout
+
+\begin_layout Section
+Stereo bit allocation
+\end_layout
+
+\begin_layout Standard
+By dumping quantization data from the encoder and looking at the normalized
+ distortion as a function of the angle 
+\begin_inset Formula $\phi$
+\end_inset
+
+ and the rate, we have come up with the following approximation that best
+ fits the data with a simple enough function:
+\end_layout
+
+\begin_layout Standard
+\begin_inset Formula 
+\[
+D=3\left(4^{-r}\sin\phi+4^{-2r}\left(1-\sin\phi\right)\right)\,,
+\]
+
+\end_inset
+
+where 
+\begin_inset Formula $r$
+\end_inset
+
+ is the bit depth
+\begin_inset Formula 
+\[
+r=\frac{b}{2N-1}\,.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Solving for 
+\begin_inset Formula $r$
+\end_inset
+
+, we get
+\begin_inset Formula 
+\[
+R=\frac{-3\sin\phi+\sqrt{9\sin^{2}\phi+12D\left(1-\sin\phi\right)}}{6\left(1-\sin\phi\right)}\,,
+\]
+
+\end_inset
+
+with 
+\begin_inset Formula $r=-\log_{4}R$
+\end_inset
+
+.
+ 
+\end_layout
+
+\end_body
+\end_document
author	Jean-Marc Valin <jmvalin@jmvalin.ca>	2016-07-29 16:30:34 -0400
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2016-08-05 12:56:52 -0400
commit	48ee78d2ee498abd98b6a70d41707b087a79121c (patch)
tree	ea008539098ede19c19497c87db78f3a4443a667
parent	80e80c2f0540eeaaae3c3cfcf62ebd41b5a6014c (diff)
download	opus-exp_stereo5.tar.gz