From 48ee78d2ee498abd98b6a70d41707b087a79121c Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 29 Jul 2016 16:30:34 -0400 Subject: Partially collapsing the stereo image when channels have different energy We can lower the distortion of the most important channel, at the expense of the other channel. --- celt/bands.c | 65 ++++++- doc/stereo.lyx | 545 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 602 insertions(+), 8 deletions(-) create mode 100644 doc/stereo.lyx diff --git a/celt/bands.c b/celt/bands.c index d53f0280..a089d31e 100644 --- a/celt/bands.c +++ b/celt/bands.c @@ -409,9 +409,55 @@ static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, con } } -static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) +static void stereo_split_collapse(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, + int N, opus_val16 w[2], int utheta, int itheta) { int j; +#ifdef FIXED_POINT + (void)utheta; + (void)itheta; +#else + /* When we care more about one of the channels, adjust x and y to minimize + the weighted distortion. See doc/stereo.lyx for more details. */ + if (w[0] != w[1]) + { + float phi; + float S; + float tan_dx, tan_dy; + float cos_dx, cos_dy; + float cos_phi, sin_phi; + float gxx, gxy, gyx, gyy; + float gx, gy; + phi = utheta*M_PI/16384; + cos_phi = cos(phi); + sin_phi = sin(phi); + /* S=dx+dy is the error due to quantizing phi. */ + S = (utheta-itheta)*M_PI/16384.f; + /* Angular displacement for x and y. */ + tan_dx = w[1]*sin(S)/(w[0] + w[1]*cos(S)); + tan_dy = w[0]*sin(S)/(w[1] + w[0]*cos(S)); + /* Identify: cos(x) = 1/sqrt(1 + x^2) */ + cos_dx = 1.f/sqrt(1.f + tan_dx*tan_dx); + cos_dy = 1.f/sqrt(1.f + tan_dy*tan_dy); + /* Normalization factors such that x and y end up with unit norm. */ + gx = cos_dx/(EPSILON+sin_phi); + gy = cos_dy/(EPSILON+sin_phi); + /* Linear combinations of x and y that produce the desired rotations + by dx and dy. */ + gxy = gx*tan_dx; + gxx = gx*(sin_phi - cos_phi*tan_dx); + gyx = gy*tan_dy; + gyy = gy*(sin_phi - cos_phi*tan_dy); + for (j=0;jarch); + utheta = itheta = stereo_itheta(X, Y, stereo, N, ctx->arch); } tell = ec_tell_frac(ec); if (qn!=1) @@ -825,7 +872,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, if (itheta==0) intensity_stereo(m, X, Y, bandE, i, N); else - stereo_split(X, Y, N); + stereo_split_collapse(X, Y, N, ctx->w, utheta, itheta); } /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. Let's do that at higher complexity */ @@ -1435,6 +1482,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, ctx.arch = arch; ctx.resynth = resynth; ctx.theta_round = 0; + ctx.w[0] = ctx.w[1] = Q15ONE; for (i=start;inbEBands], w); + compute_channel_weights(bandE[i], bandE[i+m->nbEBands], ctx.w); /* Make a copy. */ cm = x_cm|y_cm; ec_save = *ec; @@ -1554,7 +1601,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, effective_lowband != -1 ? norm+effective_lowband : NULL, LM, last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm); - dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch)); + dist0 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch)) + + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch)); /* Save first result. */ cm2 = x_cm; @@ -1580,7 +1628,8 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, effective_lowband != -1 ? norm+effective_lowband : NULL, LM, last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm); - dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch)); + dist1 = MULT16_32_Q15(ctx.w[0], celt_inner_prod(X_save, X, N, arch)) + + MULT16_32_Q15(ctx.w[1], celt_inner_prod(Y_save, Y, N, arch)); if (dist0 >= dist1) { x_cm = cm2; *ec = ec_save2; diff --git a/doc/stereo.lyx b/doc/stereo.lyx new file mode 100644 index 00000000..202af5f0 --- /dev/null +++ b/doc/stereo.lyx @@ -0,0 +1,545 @@ +#LyX 2.2 created this file. For more info see http://www.lyx.org/ +\lyxformat 508 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\use_default_options true +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding auto +\fontencoding global +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry true +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\index Index +\shortcut idx +\color #008000 +\end_index +\leftmargin 2cm +\topmargin 2cm +\rightmargin 2cm +\bottommargin 2cm +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Title +Stereo Quantization Improvements in Opus/CELT +\end_layout + +\begin_layout Author +Jean-Marc Valin +\end_layout + +\begin_layout Section +Modifying stereo input vectors +\end_layout + +\begin_layout Standard +Let +\begin_inset Formula $\mathbf{x}$ +\end_inset + + denote the normalized vector for a band of the left channel and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + denote the corresponding vector for the right channel. + When quantizing stereo, the first step is to quantize the angle derived + from the ratio of the magnitude of the mid to the magnitude of the side +\begin_inset Formula +\[ +\theta=\arctan\frac{\left\Vert \mathbf{M}\right\Vert }{\left\Vert \mathbf{S}\right\Vert }\,, +\] + +\end_inset + +where +\begin_inset Formula $\mathbf{M}=\mathbf{x}+\mathbf{y}$ +\end_inset + + and +\begin_inset Formula $\mathbf{S}=\mathbf{x}-\mathbf{y}$ +\end_inset + +. + +\end_layout + +\begin_layout Standard +It can be shown that the angle is +\begin_inset Formula $\theta$ +\end_inset + + is related to the angle +\begin_inset Formula $\phi$ +\end_inset + + between +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + by +\begin_inset Formula $\phi=2\theta$ +\end_inset + +, where +\begin_inset Formula +\[ +\cos\phi=\mathbf{x}^{T}\mathbf{y}\,. +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +When +\begin_inset Formula $\theta$ +\end_inset + + is quantized to +\begin_inset Formula $\hat{\theta}$ +\end_inset + +, it causes distortion to both channels. + The distortion (sum of squared errors) for each channel is given by the + law of cosines to be +\begin_inset Formula +\[ +D=2-2\cos\delta\,, +\] + +\end_inset + +where +\begin_inset Formula $\delta$ +\end_inset + + is the angle by which each of the vectors was +\emph on +moved +\emph default + by the quantization. + Since both channels are affected by the same amount, +\begin_inset Formula $\delta=\frac{\hat{\phi}-\phi}{2}=\hat{\theta}-\theta$ +\end_inset + +. +\end_layout + +\begin_layout Standard +However, we may want to change that behaviour when the two channels differ + in loudness. + Let +\begin_inset Formula $w_{x}$ +\end_inset + + and +\begin_inset Formula $w_{y}$ +\end_inset + + be the weight we assign to each of the channels. + The total weighted distortion then becomes +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +D=w_{x}\left(2-2\cos\delta_{x}\right)+w_{y}\left(2-2\cos\delta_{y}\right)\,. +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +Let +\begin_inset Formula $S=\delta_{x}+\delta_{y}=\hat{\phi}-\phi$ +\end_inset + + be a known value (from the quantization process). + We can minimize the weighted distortion by substituting +\begin_inset Formula $\delta_{y}=S-\delta_{x}$ +\end_inset + + and solving: +\begin_inset Formula +\begin{align*} +\frac{\partial D}{\partial\delta_{x}}=2w_{x}\sin\delta_{x}-2w_{y}\sin\left(S-\delta_{x}\right) & =0\\ +2w_{x}\sin\delta_{x}-2w_{y}\left(\sin S\cos\delta_{x}-\cos S\sin\delta_{x}\right) & =0\\ +w_{x}\sin\delta_{x}+w_{y}\cos S\sin\delta_{x} & =w_{y}\sin S\cos\delta_{y}\\ +\sin\delta_{x}\cdot & \left(w_{x}+w_{y}\cos S\right)=w_{y}\sin S\cos\delta_{x}\\ +\tan\delta_{x} & =\frac{w_{y}\sin S}{w_{x}+w_{y}\cos S}\,. +\end{align*} + +\end_inset + +Using a similar derivation, we can find +\begin_inset Formula +\[ +\tan\delta_{y}=\frac{w_{x}\sin S}{w_{y}+w_{x}\cos S}\,. +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +Given these values, we want to compute +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + that will be quantized instead of +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + +. + Since quantizing +\begin_inset Formula $\theta$ +\end_inset + + keep +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + in the same plane, we also want +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + to lie on the same plane as +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + +. + We express them as linear combinations of +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + such that the angle between +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\mathbf{x}$ +\end_inset + + is +\begin_inset Formula $\delta_{x}$ +\end_inset + + and the angle between +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + is +\begin_inset Formula $\delta_{y}$ +\end_inset + +. + To make the calcualtion easier, we are not yet concerned about the norm + of +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + +. + Let us consider +\begin_inset Formula $\tilde{\mathbf{x}}=\mathbf{x}+\alpha_{x}\mathbf{y}$ +\end_inset + +, the angle between +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\mathbf{x}$ +\end_inset + + is given by +\begin_inset Formula +\[ +\delta_{x}=\arctan\frac{\alpha_{x}\sin\phi}{1+\alpha_{x}cos\phi}\,, +\] + +\end_inset + +where again +\begin_inset Formula $\phi$ +\end_inset + + is the angle between +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + +. + Solving for +\begin_inset Formula $\alpha_{x}$ +\end_inset + +, we get +\begin_inset Formula +\begin{align*} +\tan\delta_{x}\left(1+\alpha_{x}\cos\phi\right) & =\alpha_{x}\sin\phi\\ +\tan\delta_{x} & =\alpha_{x}\sin\phi-\alpha_{x}\cos\phi\tan\delta_{x}\\ +\alpha_{x} & =\frac{\tan\delta_{x}}{\sin\phi-\cos\phi\tan\delta_{x}}\,. +\end{align*} + +\end_inset + + +\end_layout + +\begin_layout Standard +Since we are not concerned with scaling, we can avoid the division by simply + defining a denormalized +\begin_inset Formula +\[ +\tilde{\mathbf{x}}_{d}=g_{xx}\mathbf{x}+g_{xy}\mathbf{y}\,, +\] + +\end_inset + +with +\begin_inset Formula +\begin{align*} +g_{xx} & =\sin\phi-\cos\phi\tan\delta_{x}\\ +g_{xy} & =\tan\delta_{x}\,. +\end{align*} + +\end_inset + + +\end_layout + +\begin_layout Standard +Using the law of cosines, the magnitude of +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + is given by +\begin_inset Formula +\begin{align*} +\left\Vert \tilde{\mathbf{x}}_{d}\right\Vert & =\tan^{2}\delta_{x}+\left(\sin\phi-\cos\phi\tan\delta_{x}\right)^{2}+2\cos\phi\tan\delta_{x}\left(\sin\phi-\cos\phi\tan\delta_{x}\right)\\ + & =\tan^{2}\delta_{x}+\sin^{2}\phi+\cos^{2}\phi\tan^{2}\delta_{x}-2\sin\phi\cos\phi\tan\delta_{x}+2\cos\phi\tan\delta_{x}\sin\phi-2\cos^{2}\phi\tan^{2}\delta_{x}\\ + & =\tan^{2}\delta_{x}+\sin^{2}\phi-\cos^{2}\phi\tan^{2}\delta_{x}\\ + & =\left(1-\cos^{2}\phi\right)\tan^{2}\delta_{x}+\sin^{2}\phi\\ + & =\sin^{2}\phi\left(1+\tan^{2}\delta_{x}\right)\\ + & =\frac{\sin^{2}\phi}{\cos^{2}\delta_{x}}\,. +\end{align*} + +\end_inset + +Knowing this, we can compute a normalized +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + as +\begin_inset Formula +\[ +\tilde{\mathbf{x}}=\frac{\cos\delta_{x}}{\sin\phi}\tilde{\mathbf{x}}_{d}\,. +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +We can then compute +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + similarly. + Replacing +\begin_inset Formula $\mathbf{x}$ +\end_inset + + and +\begin_inset Formula $\mathbf{y}$ +\end_inset + + with +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + in the quantization process, we can give more weight to one channel or + the other. + When trying multiple values of +\begin_inset Formula $\hat{\theta}$ +\end_inset + +, we will derive a different value of +\begin_inset Formula $\tilde{\mathbf{x}}$ +\end_inset + + and +\begin_inset Formula $\tilde{\mathbf{y}}$ +\end_inset + + and each +\begin_inset Formula $\hat{\theta}$ +\end_inset + +. + +\end_layout + +\begin_layout Section +Stereo bit allocation +\end_layout + +\begin_layout Standard +By dumping quantization data from the encoder and looking at the normalized + distortion as a function of the angle +\begin_inset Formula $\phi$ +\end_inset + + and the rate, we have come up with the following approximation that best + fits the data with a simple enough function: +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +D=3\left(4^{-r}\sin\phi+4^{-2r}\left(1-\sin\phi\right)\right)\,, +\] + +\end_inset + +where +\begin_inset Formula $r$ +\end_inset + + is the bit depth +\begin_inset Formula +\[ +r=\frac{b}{2N-1}\,. +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +Solving for +\begin_inset Formula $r$ +\end_inset + +, we get +\begin_inset Formula +\[ +R=\frac{-3\sin\phi+\sqrt{9\sin^{2}\phi+12D\left(1-\sin\phi\right)}}{6\left(1-\sin\phi\right)}\,, +\] + +\end_inset + +with +\begin_inset Formula $r=-\log_{4}R$ +\end_inset + +. + +\end_layout + +\end_body +\end_document -- cgit v1.2.1