From 72273000ece2713704fba5e3402aef7a3e009c10 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 20 Apr 2012 10:26:08 -0400 Subject: Misc changes to address Robert Sparks' comments See http://www.ietf.org/mail-archive/web/codec/current/msg02833.html Still more changes to come --- Makefile.draft | 4 +- README.draft | 5 +- celt/bands.c | 3 +- celt/kiss_fft.h | 13 --- celt/vq.c | 22 +---- doc/draft-ietf-codec-opus.xml | 193 +++++++++++++++++++++++++++++++----------- include/opus_defines.h | 2 +- src/opus_decoder.c | 4 +- tests/run_vectors.sh | 4 +- 9 files changed, 158 insertions(+), 92 deletions(-) diff --git a/Makefile.draft b/Makefile.draft index 501f76eb..800cceec 100644 --- a/Makefile.draft +++ b/Makefile.draft @@ -36,8 +36,7 @@ LIBPREFIX = lib LIBSUFFIX = .a OBJSUFFIX = .o -CC = $(TOOLCHAIN_PREFIX)gcc$(TOOLCHAIN_SUFFIX) -CXX = $(TOOLCHAIN_PREFIX)g++$(TOOLCHAIN_SUFFIX) +CC = $(TOOLCHAIN_PREFIX)cc$(TOOLCHAIN_SUFFIX) AR = $(TOOLCHAIN_PREFIX)ar RANLIB = $(TOOLCHAIN_PREFIX)ranlib CP = $(TOOLCHAIN_PREFIX)cp @@ -79,7 +78,6 @@ LDFLAGS += $(call ldflags-from-ldlibdirs,$(LDLIBDIRS)) LDLIBS += $(call ldlibs-from-libs,$(LIBS)) COMPILE.c.cmdline = $(CC) -c $(CFLAGS) -o $@ $< -COMPILE.cpp.cmdline = $(CXX) -c $(CFLAGS) -o $@ $< LINK.o = $(CC) $(LDPREFLAGS) $(LDFLAGS) LINK.o.cmdline = $(LINK.o) $^ $(LDLIBS) -o $@$(EXESUFFIX) diff --git a/README.draft b/README.draft index 8dbe48d8..8128e0b2 100644 --- a/README.draft +++ b/README.draft @@ -6,10 +6,11 @@ If this does not work, or if you want to change the default configuration (e.g., to compile for a fixed-point architecture), simply edit the options in the Makefile. -To build from the git repository instead of using this draft, follow these +To build from the git repository instead of using this RFC, follow these steps: -1) Clone the repository: +1) Clone the repository (latest implementation of this standard at the time +of publication) % git clone git://git.opus-codec.org/opus.git % cd opus diff --git a/celt/bands.c b/celt/bands.c index 4b52eb5a..7af05f29 100644 --- a/celt/bands.c +++ b/celt/bands.c @@ -99,8 +99,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bandE[i+c*m->nbEBands] = EPSILON; diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h index fe5a7b4d..66332e3b 100644 --- a/celt/kiss_fft.h +++ b/celt/kiss_fft.h @@ -37,19 +37,6 @@ extern "C" { #endif -/* - ATTENTION! - If you would like a : - -- a utility that will handle the caching of fft objects - -- real-only (no imaginary time component ) FFT - -- a multi-dimensional FFT - -- a command-line utility to perform ffts - -- a command-line utility to perform fast-convolution filtering - - Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c - in the tools/ directory. -*/ - #ifdef USE_SIMD # include # define kiss_fft_scalar __m128 diff --git a/celt/vq.c b/celt/vq.c index c743f9d7..40c9a07b 100644 --- a/celt/vq.c +++ b/celt/vq.c @@ -70,14 +70,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int opus_val16 gain, theta; int stride2=0; int factor; - /*int i; - if (len>=30) - { - for (i=0;i=len || spread==SPREAD_NONE) return; factor = SPREAD_FACTOR[spread-1]; @@ -91,9 +84,8 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int if (len>=8*stride) { stride2 = 1; - /* This is just a simple way of computing sqrt(len/stride) with rounding. - It's basically incrementing long as (stride2+0.5)^2 < len/stride. - I _think_ it is bit-exact */ + /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. + It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ while ((stride2*stride2+stride2)*stride + (stride>>2) < len) stride2++; } @@ -113,13 +105,6 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int exp_rotation1(X+i*len, len, stride2, s, -c); } } - /*if (len>=30) - { - for (i=0;i. It is composed of a linear - prediction (LP)-based layer and a Modified Discrete Cosine Transform - (MDCT)-based layer. + prediction (LP)-based layer and a Modified Discrete Cosine Transform + (MDCT)-based layer. The main idea behind using two layers is that in speech, linear prediction techniques (such as CELP) code low frequencies more efficiently than transform (e.g., MDCT) domain techniques, while the situation is reversed for music and @@ -273,8 +273,7 @@ Therefore, if an application wishes to process a signal sampled at 32 kHz, -The LP layer is based on the - SILK codec +The LP layer is based on the SILK codec . It supports NB, MB, or WB audio and frame sizes from 10 ms to 60 ms, and requires an additional 5 ms look-ahead for noise shaping estimation. @@ -290,9 +289,7 @@ This document does not serve to define that format, but those interested in the -The MDCT layer is based on the - CELT codec - . +The MDCT layer is based on the CELT codec . It supports NB, WB, SWB, or FB audio and frame sizes from 2.5 ms to 20 ms, and requires an additional 2.5 ms look-ahead due to the overlapping MDCT windows. @@ -436,7 +433,7 @@ encoder, the complexity is selected using an integer from 0 to 10, where 0 is the lowest complexity and 10 is the highest. Examples of computations for which such trade-offs may occur are: -The order of the pitch analysis whitening filter, +The order of the pitch analysis whitening filter , The order of the short-term noise shaping filter, The number of states in delayed decision quantization of the residual signal, and @@ -474,9 +471,8 @@ the default. However, in some (rare) applications, constant bitrate (CBR) is required. There are two main reasons to operate in CBR mode: When the transport only supports a fixed size for each compressed frame -When security is important and the input audio -not a normal conversation but is highly constrained (e.g. yes/no, recorded prompts) - +When encryption is used for an audio stream that is either highly constrained + (e.g. yes/no, recorded prompts) or highly sensitive When low-latency transmission is required over a relatively slow connection, then @@ -734,9 +730,9 @@ This makes, for example, a 2-byte code 2 packet with a second byte in the range -
+
-Code 3 packets may encode an arbitrary number of frames, as well as additional +Code 3 packets signal the number of frames, as well as additional padding, called "Opus padding" to indicate that this padding is added at the Opus layer, rather than at the transport layer. Code 3 packets MUST have at least 2 bytes. @@ -1271,10 +1267,10 @@ The raw bits used by the CELT layer are packed at the end of the packet, with The reference implementation reads them using ec_dec_bits() (entdec.c). Because the range decoder must read several bytes ahead in the stream, as described in , the input consumed by the - raw bits MAY overlap with the input consumed by the range coder, and a decoder + raw bits may overlap with the input consumed by the range coder, and a decoder MUST allow this. The format should render it impossible to attempt to read more raw bits than - there are actual bits in the frame, though a decoder MAY wish to check for + there are actual bits in the frame, though a decoder may wish to check for this and report an error.
@@ -1388,9 +1384,9 @@ Reading raw bits increases nbits_total by the number of raw bits read.
-The whole number of bits buffered in rng may be estimated via l = ilog(rng). +The whole number of bits buffered in rng may be estimated via lg = ilog(rng). ec_tell() then becomes a simple matter of removing these bits from the total. -It returns (nbits_total - l). +It returns (nbits_total - lg). In a newly initialized decoder, before any symbols have been read, this reports @@ -1403,7 +1399,7 @@ This is the bit reserved for termination of the encoder. ec_tell_frac() estimates the number of bits buffered in rng to fractional precision. -Since rng must be greater than 2**23 after renormalization, l must be at least +Since rng must be greater than 2**23 after renormalization, lg must be at least 24. Let
@@ -1414,7 +1410,7 @@ r_Q15 = rng >> (l-16) ,
so that 32768 <= r_Q15 < 65536, an unsigned Q15 value representing the fractional part of rng. -Then the following procedure can be used to add one bit of precision to l. +Then the following procedure can be used to add one bit of precision to lg. First, update
@@ -1422,11 +1418,11 @@ First, update r_Q15 = (r_Q15*r_Q15) >> 15 . ]]>
-Then add the 16th bit of r_Q15 to l via +Then add the 16th bit of r_Q15 to lg via
> 16) . +lg = 2*lg + (r_Q15 >> 16) . ]]>
Finally, if this bit was a 1, reduce r_Q15 by a factor of two via @@ -1439,8 +1435,8 @@ r_Q15 = r_Q15 >> 1 , so that it once again lies in the range 32768 <= r_Q15 < 65536.
-This procedure is repeated three times to extend l to 1/8th bit precision. -ec_tell_frac() then returns (nbits_total*8 - l). +This procedure is repeated three times to extend lg to 1/8th bit precision. +ec_tell_frac() then returns (nbits_total*8 - lg).
@@ -5301,7 +5297,7 @@ resolution is shown in the tables below. A negative TF adjustment means that the temporal resolution is increased, while a positive TF adjustment means that the frequency resolution is increased. -Changes in TF resolution are implemented using the Hadamard transform. To increase +Changes in TF resolution are implemented using the Hadamard transform . To increase the time resolution by N, N "levels" of the Hadamard transform are applied to the decoded vector for each interleaved MDCT vector. To increase the frequency resolution (assumes a transient frame), then N levels of the Hadamard transform are applied @@ -5459,9 +5455,9 @@ artifact than if the frame were dropped after decoding. A decoder MAY employ a more sophisticated drift compensation method. For example, the -NetEQ component +NetEQ component of the -WebRTC.org codebase +Google WebRTC codebase compensates for drift by adding or removing one period when the signal is highly periodic. The reference implementation of Opus allows a caller to learn whether the current frame's signal is highly @@ -6822,7 +6818,7 @@ of the scalar quantizer, and as a result the quantization error of each value depends on the quantization decision of the previous value. This dependency is exploited by the delayed decision mechanism to search for a quantization sequency with best R/D performance -with a Viterbi-like algorithm . +with a Viterbi-like algorithm . The quantizer processes the residual LSF vector in reverse order (i.e., it starts with the highest residual LSF value). This is done because the prediction works slightly @@ -7274,14 +7270,15 @@ are built and <vector path> is the directory containing the test vectors.
-To complement the Opus specification, the "Opus Custom" codec is defined to +Opus Custom is an OPTIONAL part of the specification that is defined to handle special sample rates and frame rates that are not supported by the main Opus specification. Use of Opus Custom is discouraged for all but very special applications for which a frame size different from 2.5, 5, 10, or 20 ms is -needed (for either complexity or latency reasons). Such applications will not -be compatible with the "main" Opus codec. In Opus Custom operation, -only the CELT layer is available, which is available using the celt_* function -calls in celt.h. +needed (for either complexity or latency reasons). Because Opus Custom is +optional, applications using that part of the specification may not be compatible +with other applications implementing Opus. In Opus Custom operation, +only the CELT layer is available, using the opus_custom_* function +calls in opus_custom.h.
@@ -7338,7 +7335,7 @@ Sending the decoder packets generated by a version of the reference encoder
In all of the conditions above, both the encoder and the decoder were run - inside the Valgrind memory + inside the Valgrind memory debugger, which tracks reads and writes to invalid memory regions as well as the use of uninitialized memory. There were no errors reported on any of the tested conditions. @@ -7407,7 +7404,7 @@ name of work, or endorsement information.
- + SILK Speech Codec @@ -7442,7 +7439,7 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect - + Constrained-Energy Lapped Transform (CELT) Codec @@ -7472,8 +7469,8 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect - - + + @@ -7536,6 +7533,98 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect + + +Valgrind website + + + + + + +Google NetEQ code + + + + + + +Google WebRTC code + + + + + + + +Opus Git Repository + + + + + + +Opus website + + + + + + +Opus Testvectors (webside) + + + + + + +Opus Testvectors (proceedings) + + + + + + +Hadamard Transform +Wikipedia + + + + + +Viterbi Algorithm +Wikipedia + + + + + +White Noise +Wikipedia + + + + + +Linear Prediction +Wikipedia + + + + + +Modified Discrete Cosine Transform +Wikipedia + + + + + +Fast Fourier Transform +Wikipedia + + +
@@ -7551,7 +7640,7 @@ available in the README file. The implementation can be compiled with either a C89 or a C99 compiler. It is reasonably optimized for most platforms such that only architecture-specific optimizations are likely to be useful. -The FFT used is a slightly modified version of the KISS-FFT library, +The FFT used is a slightly modified version of the KISS-FFT library, but it is easy to substitute any other FFT library. @@ -7586,7 +7675,7 @@ following command line: opus_source.tar.gz +cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/...###//' | base64 -d > opus_source.tar.gz ]]> tar xzvf opus_source.tar.gz @@ -7594,11 +7683,19 @@ tar xzvf opus_source.tar.gz cd opus_source make +On systems where the provided Makefile does not work, the following command line may be used to compile +the source code: + + + + On systems where the base64 utility is not present, the following commands can be used instead: opus.b64 +cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/...###//' > opus.b64 ]]> openssl base64 -d -in opus.b64 > opus_source.tar.gz @@ -7606,12 +7703,13 @@ cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/\s\s\s###//' > opu
-
+
-The current development version of the source code is available in a - Git repository. -Development snapshots are provided at - . +As of the time of publication of this memo, up-to-date source code implementing +this standard is available in a + Git repository. +Releases and other resources are available at + .
@@ -7624,9 +7722,8 @@ Development snapshots are provided at
Because of size constraints, the Opus test vectors are not distributed in this -draft. They are available from the Opus codec website at - and will also be made available -in IETF meeting proceedings. These test vectors were created specifically to exercise +draft. They are available in the proceedings of the 83th IETF meeting (Paris) and from the Opus codec website at +. These test vectors were created specifically to exercise all aspects of the decoder and therefore the audio quality of the decoded output is significantly lower than what Opus can achieve in normal operation. diff --git a/include/opus_defines.h b/include/opus_defines.h index 883f9d12..650961f6 100644 --- a/include/opus_defines.h +++ b/include/opus_defines.h @@ -120,7 +120,7 @@ extern "C" { #define OPUS_AUTO -1000 /**prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); - /* The SILK PLC cannot support produce frames of less than 10 ms */ + /* The SILK PLC cannot produce frames of less than 10 ms */ st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); if (data != NULL) @@ -574,7 +574,7 @@ static int opus_packet_parse_impl(const unsigned char *data, int len, last_size = len-size[0]; break; /* Multiple CBR/VBR frames (from 0 to 120 ms) */ - case 3: + default: /*case 3:*/ if (len<1) return OPUS_INVALID_PACKET; /* Number of frames encoded in bits 0 to 5 */ diff --git a/tests/run_vectors.sh b/tests/run_vectors.sh index 9b5c29be..78b09e67 100755 --- a/tests/run_vectors.sh +++ b/tests/run_vectors.sh @@ -36,7 +36,7 @@ echo Testing mono echo "==============" echo -for file in `seq -w 1 11` +for file in 01 02 03 04 05 06 07 08 09 10 11 12 do if [ -e $VECTOR_PATH/testvector$file.bit ]; then echo Testing testvector$file @@ -66,7 +66,7 @@ echo Testing stereo echo "==============" echo -for file in `seq -w 1 11` +for file in 01 02 03 04 05 06 07 08 09 10 11 12 do if [ -e $VECTOR_PATH/testvector$file.bit ]; then echo Testing testvector$file -- cgit v1.2.1