summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-07-13 20:57:15 +0200
committerGitHub <noreply@github.com>2022-07-13 20:57:15 +0200
commit48d4f81211486846237a23baf4c528718d5d95b9 (patch)
tree08f5fbae5cde5c83b005d37e397bdbd9eb2b7c3b /src
parent43a93a00e49f1fff572fd473c4ca87b16fea4944 (diff)
downloadflac-48d4f81211486846237a23baf4c528718d5d95b9.tar.gz
Revert "Enable encoder to use INT32_MIN as residual value" (#389)
This reverts commit 7e0a0e572305e9004a6fa9bba3dd6be936553b03, following the change to the FLAC format proposed here: https://github.com/ietf-wg-cellar/flac-specification/pull/148 It turns out supporting use of INT32_MIN is quite a hassle.
Diffstat (limited to 'src')
-rw-r--r--src/libFLAC/fixed.c75
-rw-r--r--src/libFLAC/include/private/stream_encoder.h12
-rw-r--r--src/libFLAC/lpc.c6
-rw-r--r--src/libFLAC/stream_encoder.c37
-rw-r--r--src/libFLAC/stream_encoder_intrin_avx2.c18
-rw-r--r--src/libFLAC/stream_encoder_intrin_sse2.c19
-rw-r--r--src/libFLAC/stream_encoder_intrin_ssse3.c18
7 files changed, 73 insertions, 112 deletions
diff --git a/src/libFLAC/fixed.c b/src/libFLAC/fixed.c
index 9dfe4f19..26f46e6c 100644
--- a/src/libFLAC/fixed.c
+++ b/src/libFLAC/fixed.c
@@ -377,32 +377,33 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual(const FLAC__int32 dat
#endif
{
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
- FLAC__int64 error_0, error_1, error_2, error_3, error_4;
+ FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
uint32_t order = 0;
for(int i = 0; i < (int)data_len; i++) {
- error_0 = (FLAC__int64)data[i];
- error_1 = (i > 0) ? (FLAC__int64)data[i] - data[i-1] : 0 ;
- error_2 = (i > 1) ? (FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2] : 0;
- error_3 = (i > 2) ? (FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3] : 0;
- error_4 = (i > 3) ? (FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4] : 0;
-
- total_error_0 += local_abs64(error_0);
- total_error_1 += local_abs64(error_1);
- total_error_2 += local_abs64(error_2);
- total_error_3 += local_abs64(error_3);
- total_error_4 += local_abs64(error_4);
-
- if(error_0 > INT32_MAX || error_0 < INT32_MIN)
+ error_0 = local_abs64((FLAC__int64)data[i]);
+ error_1 = (i > 0) ? local_abs64((FLAC__int64)data[i] - data[i-1]) : 0 ;
+ error_2 = (i > 1) ? local_abs64((FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2]) : 0;
+ error_3 = (i > 2) ? local_abs64((FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3]) : 0;
+ error_4 = (i > 3) ? local_abs64((FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4]) : 0;
+
+ total_error_0 += error_0;
+ total_error_1 += error_1;
+ total_error_2 += error_2;
+ total_error_3 += error_3;
+ total_error_4 += error_4;
+
+ /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+ if(error_0 > INT32_MAX)
order_0_is_valid = false;
- if(error_1 > INT32_MAX || error_1 < INT32_MIN)
+ if(error_1 > INT32_MAX)
order_1_is_valid = false;
- if(error_2 > INT32_MAX || error_2 < INT32_MIN)
+ if(error_2 > INT32_MAX)
order_2_is_valid = false;
- if(error_3 > INT32_MAX || error_3 < INT32_MIN)
+ if(error_3 > INT32_MAX)
order_3_is_valid = false;
- if(error_4 > INT32_MAX || error_4 < INT32_MIN)
+ if(error_4 > INT32_MAX)
order_4_is_valid = false;
}
@@ -422,33 +423,33 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(const FLAC__int
#endif
{
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
- FLAC__int64 error_0, error_1, error_2, error_3, error_4;
+ FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
uint32_t order = 0;
for(int i = 0; i < (int)data_len; i++) {
- error_0 = data[i];
- error_1 = (i > 0) ? data[i] - data[i-1] : 0 ;
- error_2 = (i > 1) ? data[i] - 2 * data[i-1] + data[i-2] : 0;
- error_3 = (i > 2) ? data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3] : 0;
- error_4 = (i > 3) ? data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4] : 0;
-
- total_error_0 += local_abs64(error_0);
- total_error_1 += local_abs64(error_1);
- total_error_2 += local_abs64(error_2);
- total_error_3 += local_abs64(error_3);
- total_error_4 += local_abs64(error_4);
-
-
- if(error_0 > INT32_MAX || error_0 < INT32_MIN)
+ error_0 = local_abs64(data[i]);
+ error_1 = (i > 0) ? local_abs64(data[i] - data[i-1]) : 0 ;
+ error_2 = (i > 1) ? local_abs64(data[i] - 2 * data[i-1] + data[i-2]) : 0;
+ error_3 = (i > 2) ? local_abs64(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]) : 0;
+ error_4 = (i > 3) ? local_abs64(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]) : 0;
+
+ total_error_0 += error_0;
+ total_error_1 += error_1;
+ total_error_2 += error_2;
+ total_error_3 += error_3;
+ total_error_4 += error_4;
+
+ /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+ if(error_0 > INT32_MAX)
order_0_is_valid = false;
- if(error_1 > INT32_MAX || error_1 < INT32_MIN)
+ if(error_1 > INT32_MAX)
order_1_is_valid = false;
- if(error_2 > INT32_MAX || error_2 < INT32_MIN)
+ if(error_2 > INT32_MAX)
order_2_is_valid = false;
- if(error_3 > INT32_MAX || error_3 < INT32_MIN)
+ if(error_3 > INT32_MAX)
order_3_is_valid = false;
- if(error_4 > INT32_MAX || error_4 < INT32_MIN)
+ if(error_4 > INT32_MAX)
order_4_is_valid = false;
}
diff --git a/src/libFLAC/include/private/stream_encoder.h b/src/libFLAC/include/private/stream_encoder.h
index ed6af122..ade648bf 100644
--- a/src/libFLAC/include/private/stream_encoder.h
+++ b/src/libFLAC/include/private/stream_encoder.h
@@ -37,23 +37,29 @@
#include <config.h>
#endif
+/*
+ * This is used to avoid overflow with unusual signals in 32-bit
+ * accumulator in the *precompute_partition_info_sums_* functions.
+ */
+#define FLAC__MAX_EXTRA_RESIDUAL_BPS 4
+
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "private/cpu.h"
#include "FLAC/format.h"
#ifdef FLAC__SSE2_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
#endif
#ifdef FLAC__SSSE3_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
#endif
#ifdef FLAC__AVX2_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
#endif
#endif
diff --git a/src/libFLAC/lpc.c b/src/libFLAC/lpc.c
index 2e8fa51c..3fe3a2b7 100644
--- a/src/libFLAC/lpc.c
+++ b/src/libFLAC/lpc.c
@@ -828,7 +828,8 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(const
case 1: sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
}
residual_to_check = data[i] - (sum >> lp_quantization);
- if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
+ /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+ if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
return false;
else
residual[i] = residual_to_check;
@@ -881,7 +882,8 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit
case 1: sum += qlp_coeff[ 0] * data[i- 1];
}
residual_to_check = data[i] - (sum >> lp_quantization);
- if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
+ /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+ if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
return false;
else
residual[i] = residual_to_check;
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 3f3dc326..34222174 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -231,7 +231,7 @@ static uint32_t find_best_partition_order_(
uint32_t rice_parameter_limit,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t max_residual_bps,
+ uint32_t bps,
FLAC__bool do_escape_coding,
uint32_t rice_parameter_search_dist,
FLAC__EntropyCodingMethod *best_ecm
@@ -244,7 +244,7 @@ static void precompute_partition_info_sums_(
uint32_t predictor_order,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t max_residual_bps
+ uint32_t bps
);
static void precompute_partition_info_escapes_(
@@ -349,7 +349,7 @@ typedef struct FLAC__StreamEncoderPrivate {
uint32_t current_frame_number;
FLAC__MD5Context md5context;
FLAC__CPUInfo cpuinfo;
- void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
+ void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
#ifndef FLAC__INTEGER_ONLY_LIBRARY
uint32_t (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
uint32_t (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
@@ -3873,7 +3873,7 @@ uint32_t evaluate_fixed_subframe_(
rice_parameter_limit,
min_partition_order,
max_partition_order,
- (subframe_bps + order),
+ subframe_bps,
do_escape_coding,
rice_parameter_search_dist,
&subframe->data.fixed.entropy_coding_method
@@ -3972,7 +3972,7 @@ uint32_t evaluate_lpc_subframe_(
rice_parameter_limit,
min_partition_order,
max_partition_order,
- FLAC__lpc_max_residual_bps(subframe_bps, qlp_coeff, order, quantization),
+ subframe_bps,
do_escape_coding,
rice_parameter_search_dist,
&subframe->data.lpc.entropy_coding_method
@@ -4046,7 +4046,7 @@ uint32_t find_best_partition_order_(
uint32_t rice_parameter_limit,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t max_residual_bps,
+ uint32_t bps,
FLAC__bool do_escape_coding,
uint32_t rice_parameter_search_dist,
FLAC__EntropyCodingMethod *best_ecm
@@ -4060,7 +4060,7 @@ uint32_t find_best_partition_order_(
max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
min_partition_order = flac_min(min_partition_order, max_partition_order);
- private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, max_residual_bps);
+ private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
if(do_escape_coding)
precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
@@ -4138,7 +4138,7 @@ void precompute_partition_info_sums_(
uint32_t predictor_order,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t max_residual_bps
+ uint32_t bps
)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
@@ -4150,33 +4150,22 @@ void precompute_partition_info_sums_(
{
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int)predictor_order);
- if(max_residual_bps < threshold) {
+ /* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */
+ if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
FLAC__uint32 abs_residual_partition_sum = 0;
end += default_partition_samples;
for( ; residual_sample < end; residual_sample++)
- abs_residual_partition_sum += abs(residual[residual_sample]);
+ abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
abs_residual_partition_sums[partition] = abs_residual_partition_sum;
}
}
- else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
+ else { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
FLAC__uint64 abs_residual_partition_sum64 = 0;
end += default_partition_samples;
for( ; residual_sample < end; residual_sample++)
- abs_residual_partition_sum64 += abs(residual[residual_sample]);
- abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
- }
- }
- else { /* must handle abs(INT32_MIN) */
- for(partition = residual_sample = 0; partition < partitions; partition++) {
- FLAC__uint64 abs_residual_partition_sum64 = 0;
- end += default_partition_samples;
- for( ; residual_sample < end; residual_sample++)
- if(residual[residual_sample] == INT32_MIN)
- abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
- else
- abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sum64 += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
}
}
diff --git a/src/libFLAC/stream_encoder_intrin_avx2.c b/src/libFLAC/stream_encoder_intrin_avx2.c
index 863ae4d2..7a06ea15 100644
--- a/src/libFLAC/stream_encoder_intrin_avx2.c
+++ b/src/libFLAC/stream_encoder_intrin_avx2.c
@@ -48,7 +48,7 @@
FLAC__SSE_TARGET("avx2")
void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(max_residual_bps < threshold) {
+ if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m256i sum256 = _mm256_setzero_si256();
__m128i sum128;
@@ -92,7 +92,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
#endif
}
}
- else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
+ else { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m256i sum256 = _mm256_setzero_si256();
__m128i sum128;
@@ -121,18 +121,6 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), sum128);
}
}
- else { /* must handle abs(INT32_MIN) */
- for(partition = residual_sample = 0; partition < partitions; partition++) {
- FLAC__uint64 abs_residual_partition_sum64 = 0;
- end += default_partition_samples;
- for( ; residual_sample < end; residual_sample++)
- if(residual[residual_sample] == INT32_MIN)
- abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
- else
- abs_residual_partition_sum64 += abs(residual[residual_sample]);
- abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
- }
- }
}
/* now merge partitions for lower orders */
diff --git a/src/libFLAC/stream_encoder_intrin_sse2.c b/src/libFLAC/stream_encoder_intrin_sse2.c
index a8490f4c..04e560ba 100644
--- a/src/libFLAC/stream_encoder_intrin_sse2.c
+++ b/src/libFLAC/stream_encoder_intrin_sse2.c
@@ -59,7 +59,7 @@ static inline __m128i local_abs_epi32(__m128i val)
FLAC__SSE_TARGET("sse2")
void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -71,7 +71,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(max_residual_bps < threshold) {
+ if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -106,7 +106,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
#endif
}
}
- else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
+ else { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -135,19 +135,6 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
}
}
- else { /* must handle abs(INT32_MIN) */
- for(partition = residual_sample = 0; partition < partitions; partition++) {
- FLAC__uint64 abs_residual_partition_sum64 = 0;
- end += default_partition_samples;
- for( ; residual_sample < end; residual_sample++)
- if(residual[residual_sample] == INT32_MIN)
- abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
- else
- abs_residual_partition_sum64 += abs(residual[residual_sample]);
- abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
- }
- }
-
}
/* now merge partitions for lower orders */
diff --git a/src/libFLAC/stream_encoder_intrin_ssse3.c b/src/libFLAC/stream_encoder_intrin_ssse3.c
index 628cd5ca..d7395710 100644
--- a/src/libFLAC/stream_encoder_intrin_ssse3.c
+++ b/src/libFLAC/stream_encoder_intrin_ssse3.c
@@ -48,7 +48,7 @@
FLAC__SSE_TARGET("ssse3")
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(max_residual_bps < threshold) {
+ if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -95,7 +95,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
#endif
}
}
- else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
+ else { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -124,18 +124,6 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
}
}
- else { /* must handle abs(INT32_MIN) */
- for(partition = residual_sample = 0; partition < partitions; partition++) {
- FLAC__uint64 abs_residual_partition_sum64 = 0;
- end += default_partition_samples;
- for( ; residual_sample < end; residual_sample++)
- if(residual[residual_sample] == INT32_MIN)
- abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
- else
- abs_residual_partition_sum64 += abs(residual[residual_sample]);
- abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
- }
- }
}
/* now merge partitions for lower orders */