summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-06-29 12:00:13 +0200
committerMartijn van Beurden <mvanb1@gmail.com>2022-07-01 21:51:31 +0200
commit7e0a0e572305e9004a6fa9bba3dd6be936553b03 (patch)
treea344ad5eccc1acb81a16f1f756a14c09e7e035e8 /src
parent633ab36ec51bada8737459e51cc54441544e63b2 (diff)
downloadflac-7e0a0e572305e9004a6fa9bba3dd6be936553b03.tar.gz
Enable encoder to use INT32_MIN as residual value
As abs(INT32_MIN) is undefined, it took some extra work to enable the encoder to do this. While expected gains are zero, this is done to ensure full spec coverage in this regard
Diffstat (limited to 'src')
-rw-r--r--src/libFLAC/fixed.c75
-rw-r--r--src/libFLAC/include/private/stream_encoder.h12
-rw-r--r--src/libFLAC/lpc.c6
-rw-r--r--src/libFLAC/stream_encoder.c37
-rw-r--r--src/libFLAC/stream_encoder_intrin_avx2.c18
-rw-r--r--src/libFLAC/stream_encoder_intrin_sse2.c19
-rw-r--r--src/libFLAC/stream_encoder_intrin_ssse3.c18
7 files changed, 112 insertions, 73 deletions
diff --git a/src/libFLAC/fixed.c b/src/libFLAC/fixed.c
index 26f46e6c..9dfe4f19 100644
--- a/src/libFLAC/fixed.c
+++ b/src/libFLAC/fixed.c
@@ -377,33 +377,32 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual(const FLAC__int32 dat
#endif
{
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
- FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
+ FLAC__int64 error_0, error_1, error_2, error_3, error_4;
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
uint32_t order = 0;
for(int i = 0; i < (int)data_len; i++) {
- error_0 = local_abs64((FLAC__int64)data[i]);
- error_1 = (i > 0) ? local_abs64((FLAC__int64)data[i] - data[i-1]) : 0 ;
- error_2 = (i > 1) ? local_abs64((FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2]) : 0;
- error_3 = (i > 2) ? local_abs64((FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3]) : 0;
- error_4 = (i > 3) ? local_abs64((FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4]) : 0;
-
- total_error_0 += error_0;
- total_error_1 += error_1;
- total_error_2 += error_2;
- total_error_3 += error_3;
- total_error_4 += error_4;
-
- /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
- if(error_0 > INT32_MAX)
+ error_0 = (FLAC__int64)data[i];
+ error_1 = (i > 0) ? (FLAC__int64)data[i] - data[i-1] : 0 ;
+ error_2 = (i > 1) ? (FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2] : 0;
+ error_3 = (i > 2) ? (FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3] : 0;
+ error_4 = (i > 3) ? (FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4] : 0;
+
+ total_error_0 += local_abs64(error_0);
+ total_error_1 += local_abs64(error_1);
+ total_error_2 += local_abs64(error_2);
+ total_error_3 += local_abs64(error_3);
+ total_error_4 += local_abs64(error_4);
+
+ if(error_0 > INT32_MAX || error_0 < INT32_MIN)
order_0_is_valid = false;
- if(error_1 > INT32_MAX)
+ if(error_1 > INT32_MAX || error_1 < INT32_MIN)
order_1_is_valid = false;
- if(error_2 > INT32_MAX)
+ if(error_2 > INT32_MAX || error_2 < INT32_MIN)
order_2_is_valid = false;
- if(error_3 > INT32_MAX)
+ if(error_3 > INT32_MAX || error_3 < INT32_MIN)
order_3_is_valid = false;
- if(error_4 > INT32_MAX)
+ if(error_4 > INT32_MAX || error_4 < INT32_MIN)
order_4_is_valid = false;
}
@@ -423,33 +422,33 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(const FLAC__int
#endif
{
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
- FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
+ FLAC__int64 error_0, error_1, error_2, error_3, error_4;
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
uint32_t order = 0;
for(int i = 0; i < (int)data_len; i++) {
- error_0 = local_abs64(data[i]);
- error_1 = (i > 0) ? local_abs64(data[i] - data[i-1]) : 0 ;
- error_2 = (i > 1) ? local_abs64(data[i] - 2 * data[i-1] + data[i-2]) : 0;
- error_3 = (i > 2) ? local_abs64(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]) : 0;
- error_4 = (i > 3) ? local_abs64(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]) : 0;
-
- total_error_0 += error_0;
- total_error_1 += error_1;
- total_error_2 += error_2;
- total_error_3 += error_3;
- total_error_4 += error_4;
-
- /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
- if(error_0 > INT32_MAX)
+ error_0 = data[i];
+ error_1 = (i > 0) ? data[i] - data[i-1] : 0 ;
+ error_2 = (i > 1) ? data[i] - 2 * data[i-1] + data[i-2] : 0;
+ error_3 = (i > 2) ? data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3] : 0;
+ error_4 = (i > 3) ? data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4] : 0;
+
+ total_error_0 += local_abs64(error_0);
+ total_error_1 += local_abs64(error_1);
+ total_error_2 += local_abs64(error_2);
+ total_error_3 += local_abs64(error_3);
+ total_error_4 += local_abs64(error_4);
+
+
+ if(error_0 > INT32_MAX || error_0 < INT32_MIN)
order_0_is_valid = false;
- if(error_1 > INT32_MAX)
+ if(error_1 > INT32_MAX || error_1 < INT32_MIN)
order_1_is_valid = false;
- if(error_2 > INT32_MAX)
+ if(error_2 > INT32_MAX || error_2 < INT32_MIN)
order_2_is_valid = false;
- if(error_3 > INT32_MAX)
+ if(error_3 > INT32_MAX || error_3 < INT32_MIN)
order_3_is_valid = false;
- if(error_4 > INT32_MAX)
+ if(error_4 > INT32_MAX || error_4 < INT32_MIN)
order_4_is_valid = false;
}
diff --git a/src/libFLAC/include/private/stream_encoder.h b/src/libFLAC/include/private/stream_encoder.h
index ade648bf..ed6af122 100644
--- a/src/libFLAC/include/private/stream_encoder.h
+++ b/src/libFLAC/include/private/stream_encoder.h
@@ -37,29 +37,23 @@
#include <config.h>
#endif
-/*
- * This is used to avoid overflow with unusual signals in 32-bit
- * accumulator in the *precompute_partition_info_sums_* functions.
- */
-#define FLAC__MAX_EXTRA_RESIDUAL_BPS 4
-
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "private/cpu.h"
#include "FLAC/format.h"
#ifdef FLAC__SSE2_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
#endif
#ifdef FLAC__SSSE3_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
#endif
#ifdef FLAC__AVX2_SUPPORTED
extern void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
#endif
#endif
diff --git a/src/libFLAC/lpc.c b/src/libFLAC/lpc.c
index 3fe3a2b7..2e8fa51c 100644
--- a/src/libFLAC/lpc.c
+++ b/src/libFLAC/lpc.c
@@ -828,8 +828,7 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(const
case 1: sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
}
residual_to_check = data[i] - (sum >> lp_quantization);
- /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
- if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
+ if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
return false;
else
residual[i] = residual_to_check;
@@ -882,8 +881,7 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit
case 1: sum += qlp_coeff[ 0] * data[i- 1];
}
residual_to_check = data[i] - (sum >> lp_quantization);
- /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
- if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
+ if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
return false;
else
residual[i] = residual_to_check;
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 34222174..3f3dc326 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -231,7 +231,7 @@ static uint32_t find_best_partition_order_(
uint32_t rice_parameter_limit,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t bps,
+ uint32_t max_residual_bps,
FLAC__bool do_escape_coding,
uint32_t rice_parameter_search_dist,
FLAC__EntropyCodingMethod *best_ecm
@@ -244,7 +244,7 @@ static void precompute_partition_info_sums_(
uint32_t predictor_order,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t bps
+ uint32_t max_residual_bps
);
static void precompute_partition_info_escapes_(
@@ -349,7 +349,7 @@ typedef struct FLAC__StreamEncoderPrivate {
uint32_t current_frame_number;
FLAC__MD5Context md5context;
FLAC__CPUInfo cpuinfo;
- void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
+ void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
#ifndef FLAC__INTEGER_ONLY_LIBRARY
uint32_t (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
uint32_t (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
@@ -3873,7 +3873,7 @@ uint32_t evaluate_fixed_subframe_(
rice_parameter_limit,
min_partition_order,
max_partition_order,
- subframe_bps,
+ (subframe_bps + order),
do_escape_coding,
rice_parameter_search_dist,
&subframe->data.fixed.entropy_coding_method
@@ -3972,7 +3972,7 @@ uint32_t evaluate_lpc_subframe_(
rice_parameter_limit,
min_partition_order,
max_partition_order,
- subframe_bps,
+ FLAC__lpc_max_residual_bps(subframe_bps, qlp_coeff, order, quantization),
do_escape_coding,
rice_parameter_search_dist,
&subframe->data.lpc.entropy_coding_method
@@ -4046,7 +4046,7 @@ uint32_t find_best_partition_order_(
uint32_t rice_parameter_limit,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t bps,
+ uint32_t max_residual_bps,
FLAC__bool do_escape_coding,
uint32_t rice_parameter_search_dist,
FLAC__EntropyCodingMethod *best_ecm
@@ -4060,7 +4060,7 @@ uint32_t find_best_partition_order_(
max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
min_partition_order = flac_min(min_partition_order, max_partition_order);
- private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
+ private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, max_residual_bps);
if(do_escape_coding)
precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
@@ -4138,7 +4138,7 @@ void precompute_partition_info_sums_(
uint32_t predictor_order,
uint32_t min_partition_order,
uint32_t max_partition_order,
- uint32_t bps
+ uint32_t max_residual_bps
)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
@@ -4150,22 +4150,33 @@ void precompute_partition_info_sums_(
{
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int)predictor_order);
- /* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */
- if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
+ if(max_residual_bps < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
FLAC__uint32 abs_residual_partition_sum = 0;
end += default_partition_samples;
for( ; residual_sample < end; residual_sample++)
- abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+ abs_residual_partition_sum += abs(residual[residual_sample]);
abs_residual_partition_sums[partition] = abs_residual_partition_sum;
}
}
- else { /* have to pessimistically use 64 bits for accumulator */
+ else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
FLAC__uint64 abs_residual_partition_sum64 = 0;
end += default_partition_samples;
for( ; residual_sample < end; residual_sample++)
- abs_residual_partition_sum64 += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+ }
+ }
+ else { /* must handle abs(INT32_MIN) */
+ for(partition = residual_sample = 0; partition < partitions; partition++) {
+ FLAC__uint64 abs_residual_partition_sum64 = 0;
+ end += default_partition_samples;
+ for( ; residual_sample < end; residual_sample++)
+ if(residual[residual_sample] == INT32_MIN)
+ abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
+ else
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
}
}
diff --git a/src/libFLAC/stream_encoder_intrin_avx2.c b/src/libFLAC/stream_encoder_intrin_avx2.c
index 7a06ea15..863ae4d2 100644
--- a/src/libFLAC/stream_encoder_intrin_avx2.c
+++ b/src/libFLAC/stream_encoder_intrin_avx2.c
@@ -48,7 +48,7 @@
FLAC__SSE_TARGET("avx2")
void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
+ if(max_residual_bps < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m256i sum256 = _mm256_setzero_si256();
__m128i sum128;
@@ -92,7 +92,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
#endif
}
}
- else { /* have to pessimistically use 64 bits for accumulator */
+ else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m256i sum256 = _mm256_setzero_si256();
__m128i sum128;
@@ -121,6 +121,18 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), sum128);
}
}
+ else { /* must handle abs(INT32_MIN) */
+ for(partition = residual_sample = 0; partition < partitions; partition++) {
+ FLAC__uint64 abs_residual_partition_sum64 = 0;
+ end += default_partition_samples;
+ for( ; residual_sample < end; residual_sample++)
+ if(residual[residual_sample] == INT32_MIN)
+ abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
+ else
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+ }
+ }
}
/* now merge partitions for lower orders */
diff --git a/src/libFLAC/stream_encoder_intrin_sse2.c b/src/libFLAC/stream_encoder_intrin_sse2.c
index 04e560ba..a8490f4c 100644
--- a/src/libFLAC/stream_encoder_intrin_sse2.c
+++ b/src/libFLAC/stream_encoder_intrin_sse2.c
@@ -59,7 +59,7 @@ static inline __m128i local_abs_epi32(__m128i val)
FLAC__SSE_TARGET("sse2")
void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -71,7 +71,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
+ if(max_residual_bps < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -106,7 +106,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
#endif
}
}
- else { /* have to pessimistically use 64 bits for accumulator */
+ else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -135,6 +135,19 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
}
}
+ else { /* must handle abs(INT32_MIN) */
+ for(partition = residual_sample = 0; partition < partitions; partition++) {
+ FLAC__uint64 abs_residual_partition_sum64 = 0;
+ end += default_partition_samples;
+ for( ; residual_sample < end; residual_sample++)
+ if(residual[residual_sample] == INT32_MIN)
+ abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
+ else
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+ }
+ }
+
}
/* now merge partitions for lower orders */
diff --git a/src/libFLAC/stream_encoder_intrin_ssse3.c b/src/libFLAC/stream_encoder_intrin_ssse3.c
index d7395710..628cd5ca 100644
--- a/src/libFLAC/stream_encoder_intrin_ssse3.c
+++ b/src/libFLAC/stream_encoder_intrin_ssse3.c
@@ -48,7 +48,7 @@
FLAC__SSE_TARGET("ssse3")
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
+ if(max_residual_bps < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -95,7 +95,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
#endif
}
}
- else { /* have to pessimistically use 64 bits for accumulator */
+ else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -124,6 +124,18 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
}
}
+ else { /* must handle abs(INT32_MIN) */
+ for(partition = residual_sample = 0; partition < partitions; partition++) {
+ FLAC__uint64 abs_residual_partition_sum64 = 0;
+ end += default_partition_samples;
+ for( ; residual_sample < end; residual_sample++)
+ if(residual[residual_sample] == INT32_MIN)
+ abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
+ else
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+ }
+ }
}
/* now merge partitions for lower orders */