summaryrefslogtreecommitdiff
path: root/src/libFLAC/stream_encoder_intrin_ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libFLAC/stream_encoder_intrin_ssse3.c')
-rw-r--r--src/libFLAC/stream_encoder_intrin_ssse3.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/src/libFLAC/stream_encoder_intrin_ssse3.c b/src/libFLAC/stream_encoder_intrin_ssse3.c
index d7395710..628cd5ca 100644
--- a/src/libFLAC/stream_encoder_intrin_ssse3.c
+++ b/src/libFLAC/stream_encoder_intrin_ssse3.c
@@ -48,7 +48,7 @@
FLAC__SSE_TARGET("ssse3")
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
- uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
+ uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
{
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
uint32_t partitions = 1u << max_partition_order;
@@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
- if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
+ if(max_residual_bps < threshold) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -95,7 +95,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
#endif
}
}
- else { /* have to pessimistically use 64 bits for accumulator */
+ else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
for(partition = residual_sample = 0; partition < partitions; partition++) {
__m128i mm_sum = _mm_setzero_si128();
uint32_t e1, e3;
@@ -124,6 +124,18 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
}
}
+ else { /* must handle abs(INT32_MIN) */
+ for(partition = residual_sample = 0; partition < partitions; partition++) {
+ FLAC__uint64 abs_residual_partition_sum64 = 0;
+ end += default_partition_samples;
+ for( ; residual_sample < end; residual_sample++)
+ if(residual[residual_sample] == INT32_MIN)
+ abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
+ else
+ abs_residual_partition_sum64 += abs(residual[residual_sample]);
+ abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
+ }
+ }
}
/* now merge partitions for lower orders */