summaryrefslogtreecommitdiff
path: root/plugins/fast_float/src/fast_8_matsh.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/fast_float/src/fast_8_matsh.c')
-rw-r--r--plugins/fast_float/src/fast_8_matsh.c67
1 files changed, 21 insertions, 46 deletions
diff --git a/plugins/fast_float/src/fast_8_matsh.c b/plugins/fast_float/src/fast_8_matsh.c
index 22e7f2b..6a126f3 100644
--- a/plugins/fast_float/src/fast_8_matsh.c
+++ b/plugins/fast_float/src/fast_8_matsh.c
@@ -30,10 +30,9 @@ typedef cmsInt32Number cmsS1Fixed14Number; // Note that this may hold more tha
// This is the private data container used by this optimization
typedef struct {
- // This is for SSE2, MUST be aligned at 16 bit boundary
+ // Alignment makes it faster
- cmsFloat32Number fMatrix[4][4];
- cmsFloat32Number fShaper1[256 * 3];
+ cmsS1Fixed14Number Mat[4][4]; // n.14 to n.14 (needs a saturation after that)
void * real_ptr;
@@ -42,10 +41,7 @@ typedef struct {
cmsS1Fixed14Number Shaper1R[256]; // from 0..255 to 1.14 (0.0...1.0)
cmsS1Fixed14Number Shaper1G[256];
cmsS1Fixed14Number Shaper1B[256];
-
- cmsS1Fixed14Number Mat[3][3]; // n.14 to n.14 (needs a saturation after that)
- cmsS1Fixed14Number Off[3];
-
+
cmsUInt8Number Shaper2R[0x4001]; // 1.14 to 0..255
cmsUInt8Number Shaper2G[0x4001];
cmsUInt8Number Shaper2B[0x4001];
@@ -97,20 +93,6 @@ void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
}
}
-static
-void FillFirstShaperFloat(cmsFloat32Number* Table, cmsToneCurve* Curve)
-{
- int i;
- cmsFloat32Number R;
-
- for (i=0; i < 256; i++) {
-
- R = (cmsFloat32Number) (i / 255.0);
-
- Table[i] = cmsEvalToneCurveFloat(Curve, R);
- }
-}
-
// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
static
@@ -118,15 +100,17 @@ void FillSecondShaper(cmsUInt8Number* Table, cmsToneCurve* Curve)
{
int i;
cmsFloat32Number R, Val;
- cmsUInt16Number w;
+ cmsInt32Number w;
for (i=0; i < 0x4001; i++) {
- R = (cmsFloat32Number) (i / 16384.0);
+ R = (cmsFloat32Number) (i / 16384.0f);
Val = cmsEvalToneCurveFloat(Curve, R);
- w = _cmsSaturateWord(Val * 65535.0 + 0.5);
+ w = (cmsInt32Number) (Val * 255.0f + 0.5f);
+ if (w < 0) w = 0;
+ if (w > 255) w = 255;
- Table[i] = FROM_16_TO_8(w);
+ Table[i] = (cmsInt8Number) w;
}
}
@@ -153,30 +137,22 @@ XMatShaper8Data* SetMatShaper(cmsContext ContextID, cmsToneCurve* Curve1[3], cms
FillSecondShaper(p ->Shaper2G, Curve2[1]);
FillSecondShaper(p ->Shaper2B, Curve2[2]);
-
- FillFirstShaperFloat(p ->fShaper1, Curve1[0]);
- FillFirstShaperFloat(p ->fShaper1 + 256, Curve1[1]);
- FillFirstShaperFloat(p ->fShaper1 + 256*2, Curve1[2]);
-
+
// Convert matrix to nFixed14. Note that those values may take more than 16 bits as
for (i=0; i < 3; i++) {
for (j=0; j < 3; j++) {
- p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
- p ->fMatrix[j][i] = (cmsFloat32Number) Mat ->v[i].n[j];
+ p ->Mat[j][i] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
}
}
-
-
+
for (i=0; i < 3; i++) {
if (Off == NULL) {
-
- p ->Off[i] = 0x2000;
- p ->fMatrix[3][i] = 0.0f;
+
+ p->Mat[3][i] = DOUBLE_TO_1FIXED14(0.5);
}
- else {
- p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]) + 0x2000;
- p ->fMatrix[3][i] = (cmsFloat32Number) Off->n[i];
+ else {
+ p->Mat[3][i] = DOUBLE_TO_1FIXED14(Off->n[i] + 0.5);
}
}
@@ -237,20 +213,19 @@ void MatShaperXform8(struct _cmstransform_struct *CMMcargo,
gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut;
bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut;
if (nalpha)
- aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut;
-
+ aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut;
for (ii = 0; ii < PixelsPerLine; ii++) {
-
+
// Across first shaper, which also converts to 1.14 fixed point. 16 bits guaranteed.
r = p->Shaper1R[*rin];
g = p->Shaper1G[*gin];
b = p->Shaper1B[*bin];
// Evaluate the matrix in 1.14 fixed point
- l1 = (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0]) >> 14;
- l2 = (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1]) >> 14;
- l3 = (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2]) >> 14;
+ l1 = (p->Mat[0][0] * r + p->Mat[1][0] * g + p->Mat[2][0] * b + p->Mat[3][0]) >> 14;
+ l2 = (p->Mat[0][1] * r + p->Mat[1][1] * g + p->Mat[2][1] * b + p->Mat[3][1]) >> 14;
+ l3 = (p->Mat[0][2] * r + p->Mat[1][2] * g + p->Mat[2][2] * b + p->Mat[3][2]) >> 14;
// Now we have to clip to 0..1.0 range