common/math_util.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

/* Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

/* Common math functions. */

#include "common.h"
#include "math.h"
#include "math_util.h"
#include "util.h"

/* Some useful math functions.  Use with integers only! */
#define SQ(x) ((x) * (x))

/* For cosine lookup table, define the increment and the size of the table. */
#define COSINE_LUT_INCR_DEG	5
#define COSINE_LUT_SIZE		((180 / COSINE_LUT_INCR_DEG) + 1)

/* Lookup table for the value of cosine from 0 degrees to 180 degrees. */
static const fp_t cos_lut[] = {
	FLOAT_TO_FP( 1.00000), FLOAT_TO_FP( 0.99619), FLOAT_TO_FP( 0.98481),
	FLOAT_TO_FP( 0.96593), FLOAT_TO_FP( 0.93969), FLOAT_TO_FP( 0.90631),
	FLOAT_TO_FP( 0.86603), FLOAT_TO_FP( 0.81915), FLOAT_TO_FP( 0.76604),
	FLOAT_TO_FP( 0.70711), FLOAT_TO_FP( 0.64279), FLOAT_TO_FP( 0.57358),
	FLOAT_TO_FP( 0.50000), FLOAT_TO_FP( 0.42262), FLOAT_TO_FP( 0.34202),
	FLOAT_TO_FP( 0.25882), FLOAT_TO_FP( 0.17365), FLOAT_TO_FP( 0.08716),
	FLOAT_TO_FP( 0.00000), FLOAT_TO_FP(-0.08716), FLOAT_TO_FP(-0.17365),
	FLOAT_TO_FP(-0.25882), FLOAT_TO_FP(-0.34202), FLOAT_TO_FP(-0.42262),
	FLOAT_TO_FP(-0.50000), FLOAT_TO_FP(-0.57358), FLOAT_TO_FP(-0.64279),
	FLOAT_TO_FP(-0.70711), FLOAT_TO_FP(-0.76604), FLOAT_TO_FP(-0.81915),
	FLOAT_TO_FP(-0.86603), FLOAT_TO_FP(-0.90631), FLOAT_TO_FP(-0.93969),
	FLOAT_TO_FP(-0.96593), FLOAT_TO_FP(-0.98481), FLOAT_TO_FP(-0.99619),
	FLOAT_TO_FP(-1.00000),
};
BUILD_ASSERT(ARRAY_SIZE(cos_lut) == COSINE_LUT_SIZE);


fp_t arc_cos(fp_t x)
{
	int i;

	/* Cap x if out of range. */
	if (x < FLOAT_TO_FP(-1.0))
		x = FLOAT_TO_FP(-1.0);
	else if (x > FLOAT_TO_FP(1.0))
		x = FLOAT_TO_FP(1.0);

	/*
	 * Increment through lookup table to find index and then linearly
	 * interpolate for precision.
	 */
	/* TODO(crosbug.com/p/25600): Optimize with binary search. */
	for (i = 0; i < COSINE_LUT_SIZE-1; i++) {
		if (x >= cos_lut[i+1]) {
			const fp_t interp = fp_div(cos_lut[i] - x,
						   cos_lut[i] - cos_lut[i + 1]);

			return fp_mul(INT_TO_FP(COSINE_LUT_INCR_DEG),
				      INT_TO_FP(i) + interp);
		}
	}

	/*
	 * Shouldn't be possible to get here because inputs are clipped to
	 * [-1, 1] and the cos_lut[] table goes over the same range. If we
	 * are here, throw an assert.
	 */
	ASSERT(0);

	return 0;
}

/**
 * Integer square root.
 */
#ifdef CONFIG_FPU
/*
 * Use library sqrtf instruction, if available, since it's usually much faster
 * and smaller. On Cortex-M4, this becomes a single instruction which takes
 * 14 cycles to execute. This produces identical results to binary search,
 * except when the floating point representation of the square root rounds up
 * to an integer.
 */
static inline int int_sqrtf(fp_inter_t x)
{
	return sqrtf(x);
}
#else
static int int_sqrtf(fp_inter_t x)
{
	int rmax = 0x7fffffff;
	int rmin = 0;

	/* Short cut if x is 32-bit value */
	if (x < rmax)
		rmax = 0x7fff;

	/*
	 * Just binary-search.  There are better algorithms, but we call this
	 * infrequently enough it doesn't matter.
	 */
	if (x <= 0)
		return 0;  /* Yeah, for imaginary numbers too */
	else if (x > (fp_inter_t)rmax * rmax)
		return rmax;

	while (1) {
		int r = (rmax + rmin) / 2;
		fp_inter_t r2 = (fp_inter_t)r * r;

		if (r2 > x) {
			/* Guessed too high */
			rmax = r;
		} else if (r2 < x) {
			/* Guessed too low.  Watch out for rounding! */
			if (rmin == r)
				return r;

			rmin = r;
		} else {
			/* Bullseye! */
			return r;
		}
	}
}
#endif

int vector_magnitude(const intv3_t v)
{
	fp_inter_t sum =   (fp_inter_t)v[0] * v[0] +
			(fp_inter_t)v[1] * v[1] +
			(fp_inter_t)v[2] * v[2];

	return int_sqrtf(sum);
}

/* cross_product only works if the vectors magnitudes are around 1<<16. */
void cross_product(const intv3_t v1, const intv3_t v2, intv3_t v)
{
	v[X] = (fp_inter_t)v1[Y] * v2[Z] - (fp_inter_t)v1[Z] * v2[Y];
	v[Y] = (fp_inter_t)v1[Z] * v2[X] - (fp_inter_t)v1[X] * v2[Z];
	v[Z] = (fp_inter_t)v1[X] * v2[Y] - (fp_inter_t)v1[Y] * v2[X];
}

fp_inter_t dot_product(const intv3_t v1, const intv3_t v2)
{
	return (fp_inter_t)v1[X] * v2[X] +
	       (fp_inter_t)v1[Y] * v2[Y] +
	       (fp_inter_t)v1[Z] * v2[Z];
}

void vector_scale(intv3_t v, fp_t s)
{
	v[X] = fp_mul(v[X], s);
	v[Y] = fp_mul(v[Y], s);
	v[Z] = fp_mul(v[Z], s);
}

fp_t cosine_of_angle_diff(const intv3_t v1, const intv3_t v2)
{
	fp_inter_t dotproduct;
	fp_inter_t denominator;

	/*
	 * Angle between two vectors is acos(A dot B / |A|*|B|). To return
	 * cosine of angle between vectors, then don't do acos operation.
	 */
	dotproduct = dot_product(v1, v2);

	denominator = (fp_inter_t)vector_magnitude(v1) * vector_magnitude(v2);

	/* Check for divide by 0 although extremely unlikely. */
	if (!denominator)
		return 0;

	/*
	 * We must shift the dot product first, so that we can represent
	 * fractions.  The answer is always a number with magnitude < 1.0, so
	 * if we don't shift, it will always round down to 0.
	 *
	 * Note that overflow is possible if the dot product is large (that is,
	 * if the vector components are of size (31 - FP_BITS/2) bits.  If that
	 * ever becomes a problem, we could detect this by counting the leading
	 * zeroes of the dot product and shifting the denominator down
	 * partially instead of shifting the dot product up.  With the current
	 * FP_BITS=16, that happens if the vector components are ~2^23.  Which
	 * we're a long way away from; the vector components used in
	 * accelerometer calculations are ~2^11.
	 */
	return fp_div(dotproduct, denominator);
}

/*
 * rotate a vector v
 *  - support input v and output res are the same vector
 */
void rotate(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
	fp_inter_t t[3];

	if (R == NULL) {
		if (v != res)
			memcpy(res, v, sizeof(intv3_t));
		return;
	}


	/* Rotate */
	t[0] =	(fp_inter_t)v[0] * R[0][0] +
		(fp_inter_t)v[1] * R[1][0] +
		(fp_inter_t)v[2] * R[2][0];
	t[1] =	(fp_inter_t)v[0] * R[0][1] +
		(fp_inter_t)v[1] * R[1][1] +
		(fp_inter_t)v[2] * R[2][1];
	t[2] =	(fp_inter_t)v[0] * R[0][2] +
		(fp_inter_t)v[1] * R[1][2] +
		(fp_inter_t)v[2] * R[2][2];

	/* Scale by fixed point shift when writing back to result */
	res[0] = FP_TO_INT(t[0]);
	res[1] = FP_TO_INT(t[1]);
	res[2] = FP_TO_INT(t[2]);
}

void rotate_inv(const intv3_t v, const mat33_fp_t R, intv3_t res)
{
	fp_inter_t t[3];
	fp_t deter;

	if (R == NULL) {
		if (v != res)
			memcpy(res, v, sizeof(intv3_t));
		return;
	}

	deter = fp_mul(R[0][0], (fp_mul(R[1][1], R[2][2]) -
				 fp_mul(R[2][1], R[1][2]))) -
		fp_mul(R[0][1], (fp_mul(R[1][0], R[2][2]) -
				 fp_mul(R[1][2], R[2][0]))) +
		fp_mul(R[0][2], (fp_mul(R[1][0], R[2][1]) -
				 fp_mul(R[1][1], R[2][0])));

	/*
	 * invert the matrix: from
	 * http://stackoverflow.com/questions/983999/
	 * simple-3x3-matrix-inverse-code-c
	 */
	t[0] =  (fp_inter_t)v[0] * (fp_mul(R[1][1], R[2][2]) -
				 fp_mul(R[2][1], R[1][2])) -
		(fp_inter_t)v[1] * (fp_mul(R[1][0], R[2][2]) -
				 fp_mul(R[1][2], R[2][0])) +
		(fp_inter_t)v[2] * (fp_mul(R[1][0], R[2][1]) -
				 fp_mul(R[2][0], R[1][1]));

	t[1] =	(fp_inter_t)v[0] * (fp_mul(R[0][1], R[2][2]) -
				 fp_mul(R[0][2], R[2][1])) * -1 +
		(fp_inter_t)v[1] * (fp_mul(R[0][0], R[2][2]) -
				 fp_mul(R[0][2], R[2][0])) -
		(fp_inter_t)v[2] * (fp_mul(R[0][0], R[2][1]) -
				 fp_mul(R[2][0], R[0][1]));

	t[2] =	(fp_inter_t)v[0] * (fp_mul(R[0][1], R[1][2]) -
				 fp_mul(R[0][2], R[1][1])) -
		(fp_inter_t)v[1] * (fp_mul(R[0][0], R[1][2]) -
				 fp_mul(R[1][0], R[0][2])) +
		(fp_inter_t)v[2] * (fp_mul(R[0][0], R[1][1]) -
				 fp_mul(R[1][0], R[0][1]));

	/* Scale by fixed point shift when writing back to result */
	res[0] = FP_TO_INT(fp_div(t[0], deter));
	res[1] = FP_TO_INT(fp_div(t[1], deter));
	res[2] = FP_TO_INT(fp_div(t[2], deter));
}

/* division that round to the nearest integer */
int round_divide(int64_t dividend, int divisor)
{
	return (dividend > 0) ^ (divisor > 0) ?
		(dividend - divisor / 2) / divisor :
		(dividend + divisor / 2) / divisor;
}