From 1bee6e670b85d5971d455b454987b52eae066a59 Mon Sep 17 00:00:00 2001
From: Alan Evans <thealanevans@googlemail.com>
Date: Tue, 9 Jun 2015 21:01:06 +0100
Subject: [PATCH 1/6] CORDIC sincos implementation from
 http://www.dcs.gla.ac.uk/~jhw/cordic/

---
 src/math/p_sincos.c | 87 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 5 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index 4218be6..569e4fb 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -1,4 +1,77 @@
 #include <pal.h>
+#include <pal_math.h>
+#include <pal_base.h>
+
+#define SIN_COS_ITERATIONS 17
+
+//Cordic in 32 bit signed fixed point math
+//Function is valid for arguments in range -pi/2 -- pi/2
+//for values pi/2--pi: value = half_pi-(theta-half_pi) and similarly for values -pi---pi/2
+//
+// 1.0 = 1073741824
+// 1/k = 0.6072529350088812561694
+// pi = 3.1415926535897932384626
+//Constants
+#define cordic_1K 0x26DD3B6A
+#define half_pi 0x6487ED51
+#define MUL 1073741824.000000
+#define MUL_RECIP 1.0/MUL
+#define CORDIC_NTAB 32
+int cordic_ctab[] = { 0x3243F6A8, 0x1DAC6705, 0x0FADBAFC, 0x07F56EA6,
+		0x03FEAB76, 0x01FFD55B, 0x00FFFAAA, 0x007FFF55, 0x003FFFEA, 0x001FFFFD,
+		0x000FFFFF, 0x0007FFFF, 0x0003FFFF, 0x0001FFFF, 0x0000FFFF, 0x00007FFF,
+		0x00003FFF, 0x00001FFF, 0x00000FFF, 0x000007FF, 0x000003FF, 0x000001FF,
+		0x000000FF, 0x0000007F, 0x0000003F, 0x0000001F, 0x0000000F, 0x00000008,
+		0x00000004, 0x00000002, 0x00000001, 0x00000000, };
+
+void cordic(int theta, int *s, int *c, int n) {
+	int k, d, tx, ty, tz;
+	int x = cordic_1K, y = 0, z = theta;
+	n = (n > CORDIC_NTAB) ? CORDIC_NTAB : n;
+	for (k = 0; k < n; ++k) {
+		d = z >> 31;
+		//get sign. for other architectures, you might want to use the more portable version
+		//d = z>=0 ? 0 : -1;
+		tx = x - (((y >> k) ^ d) - d);
+		ty = y + (((x >> k) ^ d) - d);
+		tz = z - ((cordic_ctab[k] ^ d) - d);
+		x = tx;
+		y = ty;
+		z = tz;
+	}
+	*c = x;
+	*s = y;
+}
+
+int radiansToPlusMinusM_PI_2(float *radians) {
+	int flip = 0;
+	while (*radians > M_PI)
+		*radians -= 2 * M_PI;
+	while (*radians < -M_PI)
+		*radians += 2 * M_PI;
+
+	if (*radians < -M_PI_2 || *radians > M_PI_2) {
+		if (*radians < 0) {
+			*radians += M_PI;
+		} else {
+			*radians -= M_PI;
+		}
+		flip = 1; //flip the sign for second or third quadrant
+	}
+	return flip;
+}
+
+void cordicF(float theta, float *s, float *c, int n) {
+	int sint, cint;
+	int flip = radiansToPlusMinusM_PI_2(&theta);
+	cordic(theta * MUL, &sint, &cint, n);
+	*s = sint * MUL_RECIP;
+	*c = cint * MUL_RECIP;
+	if (flip) {
+		*s = -*s;
+		*c = -*c;
+	}
+}
 
 /**
  *
@@ -22,9 +95,13 @@
  * @return      None
  *
  */
-void p_sincos_f32(const float *a, float *c, float *z,
-                  int n, int p, p_team_t team)
-{
-    p_sin_f32(a, c, n, 0, team);
-    p_cos_f32(a, z, n, 0, team);
+void p_sincos_f32(const float *a, float *c, float *z, int n, int p, p_team_t team) {
+	int i;
+	for (i = 0; i < n; i++) {
+		const float angle = a[i];
+		float *pcos = z + i;
+		float *psin = c + i;
+		cordicF(angle, psin, pcos, SIN_COS_ITERATIONS);
+	}
 }
+

From 2ba67a8e04569ee4a9f35fe25aa04207347f1f72 Mon Sep 17 00:00:00 2001
From: Alan Evans <thealanevans@googlemail.com>
Date: Tue, 9 Jun 2015 22:10:29 +0100
Subject: [PATCH 2/6] Replace while with my own normalize function to map
 radians to the range [-pi..pi]

---
 src/math/p_sincos.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index 569e4fb..ae6faab 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -43,12 +43,19 @@ void cordic(int theta, int *s, int *c, int n) {
 	*s = y;
 }
 
+float normalizeRadiansToPlusMinusM_PI(float radians) {
+	int sign = radians < 0 ? -1 : 1;
+	radians = sign * radians;
+	int revolutions = (int) (radians * M_1_PI) + 1;
+	revolutions = (revolutions >> 1) << 1;
+
+	radians = radians - revolutions * M_PI;
+	return sign * radians;
+}
+
 int radiansToPlusMinusM_PI_2(float *radians) {
 	int flip = 0;
-	while (*radians > M_PI)
-		*radians -= 2 * M_PI;
-	while (*radians < -M_PI)
-		*radians += 2 * M_PI;
+	*radians = normalizeRadiansToPlusMinusM_PI(*radians);
 
 	if (*radians < -M_PI_2 || *radians > M_PI_2) {
 		if (*radians < 0) {

From 9ed885130405f148d8a45d28987de844601479bf Mon Sep 17 00:00:00 2001
From: Alan Evans <thealanevans@googlemail.com>
Date: Tue, 9 Jun 2015 23:05:29 +0100
Subject: [PATCH 3/6] Improved code from (x>>1)<<1 to & ~1 to remove last bit.

---
 src/math/p_sincos.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index ae6faab..6bd04b7 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -46,8 +46,7 @@ void cordic(int theta, int *s, int *c, int n) {
 float normalizeRadiansToPlusMinusM_PI(float radians) {
 	int sign = radians < 0 ? -1 : 1;
 	radians = sign * radians;
-	int revolutions = (int) (radians * M_1_PI) + 1;
-	revolutions = (revolutions >> 1) << 1;
+	int revolutions = ((int) (radians * M_1_PI) + 1) & ~1;
 
 	radians = radians - revolutions * M_PI;
 	return sign * radians;

From 733b906e0be2f44a06855902912e2ca8d06b11dd Mon Sep 17 00:00:00 2001
From: Alan Evans <thealanevans@googlemail.com>
Date: Tue, 9 Jun 2015 23:28:28 +0100
Subject: [PATCH 4/6] Rename variable. Remove branch code

---
 src/math/p_sincos.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index 6bd04b7..6a8dd9a 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -44,11 +44,10 @@ void cordic(int theta, int *s, int *c, int n) {
 }
 
 float normalizeRadiansToPlusMinusM_PI(float radians) {
-	int sign = radians < 0 ? -1 : 1;
+	int sign = ((radians > 0) << 1) - 1;
 	radians = sign * radians;
-	int revolutions = ((int) (radians * M_1_PI) + 1) & ~1;
-
-	radians = radians - revolutions * M_PI;
+	int excessHalfRevolutions = ((int) (radians * M_1_PI) + 1) & ~1;
+	radians = radians - excessHalfRevolutions * M_PI;
 	return sign * radians;
 }
 

From 6c804f319973b59abab8092c9b632510cb68fb3c Mon Sep 17 00:00:00 2001
From: Alan Evans <thealanevans@googlemail.com>
Date: Tue, 9 Jun 2015 23:37:49 +0100
Subject: [PATCH 5/6] One shift rather than &

---
 src/math/p_sincos.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index 6a8dd9a..07999ff 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -46,8 +46,8 @@ void cordic(int theta, int *s, int *c, int n) {
 float normalizeRadiansToPlusMinusM_PI(float radians) {
 	int sign = ((radians > 0) << 1) - 1;
 	radians = sign * radians;
-	int excessHalfRevolutions = ((int) (radians * M_1_PI) + 1) & ~1;
-	radians = radians - excessHalfRevolutions * M_PI;
+	int excessRevolutions = ((int) (radians * M_1_PI) + 1) >> 1;
+	radians = radians - excessRevolutions * (M_PI * 2);
 	return sign * radians;
 }
 
@@ -100,7 +100,8 @@ void cordicF(float theta, float *s, float *c, int n) {
  * @return      None
  *
  */
-void p_sincos_f32(const float *a, float *c, float *z, int n, int p, p_team_t team) {
+void p_sincos_f32(const float *a, float *c, float *z, int n, int p,
+		p_team_t team) {
 	int i;
 	for (i = 0; i < n; i++) {
 		const float angle = a[i];

From 2ff61671104e09eb7e03777ffdf767c91264a934 Mon Sep 17 00:00:00 2001
From: Alan Evans <alan.evans@burrows.yr.com>
Date: Thu, 11 Jun 2015 09:16:54 +0100
Subject: [PATCH 6/6] Faster normalization

---
 src/math/p_sincos.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/math/p_sincos.c b/src/math/p_sincos.c
index 07999ff..3e73b11 100644
--- a/src/math/p_sincos.c
+++ b/src/math/p_sincos.c
@@ -44,11 +44,17 @@ void cordic(int theta, int *s, int *c, int n) {
 }
 
 float normalizeRadiansToPlusMinusM_PI(float radians) {
-	int sign = ((radians > 0) << 1) - 1;
-	radians = sign * radians;
-	int excessRevolutions = ((int) (radians * M_1_PI) + 1) >> 1;
-	radians = radians - excessRevolutions * (M_PI * 2);
-	return sign * radians;
+	unsigned int *radiansBits = (unsigned int *)&radians;
+	unsigned int signBit = *radiansBits & 0x80000000u;
+	*radiansBits = *radiansBits ^ signBit; //remove any negative bit
+
+	int revolutions = (int) (radians * M_1_PI) + 1;
+	revolutions = revolutions >> 1; // div 2
+
+	radians = radians - revolutions * (2 * M_PI); //subtract whole revolutions, now in range [-pi..pi]
+
+	*radiansBits = *radiansBits ^ signBit; //if was negative, flip negative bit
+	return radians;
 }
 
 int radiansToPlusMinusM_PI_2(float *radians) {