From e538e288376bc2d26bc69dfc598fe921e7325add Mon Sep 17 00:00:00 2001
From: Ram Mohan M <ram.mohan@ittiam.com>
Date: Fri, 29 Nov 2024 14:39:04 +0530
Subject: [PATCH] update weights used in rgb to yuv conversion equations

Display P3 color space is using bt601 equations. Updated these to use
the ones derived using p3 primaries

Test: ./ultrahdr_unit_test

Change-Id: I9306fe13c4651c7d55a830d0597924a8d26d2745
---
 examples/ultrahdr_app.cpp             |  38 +++---
 lib/include/ultrahdr/gainmapmath.h    |  25 +++-
 lib/include/ultrahdr/ultrahdrcommon.h |   2 +
 lib/src/dsp/arm/gainmapmath_neon.cpp  |  86 ++++++++----
 lib/src/gainmapmath.cpp               | 190 ++++++++++++++++++--------
 lib/src/jpegr.cpp                     |  31 +++--
 tests/gainmapmath_test.cpp            |  40 +++---
 7 files changed, 270 insertions(+), 142 deletions(-)

diff --git a/examples/ultrahdr_app.cpp b/examples/ultrahdr_app.cpp
index 3f032054..6cae7062 100644
--- a/examples/ultrahdr_app.cpp
+++ b/examples/ultrahdr_app.cpp
@@ -32,23 +32,23 @@
 
 #include "ultrahdr_api.h"
 
-const float BT601YUVtoRGBMatrix[9] = {
-    1.f, 0.f, 1.402f, 1.f, (-0.202008f / 0.587f), (-0.419198f / 0.587f), 1.0f, 1.772f, 0.0f};
+const float DisplayP3YUVtoRGBMatrix[9] = {
+    1.f, 0.f, 1.542f, 1.f, (-0.146023f / 0.6917f), (-0.353118f / 0.6917f), 1.0f, 1.8414f, 0.0f};
 const float BT709YUVtoRGBMatrix[9] = {
     1.f,  0.f,     1.5748f, 1.f, (-0.13397432f / 0.7152f), (-0.33480248f / 0.7152f),
     1.0f, 1.8556f, 0.0f};
 const float BT2020YUVtoRGBMatrix[9] = {
     1.f, 0.f, 1.4746f, 1.f, (-0.11156702f / 0.6780f), (-0.38737742f / 0.6780f), 1.f, 1.8814f, 0.f};
 
-const float BT601RGBtoYUVMatrix[9] = {0.299f,
-                                      0.587f,
-                                      0.114f,
-                                      (-0.299f / 1.772f),
-                                      (-0.587f / 1.772f),
-                                      0.5f,
-                                      0.5f,
-                                      (-0.587f / 1.402f),
-                                      (-0.114f / 1.402f)};
+const float DisplayP3RGBtoYUVMatrix[9] = {0.229f,
+                                          0.6917f,
+                                          0.0793f,
+                                          (-0.229f / 1.8414f),
+                                          (-0.6917f / 1.8414f),
+                                          0.5f,
+                                          0.5f,
+                                          (-0.6917f / 1.542f),
+                                          (-0.0793f / 1.542f)};
 const float BT709RGBtoYUVMatrix[9] = {0.2126f,
                                       0.7152f,
                                       0.0722f,
@@ -892,7 +892,7 @@ bool UltraHdrAppInput::convertP010ToRGBImage() {
   } else if (mHdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mHdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mHdrCg << " using BT2020Matrix"
               << std::endl;
@@ -984,15 +984,15 @@ bool UltraHdrAppInput::convertYuv420ToRGBImage() {
   uint8_t* u = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_U]);
   uint8_t* v = static_cast<uint8_t*>(mRawYuv420Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601YUVtoRGBMatrix;
+  const float* coeffs = BT709YUVtoRGBMatrix;
   if (mSdrCg == UHDR_CG_BT_709) {
     coeffs = BT709YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_BT_2100) {
     coeffs = BT2020YUVtoRGBMatrix;
   } else if (mSdrCg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601YUVtoRGBMatrix;
+    coeffs = DisplayP3YUVtoRGBMatrix;
   } else {
-    std::cerr << "color matrix not present for gamut " << mSdrCg << " using BT601Matrix"
+    std::cerr << "color matrix not present for gamut " << mSdrCg << " using BT709Matrix"
               << std::endl;
   }
   for (size_t i = 0; i < mRawYuv420Image.h; i++) {
@@ -1054,16 +1054,16 @@ bool UltraHdrAppInput::convertRgba8888ToYUV444Image() {
   uint8_t* uData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_U]);
   uint8_t* vData = static_cast<uint8_t*>(mDecodedUhdrYuv444Image.planes[UHDR_PLANE_V]);
 
-  const float* coeffs = BT601RGBtoYUVMatrix;
+  const float* coeffs = BT709RGBtoYUVMatrix;
   if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_709) {
     coeffs = BT709RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
-              << " using BT601Matrix" << std::endl;
+              << " using BT709Matrix" << std::endl;
   }
 
   for (size_t i = 0; i < mDecodedUhdrRgbImage.h; i++) {
@@ -1108,7 +1108,7 @@ bool UltraHdrAppInput::convertRgba1010102ToYUV444Image() {
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_BT_2100) {
     coeffs = BT2020RGBtoYUVMatrix;
   } else if (mDecodedUhdrRgbImage.cg == UHDR_CG_DISPLAY_P3) {
-    coeffs = BT601RGBtoYUVMatrix;
+    coeffs = DisplayP3RGBtoYUVMatrix;
   } else {
     std::cerr << "color matrix not present for gamut " << mDecodedUhdrRgbImage.cg
               << " using BT2020Matrix" << std::endl;
diff --git a/lib/include/ultrahdr/gainmapmath.h b/lib/include/ultrahdr/gainmapmath.h
index b51a9771..82945bb9 100644
--- a/lib/include/ultrahdr/gainmapmath.h
+++ b/lib/include/ultrahdr/gainmapmath.h
@@ -347,6 +347,13 @@ Color pqInvOetfLUT(Color e_gamma);
 constexpr int32_t kPqInvOETFPrecision = 12;
 constexpr int32_t kPqInvOETFNumEntries = 1 << kPqInvOETFPrecision;
 
+////////////////////////////////////////////////////////////////////////////////
+// BT.601 transformations
+
+// BT.601 rgb <-> yuv conversion
+Color Bt601RgbToYuv(Color e_gamma);
+Color Bt601YuvToRgb(Color e_gamma);
+
 // util class to prepare look up tables for oetf/eotf functions
 class LookUpTable {
  public:
@@ -415,20 +422,26 @@ Color bt2100ToP3(Color e);
 
 // convert between yuv encodings
 extern const std::array<float, 9> kYuvBt709ToBt601;
+extern const std::array<float, 9> kYuvBt709ToDisplayP3;
 extern const std::array<float, 9> kYuvBt709ToBt2100;
-extern const std::array<float, 9> kYuvBt601ToBt709;
-extern const std::array<float, 9> kYuvBt601ToBt2100;
-extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt601;
+extern const std::array<float, 9> kYuvDisplayP3ToBt709;
+extern const std::array<float, 9> kYuvDisplayP3ToBt2100;
 extern const std::array<float, 9> kYuvBt2100ToBt601;
+extern const std::array<float, 9> kYuvBt2100ToBt709;
+extern const std::array<float, 9> kYuvBt2100ToDisplayP3;
 
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
 
 extern const int16_t kYuv709To601_coeffs_neon[8];
+extern const int16_t kYuv709ToP3_coeffs_neon[8];
 extern const int16_t kYuv709To2100_coeffs_neon[8];
-extern const int16_t kYuv601To709_coeffs_neon[8];
-extern const int16_t kYuv601To2100_coeffs_neon[8];
-extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuvP3To601_coeffs_neon[8];
+extern const int16_t kYuvP3To709_coeffs_neon[8];
+extern const int16_t kYuvP3To2100_coeffs_neon[8];
 extern const int16_t kYuv2100To601_coeffs_neon[8];
+extern const int16_t kYuv2100To709_coeffs_neon[8];
+extern const int16_t kYuv2100ToP3_coeffs_neon[8];
 
 /*
  * The Y values are provided at half the width of U & V values to allow use of the widening
diff --git a/lib/include/ultrahdr/ultrahdrcommon.h b/lib/include/ultrahdr/ultrahdrcommon.h
index 48238441..814e814a 100644
--- a/lib/include/ultrahdr/ultrahdrcommon.h
+++ b/lib/include/ultrahdr/ultrahdrcommon.h
@@ -159,6 +159,8 @@
 
 static const uhdr_error_info_t g_no_error = {UHDR_CODEC_OK, 0, ""};
 
+static const int UHDR_CG_BT_601 = 3; /**< BT.601 */
+
 namespace ultrahdr {
 
 // ===============================================================================================
diff --git a/lib/src/dsp/arm/gainmapmath_neon.cpp b/lib/src/dsp/arm/gainmapmath_neon.cpp
index 306a971a..9728d8e2 100644
--- a/lib/src/dsp/arm/gainmapmath_neon.cpp
+++ b/lib/src/dsp/arm/gainmapmath_neon.cpp
@@ -35,12 +35,19 @@ namespace ultrahdr {
 // {Y1, Y2, U1, U2, V1, V2, 0, 0}
 
 // Yuv Bt709 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.101579f * U) + ( 0.196076f * V)
-// U' = (0.0f * Y) + ( 0.989854f * U) + (-0.110653f * V)
-// V' = (0.0f * Y) + (-0.072453f * U) + ( 0.983398f * V)
+// Y' = (1.0 * Y) + ( 0.101579 * U) + ( 0.196076 * V)
+// U' = (0.0 * Y) + ( 0.989854 * U) + (-0.110653 * V)
+// V' = (0.0 * Y) + (-0.072453 * U) + ( 0.983398 * V)
 ALIGNED(16)
 const int16_t kYuv709To601_coeffs_neon[8] = {1664, 3213, 16218, -1813, -1187, 16112, 0, 0};
 
+// Yuv Bt709 -> Display P3
+// Y' = (1.0 * Y) + ( 0.017545 * U) + ( 0.03677 * V)
+// U' = (0.0 * Y) + ( 0.998169 * U) + (-0.019968 * V)
+// V' = (0.0 * Y) + (-0.011378 * U) + ( 0.997393 * V)
+ALIGNED(16)
+const int16_t kYuv709ToP3_coeffs_neon[8] = {287, 602, 16354, -327, -186, 16341, 0, 0};
+
 // Yuv Bt709 -> Yuv Bt2100
 // Y' = (1.0f * Y) + (-0.016969f * U) + ( 0.096312f * V)
 // U' = (0.0f * Y) + ( 0.995306f * U) + (-0.051192f * V)
@@ -48,34 +55,48 @@ const int16_t kYuv709To601_coeffs_neon[8] = {1664, 3213, 16218, -1813, -1187, 16
 ALIGNED(16)
 const int16_t kYuv709To2100_coeffs_neon[8] = {-278, 1578, 16307, -839, 189, 16427, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt709
-// Y' = (1.0f * Y) + (-0.118188f * U) + (-0.212685f * V),
-// U' = (0.0f * Y) + ( 1.018640f * U) + ( 0.114618f * V),
-// V' = (0.0f * Y) + ( 0.075049f * U) + ( 1.025327f * V);
+// Yuv Display P3 -> Yuv Bt601
+// Y' = (1.0 * Y) + ( 0.086028 * U) + ( 0.161445 * V)
+// U' = (0.0 * Y) + ( 0.990631 * U) + (-0.091109 * V)
+// V' = (0.0 * Y) + (-0.061361 * U) + ( 0.98474 * V)
 ALIGNED(16)
-const int16_t kYuv601To709_coeffs_neon[8] = {-1936, -3485, 16689, 1878, 1230, 16799, 0, 0};
+const int16_t kYuvP3To601_coeffs_neon[8] = {1409, 2645, 16230, -1493, -1005, 16134, 0, 0};
 
-// Yuv Bt601 -> Yuv Bt2100
-// Y' = (1.0f * Y) + (-0.128245f * U) + (-0.115879f * V)
-// U' = (0.0f * Y) + ( 1.010016f * U) + ( 0.061592f * V)
-// V' = (0.0f * Y) + ( 0.086969f * U) + ( 1.029350f * V)
+// Yuv Display P3 -> Yuv Bt709
+// Y' = (1.0 * Y) + (-0.018002 * U) + (-0.037226 * V)
+// U' = (0.0 * Y) + ( 1.002063 * U) + ( 0.020061 * V)
+// V' = (0.0 * Y) + ( 0.011431 * U) + ( 1.002843 * V)
 ALIGNED(16)
-const int16_t kYuv601To2100_coeffs_neon[8] = {-2101, -1899, 16548, 1009, 1425, 16865, 0, 0};
+const int16_t kYuvP3To709_coeffs_neon[8] = {-295, -610, 16418, 329, 187, 16431, 0, 0};
 
-// Yuv Bt2100 -> Yuv Bt709
-// Y' = (1.0f * Y) + ( 0.018149f * U) + (-0.095132f * V)
-// U' = (0.0f * Y) + ( 1.004123f * U) + ( 0.051267f * V)
-// V' = (0.0f * Y) + (-0.011524f * U) + ( 0.996782f * V)
+// Yuv Display P3 -> Yuv Bt2100
+// Y' = (1.0 * Y) + (-0.033905 * U) + ( 0.059019 * V)
+// U' = (0.0 * Y) + ( 0.996774 * U) + ( -0.03137 * V)
+// V' = (0.0 * Y) + ( 0.022992 * U) + ( 1.005718 * V)
 ALIGNED(16)
-const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+const int16_t kYuvP3To2100_coeffs_neon[8] = {-555, 967, 16331, -514, 377, 16478, 0, 0};
 
 // Yuv Bt2100 -> Yuv Bt601
-// Y' = (1.0f * Y) + ( 0.117887f * U) + ( 0.105521f * V)
-// U' = (0.0f * Y) + ( 0.995211f * U) + (-0.059549f * V)
-// V' = (0.0f * Y) + (-0.084085f * U) + ( 0.976518f * V)
+// Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
+// U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
+// V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
 ALIGNED(16)
 const int16_t kYuv2100To601_coeffs_neon[8] = {1931, 1729, 16306, -976, -1378, 15999, 0, 0};
 
+// Yuv Bt2100 -> Yuv Bt709
+// Y' = (1.0 * Y) + ( 0.018149 * U) + (-0.095132 * V)
+// U' = (0.0 * Y) + ( 1.004123 * U) + ( 0.051267 * V)
+// V' = (0.0 * Y) + (-0.011524 * U) + ( 0.996782 * V)
+ALIGNED(16)
+const int16_t kYuv2100To709_coeffs_neon[8] = {297, -1559, 16452, 840, -189, 16331, 0, 0};
+
+// Yuv Bt2100 -> Yuv Display P3
+// Y' = (1.0 * Y) + ( 0.035343 * U) + ( -0.057581 * V)
+// U' = (0.0 * Y) + ( 1.002515 * U) + ( 0.03127 * V)
+// V' = (0.0 * Y) + (-0.022919 * U) + ( 0.9936 * V)
+ALIGNED(16)
+const int16_t kYuv2100ToP3_coeffs_neon[8] = {579, -943, 16425, 512, -376, 16279, 0, 0};
+
 static inline int16x8_t yConversion_neon(uint8x8_t y, int16x8_t u, int16x8_t v, int16x8_t coeffs) {
   int32x4_t lo = vmull_lane_s16(vget_low_s16(u), vget_low_s16(coeffs), 0);
   int32x4_t hi = vmull_lane_s16(vget_high_s16(u), vget_low_s16(coeffs), 0);
@@ -240,11 +261,14 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
 
   switch (src_encoding) {
     case UHDR_CG_BT_709:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv709To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           return status;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv709To601_coeffs_neon;
+          coeffs = kYuv709ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           coeffs = kYuv709To2100_coeffs_neon;
@@ -258,14 +282,17 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_DISPLAY_P3:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuvP3To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
-          coeffs = kYuv601To709_coeffs_neon;
+          coeffs = kYuvP3To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
           return status;
         case UHDR_CG_BT_2100:
-          coeffs = kYuv601To2100_coeffs_neon;
+          coeffs = kYuvP3To2100_coeffs_neon;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -276,12 +303,15 @@ uhdr_error_info_t convertYuv_neon(uhdr_raw_image_t* image, uhdr_color_gamut_t sr
       }
       break;
     case UHDR_CG_BT_2100:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs = kYuv2100To601_coeffs_neon;
+          break;
         case UHDR_CG_BT_709:
           coeffs = kYuv2100To709_coeffs_neon;
           break;
         case UHDR_CG_DISPLAY_P3:
-          coeffs = kYuv2100To601_coeffs_neon;
+          coeffs = kYuv2100ToP3_coeffs_neon;
           break;
         case UHDR_CG_BT_2100:
           return status;
diff --git a/lib/src/gainmapmath.cpp b/lib/src/gainmapmath.cpp
index b14be0e2..ccc31c4e 100644
--- a/lib/src/gainmapmath.cpp
+++ b/lib/src/gainmapmath.cpp
@@ -85,6 +85,73 @@ void ShepardsIDW::fillShepardsIDW(float* weights, int incR, int incB) {
   }
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// NOTES:
+//
+// For Luminance computation from chromaticity coordinates (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy), See
+// https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#PRIMARY_CONVERSION
+//
+// a = ((1 - Wx) / Wy)
+// b = ((1 - Rx) / Ry)
+// c = ((1 - Gx) / Gy)
+// d = ((1 - Bx) / By)
+// p = Wx / Wy
+// q = Rx / Ry
+// r = Gx / Gy
+// s = Bx / By
+//
+// BYNum = ((a - b) * (r - q)) - ((p - q) * (c - b))
+// BYDen = ((d - b) * (r - q)) - ((s - q) * (c - b))
+// roundFactor = 7
+// BY = round(BYNum / BYDen, roundFactor)
+// GY = round((p - q - BY * (s - q)) / (r - q), roundFactor)
+// RY = round(1 - BY - GY, roundFactor)
+//
+// Luminance of pixel(r, g, b) is,
+// Luminance = RY * pixel.r + GY * pixel.g + BY * pixel.b
+//
+// For RGB to YCbCr conversions for a set of primaries, See
+// https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#MODEL_YUV
+// Y' = KR * pixel.r' + (1 - KR - KB) * pixel.g' + KB * pixel.b'
+// Cb' = (pixel.b' - Y) / (2 * (1 - KB))
+// Cr' = (pixel.r' - Y) / (2 * (1 - KR))
+// Here KR and KB are computed as per equations (39) - (44) of ITU H.273.
+// Strangely, RY = KR, GY = (1 - KR - KB), BY = KB. This cannot be a coincidence !!!
+// Y' was meant to approximate a perceptually uniform correlate of luminance. Hence the same
+// weights?
+//
+// For YCbCr to RGB conversions for a set of primaries, See
+// https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#MODEL_YUV
+// pixel.r' = Y' + ((2 * (1 - KR)) * Cr')
+// pixel.g' = Y' - ((2 * (KR * (1 - KR) * Cr' +  KB * (1 - KB) * Cb')) / (1 - KR - KB))
+// pixel.b' = Y' + ((2 * (1 - KB)) * Cb')
+//
+// Addl. References:
+//
+// sRGB and BT709 share same chromaticity coordinates
+// BT709 (0.640, 0.330,  0.300, 0.600,  0.150, 0.060,  0.3127, 0.3290)
+// BT709 Luminance
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#PRIMARIES_BT709
+// BT709 RGB <-> YCbCr
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#MODEL_BT709
+// sRGB EOTF, EOTF Inv
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#TRANSFER_SRGB
+//
+// DisplayP3 (0.680, 0.320,  0.265, 0.690,  0.150, 0.060,  0.3127, 0.3290)
+// DisplayP3 Luminance
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#PRIMARIES_DISPLAYP3
+//
+// BT2100 (0.708, 0.292,  0.170, 0.797,  0.131, 0.046,  0.3127, 0.3290)
+// BT2100 Luminance
+// See, https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#PRIMARIES_BT2020
+// BT2100 RGB <-> YCbCr
+// See https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#MODEL_BT2020
+// HLG OETF, Inverse OETF, OOTF and Inverse OOTF
+// See, https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#TRANSFER_HLG
+// PQ EOTF Inverse EOTF
+// See, https://registry.khronos.org/DataFormat/specs/1.3/dataformat.1.3.html#TRANSFER_PQ
+//
+
 ////////////////////////////////////////////////////////////////////////////////
 // sRGB transformations
 
@@ -104,9 +171,6 @@ Color srgbRgbToYuv(Color e_gamma) {
   return {{{y_gamma, (e_gamma.b - y_gamma) / kSrgbCb, (e_gamma.r - y_gamma) / kSrgbCr}}};
 }
 
-// See ITU-R BT.709-6, Section 3.
-// Same derivation to BT.2100's YUV->RGB, below. Similar to srgbRgbToYuv, we
-// can reuse the luminance coefficients since they are the same.
 static const float kSrgbGCb = kSrgbB * kSrgbCb / kSrgbG;
 static const float kSrgbGCr = kSrgbR * kSrgbCr / kSrgbG;
 
@@ -163,22 +227,15 @@ static const float kP3R = 0.2289746f, kP3G = 0.6917385f, kP3B = 0.0792869f;
 
 float p3Luminance(Color e) { return kP3R * e.r + kP3G * e.g + kP3B * e.b; }
 
-// See ITU-R BT.601-7, Sections 2.5.1 and 2.5.2.
-// Unfortunately, calculation of luma signal differs from calculation of
-// luminance for Display-P3, so we can't reuse p3Luminance here.
-static const float kP3YR = 0.299f, kP3YG = 0.587f, kP3YB = 0.114f;
-static const float kP3Cb = 1.772f, kP3Cr = 1.402f;
+static const float kP3Cb = (2 * (1 - kP3B)), kP3Cr = (2 * (1 - kP3R));
 
 Color p3RgbToYuv(Color e_gamma) {
-  float y_gamma = kP3YR * e_gamma.r + kP3YG * e_gamma.g + kP3YB * e_gamma.b;
+  float y_gamma = p3Luminance(e_gamma);
   return {{{y_gamma, (e_gamma.b - y_gamma) / kP3Cb, (e_gamma.r - y_gamma) / kP3Cr}}};
 }
 
-// See ITU-R BT.601-7, Sections 2.5.1 and 2.5.2.
-// Same derivation to BT.2100's YUV->RGB, below. Similar to p3RgbToYuv, we must
-// use luma signal coefficients rather than the luminance coefficients.
-static const float kP3GCb = kP3YB * kP3Cb / kP3YG;
-static const float kP3GCr = kP3YR * kP3Cr / kP3YG;
+static const float kP3GCb = kP3B * kP3Cb / kP3G;
+static const float kP3GCr = kP3R * kP3Cr / kP3G;
 
 Color p3YuvToRgb(Color e_gamma) {
   return {{{clampPixelFloat(e_gamma.y + kP3Cr * e_gamma.v),
@@ -204,31 +261,6 @@ Color bt2100RgbToYuv(Color e_gamma) {
   return {{{y_gamma, (e_gamma.b - y_gamma) / kBt2100Cb, (e_gamma.r - y_gamma) / kBt2100Cr}}};
 }
 
-// See ITU-R BT.2100-2, Table 6, Derivation of colour difference signals.
-//
-// Similar to bt2100RgbToYuv above, we can reuse the luminance coefficients.
-//
-// Derived by inversing bt2100RgbToYuv. The derivation for R and B are  pretty
-// straight forward; we just invert the formulas for U and V above. But deriving
-// the formula for G is a bit more complicated:
-//
-// Start with equation for luminance:
-//   Y = kBt2100R * R + kBt2100G * G + kBt2100B * B
-// Solve for G:
-//   G = (Y - kBt2100R * R - kBt2100B * B) / kBt2100B
-// Substitute equations for R and B in terms YUV:
-//   G = (Y - kBt2100R * (Y + kBt2100Cr * V) - kBt2100B * (Y + kBt2100Cb * U)) / kBt2100B
-// Simplify:
-//   G = Y * ((1 - kBt2100R - kBt2100B) / kBt2100G)
-//     + U * (kBt2100B * kBt2100Cb / kBt2100G)
-//     + V * (kBt2100R * kBt2100Cr / kBt2100G)
-//
-// We then get the following coeficients for calculating G from YUV:
-//
-// Coef for Y = (1 - kBt2100R - kBt2100B) / kBt2100G = 1
-// Coef for U = kBt2100B * kBt2100Cb / kBt2100G = kBt2100GCb = ~0.1645
-// Coef for V = kBt2100R * kBt2100Cr / kBt2100G = kBt2100GCr = ~0.5713
-
 static const float kBt2100GCb = kBt2100B * kBt2100Cb / kBt2100G;
 static const float kBt2100GCr = kBt2100R * kBt2100Cr / kBt2100G;
 
@@ -354,6 +386,26 @@ Color pqInvOetfLUT(Color e_gamma) {
   return {{{pqInvOetfLUT(e_gamma.r), pqInvOetfLUT(e_gamma.g), pqInvOetfLUT(e_gamma.b)}}};
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// BT.601 transformations
+
+// See ITU-R BT.601-7, Sections 2.5.1 and 2.5.2.
+static const float kBt601R = 0.299f, kBt601G = 0.587f, kBt601B = 0.114f;
+static const float kBt601Cb = (2 * (1 - kBt601B)), kBt601Cr = (2 * (1 - kBt601R));
+static const float kBt601GCb = kBt601B * kBt601Cb / kBt601G;
+static const float kBt601GCr = kBt601R * kBt601Cr / kBt601G;
+
+Color Bt601RgbToYuv(Color e_gamma) {
+  float y_gamma = kBt601R * e_gamma.r + kBt601G * e_gamma.g + kBt601B * e_gamma.b;
+  return {{{y_gamma, (e_gamma.b - y_gamma) / kBt601Cb, (e_gamma.r - y_gamma) / kBt601Cr}}};
+}
+
+Color Bt601YuvToRgb(Color e_gamma) {
+  return {{{clampPixelFloat(e_gamma.y + kBt601Cr * e_gamma.v),
+            clampPixelFloat(e_gamma.y - kBt601GCb * e_gamma.u - kBt601GCr * e_gamma.v),
+            clampPixelFloat(e_gamma.y + kBt601Cb * e_gamma.u)}}};
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Color access functions
 
@@ -634,8 +686,7 @@ Color bt2100ToP3(Color e) { return ConvertGamut(e, kBt2100ToP3); }
 
 // All of these conversions are derived from the respective input YUV->RGB conversion followed by
 // the RGB->YUV for the receiving encoding. They are consistent with the RGB<->YUV functions in
-// gainmapmath.cpp, given that we use BT.709 encoding for sRGB and BT.601 encoding for Display-P3,
-// to match DataSpace.
+// gainmapmath.cpp.
 
 // Yuv Bt709 -> Yuv Bt601
 // Y' = (1.0 * Y) + ( 0.101579 * U) + ( 0.196076 * V)
@@ -644,6 +695,13 @@ Color bt2100ToP3(Color e) { return ConvertGamut(e, kBt2100ToP3); }
 const std::array<float, 9> kYuvBt709ToBt601 = {
     1.0f, 0.101579f, 0.196076f, 0.0f, 0.989854f, -0.110653f, 0.0f, -0.072453f, 0.983398f};
 
+// Yuv Bt709 -> Display P3
+// Y' = (1.0 * Y) + ( 0.017545 * U) + ( 0.03677 * V)
+// U' = (0.0 * Y) + ( 0.998169 * U) + (-0.019968 * V)
+// V' = (0.0 * Y) + (-0.011378 * U) + ( 0.997393 * V)
+const std::array<float, 9> kYuvBt709ToDisplayP3 = {
+    1.0f, 0.017545f, 0.03677f, 0.0f, 0.998169f, -0.019968f, 0.0f, -0.011378f, 0.997393f};
+
 // Yuv Bt709 -> Yuv Bt2100
 // Y' = (1.0 * Y) + (-0.016969 * U) + ( 0.096312 * V)
 // U' = (0.0 * Y) + ( 0.995306 * U) + (-0.051192 * V)
@@ -651,19 +709,33 @@ const std::array<float, 9> kYuvBt709ToBt601 = {
 const std::array<float, 9> kYuvBt709ToBt2100 = {
     1.0f, -0.016969f, 0.096312f, 0.0f, 0.995306f, -0.051192f, 0.0f, 0.011507f, 1.002637f};
 
-// Yuv Bt601 -> Yuv Bt709
-// Y' = (1.0 * Y) + (-0.118188 * U) + (-0.212685 * V)
-// U' = (0.0 * Y) + ( 1.018640 * U) + ( 0.114618 * V)
-// V' = (0.0 * Y) + ( 0.075049 * U) + ( 1.025327 * V)
-const std::array<float, 9> kYuvBt601ToBt709 = {
-    1.0f, -0.118188f, -0.212685f, 0.0f, 1.018640f, 0.114618f, 0.0f, 0.075049f, 1.025327f};
+// Display P3 -> Yuv Bt601
+// Y' = (1.0 * Y) + ( 0.086028 * U) + ( 0.161445 * V)
+// U' = (0.0 * Y) + ( 0.990631 * U) + (-0.091109 * V)
+// V' = (0.0 * Y) + (-0.061361 * U) + ( 0.98474 * V)
+const std::array<float, 9> kYuvDisplayP3ToBt601 = {
+    1.0f, 0.086028f, 0.161445f, 0.0f, 0.990631f, -0.091109f, 0.0f, -0.061361f, 0.98474f};
+
+// Display P3 -> Yuv Bt709
+// Y' = (1.0 * Y) + (-0.018002 * U) + (-0.037226 * V)
+// U' = (0.0 * Y) + ( 1.002063 * U) + ( 0.020061 * V)
+// V' = (0.0 * Y) + ( 0.011431 * U) + ( 1.002843 * V)
+const std::array<float, 9> kYuvDisplayP3ToBt709 = {
+    1.0f, -0.018002f, -0.037226f, 0.0f, 1.002063f, 0.020061f, 0.0f, 0.011431f, 1.002843f};
+
+// Display P3 -> Yuv Bt2100
+// Y' = (1.0 * Y) + (-0.033905 * U) + ( 0.059019 * V)
+// U' = (0.0 * Y) + ( 0.996774 * U) + ( -0.03137 * V)
+// V' = (0.0 * Y) + ( 0.022992 * U) + ( 1.005718 * V)
+const std::array<float, 9> kYuvDisplayP3ToBt2100 = {
+    1.0f, -0.033905f, 0.059019f, 0.0f, 0.996774f, -0.03137f, 0.0f, 0.022992f, 1.005718f};
 
-// Yuv Bt601 -> Yuv Bt2100
-// Y' = (1.0 * Y) + (-0.128245 * U) + (-0.115879 * V)
-// U' = (0.0 * Y) + ( 1.010016 * U) + ( 0.061592 * V)
-// V' = (0.0 * Y) + ( 0.086969 * U) + ( 1.029350 * V)
-const std::array<float, 9> kYuvBt601ToBt2100 = {
-    1.0f, -0.128245f, -0.115879, 0.0f, 1.010016f, 0.061592f, 0.0f, 0.086969f, 1.029350f};
+// Yuv Bt2100 -> Yuv Bt601
+// Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
+// U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
+// V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
+const std::array<float, 9> kYuvBt2100ToBt601 = {
+    1.0f, 0.117887f, 0.105521f, 0.0f, 0.995211f, -0.059549f, 0.0f, -0.084085f, 0.976518f};
 
 // Yuv Bt2100 -> Yuv Bt709
 // Y' = (1.0 * Y) + ( 0.018149 * U) + (-0.095132 * V)
@@ -672,12 +744,12 @@ const std::array<float, 9> kYuvBt601ToBt2100 = {
 const std::array<float, 9> kYuvBt2100ToBt709 = {
     1.0f, 0.018149f, -0.095132f, 0.0f, 1.004123f, 0.051267f, 0.0f, -0.011524f, 0.996782f};
 
-// Yuv Bt2100 -> Yuv Bt601
-// Y' = (1.0 * Y) + ( 0.117887 * U) + ( 0.105521 * V)
-// U' = (0.0 * Y) + ( 0.995211 * U) + (-0.059549 * V)
-// V' = (0.0 * Y) + (-0.084085 * U) + ( 0.976518 * V)
-const std::array<float, 9> kYuvBt2100ToBt601 = {
-    1.0f, 0.117887f, 0.105521f, 0.0f, 0.995211f, -0.059549f, 0.0f, -0.084085f, 0.976518f};
+// Yuv Bt2100 -> Display P3
+// Y' = (1.0 * Y) + ( 0.035343 * U) + ( -0.057581 * V)
+// U' = (0.0 * Y) + ( 1.002515 * U) + ( 0.03127 * V)
+// V' = (0.0 * Y) + (-0.022919 * U) + ( 0.9936 * V)
+const std::array<float, 9> kYuvBt2100ToDisplayP3 = {
+    1.0f, 0.035343f, -0.057581f, 0.0f, 1.002515f, 0.03127f, 0.0f, -0.022919f, 0.9936f};
 
 Color yuvColorGamutConversion(Color e_gamma, const std::array<float, 9>& coeffs) {
   const float y = e_gamma.y * std::get<0>(coeffs) + e_gamma.u * std::get<1>(coeffs) +
diff --git a/lib/src/jpegr.cpp b/lib/src/jpegr.cpp
index 1d7d6f55..372b6774 100644
--- a/lib/src/jpegr.cpp
+++ b/lib/src/jpegr.cpp
@@ -264,9 +264,11 @@ uhdr_error_info_t JpegR::encodeJPEGR(uhdr_raw_image_t* hdr_intent, uhdr_raw_imag
 
   // convert to bt601 YUV encoding for JPEG encode
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
-  UHDR_ERR_CHECK(convertYuv_neon(sdr_intent_yuv, sdr_intent_yuv->cg, UHDR_CG_DISPLAY_P3));
+  UHDR_ERR_CHECK(
+      convertYuv_neon(sdr_intent_yuv, sdr_intent_yuv->cg, (uhdr_color_gamut_t)UHDR_CG_BT_601));
 #else
-  UHDR_ERR_CHECK(convertYuv(sdr_intent_yuv, sdr_intent_yuv->cg, UHDR_CG_DISPLAY_P3));
+  UHDR_ERR_CHECK(
+      convertYuv(sdr_intent_yuv, sdr_intent_yuv->cg, (uhdr_color_gamut_t)UHDR_CG_BT_601));
 #endif
 
   // compress sdr image
@@ -432,11 +434,14 @@ uhdr_error_info_t JpegR::convertYuv(uhdr_raw_image_t* image, uhdr_color_gamut_t
 
   switch (src_encoding) {
     case UHDR_CG_BT_709:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs_ptr = &kYuvBt709ToBt601;
+          break;
         case UHDR_CG_BT_709:
           return status;
         case UHDR_CG_DISPLAY_P3:
-          coeffs_ptr = &kYuvBt709ToBt601;
+          coeffs_ptr = &kYuvBt709ToDisplayP3;
           break;
         case UHDR_CG_BT_2100:
           coeffs_ptr = &kYuvBt709ToBt2100;
@@ -450,14 +455,17 @@ uhdr_error_info_t JpegR::convertYuv(uhdr_raw_image_t* image, uhdr_color_gamut_t
       }
       break;
     case UHDR_CG_DISPLAY_P3:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs_ptr = &kYuvDisplayP3ToBt601;
+          break;
         case UHDR_CG_BT_709:
-          coeffs_ptr = &kYuvBt601ToBt709;
+          coeffs_ptr = &kYuvDisplayP3ToBt709;
           break;
         case UHDR_CG_DISPLAY_P3:
           return status;
         case UHDR_CG_BT_2100:
-          coeffs_ptr = &kYuvBt601ToBt2100;
+          coeffs_ptr = &kYuvDisplayP3ToBt2100;
           break;
         default:
           status.error_code = UHDR_CODEC_INVALID_PARAM;
@@ -468,12 +476,15 @@ uhdr_error_info_t JpegR::convertYuv(uhdr_raw_image_t* image, uhdr_color_gamut_t
       }
       break;
     case UHDR_CG_BT_2100:
-      switch (dst_encoding) {
+      switch ((int)dst_encoding) {
+        case UHDR_CG_BT_601:
+          coeffs_ptr = &kYuvBt2100ToBt601;
+          break;
         case UHDR_CG_BT_709:
           coeffs_ptr = &kYuvBt2100ToBt709;
           break;
         case UHDR_CG_DISPLAY_P3:
-          coeffs_ptr = &kYuvBt2100ToBt601;
+          coeffs_ptr = &kYuvBt2100ToDisplayP3;
           break;
         case UHDR_CG_BT_2100:
           return status;
@@ -687,7 +698,7 @@ uhdr_error_info_t JpegR::generateGainMap(uhdr_raw_image_t* sdr_intent, uhdr_raw_
   }
 
   if (sdr_is_601) {
-    sdrYuvToRgbFn = p3YuvToRgb;
+    sdrYuvToRgbFn = Bt601YuvToRgb;
   }
 
   unsigned int image_width = sdr_intent->w;
diff --git a/tests/gainmapmath_test.cpp b/tests/gainmapmath_test.cpp
index 82d098f0..3fca82f9 100644
--- a/tests/gainmapmath_test.cpp
+++ b/tests/gainmapmath_test.cpp
@@ -91,9 +91,9 @@ class GainMapMathTest : public testing::Test {
   Color SrgbYuvGreen() { return {{{0.7152f, -0.38543f, -0.45415f}}}; }
   Color SrgbYuvBlue() { return {{{0.0722f, 0.5f, -0.04585f}}}; }
 
-  Color P3YuvRed() { return {{{0.299f, -0.16874f, 0.5f}}}; }
-  Color P3YuvGreen() { return {{{0.587f, -0.33126f, -0.41869f}}}; }
-  Color P3YuvBlue() { return {{{0.114f, 0.5f, -0.08131f}}}; }
+  Color P3YuvRed() { return {{{0.229f, -0.124362f, 0.5f}}}; }
+  Color P3YuvGreen() { return {{{0.6917f, -0.375638f, -0.448573f}}}; }
+  Color P3YuvBlue() { return {{{0.0793f, 0.5f, -0.051427f}}}; }
 
   Color Bt2100YuvRed() { return {{{0.2627f, -0.13963f, 0.5f}}}; }
   Color Bt2100YuvGreen() { return {{{0.6780f, -0.36037f, -0.45979f}}}; }
@@ -116,9 +116,9 @@ class GainMapMathTest : public testing::Test {
   Pixel SrgbYuvGreenPixel() { return {182, -98, -116}; }
   Pixel SrgbYuvBluePixel() { return {18, 128, -12}; }
 
-  Pixel P3YuvRedPixel() { return {76, -43, 128}; }
-  Pixel P3YuvGreenPixel() { return {150, -84, -107}; }
-  Pixel P3YuvBluePixel() { return {29, 128, -21}; }
+  Pixel P3YuvRedPixel() { return {58, -32, 128}; }
+  Pixel P3YuvGreenPixel() { return {176, -96, -114}; }
+  Pixel P3YuvBluePixel() { return {20, 128, -13}; }
 
   Pixel Bt2100YuvRedPixel() { return {67, -36, 128}; }
   Pixel Bt2100YuvGreenPixel() { return {173, -92, -117}; }
@@ -740,12 +740,12 @@ TEST_F(GainMapMathTest, YuvColorGamutConversion) {
                               const std::array<Color, 5>>,
                    6>
       coeffs_setup_expected{{
-          {kYuvBt709ToBt601, SrgbYuvColors, P3YuvColors},
+          {kYuvBt709ToDisplayP3, SrgbYuvColors, P3YuvColors},
           {kYuvBt709ToBt2100, SrgbYuvColors, Bt2100YuvColors},
-          {kYuvBt601ToBt709, P3YuvColors, SrgbYuvColors},
-          {kYuvBt601ToBt2100, P3YuvColors, Bt2100YuvColors},
+          {kYuvDisplayP3ToBt709, P3YuvColors, SrgbYuvColors},
+          {kYuvDisplayP3ToBt2100, P3YuvColors, Bt2100YuvColors},
           {kYuvBt2100ToBt709, Bt2100YuvColors, SrgbYuvColors},
-          {kYuvBt2100ToBt601, Bt2100YuvColors, P3YuvColors},
+          {kYuvBt2100ToDisplayP3, Bt2100YuvColors, P3YuvColors},
       }};
 
   for (const auto& [coeffs, input, expected] : coeffs_setup_expected) {
@@ -788,12 +788,12 @@ TEST_F(GainMapMathTest, YuvConversionNeon) {
   const std::array<
       std::tuple<const int16_t*, const std::array<Pixel, 5>, const std::array<Pixel, 5>>, 6>
       coeffs_setup_correct{{
-          {kYuv709To601_coeffs_neon, SrgbYuvColors, P3YuvColors},
+          {kYuv709ToP3_coeffs_neon, SrgbYuvColors, P3YuvColors},
           {kYuv709To2100_coeffs_neon, SrgbYuvColors, Bt2100YuvColors},
-          {kYuv601To709_coeffs_neon, P3YuvColors, SrgbYuvColors},
-          {kYuv601To2100_coeffs_neon, P3YuvColors, Bt2100YuvColors},
+          {kYuvP3To709_coeffs_neon, P3YuvColors, SrgbYuvColors},
+          {kYuvP3To2100_coeffs_neon, P3YuvColors, Bt2100YuvColors},
           {kYuv2100To709_coeffs_neon, Bt2100YuvColors, SrgbYuvColors},
-          {kYuv2100To601_coeffs_neon, Bt2100YuvColors, P3YuvColors},
+          {kYuv2100ToP3_coeffs_neon, Bt2100YuvColors, P3YuvColors},
       }};
 
   for (const auto& [coeff_ptr, input, expected] : coeffs_setup_correct) {
@@ -889,8 +889,8 @@ TEST_F(GainMapMathTest, TransformYuv420) {
   uint8_t* cr = cb + input.w * input.h / 4;
 
   const std::array<std::array<float, 9>, 6> conversion_coeffs = {
-      kYuvBt709ToBt601,  kYuvBt709ToBt2100, kYuvBt601ToBt709,
-      kYuvBt601ToBt2100, kYuvBt2100ToBt709, kYuvBt2100ToBt601};
+      kYuvBt709ToDisplayP3,  kYuvBt709ToBt2100, kYuvDisplayP3ToBt709,
+      kYuvDisplayP3ToBt2100, kYuvBt2100ToBt709, kYuvBt2100ToDisplayP3};
 
   for (size_t coeffs_idx = 0; coeffs_idx < conversion_coeffs.size(); ++coeffs_idx) {
     auto output = Yuv420Image();
@@ -958,12 +958,12 @@ TEST_F(GainMapMathTest, TransformYuv420) {
 #if (defined(UHDR_ENABLE_INTRINSICS) && (defined(__ARM_NEON__) || defined(__ARM_NEON)))
 TEST_F(GainMapMathTest, TransformYuv420Neon) {
   const std::array<std::pair<const int16_t*, const std::array<float, 9>>, 6> fixed_floating_coeffs{
-      {{kYuv709To601_coeffs_neon, kYuvBt709ToBt601},
+      {{kYuv709ToP3_coeffs_neon, kYuvBt709ToDisplayP3},
        {kYuv709To2100_coeffs_neon, kYuvBt709ToBt2100},
-       {kYuv601To709_coeffs_neon, kYuvBt601ToBt709},
-       {kYuv601To2100_coeffs_neon, kYuvBt601ToBt2100},
+       {kYuvP3To709_coeffs_neon, kYuvDisplayP3ToBt709},
+       {kYuvP3To2100_coeffs_neon, kYuvDisplayP3ToBt2100},
        {kYuv2100To709_coeffs_neon, kYuvBt2100ToBt709},
-       {kYuv2100To601_coeffs_neon, kYuvBt2100ToBt601}}};
+       {kYuv2100ToP3_coeffs_neon, kYuvBt2100ToDisplayP3}}};
 
   for (const auto& [neon_coeffs_ptr, floating_point_coeffs] : fixed_floating_coeffs) {
     uhdr_raw_image_t input = Yuv420Image32x4();