From c8d43b7d8365f784990b7e6271b045f5e32962ba Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 12:35:33 +0200 Subject: [PATCH 1/8] apply formatting --- source/Lib/vvdec/vfgs_fw.c | 874 +++++++++++++++++---------------- source/Lib/vvdec/vfgs_fw.h | 36 +- source/Lib/vvdec/vfgs_hw.c | 465 +++++++++--------- source/Lib/vvdec/vfgs_hw.h | 33 +- source/Lib/vvdec/vvdecimpl.cpp | 170 +++---- 5 files changed, 794 insertions(+), 784 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index e944cd59..cd424a61 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -59,139 +59,140 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#define min(a,b) ((a)<(b)?(a):(b)) -#define round(a,s) (((a)+(1<<((s)-1)))>>(s)) -#define clip(x,lo,hi) ((x)>(hi)?hi:(x)<(lo)?(lo):(x)) +#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) +#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) +#define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) +// clang-format off static const int8 Gaussian_LUT[2048] = { - -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, - -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, - -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, - -1, 120,-127,-108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, - 30, 12, 77, 112, 82, -61,-127, 111, -52, -29, 2, -49, -24, 58, -29, -73, - 12, 112, 67, 79, -3,-114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, --105, 50, 16, -24, -35, -14, -15,-127, -55, -22, -55,-127,-112, 5, -26, -72, - 127, 127, -2, 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, - 99, -7, 88, 125, -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, - 51, -16, 107, -8, 53, 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, - -36, 19, 55, 17, -51, 40, 33, -37, 126, -39,-118, 17, -30, 0, 19, 98, - 60, 101, -12, -73, -17, -52, 98, 3, 3, 60, 33, -3, -2, 10, -42,-106, - -38, 14, 127, 16,-127, -31, -86, -39, -56, 46, -41, 75, 23, -19, -22, -70, - 74, -54, -2, 32, -45, 17, -92, 59, -64, -67, 56,-102, -29, -87, -34, -92, - 68, 5, -74, -61, 93, -43, 14, -26, -38,-126, -17, 16,-127, 64, 34, 31, - 93, 17, -51, -59, 71, 77, 81, 127, 127, 61, 33,-106, -93, 0, 0, 75, - -69, 71, 127, -19,-111, 30, 23, 15, 2, 39, 92, 5, 42, 2, -6, 38, - 15, 114, -30, -37, 50, 44, 106, 27, 119, 7, -80, 25, -68, -21, 92, -11, - -1, 18, 41, -50, 79,-127, -43, 127, 18, 11, -21, 32, -52, 27, -88, -90, - -39, -19, -10, 24,-118, 72, -24, -44, 2, 12, 86,-107, 39, -33,-127, 47, - 51, -24, -22, 46, 0, 15, -35, -69, -2, -74, 24, -6, 0, 29, -3, 45, - 32, -32, 117, -45, 79, -24, -17,-109, -10, -70, 88, -48, 24, -91, 120, -37, - 50,-127, 58, 32, -82, -10, -17, -7, 46,-127, -15, 89, 127, 17, 98, -39, - -33, 37, 42, -40, -32, -21, 105, -19, 19, 19, -59, -9, 30, 0,-127, 34, - 127, -84, 75, 24, -40, -49,-127,-107, -14, 45, -75, 1, 30, -20, 41, -68, - -40, 12, 127, -3, 5, 20, -73, -59,-127, -3, -3, -53, -6,-119, 93, 120, - -80, -50, 0, 20, -46, 67, 78, -12, -22,-127, 36, -41, 56, 119, -5,-116, - -22, 68, -14, -90, 24, -82, -44,-127, 107, -25, -37, 40, -7, -7, -82, 5, - -87, 44, -34, 9,-127, 39, 70, 49, -63, 74, -49, 109, -27, -89, -47, -39, - 44, 49, -4, 60, -42, 80, 9,-127, -9, -56, -49, 125, -66, 47, 36, 117, - 15, -11, -96, 109, 94, -17, -56, 70, 8, -14, -5, 50, 37, -45, 120, -30, - -76, 40, -46, 6, 3, 69, 17, -78, 1, -79, 6, 127, 43, 26, 127,-127, - 28, -55, -26, 55, 112, 48, 107, -1, -77, -1, 53, -9, -22, -43, 123, 108, - 127, 102, 68, 46, 5, 1, 123, -13, -55, -34, -49, 89, 65,-105, -5, 94, - -53, 62, 45, 30, 46, 18, -35, 15, 41, 47, -98, -24, 94, -75, 127,-114, - 127, -68, 1, -17, 51, -95, 47, 12, 34, -45, -75, 89,-107, -9, -58, -29, --109, -24, 127, -61, -13, 77, -45, 17, 19, 83, -24, 9, 127, -66, 54, 4, - 26, 13, 111, 43,-113, -22, 10, -24, 83, 67, -14, 75,-123, 59, 127, -12, - 99, -19, 64, -38, 54, 9, 7, 61, -56, 3, -57, 113,-104, -59, 3, -9, - -47, 74, 85, -55, -34, 12, 118, 28, 93, -72, 13, -99, -72, -20, 30, 72, - -94, 19, -54, 64, -12, -63, -25, 65, 72, -10, 127, 0,-127, 103, -20, -73, --112,-103, -6, 28, -42, -21, -59, -29, -26, 19, -4, -51, 94, -58, -95, -37, - 35, 20, -69, 127, -19,-127, -22,-120, -53, 37, 74,-127, -1, -12,-119, -53, - -28, 38, 69, 17, 16,-114, 89, 62, 24, 37, -23, 49,-101, -32, -9, -95, - -53, 5, 93, -23, -49, -8, 51, 3, -75, -90, -10, -39, 127, -86, -22, 20, - 20, 113, 75, 52, -31, 92, -63, 7, -12, 46, 36, 101, -43, -17, -53, -7, - -38, -76, -31, -21, 62, 31, 62, 20,-127, 31, 64, 36, 102, -85, -10, 77, - 80, 58, -79, -8, 35, 8, 80, -24, -9, 3, -17, 72, 127, 83, -87, 55, - 18,-119,-123, 36, 10, 127, 56, -55, 113, 13, 26, 32, -13, -48, 22, -13, - 5, 58, 27, 24, 26, -11, -36, 37, -92, 78, 81, 9, 51, 14, 67, -13, - 0, 32, 45, -76, 32, -39, -22, -49,-127, -27, 31, -9, 36, 14, 71, 13, - 57, 12, -53, -86, 53, -44, -35, 2, 127, 12, -66, -44, 46,-115, 3, 10, - 56, -35, 119, -19, -61, 52, -59,-127, -49, -23, 4, -5, 17, -82, -6, 127, - 25, 79, 67, 64, -25, 14, -64, -37,-127, -28, 21, -63, 66, -53, -41, 109, - -62, 15, -22, 13, 29, -63, 20, 27, 95, -44, -59,-116, -10, 79, -49, 22, - -43, -16, 46, -47,-120, -36, -29, -52, -44, 29, 127, -13, 49, -9,-127, 75, - -28, -23, 88, 59, 11, -95, 81, -59, 58, 60, -26, 40, -92, -3, -22, -58, - -45, -59, -22, -53, 71, -29, 66, -32, -23, 14, -17, -66, -24, -28, -62, 47, - 38, 17, 16, -37, -24, -11, 8, -27, -19, 59, 45, -49, -47, -4, -22, -81, - 30, -67,-127, 74, 102, 5, -18, 98, 34, -66, 42, -52, 7, -59, 24, -58, - -19, -24,-118, -73, 91, 15, -16, 79, -32, -79,-127, -36, 41, 77, -83, 2, - 56, 22, -75, 127, -16, -21, 12, 31, 56,-113,-127, 90, 55, 61, 12, 55, - -14,-113, -14, 32, 49, -67, -17, 91, -10, 1, 21, 69, -70, 99, -19,-112, - 66, -90, -10, -9, -71, 127, 50, -81, -49, 24, 61, -61,-111, 7, -41, 127, - 88, -66, 108,-127, -6, 36, -14, 41, -50, 14, 14, 73,-101, -28, 77, 127, - -8,-100, 88, 38, 121, 88,-125, -60, 13, -94,-115, 20, -67, -87, -94,-119, - 44, -28, -30, 18, 5, -53, -61, 20, -43, 11, -77, -60, 13, 29, 3, 6, - -72, 38, -60, -11, 108, -53, 41, 66, -12,-127,-127, -49, 24, 29, 46, 36, - 91, 34, -33, 116, -51, -34, -52, 91, 7, -83, 73, -26,-103, 24, -10, 76, - 84, 5, 68, -80, -13, -17, -32, -48, 20, 50, 26, 10, 63,-104, -14, 37, - 127, 114, 97, 35, 1, -33, -55, 127,-124, -33, 61, -7, 119, -32,-127, -53, - -42, 63, 3, -5, -26, 70, -58, -33, -44, -43, 34, -56,-127, 127, 25, -35, - -11, 16, -81, 29, -58, 40,-127,-127, 20, -47, -11, -36, -63, -52, -32, -82, - 78, -76, -73, 8, 27, -72, -9, -74, -85, -86, -57, 25, 78, -10, -97, 35, - -65, 8, -59, 14, 1, -42, 32, -88, -44, 17, -3, -9, 59, 40, 12,-108, - -40, 24, 34, 18, -28, 2, 51,-110, -4, 100, 1, 65, 22, 0, 127, 61, - 45, 25, -31, 6, 9, -7, -48, 99, 16, 44, -2, -40, 32, -39, -52, 10, --110, -19, 56,-127, 69, 26, 51, 92, 40, 61, -52, 45, -38, 13, 85, 122, - 27, 66, 45,-111, -83, -3, 31, 37, 19, -36, 58, 71, 39, -78, -47, 58, - -78, 8, -62, -36, -14, 61, 42,-127, 71, -4, 24, -54, 52,-127, 67, -4, - -42, 30, -63, 59, -3, -1, -18, -46, -92, -81, -96, -14, -53, -10, -11, -77, - 13, 1, 8, -67,-127, 127, -28, 26, -14, 18, -13, -26, 2, 10, -46, -32, - -15, 27, -31, -59, 59, 77,-121, 28, 40, -54, -62, -31, -21, -37, -32, -6, --127, -25, -60, 70,-127, 112,-127, 127, 88, -7, 116, 110, 53, 87,-127, 3, - 16, 23, 74,-106, -51, 3, 74, -82,-112, -74, 65, 81, 25, 53, 127, -45, - -50,-103, -41, -65, -29, 79, -67, 64, -33, -30, -8, 127, 0, -13, -51, 67, - -14, 5, -92, 29, -35, -8, -90, -57, -3, 36, 43, 44, -31, -69, -7, 36, - 39, -51, 43, -81, 58, 6, 127, 12, 57, 66, 46, 59, -43, -42, 41, -15, --120, 24, 3, -11, 19, -13, 51, 28, 3, 55, -48, -12, -1, 2, 97, -19, - 29, 42, 13, 43, 78, -44, 56,-108, -43, -19, 127, 15, -11, -18, -81, 83, - -37, 77,-109, 15, 65, -50, 43, 12, 13, 27, 28, 61, 57, 30, 26, 106, - -18, 56, 13, 97, 4, -8, -62,-103, 94, 108, -44, 52, 27, -47, -9, 105, - -53, 46, 89, 103, -33, 38, -34, 55, 51, 70, -94, -35, -87,-107, -19, -31, - 9, -19, 79, -14, 77, 5, -19,-107, 85, 21, -45, -39, -42, 9, -29, 74, - 47, -75, 60,-127, 120,-112, -57, -32, 41, 7, 79, 76, 66, 57, 41, -25, - 31, 37, -47, -36, 43, -73, -37, 63, 127, -69, -52, 90, -33, -61, 60, -55, - 44, 15, 4, -67, 13, -92, 64, 29, -39, -3, 83, -2, -38, -85, -86, 58, - 35, -69, -61, 29, -37, -95, -78, 4, 30, -4, -32, -80, -22, -9, -77, 46, - 7, -93, -71, 65, 9, -50, 127, -70, 26, -12, -39,-114, 63,-127,-100, 4, - -32, 111, 22, -60, 65,-101, 26, -42, 21, -59, -27, -74, 2, -94, 6, 126, - 5, 76, -88, -9, -43,-101, 127, 1, 125, 92, -63, 52, 56, 4, 81,-127, - 127, 80, 127, -29, 30, 116, -74, -17, -57, 105, 48, 45, 25, -72, 48, -38, --108, 31, -34, 4, -11, 41,-127, 52,-104, -43, -37, 52, 2, 47, 87, -9, - 77, 27, -41, -25, 90, 86, -56, 75, 10, 33, 78, 58, 127, 127, -7, -73, - 49, -33,-106, -35, 38, 57, 53, -17, -4, 83, 52,-108, 54,-125, 28, 23, - 56, -43, -88, -17, -6, 47, 23, -9, 0, -13, 111, 75, 27, -52, -38, -34, - 39, 30, 66, 39, 38, -64, 38, 3, 21, -32, -51, -28, 54, -38, -87, 20, - 52, 115, 18, -81, -70, 0, -14, -46, -46, -3, 125, 16, -14, 23, -82, -84, - -69, -20, -65,-127, 9, 81, -49, 61, 7, -36, -45, -42, 57, -26, 47, 20, - -85, 46, -13, 41, -37, -75, -60, 86, -78,-127, 12, 50, 2, -3, 13, 47, - 5, 19, -78, -55, -27, 65, -71, 12,-108, 20, -16, 11, -31, 63, -55, 37, - 75, -17, 127, -73, -33, -28,-120, 105, 68, 106,-103,-106, 71, 61, 2, 23, - -3, 33, -5, -15, -67, -15, -23, -54, 15, -63, 76, 58,-110, 1, 83, -27, - 22, 75, -39, -17, -11, 64, -17,-127, -54, -66, 31, 96, 116, 3,-114, -7, --108, -63, 97, 9, 50, 8, 75, -28, 72, 112, -36,-112, 95, -50, 23, -13, - -19, 55, 21, 23, 92, 91, 22, -49, 16, -75, 23, 9, -49, -97, -37, 49, - -36, 36,-127, -86, 43, 127, -24, -24, 84, 83, -35, -34, -12, 109, 102, -38, - 51, -68, 34, 19, -22, 49, -32, 127, 40, 24, -93, -4, -3, 105, 3, -58, - -18, 8, 127, -18, 125, 68, 69, -62, 30, -36, 54, -57, -24, 17, 43, -36, - -27, -57, -67, -21, -10, -49, 68, 12, 65, 4, 48, 55, 127, -75, 44, 89, - -66, -13, -78, -82, -91, 22, 30, 33, -40, -87, -34, 96, -91, 39, 10, -64, - -3, -12, 127, -50, -37, -56, 23, -35, -36, -54, 90, -91, 2, 50, 77, -6, --127, 16, 46, -5, -73, 0, -56, -18, -72, 28, 93, 60, 49, 20, 18, 111, --111, 32, -83, 47, 47, -10, 35, -88, 43, 57, -98, 127, -17, 0, 1, -39, --127, -2, 0, 63, 93, 0, 36, -66, -61, -19, 39,-127, 58, 50, -17, 127, - 88, -43,-108, -51, -16, 7, -36, 68, 46, -14, 107, 40, 57, 7, 19, 8, - 3, 88, -90, -92, -18, -21, -24, 13, 7, -4, -78, -91, -4, 8, -35, -5, - 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 + -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, + -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, + -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, + -1, 120,-127,-108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, + 30, 12, 77, 112, 82, -61,-127, 111, -52, -29, 2, -49, -24, 58, -29, -73, + 12, 112, 67, 79, -3,-114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, + -105, 50, 16, -24, -35, -14, -15,-127, -55, -22, -55,-127,-112, 5, -26, -72, + 127, 127, -2, 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, + 99, -7, 88, 125, -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, + 51, -16, 107, -8, 53, 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, + -36, 19, 55, 17, -51, 40, 33, -37, 126, -39,-118, 17, -30, 0, 19, 98, + 60, 101, -12, -73, -17, -52, 98, 3, 3, 60, 33, -3, -2, 10, -42,-106, + -38, 14, 127, 16,-127, -31, -86, -39, -56, 46, -41, 75, 23, -19, -22, -70, + 74, -54, -2, 32, -45, 17, -92, 59, -64, -67, 56,-102, -29, -87, -34, -92, + 68, 5, -74, -61, 93, -43, 14, -26, -38,-126, -17, 16,-127, 64, 34, 31, + 93, 17, -51, -59, 71, 77, 81, 127, 127, 61, 33,-106, -93, 0, 0, 75, + -69, 71, 127, -19,-111, 30, 23, 15, 2, 39, 92, 5, 42, 2, -6, 38, + 15, 114, -30, -37, 50, 44, 106, 27, 119, 7, -80, 25, -68, -21, 92, -11, + -1, 18, 41, -50, 79,-127, -43, 127, 18, 11, -21, 32, -52, 27, -88, -90, + -39, -19, -10, 24,-118, 72, -24, -44, 2, 12, 86,-107, 39, -33,-127, 47, + 51, -24, -22, 46, 0, 15, -35, -69, -2, -74, 24, -6, 0, 29, -3, 45, + 32, -32, 117, -45, 79, -24, -17,-109, -10, -70, 88, -48, 24, -91, 120, -37, + 50,-127, 58, 32, -82, -10, -17, -7, 46,-127, -15, 89, 127, 17, 98, -39, + -33, 37, 42, -40, -32, -21, 105, -19, 19, 19, -59, -9, 30, 0,-127, 34, + 127, -84, 75, 24, -40, -49,-127,-107, -14, 45, -75, 1, 30, -20, 41, -68, + -40, 12, 127, -3, 5, 20, -73, -59,-127, -3, -3, -53, -6,-119, 93, 120, + -80, -50, 0, 20, -46, 67, 78, -12, -22,-127, 36, -41, 56, 119, -5,-116, + -22, 68, -14, -90, 24, -82, -44,-127, 107, -25, -37, 40, -7, -7, -82, 5, + -87, 44, -34, 9,-127, 39, 70, 49, -63, 74, -49, 109, -27, -89, -47, -39, + 44, 49, -4, 60, -42, 80, 9,-127, -9, -56, -49, 125, -66, 47, 36, 117, + 15, -11, -96, 109, 94, -17, -56, 70, 8, -14, -5, 50, 37, -45, 120, -30, + -76, 40, -46, 6, 3, 69, 17, -78, 1, -79, 6, 127, 43, 26, 127,-127, + 28, -55, -26, 55, 112, 48, 107, -1, -77, -1, 53, -9, -22, -43, 123, 108, + 127, 102, 68, 46, 5, 1, 123, -13, -55, -34, -49, 89, 65,-105, -5, 94, + -53, 62, 45, 30, 46, 18, -35, 15, 41, 47, -98, -24, 94, -75, 127,-114, + 127, -68, 1, -17, 51, -95, 47, 12, 34, -45, -75, 89,-107, -9, -58, -29, + -109, -24, 127, -61, -13, 77, -45, 17, 19, 83, -24, 9, 127, -66, 54, 4, + 26, 13, 111, 43,-113, -22, 10, -24, 83, 67, -14, 75,-123, 59, 127, -12, + 99, -19, 64, -38, 54, 9, 7, 61, -56, 3, -57, 113,-104, -59, 3, -9, + -47, 74, 85, -55, -34, 12, 118, 28, 93, -72, 13, -99, -72, -20, 30, 72, + -94, 19, -54, 64, -12, -63, -25, 65, 72, -10, 127, 0,-127, 103, -20, -73, + -112,-103, -6, 28, -42, -21, -59, -29, -26, 19, -4, -51, 94, -58, -95, -37, + 35, 20, -69, 127, -19,-127, -22,-120, -53, 37, 74,-127, -1, -12,-119, -53, + -28, 38, 69, 17, 16,-114, 89, 62, 24, 37, -23, 49,-101, -32, -9, -95, + -53, 5, 93, -23, -49, -8, 51, 3, -75, -90, -10, -39, 127, -86, -22, 20, + 20, 113, 75, 52, -31, 92, -63, 7, -12, 46, 36, 101, -43, -17, -53, -7, + -38, -76, -31, -21, 62, 31, 62, 20,-127, 31, 64, 36, 102, -85, -10, 77, + 80, 58, -79, -8, 35, 8, 80, -24, -9, 3, -17, 72, 127, 83, -87, 55, + 18,-119,-123, 36, 10, 127, 56, -55, 113, 13, 26, 32, -13, -48, 22, -13, + 5, 58, 27, 24, 26, -11, -36, 37, -92, 78, 81, 9, 51, 14, 67, -13, + 0, 32, 45, -76, 32, -39, -22, -49,-127, -27, 31, -9, 36, 14, 71, 13, + 57, 12, -53, -86, 53, -44, -35, 2, 127, 12, -66, -44, 46,-115, 3, 10, + 56, -35, 119, -19, -61, 52, -59,-127, -49, -23, 4, -5, 17, -82, -6, 127, + 25, 79, 67, 64, -25, 14, -64, -37,-127, -28, 21, -63, 66, -53, -41, 109, + -62, 15, -22, 13, 29, -63, 20, 27, 95, -44, -59,-116, -10, 79, -49, 22, + -43, -16, 46, -47,-120, -36, -29, -52, -44, 29, 127, -13, 49, -9,-127, 75, + -28, -23, 88, 59, 11, -95, 81, -59, 58, 60, -26, 40, -92, -3, -22, -58, + -45, -59, -22, -53, 71, -29, 66, -32, -23, 14, -17, -66, -24, -28, -62, 47, + 38, 17, 16, -37, -24, -11, 8, -27, -19, 59, 45, -49, -47, -4, -22, -81, + 30, -67,-127, 74, 102, 5, -18, 98, 34, -66, 42, -52, 7, -59, 24, -58, + -19, -24,-118, -73, 91, 15, -16, 79, -32, -79,-127, -36, 41, 77, -83, 2, + 56, 22, -75, 127, -16, -21, 12, 31, 56,-113,-127, 90, 55, 61, 12, 55, + -14,-113, -14, 32, 49, -67, -17, 91, -10, 1, 21, 69, -70, 99, -19,-112, + 66, -90, -10, -9, -71, 127, 50, -81, -49, 24, 61, -61,-111, 7, -41, 127, + 88, -66, 108,-127, -6, 36, -14, 41, -50, 14, 14, 73,-101, -28, 77, 127, + -8,-100, 88, 38, 121, 88,-125, -60, 13, -94,-115, 20, -67, -87, -94,-119, + 44, -28, -30, 18, 5, -53, -61, 20, -43, 11, -77, -60, 13, 29, 3, 6, + -72, 38, -60, -11, 108, -53, 41, 66, -12,-127,-127, -49, 24, 29, 46, 36, + 91, 34, -33, 116, -51, -34, -52, 91, 7, -83, 73, -26,-103, 24, -10, 76, + 84, 5, 68, -80, -13, -17, -32, -48, 20, 50, 26, 10, 63,-104, -14, 37, + 127, 114, 97, 35, 1, -33, -55, 127,-124, -33, 61, -7, 119, -32,-127, -53, + -42, 63, 3, -5, -26, 70, -58, -33, -44, -43, 34, -56,-127, 127, 25, -35, + -11, 16, -81, 29, -58, 40,-127,-127, 20, -47, -11, -36, -63, -52, -32, -82, + 78, -76, -73, 8, 27, -72, -9, -74, -85, -86, -57, 25, 78, -10, -97, 35, + -65, 8, -59, 14, 1, -42, 32, -88, -44, 17, -3, -9, 59, 40, 12,-108, + -40, 24, 34, 18, -28, 2, 51,-110, -4, 100, 1, 65, 22, 0, 127, 61, + 45, 25, -31, 6, 9, -7, -48, 99, 16, 44, -2, -40, 32, -39, -52, 10, + -110, -19, 56,-127, 69, 26, 51, 92, 40, 61, -52, 45, -38, 13, 85, 122, + 27, 66, 45,-111, -83, -3, 31, 37, 19, -36, 58, 71, 39, -78, -47, 58, + -78, 8, -62, -36, -14, 61, 42,-127, 71, -4, 24, -54, 52,-127, 67, -4, + -42, 30, -63, 59, -3, -1, -18, -46, -92, -81, -96, -14, -53, -10, -11, -77, + 13, 1, 8, -67,-127, 127, -28, 26, -14, 18, -13, -26, 2, 10, -46, -32, + -15, 27, -31, -59, 59, 77,-121, 28, 40, -54, -62, -31, -21, -37, -32, -6, + -127, -25, -60, 70,-127, 112,-127, 127, 88, -7, 116, 110, 53, 87,-127, 3, + 16, 23, 74,-106, -51, 3, 74, -82,-112, -74, 65, 81, 25, 53, 127, -45, + -50,-103, -41, -65, -29, 79, -67, 64, -33, -30, -8, 127, 0, -13, -51, 67, + -14, 5, -92, 29, -35, -8, -90, -57, -3, 36, 43, 44, -31, -69, -7, 36, + 39, -51, 43, -81, 58, 6, 127, 12, 57, 66, 46, 59, -43, -42, 41, -15, + -120, 24, 3, -11, 19, -13, 51, 28, 3, 55, -48, -12, -1, 2, 97, -19, + 29, 42, 13, 43, 78, -44, 56,-108, -43, -19, 127, 15, -11, -18, -81, 83, + -37, 77,-109, 15, 65, -50, 43, 12, 13, 27, 28, 61, 57, 30, 26, 106, + -18, 56, 13, 97, 4, -8, -62,-103, 94, 108, -44, 52, 27, -47, -9, 105, + -53, 46, 89, 103, -33, 38, -34, 55, 51, 70, -94, -35, -87,-107, -19, -31, + 9, -19, 79, -14, 77, 5, -19,-107, 85, 21, -45, -39, -42, 9, -29, 74, + 47, -75, 60,-127, 120,-112, -57, -32, 41, 7, 79, 76, 66, 57, 41, -25, + 31, 37, -47, -36, 43, -73, -37, 63, 127, -69, -52, 90, -33, -61, 60, -55, + 44, 15, 4, -67, 13, -92, 64, 29, -39, -3, 83, -2, -38, -85, -86, 58, + 35, -69, -61, 29, -37, -95, -78, 4, 30, -4, -32, -80, -22, -9, -77, 46, + 7, -93, -71, 65, 9, -50, 127, -70, 26, -12, -39,-114, 63,-127,-100, 4, + -32, 111, 22, -60, 65,-101, 26, -42, 21, -59, -27, -74, 2, -94, 6, 126, + 5, 76, -88, -9, -43,-101, 127, 1, 125, 92, -63, 52, 56, 4, 81,-127, + 127, 80, 127, -29, 30, 116, -74, -17, -57, 105, 48, 45, 25, -72, 48, -38, + -108, 31, -34, 4, -11, 41,-127, 52,-104, -43, -37, 52, 2, 47, 87, -9, + 77, 27, -41, -25, 90, 86, -56, 75, 10, 33, 78, 58, 127, 127, -7, -73, + 49, -33,-106, -35, 38, 57, 53, -17, -4, 83, 52,-108, 54,-125, 28, 23, + 56, -43, -88, -17, -6, 47, 23, -9, 0, -13, 111, 75, 27, -52, -38, -34, + 39, 30, 66, 39, 38, -64, 38, 3, 21, -32, -51, -28, 54, -38, -87, 20, + 52, 115, 18, -81, -70, 0, -14, -46, -46, -3, 125, 16, -14, 23, -82, -84, + -69, -20, -65,-127, 9, 81, -49, 61, 7, -36, -45, -42, 57, -26, 47, 20, + -85, 46, -13, 41, -37, -75, -60, 86, -78,-127, 12, 50, 2, -3, 13, 47, + 5, 19, -78, -55, -27, 65, -71, 12,-108, 20, -16, 11, -31, 63, -55, 37, + 75, -17, 127, -73, -33, -28,-120, 105, 68, 106,-103,-106, 71, 61, 2, 23, + -3, 33, -5, -15, -67, -15, -23, -54, 15, -63, 76, 58,-110, 1, 83, -27, + 22, 75, -39, -17, -11, 64, -17,-127, -54, -66, 31, 96, 116, 3,-114, -7, + -108, -63, 97, 9, 50, 8, 75, -28, 72, 112, -36,-112, 95, -50, 23, -13, + -19, 55, 21, 23, 92, 91, 22, -49, 16, -75, 23, 9, -49, -97, -37, 49, + -36, 36,-127, -86, 43, 127, -24, -24, 84, 83, -35, -34, -12, 109, 102, -38, + 51, -68, 34, 19, -22, 49, -32, 127, 40, 24, -93, -4, -3, 105, 3, -58, + -18, 8, 127, -18, 125, 68, 69, -62, 30, -36, 54, -57, -24, 17, 43, -36, + -27, -57, -67, -21, -10, -49, 68, 12, 65, 4, 48, 55, 127, -75, 44, 89, + -66, -13, -78, -82, -91, 22, 30, 33, -40, -87, -34, 96, -91, 39, 10, -64, + -3, -12, 127, -50, -37, -56, 23, -35, -36, -54, 90, -91, 2, 50, 77, -6, + -127, 16, 46, -5, -73, 0, -56, -18, -72, 28, 93, 60, 49, 20, 18, 111, + -111, 32, -83, 47, 47, -10, 35, -88, 43, 57, -98, 127, -17, 0, 1, -39, + -127, -2, 0, 63, 93, 0, 36, -66, -61, -19, 39,-127, 58, 50, -17, 127, + 88, -43,-108, -51, -16, 7, -36, 68, 46, -14, 107, 40, 57, 7, 19, 8, + 3, 88, -90, -92, -18, -21, -24, 13, 7, -4, -78, -91, -4, 8, -35, -5, + 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 }; static const uint32 Seed_LUT[256] = { @@ -299,337 +300,342 @@ static const uint32 Seed_LUT[256] = { static const int8 DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); +// clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32 prng(uint32 x) +static uint32 prng( uint32 x ) { -#if 1 // same as HW (bit-reversed RDD-5) - uint32 s = ((x << 30) ^ (x << 2)) & 0x80000000; - x = s | (x >> 1); -#else // RDD-5 - uint32 s = ((x >> 30) ^ (x >> 2)) & 1; - x = (x << 1) | s; +#if 1 // same as HW (bit-reversed RDD-5) + uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); +#else // RDD-5 + uint32 s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; #endif - return x; + return x; } /** Apply iDCT2 to block B[64][64] + clipping */ -static void idct2_64(int8 B[][64]) +static void idct2_64( int8 B[][64] ) { - int16 X[64][64]; - int i,j,k; - int32 acc; - - /* 1st pass (DCT2_64'*B) = vertical */ - for (j=0; j<64; j++) - for (i=0; i<64; i++) - { - acc = 256; - for (k=0; k<64; k++) - acc += (int32)DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) - - X[j][i] = (acc >> 9); - } - - /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ - for (j=0; j<64; j++) - for (i=0; i<64; i++) - { - acc = 256; - for (k=0; k<64; k++) - acc += (int32)X[j][k] * DCT2_64[k][i]; - - acc >>= 9; - if (acc > 127) acc = 127; - if (acc < -127) acc = -127; - B[j][i] = acc; - } + int16 X[64][64]; + int i, j, k; + int32 acc; + + /* 1st pass (DCT2_64'*B) = vertical */ + for( j = 0; j < 64; j++ ) + for( i = 0; i < 64; i++ ) + { + acc = 256; + for( k = 0; k < 64; k++ ) + acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + + X[j][i] = ( acc >> 9 ); + } + + /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ + for( j = 0; j < 64; j++ ) + for( i = 0; i < 64; i++ ) + { + acc = 256; + for( k = 0; k < 64; k++ ) + acc += (int32) X[j][k] * DCT2_64[k][i]; + + acc >>= 9; + if( acc > 127 ) + acc = 127; + if( acc < -127 ) + acc = -127; + B[j][i] = acc; + } } /** Apply iDCT2 to block B[32][32] + clipping */ -static void idct2_32(int8 B[][32]) +static void idct2_32( int8 B[][32] ) { - int16 X[32][32]; - int i,j,k; - int32 acc; - - /* 1st pass (R32'*B) = vertical */ - for (j=0; j<32; j++) - for (i=0; i<32; i++) - { - acc = 128; - for (k=0; k<32; k++) - acc += (int32)DCT2_64[k*2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) - - X[j][i] = (acc >> 8); - } - - /* 2nd pass (...)*R32 = horizontal + clipping */ - for (j=0; j<32; j++) - for (i=0; i<32; i++) - { - acc = 256; - for (k=0; k<32; k++) - acc += (int32)X[j][k] * DCT2_64[k*2][i]; - - acc >>= 9; - if (acc > 127) acc = 127; - if (acc < -127) acc = -127; - B[j][i] = acc; - } + int16 X[32][32]; + int i, j, k; + int32 acc; + + /* 1st pass (R32'*B) = vertical */ + for( j = 0; j < 32; j++ ) + for( i = 0; i < 32; i++ ) + { + acc = 128; + for( k = 0; k < 32; k++ ) + acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + + X[j][i] = ( acc >> 8 ); + } + + /* 2nd pass (...)*R32 = horizontal + clipping */ + for( j = 0; j < 32; j++ ) + for( i = 0; i < 32; i++ ) + { + acc = 256; + for( k = 0; k < 32; k++ ) + acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + + acc >>= 9; + if( acc > 127 ) + acc = 127; + if( acc < -127 ) + acc = -127; + B[j][i] = acc; + } } -static void vfgs_make_sei_ff_pattern64(int8 B[][64], int fh, int fv) +static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) { - int k, l; - uint32 n; - fh = 4*(fh+1); - fv = 4*(fv+1); - - n = Seed_LUT[0]; - memset(B, 0, 64*64*sizeof(int8)); - for (l=0; l<64; l++) - for (k=0; k<64; k+=4) - { - if (k1) ? 44 : 82; - height = (suby>1) ? 38 : 73; - - switch (nb_coef) - { - case 6: - // SEI.AR mode - coef[3][2] = ar_coef[1]; // left - coef[2][3] = (ar_coef[1] * ar_coef[4]) >> scale; // top - coef[2][2] = (ar_coef[3] * ar_coef[4]) >> scale; // top-left - coef[2][4] = (ar_coef[3] * ar_coef[4]) >> scale; // top-right - coef[3][1] = ar_coef[5]; // left-left - coef[1][3] = ((int32)ar_coef[5] * ar_coef[4] * ar_coef[4]) >> (2*scale) ; // top-top - L = 2; - break; - - default: - assert(0); - } - if (nb_coef != 6) - for (k=0, j=-L; j<=0; j++) - for (i=-L; i<=L && (i<0 || j<0); i++, k++) - coef[3+j][3+i] = ar_coef[k]; - - memset(buf, 0, width*height); // debug (not needed) - for (y=0; y=3 && y=3 && x 1 ) ? 44 : 82; + height = ( suby > 1 ) ? 38 : 73; + + switch( nb_coef ) + { + case 6: + // SEI.AR mode + coef[3][2] = ar_coef[1]; // left + coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top + coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left + coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right + coef[3][1] = ar_coef[5]; // left-left + coef[1][3] = ( (int32) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top + + L = 2; + break; + + default: + assert( 0 ); + } + if( nb_coef != 6 ) + for( k = 0, j = -L; j <= 0; j++ ) + for( i = -L; i <= L && ( i < 0 || j < 0 ); i++, k++ ) + coef[3 + j][3 + i] = ar_coef[k]; + + memset( buf, 0, width * height ); // debug (not needed) + for( y = 0; y < height; y++ ) + for( x = 0; x < width; x++ ) + { + // Filter + g = 0; + if( y >= 3 && y < height && x >= 3 && x < width - 3 ) + { + for( j = -3; j <= 0; j++ ) + for( i = -3; i <= 3 && ( i < 0 || j < 0 ); i++ ) + g += (int) coef[3 + j][3 + i] * buf[width * ( y + j ) + x + i]; + + g = round( g, scale ); + } + + // Add random noise + g += round( Gaussian_LUT[rnd & 2047], shift ); + rnd = prng( rnd ); + + buf[width * y + x] = clip( g, -127, 127 ); + } + + // Copy cropped area to output + memset( P, 0, size * size ); + for( y = 0; y < 64 / suby; y++ ) + for( x = 0; x < 64 / subx; x++ ) + P[size * y + x] = buf[width * ( 3 + 6 / suby + y ) + ( 3 + 6 / subx + x )]; } -int same_pattern(fgs_sei* cfg, int32 a, int32 b) +int same_pattern( fgs_sei* cfg, int32 a, int32 b ) { - int16* coef_a = &cfg->comp_model_value[0][0][0] + a; - int16* coef_b = &cfg->comp_model_value[0][0][0] + b; + int16* coef_a = &cfg->comp_model_value[0][0][0] + a; + int16* coef_b = &cfg->comp_model_value[0][0][0] + b; - for (int i=1; icomp_model_present_flag[c]) - { - for (k=0; knum_intensity_intervals[c]; k++) - { - a = cfg->intensity_interval_lower_bound[c][k]; - uint32 id = SEI_MAX_MODEL_VALUES*(k + 256*c); - - for (i=0; i0; i--) - { - if (intensities[i-1] > a) - { - intensities[i] = intensities[i-1]; - patterns[i] = patterns[i-1]; - } - else - break; - } - intensities[i] = a; - patterns[i] = id; - np ++; - } - } - } - if (c==0 || c==2) - { - // 2. Register the patterns (with correct order) - for (i=0; icomp_model_value[0][0][0] + patterns[i]; - - if (c==0) - { - if (cfg->model_id) - vfgs_make_ar_pattern(Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0]); - else - vfgs_make_sei_ff_pattern64((int8 (*)[64])P, coef[1], coef[2]); - - vfgs_set_luma_pattern(i, P); - } - else if (c==2) - { - if (cfg->model_id) - vfgs_make_ar_pattern(Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1]); - else - vfgs_make_sei_ff_pattern32((int8 (*)[32])P, coef[1], coef[2]); - - vfgs_set_chroma_pattern(i, P); - } - } - // 3. Fill up LUTs - for (int cc=min(c,1); cc<=c; cc++) - { - if (cfg->comp_model_present_flag[cc]) - { - memset(plut, 255, sizeof(plut)); - // 3a. Fill valid patterns - for (k=0; knum_intensity_intervals[cc]; k++) - { - a = cfg->intensity_interval_lower_bound[cc][k]; - b = cfg->intensity_interval_upper_bound[cc][k]; - uint32 id = SEI_MAX_MODEL_VALUES*(k + 256*cc); - - for (i=0; icomp_model_value[cc][k][0]; - if (ilog2_scale_factor - (cfg->model_id ? 1 : 0)); // -1 for grain shift in pattern generation (see above) + int8 P[64 * 64]; + int8 Lbuf[73 * 82]; + int8 Cbuf[38 * 44]; + uint8 slut[256]; + uint8 plut[256]; + uint8 intensities[VFGS_MAX_PATTERNS]; + uint32 patterns[VFGS_MAX_PATTERNS]; + uint8 np = 0; // number of patterns + uint8 a, b, i; + int c, k; + + for( c = 0; c < 3; c++ ) + { + memset( slut, 0, sizeof( slut ) ); + if( c < 2 ) + { + np = 0; + memset( intensities, 0, sizeof( intensities ) ); + memset( patterns, ~0, sizeof( patterns ) ); + } + // 1. Look for different patterns, up to max supported number + if( cfg->comp_model_present_flag[c] ) + { + for( k = 0; k < cfg->num_intensity_intervals[c]; k++ ) + { + a = cfg->intensity_interval_lower_bound[c][k]; + uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); + + for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + if( same_pattern( cfg, patterns[i], id ) ) + break; + + if( i == VFGS_MAX_PATTERNS && np < VFGS_MAX_PATTERNS ) // can add it + { + // keep them sorted (by intensity). The goal of this sort is + // to enable meaningful pattern interpolation + for( i = np; i > 0; i-- ) + { + if( intensities[i - 1] > a ) + { + intensities[i] = intensities[i - 1]; + patterns[i] = patterns[i - 1]; + } + else + break; + } + intensities[i] = a; + patterns[i] = id; + np++; + } + } + } + if( c == 0 || c == 2 ) + { + // 2. Register the patterns (with correct order) + for( i = 0; i < np; i++ ) + { + int16* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; + + if( c == 0 ) + { + if( cfg->model_id ) + vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + else + vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + + vfgs_set_luma_pattern( i, P ); + } + else if( c == 2 ) + { + if( cfg->model_id ) + vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + else + vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + + vfgs_set_chroma_pattern( i, P ); + } + } + // 3. Fill up LUTs + for( int cc = min( c, 1 ); cc <= c; cc++ ) + { + if( cfg->comp_model_present_flag[cc] ) + { + memset( plut, 255, sizeof( plut ) ); + // 3a. Fill valid patterns + for( k = 0; k < cfg->num_intensity_intervals[cc]; k++ ) + { + a = cfg->intensity_interval_lower_bound[cc][k]; + b = cfg->intensity_interval_upper_bound[cc][k]; + uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); + + for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + if( same_pattern( cfg, patterns[i], id ) ) + break; + // Note: if not found, could try to find interpolation value + + for( int l = a; l <= b; l++ ) + { + slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; + if( i < VFGS_MAX_PATTERNS ) + plut[l] = i << 4; + } + } + // 3b. Fill holes (no interp. yet, just repeat last) + i = 0; + for( k = 0; k < 256; k++ ) + { + if( plut[k] == 255 ) + plut[k] = i; + else + i = plut[k]; + } + } + else + { + memset( plut, 0, sizeof( plut ) ); + } + // 3c. Register LUTs + vfgs_set_scale_lut( cc, slut ); + vfgs_set_pattern_lut( cc, plut ); + } + } + } + + vfgs_set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } - diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/vvdec/vfgs_fw.h index cfcad9ea..6100a2e5 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/vvdec/vfgs_fw.h @@ -58,28 +58,28 @@ POSSIBILITY OF SUCH DAMAGE. #define _VFGS_FW_H_ #ifndef int32 -#define int32 signed int -#define uint32 unsigned int -#define int16 signed short -#define uint16 unsigned short -#define int8 signed char -#define uint8 unsigned char +# define int32 signed int +# define uint32 unsigned int +# define int16 signed short +# define uint16 unsigned short +# define int8 signed char +# define uint8 unsigned char #endif #define SEI_MAX_MODEL_VALUES 6 -typedef struct fgs_sei_s { - uint8 model_id; - uint8 log2_scale_factor; - uint8 comp_model_present_flag[3]; - uint16 num_intensity_intervals[3]; - uint8 num_model_values[3]; - uint8 intensity_interval_lower_bound[3][256]; - uint8 intensity_interval_upper_bound[3][256]; - int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; +typedef struct fgs_sei_s +{ + uint8 model_id; + uint8 log2_scale_factor; + uint8 comp_model_present_flag[3]; + uint16 num_intensity_intervals[3]; + uint8 num_model_values[3]; + uint8 intensity_interval_lower_bound[3][256]; + uint8 intensity_interval_upper_bound[3][256]; + int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; } fgs_sei; -void vfgs_init_sei(fgs_sei* cfg); - -#endif // _VFGS_FW_H_ +void vfgs_init_sei( fgs_sei* cfg ); +#endif // _VFGS_FW_H_ diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 1cbf434e..9d34ce87 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -55,47 +55,52 @@ POSSIBILITY OF SUCH DAMAGE. */ #include "vfgs_hw.h" -#include // memcpy +#include // memcpy #include -#define min(a,b) ((a)<(b)?(a):(b)) -#define max(a,b) ((a)>(b)?(a):(b)) -#define round(a,s) (((a)+(1<<((s)-1)))>>(s)) +#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) +#define max( a, b ) ( ( a ) > ( b ) ? ( a ) : ( b ) ) +#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) #define PATTERN_INTERPOLATION 0 // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly -static int8 pattern[2][VFGS_MAX_PATTERNS+1][64][64] = {0, }; // +1 to simplify interpolation code -static uint8 sLUT[3][256] = {0, }; -static uint8 pLUT[3][256] = {0, }; -static uint32 rnd = 0xdeadbeef; -static uint32 rnd_up = 0xdeadbeef; -static uint32 line_rnd = 0xdeadbeef; +static int8 pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { + 0, +}; // +1 to simplify interpolation code +static uint8 sLUT[3][256] = { + 0, +}; +static uint8 pLUT[3][256] = { + 0, +}; +static uint32 rnd = 0xdeadbeef; +static uint32 rnd_up = 0xdeadbeef; +static uint32 line_rnd = 0xdeadbeef; static uint32 line_rnd_up = 0xdeadbeef; -static uint8 scale_shift = 5+6; -static uint8 bs = 0; // bitshift = bitdepth - 8 -static uint8 Y_min = 0; -static uint8 Y_max = 255; -static uint8 C_min = 0; -static uint8 C_max = 255; -static int csubx = 2; -static int csuby = 2; - +static uint8 scale_shift = 5 + 6; +static uint8 bs = 0; // bitshift = bitdepth - 8 +static uint8 Y_min = 0; +static uint8 Y_max = 255; +static uint8 C_min = 0; +static uint8 C_max = 255; +static int csubx = 2; +static int csuby = 2; // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) +static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) static uint8 scale[3][32]; /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32 prng(uint32 x) +static uint32 prng( uint32 x ) { - uint32 s = ((x << 30) ^ (x << 2)) & 0x80000000; - x = s | (x >> 1); - return x; + uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); + return x; } /** Derive Y x/y offsets from (random) number @@ -116,276 +121,276 @@ static uint32 prng(uint32 x) * Note: to fully support cross-component correlation within patterns, we would * need to align luma/chroma offsets. */ -static void get_offset_y(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 31) & 1) ? -1 : 1; + *s = ( ( val >> 31 ) & 1 ) ? -1 : 1; - bf = (val >> 0) & 0x3ff; - *x = ((bf * 13) >> 10) * 4; // 13 = 8 + 4 + 1 (two adders) + bf = ( val >> 0 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * 4; // 13 = 8 + 4 + 1 (two adders) - bf = (val >> 14) & 0x3ff; - *y = ((bf * 12) >> 10) * 4; // 12 = 8 + 4 (one adder) - // Note: could shift 9 and * 2, to make a multiple of 2 and make use of all - // pattern samples (when using overlap). + bf = ( val >> 14 ) & 0x3ff; + *y = ( ( bf * 12 ) >> 10 ) * 4; // 12 = 8 + 4 (one adder) + // Note: could shift 9 and * 2, to make a multiple of 2 and make use of all + // pattern samples (when using overlap). } -static void get_offset_u(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 2) & 1) ? -1 : 1; + *s = ( ( val >> 2 ) & 1 ) ? -1 : 1; - bf = (val >> 10) & 0x3ff; - *x = ((bf * 13) >> 10) * (4/csubx); + bf = ( val >> 10 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); - bf = ((val >> 24) & 0x0ff) | ((val << 8) & 0x300); - *y = ((bf * 12) >> 10) * (4/csuby); + bf = ( ( val >> 24 ) & 0x0ff ) | ( ( val << 8 ) & 0x300 ); + *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v(uint32 val, int *s, uint8 *x, uint8 *y) +static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) { - uint32 bf; // bit field + uint32 bf; // bit field - *s = ((val >> 15) & 1) ? -1 : 1; + *s = ( ( val >> 15 ) & 1 ) ? -1 : 1; - bf = (val >> 20) & 0x3ff; - *x = ((bf * 13) >> 10) * (4/csubx); + bf = ( val >> 20 ) & 0x3ff; + *x = ( ( bf * 13 ) >> 10 ) * ( 4 / csubx ); - bf = (val >> 4) & 0x3ff; - *y = ((bf * 12) >> 10) * (4/csuby); + bf = ( val >> 4 ) & 0x3ff; + *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void add_grain_block(void* I, int c, int x, int y, int width) +static void add_grain_block( void* I, int c, int x, int y, int width ) { - uint8 *I8 = (uint8*)I; - uint16 *I16 = (uint16*)I; - - int s, s_up; // random sign flip (current + upper row) - uint8 ox, oy; // random offset (current) - uint8 ox_up, oy_up; // random offset (upper row) - uint8 oc1, oc2; // overlapping coefficients - uint8 pi; // pattern index integer part - int i, j; - int P; // Pattern sample (from current pattern index) + uint8* I8 = (uint8*) I; + uint16* I16 = (uint16*) I; + + int s, s_up; // random sign flip (current + upper row) + uint8 ox, oy; // random offset (current) + uint8 ox_up, oy_up; // random offset (upper row) + uint8 oc1, oc2; // overlapping coefficients + uint8 pi; // pattern index integer part + int i, j; + int P; // Pattern sample (from current pattern index) #if PATTERN_INTERPOLATION - int Pn; // Next-pattern sample (from pattern index+1) - uint8 pf; // pattern index fractional part + int Pn; // Next-pattern sample (from pattern index+1) + uint8 pf; // pattern index fractional part #endif - uint8 intensity; - int flush = 0; - int subx = c ? csubx : 1; - int suby = c ? csuby : 1; - uint8 I_min = c ? C_min : Y_min; - uint8 I_max = c ? C_max : Y_max; - - if ((y & 1) && suby > 1) - return; - - assert(!(x & 15)); - assert(width > 128); - assert(bs == 0 || bs == 2); - assert(scale_shift + bs >= 8 && scale_shift + bs <= 13); - // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc - - j = y & 0xf; - - if (y > 15 && j == 0) // first line of overlap - { - oc1 = (suby > 1) ? 20 : 12; // current - oc2 = (suby > 1) ? 20 : 24; // upper - } - else if (y > 15 && j == 1) // second line of overlap - { - oc1 = 24; - oc2 = 12; - } - else - { - oc1 = oc2 = 0; - } - - // Derive block offsets + sign - if (c==0) - get_offset_y(rnd, &s, &ox, &oy); - else if (c==1) - get_offset_u(rnd, &s, &ox, &oy); - else - get_offset_v(rnd, &s, &ox, &oy); - oy += j/suby; - - // Same for upper block (overlap) - if (c==0) - get_offset_y(rnd_up, &s_up, &ox_up, &oy_up); - else if (c==1) - get_offset_u(rnd_up, &s_up, &ox_up, &oy_up); - else - get_offset_v(rnd_up, &s_up, &ox_up, &oy_up); - oy_up += (16 + j)/suby; - - // Make grain pattern - for (i=0; i<16/subx; i++) - { - intensity = bs ? I16[x/subx+i] >> bs : I8[x/subx+i]; - pi = pLUT[c][intensity] >> 4; // pattern index (integer part) + uint8 intensity; + int flush = 0; + int subx = c ? csubx : 1; + int suby = c ? csuby : 1; + uint8 I_min = c ? C_min : Y_min; + uint8 I_max = c ? C_max : Y_max; + + if( ( y & 1 ) && suby > 1 ) + return; + + assert( !( x & 15 ) ); + assert( width > 128 ); + assert( bs == 0 || bs == 2 ); + assert( scale_shift + bs >= 8 && scale_shift + bs <= 13 ); + // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc + + j = y & 0xf; + + if( y > 15 && j == 0 ) // first line of overlap + { + oc1 = ( suby > 1 ) ? 20 : 12; // current + oc2 = ( suby > 1 ) ? 20 : 24; // upper + } + else if( y > 15 && j == 1 ) // second line of overlap + { + oc1 = 24; + oc2 = 12; + } + else + { + oc1 = oc2 = 0; + } + + // Derive block offsets + sign + if( c == 0 ) + get_offset_y( rnd, &s, &ox, &oy ); + else if( c == 1 ) + get_offset_u( rnd, &s, &ox, &oy ); + else + get_offset_v( rnd, &s, &ox, &oy ); + oy += j / suby; + + // Same for upper block (overlap) + if( c == 0 ) + get_offset_y( rnd_up, &s_up, &ox_up, &oy_up ); + else if( c == 1 ) + get_offset_u( rnd_up, &s_up, &ox_up, &oy_up ); + else + get_offset_v( rnd_up, &s_up, &ox_up, &oy_up ); + oy_up += ( 16 + j ) / suby; + + // Make grain pattern + for( i = 0; i < 16 / subx; i++ ) + { + intensity = bs ? I16[x / subx + i] >> bs : I8[x / subx + i]; + pi = pLUT[c][intensity] >> 4; // pattern index (integer part) #if PATTERN_INTERPOLATION - pf = pLUT[c][intensity] & 15; // fractional part (interpolate with next) -- could restrict to less bits (e.g. 2) + pf = pLUT[c][intensity] & 15; // fractional part (interpolate with next) -- could restrict to less bits (e.g. 2) #endif - // Pattern - P = pattern[c?1:0][pi ][oy][ox + i] * s; // We could consider just XORing the sign bit + // Pattern + P = pattern[c ? 1 : 0][pi][oy][ox + i] * s; // We could consider just XORing the sign bit #if PATTERN_INTERPOLATION - Pn = pattern[c?1:0][pi+1][oy][ox + i] * s; // But there are equivalent hw tricks, e.g. storing values as sign + amplitude instead of two's complement + Pn = + pattern[c ? 1 : 0][pi + 1][oy][ox + i] * s; // But there are equivalent hw tricks, e.g. storing values as sign + amplitude instead of two's complement #endif - if (oc1) // overlap - { - P = round(P * oc1 + pattern[c?1:0][pi ][oy_up][ox_up + i] * oc2 * s_up, 5); + if( oc1 ) // overlap + { + P = round( P * oc1 + pattern[c ? 1 : 0][pi][oy_up][ox_up + i] * oc2 * s_up, 5 ); #if PATTERN_INTERPOLATION - Pn = round(Pn * oc1 + pattern[c?1:0][pi+1][oy_up][ox_up + i] * oc2 * s_up, 5); + Pn = round( Pn * oc1 + pattern[c ? 1 : 0][pi + 1][oy_up][ox_up + i] * oc2 * s_up, 5 ); #endif - } + } #if PATTERN_INTERPOLATION - // Pattern interpolation: P is current, Pn is next, pf is interpolation coefficient - grain[c][16/subx+i] = round(P * (16-pf) + Pn * pf, 4); + // Pattern interpolation: P is current, Pn is next, pf is interpolation coefficient + grain[c][16 / subx + i] = round( P * ( 16 - pf ) + Pn * pf, 4 ); #else - grain[c][16/subx+i] = P; + grain[c][16 / subx + i] = P; #endif - // Scale sign already integrated above because of overlap - scale[c][16/subx+i] = sLUT[c][intensity]; - } - - // Scale & output - do - { - if (x > 0) - { - int32 g; - int16 l1, l0, r0, r1; - - if (!flush) - { - // Horizontal deblock (across previous block) - l1 = grain[c][16/subx -2]; - l0 = grain[c][16/subx -1]; - r0 = grain[c][16/subx +0]; - r1 = grain[c][16/subx +1]; - grain[c][16/subx -1] = round(l1 + 3*l0 + r0, 2); - grain[c][16/subx +0] = round(l0 + 3*r0 + r1, 2); - } - for (i=0; i<16/subx; i++) - { - // Output previous block (or flush current) - g = round(scale[c][i] * (int16)grain[c][i], scale_shift); - if (bs) - I16[(x-16)/subx+i] = max(I_min<= width) - { - flush ++; - x += 16; - } - } while (flush == 1); + // Scale sign already integrated above because of overlap + scale[c][16 / subx + i] = sLUT[c][intensity]; + } + + // Scale & output + do + { + if( x > 0 ) + { + int32 g; + int16 l1, l0, r0, r1; + + if( !flush ) + { + // Horizontal deblock (across previous block) + l1 = grain[c][16 / subx - 2]; + l0 = grain[c][16 / subx - 1]; + r0 = grain[c][16 / subx + 0]; + r1 = grain[c][16 / subx + 1]; + grain[c][16 / subx - 1] = round( l1 + 3 * l0 + r0, 2 ); + grain[c][16 / subx + 0] = round( l0 + 3 * r0 + r1, 2 ); + } + for( i = 0; i < 16 / subx; i++ ) + { + // Output previous block (or flush current) + g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); + if( bs ) + I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + else + I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + } + } + + // Shift pipeline + for( i = 0; i < 16 / subx && !flush; i++ ) + { + grain[c][i] = grain[c][i + 16 / subx]; + scale[c][i] = scale[c][i + 16 / subx]; + } + + if( x + 16 >= width ) + { + flush++; + x += 16; + } + } while( flush == 1 ); } /* Public interface ***********************************************************/ -void vfgs_add_grain_line(void* Y, void* U, void* V, int y, int width) +void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) { - // Generate / backup / restore per-line random seeds (needed to make multi-line blocks) - if (y && (y & 0x0f) == 0) - { - // new line of blocks --> backup + copy current to upper - line_rnd_up = line_rnd; - line_rnd = rnd; - } - rnd_up = line_rnd_up; - rnd = line_rnd; - - // Process line - for (int x=0; x backup + copy current to upper + line_rnd_up = line_rnd; + line_rnd = rnd; + } + rnd_up = line_rnd_up; + rnd = line_rnd; + + // Process line + for( int x = 0; x < width; x += 16 ) + { + // Process pixels for each color component + add_grain_block( Y, 0, x, y, width ); + add_grain_block( U, 1, x, y, width ); + add_grain_block( V, 2, x, y, width ); + + // Crank random generator + rnd = prng( rnd ); + rnd_up = prng( rnd_up ); // upper block (overlapping) + } } -void vfgs_set_luma_pattern(int index, int8* P) +void vfgs_set_luma_pattern( int index, int8* P ) { - assert(index >= 0 && index < 8); - memcpy(pattern[0][index], P, 64*64); + assert( index >= 0 && index < 8 ); + memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern(int index, int8 *P) +void vfgs_set_chroma_pattern( int index, int8* P ) { - assert(index >= 0 && index < 8); - for (int i=0; i<64/csuby; i++) - memcpy(pattern[1][index][i], P + (64/csuby)*i, 64/csubx); + assert( index >= 0 && index < 8 ); + for( int i = 0; i < 64 / csuby; i++ ) + memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); } -void vfgs_set_scale_lut(int c, uint8 lut[]) +void vfgs_set_scale_lut( int c, uint8 lut[] ) { - assert(c>=0 && c<3); - memcpy(sLUT[c], lut, 256); + assert( c >= 0 && c < 3 ); + memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut(int c, uint8 lut[]) +void vfgs_set_pattern_lut( int c, uint8 lut[] ) { - assert(c>=0 && c<3); - memcpy(pLUT[c], lut, 256); + assert( c >= 0 && c < 3 ); + memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed(uint32 seed) +void vfgs_set_seed( uint32 seed ) { - // Note: shift left the seed as the LFSR loops on the 31 MSBs, so - // the LFSR register LSB has no effect on random sequence initialization - rnd = rnd_up = line_rnd = line_rnd_up = (seed << 1); + // Note: shift left the seed as the LFSR loops on the 31 MSBs, so + // the LFSR register LSB has no effect on random sequence initialization + rnd = rnd_up = line_rnd = line_rnd_up = ( seed << 1 ); } -void vfgs_set_scale_shift(int shift) +void vfgs_set_scale_shift( int shift ) { - assert(shift >= 2 && shift < 8); - scale_shift = shift + 6 - bs; + assert( shift >= 2 && shift < 8 ); + scale_shift = shift + 6 - bs; } -void vfgs_set_depth(int depth) +void vfgs_set_depth( int depth ) { - assert(depth==8 || depth==10); + assert( depth == 8 || depth == 10 ); - if (bs==0 && depth>8) - scale_shift -= 2; - if (bs==2 && depth==8) - scale_shift += 2; + if( bs == 0 && depth > 8 ) + scale_shift -= 2; + if( bs == 2 && depth == 8 ) + scale_shift += 2; - bs = depth - 8; + bs = depth - 8; } -void vfgs_set_chroma_subsampling(int subx, int suby) +void vfgs_set_chroma_subsampling( int subx, int suby ) { - assert(subx==1 || subx==2); - assert(suby==1 || suby==2); - csubx = subx; - csuby = suby; + assert( subx == 1 || subx == 2 ); + assert( suby == 1 || suby == 2 ); + csubx = subx; + csuby = suby; } - diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/vvdec/vfgs_hw.h index 4e81ab4e..0bdb8d05 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/vvdec/vfgs_hw.h @@ -58,27 +58,26 @@ POSSIBILITY OF SUCH DAMAGE. #define _VFGS_HW_H_ #ifndef int32 -#define int32 signed int -#define uint32 unsigned int -#define int16 signed short -#define uint16 unsigned short -#define int8 signed char -#define uint8 unsigned char +# define int32 signed int +# define uint32 unsigned int +# define int16 signed short +# define uint16 unsigned short +# define int8 signed char +# define uint8 unsigned char #endif #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern(int index, int8* P); -void vfgs_set_chroma_pattern(int index, int8 *P); -void vfgs_set_scale_lut(int c, uint8 lut[]); -void vfgs_set_pattern_lut(int c, uint8 lut[]); +void vfgs_set_luma_pattern( int index, int8* P ); +void vfgs_set_chroma_pattern( int index, int8* P ); +void vfgs_set_scale_lut( int c, uint8 lut[] ); +void vfgs_set_pattern_lut( int c, uint8 lut[] ); -void vfgs_set_seed(uint32 seed); -void vfgs_set_scale_shift(int shift); -void vfgs_set_depth(int depth); -void vfgs_set_chroma_subsampling(int subx, int suby); +void vfgs_set_seed( uint32 seed ); +void vfgs_set_scale_shift( int shift ); +void vfgs_set_depth( int depth ); +void vfgs_set_chroma_subsampling( int subx, int suby ); -void vfgs_add_grain_line(void* Y, void* U, void* V, int y, int width); - -#endif // _VFGS_HW_H_ +void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ); +#endif // _VFGS_HW_H_ diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 2bf94f62..45238681 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -827,98 +827,98 @@ int VVDecImpl::copyComp( const unsigned char* pucSrc, unsigned char* pucDest, un } #if ENABLE_FILM_GRAIN -int VVDecImpl::xUpdateFGC( vvdecSEI *s ) +int VVDecImpl::xUpdateFGC( vvdecSEI* s ) { - vvdecSEIFilmGrainCharacteristics* sei =(vvdecSEIFilmGrainCharacteristics*)s->payload; + vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; - if (!sei->filmGrainCharacteristicsCancelFlag) + if( !sei->filmGrainCharacteristicsCancelFlag ) + { + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = sei->filmGrainModelId; + fgs.log2_scale_factor = sei->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) + { + vvdecCompModel& cm = sei->compModel[c]; + if( cm.presentFlag ) + { + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + { + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + if( fgs.num_model_values[c] < 2 ) fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + } + } + } + + vfgs_set_depth( 10 ); + vfgs_set_chroma_subsampling( 2, 2 ); + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) { - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for (int c=0; c<3; c++) - { - vvdecCompModel &cm = sei->compModel[c]; - if (cm.presentFlag) - { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for (int i=0; i>= 1; - } - } - vfgs_init_sei(&fgs); - - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32 seed); - - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - } - else - { - m_eFgs = 0; - } - - return VVDEC_OK; + for( int c = 1; c < 3; c++ ) + if( fgs.comp_model_present_flag[c] ) + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) + { + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; + } + } + vfgs_init_sei( &fgs ); + + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32 seed); + + m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; + } + else + { + m_eFgs = 0; + } + + return VVDEC_OK; } -int VVDecImpl::xAddGrain( vvdecFrame *frame ) +int VVDecImpl::xAddGrain( vvdecFrame* frame ) { - if (m_eFgs) - { - uint8 *Y = (uint8*)frame->planes[0].ptr; - uint8 *U = (uint8*)frame->planes[1].ptr; - uint8 *V = (uint8*)frame->planes[2].ptr; - - CHECK(frame->bitDepth != 10, "Bitdepth is not 10"); - - for (int y=0; yplanes[0].height; y++) - { - vfgs_add_grain_line(Y, U, V, y, frame->planes[0].width); - Y += frame->planes[0].stride; - if ((y & 1) || (frame->planes[0].height == frame->planes[1].height)) - { - U += frame->planes[1].stride; - V += frame->planes[1].stride; - } - } - - if (m_eFgs < 2) // Not persistent - m_eFgs = 0; + if( m_eFgs ) + { + uint8* Y = (uint8*) frame->planes[0].ptr; + uint8* U = (uint8*) frame->planes[1].ptr; + uint8* V = (uint8*) frame->planes[2].ptr; + + CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); + + for( int y = 0; y < frame->planes[0].height; y++ ) + { + vfgs_add_grain_line( Y, U, V, y, frame->planes[0].width ); + Y += frame->planes[0].stride; + if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) + { + U += frame->planes[1].stride; + V += frame->planes[1].stride; + } } - return VVDEC_OK; + + if( m_eFgs < 2 ) // Not persistent + m_eFgs = 0; + } + return VVDEC_OK; } #endif // ENABLE_FILM_GRAIN From 026a89b9d5c54fcbe1f87c4798d37bed64b1d0e2 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 13:21:49 +0200 Subject: [PATCH 2/8] clang-format InsertBraces --- source/Lib/vvdec/vfgs_fw.c | 72 ++++++++++++++++++++++++++++++++++ source/Lib/vvdec/vfgs_hw.c | 24 ++++++++++++ source/Lib/vvdec/vvdecimpl.cpp | 18 ++++++--- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index cd424a61..993b1e77 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -324,30 +324,42 @@ static void idct2_64( int8 B[][64] ) /* 1st pass (DCT2_64'*B) = vertical */ for( j = 0; j < 64; j++ ) + { for( i = 0; i < 64; i++ ) { acc = 256; for( k = 0; k < 64; k++ ) + { acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + } X[j][i] = ( acc >> 9 ); } + } /* 2nd pass (...)*DCT2_64 = horizontal + clipping */ for( j = 0; j < 64; j++ ) + { for( i = 0; i < 64; i++ ) { acc = 256; for( k = 0; k < 64; k++ ) + { acc += (int32) X[j][k] * DCT2_64[k][i]; + } acc >>= 9; if( acc > 127 ) + { acc = 127; + } if( acc < -127 ) + { acc = -127; + } B[j][i] = acc; } + } } /** Apply iDCT2 to block B[32][32] + clipping */ @@ -359,30 +371,42 @@ static void idct2_32( int8 B[][32] ) /* 1st pass (R32'*B) = vertical */ for( j = 0; j < 32; j++ ) + { for( i = 0; i < 32; i++ ) { acc = 128; for( k = 0; k < 32; k++ ) + { acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + } X[j][i] = ( acc >> 8 ); } + } /* 2nd pass (...)*R32 = horizontal + clipping */ for( j = 0; j < 32; j++ ) + { for( i = 0; i < 32; i++ ) { acc = 256; for( k = 0; k < 32; k++ ) + { acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + } acc >>= 9; if( acc > 127 ) + { acc = 127; + } if( acc < -127 ) + { acc = -127; + } B[j][i] = acc; } + } } static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) @@ -395,6 +419,7 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) n = Seed_LUT[0]; memset( B, 0, 64 * 64 * sizeof( int8 ) ); for( l = 0; l < 64; l++ ) + { for( k = 0; k < 64; k += 4 ) { if( k < fh && l < fv ) @@ -406,6 +431,7 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) } n = prng( n ); } + } B[0][0] = 0; idct2_64( B ); } @@ -420,6 +446,7 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) n = Seed_LUT[1]; memset( B, 0, 32 * 32 * sizeof( int8 ) ); for( l = 0; l < 32; l++ ) + { for( k = 0; k < 32; k += 2 ) { if( k < fh && l < fv ) @@ -429,6 +456,7 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) } n = prng( n ); } + } B[0][0] = 0; idct2_32( B ); } @@ -465,12 +493,19 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar assert( 0 ); } if( nb_coef != 6 ) + { for( k = 0, j = -L; j <= 0; j++ ) + { for( i = -L; i <= L && ( i < 0 || j < 0 ); i++, k++ ) + { coef[3 + j][3 + i] = ar_coef[k]; + } + } + } memset( buf, 0, width * height ); // debug (not needed) for( y = 0; y < height; y++ ) + { for( x = 0; x < width; x++ ) { // Filter @@ -478,8 +513,12 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar if( y >= 3 && y < height && x >= 3 && x < width - 3 ) { for( j = -3; j <= 0; j++ ) + { for( i = -3; i <= 3 && ( i < 0 || j < 0 ); i++ ) + { g += (int) coef[3 + j][3 + i] * buf[width * ( y + j ) + x + i]; + } + } g = round( g, scale ); } @@ -490,12 +529,17 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar buf[width * y + x] = clip( g, -127, 127 ); } + } // Copy cropped area to output memset( P, 0, size * size ); for( y = 0; y < 64 / suby; y++ ) + { for( x = 0; x < 64 / subx; x++ ) + { P[size * y + x] = buf[width * ( 3 + 6 / suby + y ) + ( 3 + 6 / subx + x )]; + } + } } int same_pattern( fgs_sei* cfg, int32 a, int32 b ) @@ -504,8 +548,12 @@ int same_pattern( fgs_sei* cfg, int32 a, int32 b ) int16* coef_b = &cfg->comp_model_value[0][0][0] + b; for( int i = 1; i < SEI_MAX_MODEL_VALUES; i++ ) + { if( coef_a[i] != coef_b[i] ) + { return 0; + } + } return 1; } @@ -542,8 +590,12 @@ void vfgs_init_sei( fgs_sei* cfg ) uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + { if( same_pattern( cfg, patterns[i], id ) ) + { break; + } + } if( i == VFGS_MAX_PATTERNS && np < VFGS_MAX_PATTERNS ) // can add it { @@ -557,7 +609,9 @@ void vfgs_init_sei( fgs_sei* cfg ) patterns[i] = patterns[i - 1]; } else + { break; + } } intensities[i] = a; patterns[i] = id; @@ -575,18 +629,26 @@ void vfgs_init_sei( fgs_sei* cfg ) if( c == 0 ) { if( cfg->model_id ) + { vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + } else + { vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + } vfgs_set_luma_pattern( i, P ); } else if( c == 2 ) { if( cfg->model_id ) + { vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + } else + { vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + } vfgs_set_chroma_pattern( i, P ); } @@ -605,15 +667,21 @@ void vfgs_init_sei( fgs_sei* cfg ) uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) + { if( same_pattern( cfg, patterns[i], id ) ) + { break; + } + } // Note: if not found, could try to find interpolation value for( int l = a; l <= b; l++ ) { slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; if( i < VFGS_MAX_PATTERNS ) + { plut[l] = i << 4; + } } } // 3b. Fill holes (no interp. yet, just repeat last) @@ -621,9 +689,13 @@ void vfgs_init_sei( fgs_sei* cfg ) for( k = 0; k < 256; k++ ) { if( plut[k] == 255 ) + { plut[k] = i; + } else + { i = plut[k]; + } } } else diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 9d34ce87..791a958d 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -187,7 +187,9 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) uint8 I_max = c ? C_max : Y_max; if( ( y & 1 ) && suby > 1 ) + { return; + } assert( !( x & 15 ) ); assert( width > 128 ); @@ -214,20 +216,32 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) // Derive block offsets + sign if( c == 0 ) + { get_offset_y( rnd, &s, &ox, &oy ); + } else if( c == 1 ) + { get_offset_u( rnd, &s, &ox, &oy ); + } else + { get_offset_v( rnd, &s, &ox, &oy ); + } oy += j / suby; // Same for upper block (overlap) if( c == 0 ) + { get_offset_y( rnd_up, &s_up, &ox_up, &oy_up ); + } else if( c == 1 ) + { get_offset_u( rnd_up, &s_up, &ox_up, &oy_up ); + } else + { get_offset_v( rnd_up, &s_up, &ox_up, &oy_up ); + } oy_up += ( 16 + j ) / suby; // Make grain pattern @@ -288,9 +302,13 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) // Output previous block (or flush current) g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); if( bs ) + { I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + } else + { I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + } } } @@ -347,7 +365,9 @@ void vfgs_set_chroma_pattern( int index, int8* P ) { assert( index >= 0 && index < 8 ); for( int i = 0; i < 64 / csuby; i++ ) + { memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); + } } void vfgs_set_scale_lut( int c, uint8 lut[] ) @@ -380,9 +400,13 @@ void vfgs_set_depth( int depth ) assert( depth == 8 || depth == 10 ); if( bs == 0 && depth > 8 ) + { scale_shift -= 2; + } if( bs == 2 && depth == 8 ) + { scale_shift += 2; + } bs = depth - 8; } diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 45238681..fb7e1fa4 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -852,13 +852,15 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; for( int v = 0; v < fgs.num_model_values[c]; v++ ) + { fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + } // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) } } } @@ -869,13 +871,17 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) if( fgs.model_id == 0 ) { for( int c = 1; c < 3; c++ ) + { if( fgs.comp_model_present_flag[c] ) + { for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) { fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency fgs.comp_model_value[c][k][0] >>= 1; } + } + } } vfgs_init_sei( &fgs ); @@ -916,7 +922,9 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) } if( m_eFgs < 2 ) // Not persistent + { m_eFgs = 0; + } } return VVDEC_OK; } From 5815d01bd17aa718a3e36d1736c20549b537742e Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 13:27:24 +0200 Subject: [PATCH 3/8] use standard int types --- source/Lib/vvdec/vfgs_fw.c | 126 ++++++++++++++++----------------- source/Lib/vvdec/vfgs_fw.h | 25 +++---- source/Lib/vvdec/vfgs_hw.c | 104 +++++++++++++-------------- source/Lib/vvdec/vfgs_hw.h | 19 ++--- source/Lib/vvdec/vvdecimpl.cpp | 8 +-- 5 files changed, 134 insertions(+), 148 deletions(-) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/vvdec/vfgs_fw.c index 993b1e77..5b9af023 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/vvdec/vfgs_fw.c @@ -64,7 +64,7 @@ POSSIBILITY OF SUCH DAMAGE. #define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) // clang-format off -static const int8 Gaussian_LUT[2048] = { +static const int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, @@ -195,7 +195,7 @@ static const int8 Gaussian_LUT[2048] = { 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 }; -static const uint32 Seed_LUT[256] = { +static const uint32_t Seed_LUT[256] = { 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, @@ -298,29 +298,29 @@ static const uint32 Seed_LUT[256] = { { ck, -cj, ci, -ch, cg, -cf, ce, -cd, cc, -cb, ca, -bz, by, -bx, bw, -bv, bu, -bt, bs, -br, bq, -bp, bo, -bn, bm, -bl, bk, -bj, bi, -bh, bg, -bf, bf, -bg, bh, -bi, bj, -bk, bl, -bm, bn, -bo, bp, -bq, br, -bs, bt, -bu, bv, -bw, bx, -by, bz, -ca, cb, -cc, cd, -ce, cf, -cg, ch, -ci, cj, -ck }, \ } -static const int8 DCT2_64[64][64] = \ +static const int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32 prng( uint32 x ) +static uint32_t prng( uint32_t x ) { #if 1 // same as HW (bit-reversed RDD-5) - uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); #else // RDD-5 - uint32 s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; - x = ( x << 1 ) | s; + uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; #endif return x; } /** Apply iDCT2 to block B[64][64] + clipping */ -static void idct2_64( int8 B[][64] ) +static void idct2_64( int8_t B[][64] ) { - int16 X[64][64]; - int i, j, k; - int32 acc; + int16_t X[64][64]; + int i, j, k; + int32_t acc; /* 1st pass (DCT2_64'*B) = vertical */ for( j = 0; j < 64; j++ ) @@ -330,7 +330,7 @@ static void idct2_64( int8 B[][64] ) acc = 256; for( k = 0; k < 64; k++ ) { - acc += (int32) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) + acc += (int32_t) DCT2_64[k][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64) } X[j][i] = ( acc >> 9 ); @@ -345,7 +345,7 @@ static void idct2_64( int8 B[][64] ) acc = 256; for( k = 0; k < 64; k++ ) { - acc += (int32) X[j][k] * DCT2_64[k][i]; + acc += (int32_t) X[j][k] * DCT2_64[k][i]; } acc >>= 9; @@ -363,11 +363,11 @@ static void idct2_64( int8 B[][64] ) } /** Apply iDCT2 to block B[32][32] + clipping */ -static void idct2_32( int8 B[][32] ) +static void idct2_32( int8_t B[][32] ) { - int16 X[32][32]; - int i, j, k; - int32 acc; + int16_t X[32][32]; + int i, j, k; + int32_t acc; /* 1st pass (R32'*B) = vertical */ for( j = 0; j < 32; j++ ) @@ -377,7 +377,7 @@ static void idct2_32( int8 B[][32] ) acc = 128; for( k = 0; k < 32; k++ ) { - acc += (int32) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) + acc += (int32_t) DCT2_64[k * 2][j] * B[k][i]; // iDCT bases are vertical (transpose of DCT2_64=DCT) } X[j][i] = ( acc >> 8 ); @@ -392,7 +392,7 @@ static void idct2_32( int8 B[][32] ) acc = 256; for( k = 0; k < 32; k++ ) { - acc += (int32) X[j][k] * DCT2_64[k * 2][i]; + acc += (int32_t) X[j][k] * DCT2_64[k * 2][i]; } acc >>= 9; @@ -409,15 +409,15 @@ static void idct2_32( int8 B[][32] ) } } -static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) +static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) { - int k, l; - uint32 n; + int k, l; + uint32_t n; fh = 4 * ( fh + 1 ); fv = 4 * ( fv + 1 ); n = Seed_LUT[0]; - memset( B, 0, 64 * 64 * sizeof( int8 ) ); + memset( B, 0, 64 * 64 * sizeof( int8_t ) ); for( l = 0; l < 64; l++ ) { for( k = 0; k < 64; k += 4 ) @@ -436,15 +436,15 @@ static void vfgs_make_sei_ff_pattern64( int8 B[][64], int fh, int fv ) idct2_64( B ); } -static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) +static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) { - int k, l; - uint32 n; + int k, l; + uint32_t n; fh = 2 * ( fh + 1 ); fv = 2 * ( fv + 1 ); n = Seed_LUT[1]; - memset( B, 0, 32 * 32 * sizeof( int8 ) ); + memset( B, 0, 32 * 32 * sizeof( int8_t ) ); for( l = 0; l < 32; l++ ) { for( k = 0; k < 32; k += 2 ) @@ -461,14 +461,14 @@ static void vfgs_make_sei_ff_pattern32( int8 B[][32], int fh, int fv ) idct2_32( B ); } -static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar_coef[], int nb_coef, int shift, int scale, uint32 seed ) +static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) { - int16 coef[4][7]; - int L = 0; - int x, y, i, j, k; - int g; - int subx, suby, width, height; - uint32 rnd = seed; + int16_t coef[4][7]; + int L = 0; + int x, y, i, j, k; + int g; + int subx, suby, width, height; + uint32_t rnd = seed; memset( coef, 0, sizeof( coef ) ); subx = suby = ( size == 32 ) ? 2 : 1; @@ -479,12 +479,12 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar { case 6: // SEI.AR mode - coef[3][2] = ar_coef[1]; // left - coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top - coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left - coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right - coef[3][1] = ar_coef[5]; // left-left - coef[1][3] = ( (int32) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top + coef[3][2] = ar_coef[1]; // left + coef[2][3] = ( ar_coef[1] * ar_coef[4] ) >> scale; // top + coef[2][2] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-left + coef[2][4] = ( ar_coef[3] * ar_coef[4] ) >> scale; // top-right + coef[3][1] = ar_coef[5]; // left-left + coef[1][3] = ( (int32_t) ar_coef[5] * ar_coef[4] * ar_coef[4] ) >> ( 2 * scale ); // top-top L = 2; break; @@ -542,10 +542,10 @@ static void vfgs_make_ar_pattern( int8 buf[], int8 P[], int size, const int16 ar } } -int same_pattern( fgs_sei* cfg, int32 a, int32 b ) +int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) { - int16* coef_a = &cfg->comp_model_value[0][0][0] + a; - int16* coef_b = &cfg->comp_model_value[0][0][0] + b; + int16_t* coef_a = &cfg->comp_model_value[0][0][0] + a; + int16_t* coef_b = &cfg->comp_model_value[0][0][0] + b; for( int i = 1; i < SEI_MAX_MODEL_VALUES; i++ ) { @@ -561,16 +561,16 @@ int same_pattern( fgs_sei* cfg, int32 a, int32 b ) /** Initialize "hardware" interface from FGS SEI parameters */ void vfgs_init_sei( fgs_sei* cfg ) { - int8 P[64 * 64]; - int8 Lbuf[73 * 82]; - int8 Cbuf[38 * 44]; - uint8 slut[256]; - uint8 plut[256]; - uint8 intensities[VFGS_MAX_PATTERNS]; - uint32 patterns[VFGS_MAX_PATTERNS]; - uint8 np = 0; // number of patterns - uint8 a, b, i; - int c, k; + int8_t P[64 * 64]; + int8_t Lbuf[73 * 82]; + int8_t Cbuf[38 * 44]; + uint8_t slut[256]; + uint8_t plut[256]; + uint8_t intensities[VFGS_MAX_PATTERNS]; + uint32_t patterns[VFGS_MAX_PATTERNS]; + uint8_t np = 0; // number of patterns + uint8_t a, b, i; + int c, k; for( c = 0; c < 3; c++ ) { @@ -586,8 +586,8 @@ void vfgs_init_sei( fgs_sei* cfg ) { for( k = 0; k < cfg->num_intensity_intervals[c]; k++ ) { - a = cfg->intensity_interval_lower_bound[c][k]; - uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); + a = cfg->intensity_interval_lower_bound[c][k]; + uint32_t id = SEI_MAX_MODEL_VALUES * ( k + 256 * c ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) { @@ -624,7 +624,7 @@ void vfgs_init_sei( fgs_sei* cfg ) // 2. Register the patterns (with correct order) for( i = 0; i < np; i++ ) { - int16* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; + int16_t* coef = &cfg->comp_model_value[0][0][0] + patterns[i]; if( c == 0 ) { @@ -634,7 +634,7 @@ void vfgs_init_sei( fgs_sei* cfg ) } else { - vfgs_make_sei_ff_pattern64( (int8( * )[64]) P, coef[1], coef[2] ); + vfgs_make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); } vfgs_set_luma_pattern( i, P ); @@ -647,7 +647,7 @@ void vfgs_init_sei( fgs_sei* cfg ) } else { - vfgs_make_sei_ff_pattern32( (int8( * )[32]) P, coef[1], coef[2] ); + vfgs_make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); } vfgs_set_chroma_pattern( i, P ); @@ -662,9 +662,9 @@ void vfgs_init_sei( fgs_sei* cfg ) // 3a. Fill valid patterns for( k = 0; k < cfg->num_intensity_intervals[cc]; k++ ) { - a = cfg->intensity_interval_lower_bound[cc][k]; - b = cfg->intensity_interval_upper_bound[cc][k]; - uint32 id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); + a = cfg->intensity_interval_lower_bound[cc][k]; + b = cfg->intensity_interval_upper_bound[cc][k]; + uint32_t id = SEI_MAX_MODEL_VALUES * ( k + 256 * cc ); for( i = 0; i < VFGS_MAX_PATTERNS; i++ ) { @@ -677,7 +677,7 @@ void vfgs_init_sei( fgs_sei* cfg ) for( int l = a; l <= b; l++ ) { - slut[l] = (uint8) cfg->comp_model_value[cc][k][0]; + slut[l] = (uint8_t) cfg->comp_model_value[cc][k][0]; if( i < VFGS_MAX_PATTERNS ) { plut[l] = i << 4; diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/vvdec/vfgs_fw.h index 6100a2e5..fc3b4fe7 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/vvdec/vfgs_fw.h @@ -57,27 +57,20 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _VFGS_FW_H_ #define _VFGS_FW_H_ -#ifndef int32 -# define int32 signed int -# define uint32 unsigned int -# define int16 signed short -# define uint16 unsigned short -# define int8 signed char -# define uint8 unsigned char -#endif +#include #define SEI_MAX_MODEL_VALUES 6 typedef struct fgs_sei_s { - uint8 model_id; - uint8 log2_scale_factor; - uint8 comp_model_present_flag[3]; - uint16 num_intensity_intervals[3]; - uint8 num_model_values[3]; - uint8 intensity_interval_lower_bound[3][256]; - uint8 intensity_interval_upper_bound[3][256]; - int16 comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; + uint8_t model_id; + uint8_t log2_scale_factor; + uint8_t comp_model_present_flag[3]; + uint16_t num_intensity_intervals[3]; + uint8_t num_model_values[3]; + uint8_t intensity_interval_lower_bound[3][256]; + uint8_t intensity_interval_upper_bound[3][256]; + int16_t comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; } fgs_sei; void vfgs_init_sei( fgs_sei* cfg ); diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/vvdec/vfgs_hw.c index 791a958d..a3d88ed6 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/vvdec/vfgs_hw.c @@ -66,40 +66,40 @@ POSSIBILITY OF SUCH DAMAGE. // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly -static int8 pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { +static int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code -static uint8 sLUT[3][256] = { +static uint8_t sLUT[3][256] = { 0, }; -static uint8 pLUT[3][256] = { +static uint8_t pLUT[3][256] = { 0, }; -static uint32 rnd = 0xdeadbeef; -static uint32 rnd_up = 0xdeadbeef; -static uint32 line_rnd = 0xdeadbeef; -static uint32 line_rnd_up = 0xdeadbeef; -static uint8 scale_shift = 5 + 6; -static uint8 bs = 0; // bitshift = bitdepth - 8 -static uint8 Y_min = 0; -static uint8 Y_max = 255; -static uint8 C_min = 0; -static uint8 C_max = 255; -static int csubx = 2; -static int csuby = 2; +static uint32_t rnd = 0xdeadbeef; +static uint32_t rnd_up = 0xdeadbeef; +static uint32_t line_rnd = 0xdeadbeef; +static uint32_t line_rnd_up = 0xdeadbeef; +static uint8_t scale_shift = 5 + 6; +static uint8_t bs = 0; // bitshift = bitdepth - 8 +static uint8_t Y_min = 0; +static uint8_t Y_max = 255; +static uint8_t C_min = 0; +static uint8_t C_max = 255; +static int csubx = 2; +static int csuby = 2; // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16 grain[3][32]; // 9 bit needed because of overlap (has norm > 1) -static uint8 scale[3][32]; +static int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) +static uint8_t scale[3][32]; /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32 prng( uint32 x ) +static uint32_t prng( uint32_t x ) { - uint32 s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); return x; } @@ -121,9 +121,9 @@ static uint32 prng( uint32 x ) * Note: to fully support cross-component correlation within patterns, we would * need to align luma/chroma offsets. */ -static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_y( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 31 ) & 1 ) ? -1 : 1; @@ -136,9 +136,9 @@ static void get_offset_y( uint32 val, int* s, uint8* x, uint8* y ) // pattern samples (when using overlap). } -static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 2 ) & 1 ) ? -1 : 1; @@ -149,9 +149,9 @@ static void get_offset_u( uint32 val, int* s, uint8* x, uint8* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) +static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { - uint32 bf; // bit field + uint32_t bf; // bit field *s = ( ( val >> 15 ) & 1 ) ? -1 : 1; @@ -164,27 +164,27 @@ static void get_offset_v( uint32 val, int* s, uint8* x, uint8* y ) static void add_grain_block( void* I, int c, int x, int y, int width ) { - uint8* I8 = (uint8*) I; - uint16* I16 = (uint16*) I; - - int s, s_up; // random sign flip (current + upper row) - uint8 ox, oy; // random offset (current) - uint8 ox_up, oy_up; // random offset (upper row) - uint8 oc1, oc2; // overlapping coefficients - uint8 pi; // pattern index integer part - int i, j; - int P; // Pattern sample (from current pattern index) + uint8_t* I8 = (uint8_t*) I; + uint16_t* I16 = (uint16_t*) I; + + int s, s_up; // random sign flip (current + upper row) + uint8_t ox, oy; // random offset (current) + uint8_t ox_up, oy_up; // random offset (upper row) + uint8_t oc1, oc2; // overlapping coefficients + uint8_t pi; // pattern index integer part + int i, j; + int P; // Pattern sample (from current pattern index) #if PATTERN_INTERPOLATION - int Pn; // Next-pattern sample (from pattern index+1) - uint8 pf; // pattern index fractional part + int Pn; // Next-pattern sample (from pattern index+1) + uint8_t pf; // pattern index fractional part #endif - uint8 intensity; - int flush = 0; - int subx = c ? csubx : 1; - int suby = c ? csuby : 1; - uint8 I_min = c ? C_min : Y_min; - uint8 I_max = c ? C_max : Y_max; + uint8_t intensity; + int flush = 0; + int subx = c ? csubx : 1; + int suby = c ? csuby : 1; + uint8_t I_min = c ? C_min : Y_min; + uint8_t I_max = c ? C_max : Y_max; if( ( y & 1 ) && suby > 1 ) { @@ -284,8 +284,8 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) { if( x > 0 ) { - int32 g; - int16 l1, l0, r0, r1; + int32_t g; + int16_t l1, l0, r0, r1; if( !flush ) { @@ -300,7 +300,7 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) for( i = 0; i < 16 / subx; i++ ) { // Output previous block (or flush current) - g = round( scale[c][i] * (int16) grain[c][i], scale_shift ); + g = round( scale[c][i] * (int16_t) grain[c][i], scale_shift ); if( bs ) { I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); @@ -355,13 +355,13 @@ void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) } } -void vfgs_set_luma_pattern( int index, int8* P ) +void vfgs_set_luma_pattern( int index, int8_t* P ) { assert( index >= 0 && index < 8 ); memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern( int index, int8* P ) +void vfgs_set_chroma_pattern( int index, int8_t* P ) { assert( index >= 0 && index < 8 ); for( int i = 0; i < 64 / csuby; i++ ) @@ -370,19 +370,19 @@ void vfgs_set_chroma_pattern( int index, int8* P ) } } -void vfgs_set_scale_lut( int c, uint8 lut[] ) +void vfgs_set_scale_lut( int c, uint8_t lut[] ) { assert( c >= 0 && c < 3 ); memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut( int c, uint8 lut[] ) +void vfgs_set_pattern_lut( int c, uint8_t lut[] ) { assert( c >= 0 && c < 3 ); memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed( uint32 seed ) +void vfgs_set_seed( uint32_t seed ) { // Note: shift left the seed as the LFSR loops on the 31 MSBs, so // the LFSR register LSB has no effect on random sequence initialization diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/vvdec/vfgs_hw.h index 0bdb8d05..d4df7576 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/vvdec/vfgs_hw.h @@ -57,23 +57,16 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _VFGS_HW_H_ #define _VFGS_HW_H_ -#ifndef int32 -# define int32 signed int -# define uint32 unsigned int -# define int16 signed short -# define uint16 unsigned short -# define int8 signed char -# define uint8 unsigned char -#endif +#include #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern( int index, int8* P ); -void vfgs_set_chroma_pattern( int index, int8* P ); -void vfgs_set_scale_lut( int c, uint8 lut[] ); -void vfgs_set_pattern_lut( int c, uint8 lut[] ); +void vfgs_set_luma_pattern( int index, int8_t* P ); +void vfgs_set_chroma_pattern( int index, int8_t* P ); +void vfgs_set_scale_lut( int c, uint8_t lut[] ); +void vfgs_set_pattern_lut( int c, uint8_t lut[] ); -void vfgs_set_seed( uint32 seed ); +void vfgs_set_seed( uint32_t seed ); void vfgs_set_scale_shift( int shift ); void vfgs_set_depth( int depth ); void vfgs_set_chroma_subsampling( int subx, int suby ); diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index fb7e1fa4..f3b6e1b4 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -888,7 +888,7 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) // if (!m_bFgs) // // TODO: get something random // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32 seed); + // vfgs_set_seed(uint32_t seed); m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; } @@ -904,9 +904,9 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) { if( m_eFgs ) { - uint8* Y = (uint8*) frame->planes[0].ptr; - uint8* U = (uint8*) frame->planes[1].ptr; - uint8* V = (uint8*) frame->planes[2].ptr; + uint8_t* Y = (uint8_t*) frame->planes[0].ptr; + uint8_t* U = (uint8_t*) frame->planes[1].ptr; + uint8_t* V = (uint8_t*) frame->planes[2].ptr; CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); From ca70edf0267c7341ee9903097f25bc8b493c3162 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 14:07:36 +0200 Subject: [PATCH 4/8] convert vfgs film gain sythesis to c++ class and make static global variables to class members --- .../vfgs_fw.c => FilmGrain/FilmGrain.cpp} | 132 ++++++++++-------- .../vfgs_fw.h => FilmGrain/FilmGrain.h} | 26 +++- .../vfgs_hw.c => FilmGrain/FilmGrainImpl.cpp} | 107 ++++++-------- .../vfgs_hw.h => FilmGrain/FilmGrainImpl.h} | 68 ++++++--- source/Lib/vvdec/CMakeLists.txt | 9 +- source/Lib/vvdec/vvdecimpl.cpp | 127 +++++++++-------- source/Lib/vvdec/vvdecimpl.h | 9 +- 7 files changed, 263 insertions(+), 215 deletions(-) rename source/Lib/{vvdec/vfgs_fw.c => FilmGrain/FilmGrain.cpp} (90%) rename source/Lib/{vvdec/vfgs_fw.h => FilmGrain/FilmGrain.h} (90%) rename source/Lib/{vvdec/vfgs_hw.c => FilmGrain/FilmGrainImpl.cpp} (78%) rename source/Lib/{vvdec/vfgs_hw.h => FilmGrain/FilmGrainImpl.h} (61%) diff --git a/source/Lib/vvdec/vfgs_fw.c b/source/Lib/FilmGrain/FilmGrain.cpp similarity index 90% rename from source/Lib/vvdec/vfgs_fw.c rename to source/Lib/FilmGrain/FilmGrain.cpp index 5b9af023..28c6e0e1 100644 --- a/source/Lib/vvdec/vfgs_fw.c +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -54,17 +54,25 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#include "vfgs_fw.h" -#include "vfgs_hw.h" -#include -#include +#include "FilmGrain.h" -#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) -#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) -#define clip( x, lo, hi ) ( ( x ) > ( hi ) ? hi : ( x ) < ( lo ) ? ( lo ) : ( x ) ) +#include +#include +#include + +#include "CommonDef.h" + +namespace vvdec +{ + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} // clang-format off -static const int8_t Gaussian_LUT[2048] = { +static constexpr int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33,-127, 17, -6,-105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, @@ -195,39 +203,39 @@ static const int8_t Gaussian_LUT[2048] = { 19, 2,-111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47 }; -static const uint32_t Seed_LUT[256] = { - 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, - 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, - 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, - 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, - 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, - 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, - 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, - 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, - 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, - 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, - 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, - 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, - 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, - 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, - 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, - 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, - 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, - 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, - 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, - 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, - 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, - 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, - 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, - 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, - 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, - 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, - 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, - 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, - 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, - 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, - 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, - 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 +static constexpr uint32_t Seed_LUT[256] = { + 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, + 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, + 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, + 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, + 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, + 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, + 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, + 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, + 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, + 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, + 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, + 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, + 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, + 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, + 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, + 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, + 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, + 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, + 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, + 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, + 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, + 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, + 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, + 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, + 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, + 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, + 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, + 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, + 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, + 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, + 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, + 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 }; #define DEFINE_DCT2_P64_MATRIX(aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl, bm, bn, bo, bp, bq, br, bs, bt, bu, bv, bw, bx, by, bz, ca, cb, cc, cd, ce, cf, cg, ch, ci, cj, ck) \ @@ -296,14 +304,14 @@ static const uint32_t Seed_LUT[256] = { { cj, -cg, cd, -ca, bx, -bu, br, -bo, bl, -bi, bf, -bh, bk, -bn, bq, -bt, bw, -bz, cc, -cf, ci, ck, -ch, ce, -cb, by, -bv, bs, -bp, bm, -bj, bg, -bg, bj, -bm, bp, -bs, bv, -by, cb, -ce, ch, -ck, -ci, cf, -cc, bz, -bw, bt, -bq, bn, -bk, bh, -bf, bi, -bl, bo, -br, bu, -bx, ca, -cd, cg, -cj }, \ { be, -bd, bc, -bb, ba, -az, ay, -ax, aw, -av, au, -at, as, -ar, aq, -ap, ap, -aq, ar, -as, at, -au, av, -aw, ax, -ay, az, -ba, bb, -bc, bd, -be, -be, bd, -bc, bb, -ba, az, -ay, ax, -aw, av, -au, at, -as, ar, -aq, ap, -ap, aq, -ar, as, -at, au, -av, aw, -ax, ay, -az, ba, -bb, bc, -bd, be }, \ { ck, -cj, ci, -ch, cg, -cf, ce, -cd, cc, -cb, ca, -bz, by, -bx, bw, -bv, bu, -bt, bs, -br, bq, -bp, bo, -bn, bm, -bl, bk, -bj, bi, -bh, bg, -bf, bf, -bg, bh, -bi, bj, -bk, bl, -bm, bn, -bo, bp, -bq, br, -bs, bt, -bu, bv, -bw, bx, -by, bz, -ca, cb, -cc, cd, -ce, cf, -cg, ch, -ci, cj, -ck }, \ - } +} -static const int8_t DCT2_64[64][64] = \ +static constexpr int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on /** Pseudo-random number generator (32-bit) */ -static uint32_t prng( uint32_t x ) +static inline uint32_t prng( uint32_t x ) { #if 1 // same as HW (bit-reversed RDD-5) uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; @@ -409,7 +417,7 @@ static void idct2_32( int8_t B[][32] ) } } -static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) +static void make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) { int k, l; uint32_t n; @@ -436,7 +444,7 @@ static void vfgs_make_sei_ff_pattern64( int8_t B[][64], int fh, int fv ) idct2_64( B ); } -static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) +static void make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) { int k, l; uint32_t n; @@ -461,7 +469,7 @@ static void vfgs_make_sei_ff_pattern32( int8_t B[][32], int fh, int fv ) idct2_32( B ); } -static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) +static void make_ar_pattern( int8_t buf[], int8_t P[], int size, const int16_t ar_coef[], int nb_coef, int shift, int scale, uint32_t seed ) { int16_t coef[4][7]; int L = 0; @@ -490,7 +498,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 break; default: - assert( 0 ); + THROW_FATAL( "nb_coef != 6 not implemented" ); } if( nb_coef != 6 ) { @@ -527,7 +535,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 g += round( Gaussian_LUT[rnd & 2047], shift ); rnd = prng( rnd ); - buf[width * y + x] = clip( g, -127, 127 ); + buf[width * y + x] = Clip3( -127, 127, g ); } } @@ -542,7 +550,7 @@ static void vfgs_make_ar_pattern( int8_t buf[], int8_t P[], int size, const int1 } } -int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) +static int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) { int16_t* coef_a = &cfg->comp_model_value[0][0][0] + a; int16_t* coef_b = &cfg->comp_model_value[0][0][0] + b; @@ -559,7 +567,7 @@ int same_pattern( fgs_sei* cfg, int32_t a, int32_t b ) } /** Initialize "hardware" interface from FGS SEI parameters */ -void vfgs_init_sei( fgs_sei* cfg ) +void FilmGrain::init_sei( fgs_sei* cfg ) { int8_t P[64 * 64]; int8_t Lbuf[73 * 82]; @@ -630,31 +638,31 @@ void vfgs_init_sei( fgs_sei* cfg ) { if( cfg->model_id ) { - vfgs_make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); + make_ar_pattern( Lbuf, P, 64, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[0] ); } else { - vfgs_make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); + make_sei_ff_pattern64( (int8_t( * )[64]) P, coef[1], coef[2] ); } - vfgs_set_luma_pattern( i, P ); + set_luma_pattern( i, P ); } else if( c == 2 ) { if( cfg->model_id ) { - vfgs_make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); + make_ar_pattern( Cbuf, P, 32, coef, 6, 1, cfg->log2_scale_factor, Seed_LUT[1] ); } else { - vfgs_make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); + make_sei_ff_pattern32( (int8_t( * )[32]) P, coef[1], coef[2] ); } - vfgs_set_chroma_pattern( i, P ); + set_chroma_pattern( i, P ); } } // 3. Fill up LUTs - for( int cc = min( c, 1 ); cc <= c; cc++ ) + for( int cc = std::min( c, 1 ); cc <= c; cc++ ) { if( cfg->comp_model_present_flag[cc] ) { @@ -703,11 +711,13 @@ void vfgs_init_sei( fgs_sei* cfg ) memset( plut, 0, sizeof( plut ) ); } // 3c. Register LUTs - vfgs_set_scale_lut( cc, slut ); - vfgs_set_pattern_lut( cc, plut ); + set_scale_lut( cc, slut ); + set_pattern_lut( cc, plut ); } } } - vfgs_set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) + set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } + +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_fw.h b/source/Lib/FilmGrain/FilmGrain.h similarity index 90% rename from source/Lib/vvdec/vfgs_fw.h rename to source/Lib/FilmGrain/FilmGrain.h index fc3b4fe7..45b892dd 100644 --- a/source/Lib/vvdec/vfgs_fw.h +++ b/source/Lib/FilmGrain/FilmGrain.h @@ -54,14 +54,16 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#ifndef _VFGS_FW_H_ -#define _VFGS_FW_H_ +#pragma once -#include +#include "FilmGrainImpl.h" + +namespace vvdec +{ #define SEI_MAX_MODEL_VALUES 6 -typedef struct fgs_sei_s +struct fgs_sei { uint8_t model_id; uint8_t log2_scale_factor; @@ -71,8 +73,18 @@ typedef struct fgs_sei_s uint8_t intensity_interval_lower_bound[3][256]; uint8_t intensity_interval_upper_bound[3][256]; int16_t comp_model_value[3][256][SEI_MAX_MODEL_VALUES]; -} fgs_sei; +}; + +class FilmGrain : public FilmGrainImpl +{ +public: + FilmGrain( int depth, int chromaSubsampling ) + { + set_depth( depth ); + set_chroma_subsampling( chromaSubsampling, chromaSubsampling ); + } -void vfgs_init_sei( fgs_sei* cfg ); + void init_sei( fgs_sei* cfg ); +}; -#endif // _VFGS_FW_H_ +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_hw.c b/source/Lib/FilmGrain/FilmGrainImpl.cpp similarity index 78% rename from source/Lib/vvdec/vfgs_hw.c rename to source/Lib/FilmGrain/FilmGrainImpl.cpp index a3d88ed6..b198334c 100644 --- a/source/Lib/vvdec/vfgs_hw.c +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -54,49 +54,29 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#include "vfgs_hw.h" -#include // memcpy -#include +#include "FilmGrainImpl.h" -#define min( a, b ) ( ( a ) < ( b ) ? ( a ) : ( b ) ) -#define max( a, b ) ( ( a ) > ( b ) ? ( a ) : ( b ) ) -#define round( a, s ) ( ( ( a ) + ( 1 << ( ( s ) - 1 ) ) ) >> ( s ) ) +#include // memcpy +#include + +#include #define PATTERN_INTERPOLATION 0 -// Note: declarations optimized for code readability; e.g. pattern storage in -// actual hardware implementation would differ significantly -static int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { - 0, -}; // +1 to simplify interpolation code -static uint8_t sLUT[3][256] = { - 0, -}; -static uint8_t pLUT[3][256] = { - 0, -}; -static uint32_t rnd = 0xdeadbeef; -static uint32_t rnd_up = 0xdeadbeef; -static uint32_t line_rnd = 0xdeadbeef; -static uint32_t line_rnd_up = 0xdeadbeef; -static uint8_t scale_shift = 5 + 6; -static uint8_t bs = 0; // bitshift = bitdepth - 8 -static uint8_t Y_min = 0; -static uint8_t Y_max = 255; -static uint8_t C_min = 0; -static uint8_t C_max = 255; -static int csubx = 2; -static int csuby = 2; - -// Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) -static int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) -static uint8_t scale[3][32]; +namespace vvdec +{ + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} /** Pseudo-random number generator * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register * (the register LSB has basically no effect since it is never fed back) */ -static uint32_t prng( uint32_t x ) +static inline uint32_t prng( uint32_t x ) { uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; x = s | ( x >> 1 ); @@ -136,7 +116,7 @@ static void get_offset_y( uint32_t val, int* s, uint8_t* x, uint8_t* y ) // pattern samples (when using overlap). } -static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) +void FilmGrainImpl::get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { uint32_t bf; // bit field @@ -149,7 +129,7 @@ static void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) +void FilmGrainImpl::get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) { uint32_t bf; // bit field @@ -162,7 +142,7 @@ static void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ) *y = ( ( bf * 12 ) >> 10 ) * ( 4 / csuby ); } -static void add_grain_block( void* I, int c, int x, int y, int width ) +void FilmGrainImpl::add_grain_block( void* I, int c, int x, int y, int width ) { uint8_t* I8 = (uint8_t*) I; uint16_t* I16 = (uint16_t*) I; @@ -191,10 +171,11 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) return; } - assert( !( x & 15 ) ); - assert( width > 128 ); - assert( bs == 0 || bs == 2 ); - assert( scale_shift + bs >= 8 && scale_shift + bs <= 13 ); + CHECK( x & 15, "x not a multiple of 16" ); + CHECK( width <= 128, "wrong width" ); + CHECK( bs != 0 && bs != 2, "wrong bs" ); + CHECK( scale_shift + bs < 8 || scale_shift + bs > 13, "wrong scale_shift" ); + // TODO: assert subx, suby, Y/C min/max, max pLUT values, etc j = y & 0xf; @@ -303,11 +284,11 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) g = round( scale[c][i] * (int16_t) grain[c][i], scale_shift ); if( bs ) { - I16[( x - 16 ) / subx + i] = max( I_min << bs, min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); + I16[( x - 16 ) / subx + i] = std::max( I_min << bs, std::min( I_max << bs, I16[( x - 16 ) / subx + i] + g ) ); } else { - I8[( x - 16 ) / subx + i] = max( I_min, min( I_max, I8[( x - 16 ) / subx + i] + g ) ); + I8[( x - 16 ) / subx + i] = std::max( I_min, std::min( I_max, I8[( x - 16 ) / subx + i] + g ) ); } } } @@ -329,7 +310,7 @@ static void add_grain_block( void* I, int c, int x, int y, int width ) /* Public interface ***********************************************************/ -void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) +void FilmGrainImpl::add_grain_line( void* Y, void* U, void* V, int y, int width ) { // Generate / backup / restore per-line random seeds (needed to make multi-line blocks) if( y && ( y & 0x0f ) == 0 ) @@ -355,49 +336,47 @@ void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ) } } -void vfgs_set_luma_pattern( int index, int8_t* P ) +void FilmGrainImpl::set_luma_pattern( int index, int8_t* P ) { - assert( index >= 0 && index < 8 ); + CHECK( index < 0 || index >= 8, "luma pattern index out of bounds" ); memcpy( pattern[0][index], P, 64 * 64 ); } -void vfgs_set_chroma_pattern( int index, int8_t* P ) +void FilmGrainImpl::set_chroma_pattern( int index, int8_t* P ) { - assert( index >= 0 && index < 8 ); + CHECK( index < 0 || index >= 8, "chroma pattern index out of bounds" ); for( int i = 0; i < 64 / csuby; i++ ) { memcpy( pattern[1][index][i], P + ( 64 / csuby ) * i, 64 / csubx ); } } -void vfgs_set_scale_lut( int c, uint8_t lut[] ) +void FilmGrainImpl::set_scale_lut( int c, uint8_t lut[] ) { - assert( c >= 0 && c < 3 ); + CHECK( c < 0 || c >= 3, "scale lut idx out of bounds" ); memcpy( sLUT[c], lut, 256 ); } -void vfgs_set_pattern_lut( int c, uint8_t lut[] ) +void FilmGrainImpl::set_pattern_lut( int c, uint8_t lut[] ) { - assert( c >= 0 && c < 3 ); + CHECK( c < 0 || c >= 3, "pattern lut idx out of bounds" ); memcpy( pLUT[c], lut, 256 ); } -void vfgs_set_seed( uint32_t seed ) +void FilmGrainImpl::set_seed( uint32_t seed ) { - // Note: shift left the seed as the LFSR loops on the 31 MSBs, so - // the LFSR register LSB has no effect on random sequence initialization - rnd = rnd_up = line_rnd = line_rnd_up = ( seed << 1 ); + rnd = rnd_up = line_rnd = line_rnd_up = seed; } -void vfgs_set_scale_shift( int shift ) +void FilmGrainImpl::set_scale_shift( int shift ) { - assert( shift >= 2 && shift < 8 ); + CHECK( shift < 2 || shift >= 8, "scale shift out of range" ); scale_shift = shift + 6 - bs; } -void vfgs_set_depth( int depth ) +void FilmGrainImpl::set_depth( int depth ) { - assert( depth == 8 || depth == 10 ); + CHECK( depth != 8 && depth != 10, "only bit depth 8 and 10 supported." ) if( bs == 0 && depth > 8 ) { @@ -411,10 +390,12 @@ void vfgs_set_depth( int depth ) bs = depth - 8; } -void vfgs_set_chroma_subsampling( int subx, int suby ) +void FilmGrainImpl::set_chroma_subsampling( int subx, int suby ) { - assert( subx == 1 || subx == 2 ); - assert( suby == 1 || suby == 2 ); + CHECK( subx != 1 && subx != 2, "chroma subsampling should be 1 or 2" ); + CHECK( suby != 1 && suby != 2, "chroma subsampling should be 1 or 2" ); csubx = subx; csuby = suby; } + +} // namespace vvdec diff --git a/source/Lib/vvdec/vfgs_hw.h b/source/Lib/FilmGrain/FilmGrainImpl.h similarity index 61% rename from source/Lib/vvdec/vfgs_hw.h rename to source/Lib/FilmGrain/FilmGrainImpl.h index d4df7576..0fbb85d9 100644 --- a/source/Lib/vvdec/vfgs_hw.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -54,23 +54,59 @@ POSSIBILITY OF SUCH DAMAGE. * message). */ -#ifndef _VFGS_HW_H_ -#define _VFGS_HW_H_ +#pragma once -#include +#include #define VFGS_MAX_PATTERNS 8 -void vfgs_set_luma_pattern( int index, int8_t* P ); -void vfgs_set_chroma_pattern( int index, int8_t* P ); -void vfgs_set_scale_lut( int c, uint8_t lut[] ); -void vfgs_set_pattern_lut( int c, uint8_t lut[] ); - -void vfgs_set_seed( uint32_t seed ); -void vfgs_set_scale_shift( int shift ); -void vfgs_set_depth( int depth ); -void vfgs_set_chroma_subsampling( int subx, int suby ); - -void vfgs_add_grain_line( void* Y, void* U, void* V, int y, int width ); - -#endif // _VFGS_HW_H_ +namespace vvdec +{ + +class FilmGrainImpl +{ + // Note: declarations optimized for code readability; e.g. pattern storage in + // actual hardware implementation would differ significantly + int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code + uint8_t sLUT[3][256] = { 0, }; + uint8_t pLUT[3][256] = { 0, }; + + uint32_t rnd = 0xdeadbeef; + uint32_t rnd_up = 0xdeadbeef; + uint32_t line_rnd = 0xdeadbeef; + uint32_t line_rnd_up = 0xdeadbeef; + uint8_t scale_shift = 5 + 6; + uint8_t bs = 0; // bitshift = bitdepth - 8 + int csubx = 2; + int csuby = 2; + + constexpr static uint8_t Y_min = 0; + constexpr static uint8_t Y_max = 255; + constexpr static uint8_t C_min = 0; + constexpr static uint8_t C_max = 255; + + // Processing pipeline (needs only 2 registers for each color actually, for horizontal deblocking) + int16_t grain[3][32]; // 9 bit needed because of overlap (has norm > 1) + uint8_t scale[3][32]; + + void get_offset_u( uint32_t val, int* s, uint8_t* x, uint8_t* y ); + void get_offset_v( uint32_t val, int* s, uint8_t* x, uint8_t* y ); + void add_grain_block( void* I, int c, int x, int y, int width ); + +protected: + void set_luma_pattern( int index, int8_t* P ); + void set_chroma_pattern( int index, int8_t* P ); + void set_scale_lut( int c, uint8_t lut[] ); + void set_pattern_lut( int c, uint8_t lut[] ); + + void set_seed( uint32_t seed ); + void set_scale_shift( int shift ); + +public: + void set_depth( int depth ); + void set_chroma_subsampling( int subx, int suby ); + + void add_grain_line( void* Y, void* U, void* V, int y, int width ); +}; + +} // namespace vvdec diff --git a/source/Lib/vvdec/CMakeLists.txt b/source/Lib/vvdec/CMakeLists.txt index 88d052ff..d33af357 100644 --- a/source/Lib/vvdec/CMakeLists.txt +++ b/source/Lib/vvdec/CMakeLists.txt @@ -46,8 +46,8 @@ file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" ) file( GLOB MD5_INC_FILES "../libmd5/*.h" ) if( VVDEC_ENABLE_FILM_GRAIN ) - file( GLOB FGS_SRC_FILES "vfgs_*.c" ) - file( GLOB FGS_INC_FILES "vfgs_*.h" ) + file( GLOB FGS_SRC_FILES "../FilmGrain/*.cpp" ) + file( GLOB FGS_INC_FILES "../FilmGrain/*.h" ) set_property( SOURCE vvdec.cpp vvdecimpl.cpp APPEND PROPERTY COMPILE_DEFINITIONS ENABLE_FILM_GRAIN ) endif() @@ -94,10 +94,12 @@ if( VVDEC_ENABLE_X86_SIMD ) #set_property( SOURCE ${X86_SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) # set needed compile flags if( MSVC ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX" ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) elseif( UNIX OR MINGW ) include( vvdecCompilerSupport ) @@ -114,10 +116,11 @@ if( VVDEC_ENABLE_X86_SIMD ) #set_property( SOURCE ${X86_SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_msse42}" ) #set_property( SOURCE ${X86_AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx}" ) set_property( SOURCE ${X86_AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx2}" ) + set_property( SOURCE ${FGS_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "${FLAG_mavx2}" ) endif() #add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_SSE42_SRC_FILES} ${X86_AVX_SRC_FILES} ${X86_AVX2_SRC_FILES} ) - add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_AVX2_SRC_FILES} ) + add_library( ${LIB_NAME}_x86_simd OBJECT ${X86_SSE41_SRC_FILES} ${X86_AVX2_SRC_FILES} ${X86_AVX2_C_FILES} ) target_link_libraries( ${LIB_NAME}_x86_simd ${INTEL_ITT_LINK_TARGET} ) # disble LTO for the files compiled with special architecture flags diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index f3b6e1b4..51a76f77 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -55,16 +55,15 @@ POSSIBILITY OF SUCH DAMAGE. #if ENABLE_FILM_GRAIN # include "vvdec/sei.h" -extern "C" -{ -# include "vfgs_fw.h" -# include "vfgs_hw.h" -} +# include "FilmGrain/FilmGrain.h" #endif // ENABLE_FILM_GRAIN namespace vvdec { +VVDecImpl::VVDecImpl() = default; +VVDecImpl::~VVDecImpl() = default; + int VVDecImpl::init( const vvdecParams& params, vvdecCreateBufferCallback createBufCallback, vvdecUnrefBufferCallback unrefBufCallback ) { if( m_bInitialized ){ return VVDEC_ERR_INITIALIZE; } @@ -108,7 +107,7 @@ int VVDecImpl::init( const vvdecParams& params, vvdecCreateBufferCallback create m_cUserAllocator = UserAllocator(); } - m_cDecLib.reset( new DecLib() ); + m_cDecLib = std::make_unique(); initROM(); @@ -176,6 +175,10 @@ int VVDecImpl::uninit() m_bInitialized = false; m_eState = INTERNAL_STATE_UNINITIALIZED; +#if ENABLE_FILM_GRAIN + m_filmGrainSynth.reset(); +#endif + return VVDEC_OK; } @@ -831,71 +834,75 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) { vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; - if( !sei->filmGrainCharacteristicsCancelFlag ) + if( sei->filmGrainCharacteristicsCancelFlag ) + { + m_eFgs = 0; + return VVDEC_OK; + } + + if( !m_filmGrainSynth ) + { + m_filmGrainSynth = std::make_unique( 10, 2 ); + } + + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = sei->filmGrainModelId; + fgs.log2_scale_factor = sei->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) { - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for( int c = 0; c < 3; c++ ) + vvdecCompModel& cm = sei->compModel[c]; + if( cm.presentFlag ) { - vvdecCompModel& cm = sei->compModel[c]; - if( cm.presentFlag ) + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) { - vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; - fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; - fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; - for( int v = 0; v < fgs.num_model_values[c]; v++ ) - { - fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; - } - // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; } + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) + if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr + if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio + if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) } } + } - vfgs_set_depth( 10 ); - vfgs_set_chroma_subsampling( 2, 2 ); - // Conversion of component model values for 4:2:0 chroma format - if( fgs.model_id == 0 ) + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) + { + for( int c = 1; c < 3; c++ ) { - for( int c = 1; c < 3; c++ ) + if( fgs.comp_model_present_flag[c] ) { - if( fgs.comp_model_present_flag[c] ) + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) { - for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) - { - fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency - fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency - fgs.comp_model_value[c][k][0] >>= 1; - } + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; } } } - vfgs_init_sei( &fgs ); + } + m_filmGrainSynth->set_depth( 10 ); + m_filmGrainSynth->set_chroma_subsampling( 2, 2 ); + m_filmGrainSynth->init_sei( &fgs ); - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32_t seed); + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32_t seed); - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - } - else - { - m_eFgs = 0; - } + m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; return VVDEC_OK; } @@ -912,7 +919,7 @@ int VVDecImpl::xAddGrain( vvdecFrame* frame ) for( int y = 0; y < frame->planes[0].height; y++ ) { - vfgs_add_grain_line( Y, U, V, y, frame->planes[0].width ); + m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); Y += frame->planes[0].stride; if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) { @@ -969,12 +976,12 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) #if ENABLE_FILM_GRAIN // find FGC SEI - for( auto& s : pcPic->seiMessageList ) + for( auto& sei: pcPic->seiMessageList ) { - if( s->payloadType == VVDEC_FILM_GRAIN_CHARACTERISTICS ) + if( sei->payloadType == VVDEC_FILM_GRAIN_CHARACTERISTICS ) { - xUpdateFGC( s ); - msg( INFO, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); + xUpdateFGC( sei ); + msg( DETAILS, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); } } bCreateStorage = bCreateStorage || m_eFgs; diff --git a/source/Lib/vvdec/vvdecimpl.h b/source/Lib/vvdec/vvdecimpl.h index cc2ff401..d0bd5390 100644 --- a/source/Lib/vvdec/vvdecimpl.h +++ b/source/Lib/vvdec/vvdecimpl.h @@ -47,6 +47,7 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { +class FilmGrain; static const char * const vvdecNalTypeNames[] = { "NAL_UNIT_CODED_SLICE_TRAIL", "NAL_UNIT_CODED_SLICE_STSA", "NAL_UNIT_CODED_SLICE_RADL", "NAL_UNIT_CODED_SLICE_RASL", "NAL_UNIT_RESERVED_VCL_4", "NAL_UNIT_RESERVED_VCL_5", "NAL_UNIT_RESERVED_VCL_6", @@ -98,11 +99,8 @@ class VVDecImpl public: - /// Constructor - VVDecImpl() = default; - - /// Destructor - ~VVDecImpl() = default; + VVDecImpl(); + ~VVDecImpl(); class FrameStorage { @@ -220,6 +218,7 @@ class VVDecImpl uint64_t m_uiSeqNumOutput = 0; #if ENABLE_FILM_GRAIN int m_eFgs = 0; + std::unique_ptr m_filmGrainSynth; #endif // ENABLE_FILM_GRAIN }; From 279c3de6ecf7cd528943824612e61a111cd507ce Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Thu, 23 May 2024 16:58:36 +0200 Subject: [PATCH 5/8] small cleanup --- source/Lib/vvdec/vvdecimpl.cpp | 71 ++++++++++++++++++---------------- source/Lib/vvdec/vvdecimpl.h | 11 ++++-- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 51a76f77..60e4d88b 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -232,7 +232,7 @@ int VVDecImpl::reset() #endif #if ENABLE_FILM_GRAIN - m_eFgs = 0; + m_filmGrainCharacteristicsState = FgcNone; #endif // ENABLE_FILM_GRAIN m_uiSeqNumber = 0; m_uiSeqNumOutput = 0; @@ -830,19 +830,19 @@ int VVDecImpl::copyComp( const unsigned char* pucSrc, unsigned char* pucDest, un } #if ENABLE_FILM_GRAIN -int VVDecImpl::xUpdateFGC( vvdecSEI* s ) +void VVDecImpl::xUpdateFGC( vvdecSEI* s ) { vvdecSEIFilmGrainCharacteristics* sei = (vvdecSEIFilmGrainCharacteristics*) s->payload; if( sei->filmGrainCharacteristicsCancelFlag ) { - m_eFgs = 0; - return VVDEC_OK; + m_filmGrainCharacteristicsState = FgcNone; + return; } if( !m_filmGrainSynth ) { - m_filmGrainSynth = std::make_unique( 10, 2 ); + m_filmGrainSynth = std::make_unique( 10, 2 ); // TODO: (GH) set correct bit depth and color format, and apply changes } fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) @@ -868,11 +868,17 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; } // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - if( fgs.num_model_values[c] < 2 ) { fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; } // H high cutoff / 1st AR coef (left & top) - if( fgs.num_model_values[c] < 3 ) { fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; } // V high cutoff / x-comp corr - if( fgs.num_model_values[c] < 4 ) { fgs.comp_model_value[c][i][3] = 0; } // H low cutoff / 2nd AR coef (top-left, top-right) - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; } // V low cutoff / aspect ratio - if( fgs.num_model_values[c] < 5 ) { fgs.comp_model_value[c][i][5] = 0; } // x-comp corr / 3rd AR coef (left-left, top-top) + switch( fgs.num_model_values[c] ) + { + // clang-format off + case 0: + case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + // clang-format on + } } } } @@ -902,38 +908,37 @@ int VVDecImpl::xUpdateFGC( vvdecSEI* s ) // // TODO: make seed also impact the pattern gen // vfgs_set_seed(uint32_t seed); - m_eFgs = sei->filmGrainCharacteristicsPersistenceFlag ? 2 : 1; - - return VVDEC_OK; + m_filmGrainCharacteristicsState = sei->filmGrainCharacteristicsPersistenceFlag ? FgcPersist : FgcDontPersist; } -int VVDecImpl::xAddGrain( vvdecFrame* frame ) +void VVDecImpl::xAddGrain( vvdecFrame* frame ) { - if( m_eFgs ) + if( m_filmGrainCharacteristicsState == FgcNone ) { - uint8_t* Y = (uint8_t*) frame->planes[0].ptr; - uint8_t* U = (uint8_t*) frame->planes[1].ptr; - uint8_t* V = (uint8_t*) frame->planes[2].ptr; + return; + } - CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); + uint8_t* Y = (uint8_t*) frame->planes[0].ptr; + uint8_t* U = (uint8_t*) frame->planes[1].ptr; + uint8_t* V = (uint8_t*) frame->planes[2].ptr; - for( int y = 0; y < frame->planes[0].height; y++ ) - { - m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); - Y += frame->planes[0].stride; - if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) - { - U += frame->planes[1].stride; - V += frame->planes[1].stride; - } - } + CHECK( frame->bitDepth != 10, "Bitdepth is not 10" ); - if( m_eFgs < 2 ) // Not persistent + for( int y = 0; y < frame->planes[0].height; y++ ) + { + m_filmGrainSynth->add_grain_line( Y, U, V, y, frame->planes[0].width ); + Y += frame->planes[0].stride; + if( ( y & 1 ) || ( frame->planes[0].height == frame->planes[1].height ) ) { - m_eFgs = 0; + U += frame->planes[1].stride; + V += frame->planes[2].stride; } } - return VVDEC_OK; + + if( m_filmGrainCharacteristicsState != FgcPersist ) // Not persistent + { + m_filmGrainCharacteristicsState = FgcNone; + } } #endif // ENABLE_FILM_GRAIN @@ -984,7 +989,7 @@ int VVDecImpl::xAddPicture( Picture* pcPic ) msg( DETAILS, "vvdecimpl [detail]: SEI FILM_GRAIN_CHARACTERISTICS\n"); } } - bCreateStorage = bCreateStorage || m_eFgs; + bCreateStorage = bCreateStorage || m_filmGrainCharacteristicsState; #endif // ENABLE_FILM_GRAIN // create a brand new picture object diff --git a/source/Lib/vvdec/vvdecimpl.h b/source/Lib/vvdec/vvdecimpl.h index d0bd5390..5399ef89 100644 --- a/source/Lib/vvdec/vvdecimpl.h +++ b/source/Lib/vvdec/vvdecimpl.h @@ -176,8 +176,8 @@ class VVDecImpl int xAddPicture ( Picture* pcPic ); int xCreateFrame ( vvdecFrame& frame, const CPelUnitBuf& rcPicBuf, uint32_t uiWidth, uint32_t uiHeight, const BitDepths& rcBitDepths, bool bCreateStorage ); - int xUpdateFGC ( vvdecSEI *sei ); - int xAddGrain ( vvdecFrame *frame ); + void xUpdateFGC ( vvdecSEI *sei ); + void xAddGrain ( vvdecFrame *frame ); static int xRetrieveNalStartCode ( unsigned char *pB, int iZerosInStartcode ); static int xConvertPayloadToRBSP ( const uint8_t* payload, size_t payloadLen, InputBitstream* bitstream, bool isVclNalUnit ); @@ -217,7 +217,12 @@ class VVDecImpl uint64_t m_uiSeqNumber = 0; uint64_t m_uiSeqNumOutput = 0; #if ENABLE_FILM_GRAIN - int m_eFgs = 0; + enum + { + FgcNone = 0, + FgcDontPersist = 1, + FgcPersist = 2 + } m_filmGrainCharacteristicsState = FgcNone; std::unique_ptr m_filmGrainSynth; #endif // ENABLE_FILM_GRAIN }; From a61e8ffb694bc5d8798700b9847f3b3701919435 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Fri, 24 May 2024 10:08:07 +0200 Subject: [PATCH 6/8] move updateFGC() to FilmGrain class --- source/Lib/FilmGrain/FilmGrain.cpp | 65 ++++++++++++++++++++++++++++++ source/Lib/FilmGrain/FilmGrain.h | 8 ++++ source/Lib/vvdec/vvdecimpl.cpp | 64 +---------------------------- 3 files changed, 74 insertions(+), 63 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrain.cpp b/source/Lib/FilmGrain/FilmGrain.cpp index 28c6e0e1..b644204b 100644 --- a/source/Lib/FilmGrain/FilmGrain.cpp +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -720,4 +720,69 @@ void FilmGrain::init_sei( fgs_sei* cfg ) set_scale_shift( cfg->log2_scale_factor - ( cfg->model_id ? 1 : 0 ) ); // -1 for grain shift in pattern generation (see above) } +void FilmGrain::updateFGC( vvdecSEIFilmGrainCharacteristics* fgc ) +{ + fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) + // Copy SEI message in vfgs structure format + // TODO: check some values and warn about unsupported stuff ? + fgs.model_id = fgc->filmGrainModelId; + fgs.log2_scale_factor = fgc->log2ScaleFactor; + for( int c = 0; c < 3; c++ ) + { + vvdecCompModel& cm = fgc->compModel[c]; + if( cm.presentFlag ) + { + fgs.comp_model_present_flag[c] = 1; + fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; + fgs.num_model_values[c] = cm.numModelValues; + for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) + { + vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; + fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; + fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; + for( int v = 0; v < fgs.num_model_values[c]; v++ ) + { + fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; + } + // Fill with default model values (VFGS needs them; it actually ignores num_model_values) + switch( fgs.num_model_values[c] ) + { + // clang-format off + case 0: + case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) + case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr + case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) + case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio + fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) + // clang-format on + } + } + } + } + + // Conversion of component model values for 4:2:0 chroma format + if( fgs.model_id == 0 ) + { + for( int c = 1; c < 3; c++ ) + { + if( fgs.comp_model_present_flag[c] ) + { + for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) + { + fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency + fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency + fgs.comp_model_value[c][k][0] >>= 1; + } + } + } + } + + init_sei( &fgs ); + + // if (!m_bFgs) + // // TODO: get something random + // // TODO: make seed also impact the pattern gen + // vfgs_set_seed(uint32_t seed); +} + } // namespace vvdec diff --git a/source/Lib/FilmGrain/FilmGrain.h b/source/Lib/FilmGrain/FilmGrain.h index 45b892dd..73303d92 100644 --- a/source/Lib/FilmGrain/FilmGrain.h +++ b/source/Lib/FilmGrain/FilmGrain.h @@ -58,6 +58,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "FilmGrainImpl.h" +#include + +#include "vvdec/sei.h" + namespace vvdec { @@ -65,6 +69,8 @@ namespace vvdec struct fgs_sei { + fgs_sei() { memset( this, 0, sizeof( *this ) ); } + uint8_t model_id; uint8_t log2_scale_factor; uint8_t comp_model_present_flag[3]; @@ -83,7 +89,9 @@ class FilmGrain : public FilmGrainImpl set_depth( depth ); set_chroma_subsampling( chromaSubsampling, chromaSubsampling ); } + void updateFGC( vvdecSEIFilmGrainCharacteristics* fgc ); +private: void init_sei( fgs_sei* cfg ); }; diff --git a/source/Lib/vvdec/vvdecimpl.cpp b/source/Lib/vvdec/vvdecimpl.cpp index 60e4d88b..89423a5a 100644 --- a/source/Lib/vvdec/vvdecimpl.cpp +++ b/source/Lib/vvdec/vvdecimpl.cpp @@ -845,69 +845,7 @@ void VVDecImpl::xUpdateFGC( vvdecSEI* s ) m_filmGrainSynth = std::make_unique( 10, 2 ); // TODO: (GH) set correct bit depth and color format, and apply changes } - fgs_sei fgs; // TODO: maybe make it a member ? (idea would be to re-seed patterns for each picture) - // Copy SEI message in vfgs structure format - // TODO: check some values and warn about unsupported stuff ? - fgs.model_id = sei->filmGrainModelId; - fgs.log2_scale_factor = sei->log2ScaleFactor; - for( int c = 0; c < 3; c++ ) - { - vvdecCompModel& cm = sei->compModel[c]; - if( cm.presentFlag ) - { - fgs.comp_model_present_flag[c] = 1; - fgs.num_intensity_intervals[c] = cm.numIntensityIntervals; - fgs.num_model_values[c] = cm.numModelValues; - for( int i = 0; i < fgs.num_intensity_intervals[c]; i++ ) - { - vvdecCompModelIntensityValues& cmiv = cm.intensityValues[i]; - fgs.intensity_interval_lower_bound[c][i] = cmiv.intensityIntervalLowerBound; - fgs.intensity_interval_upper_bound[c][i] = cmiv.intensityIntervalUpperBound; - for( int v = 0; v < fgs.num_model_values[c]; v++ ) - { - fgs.comp_model_value[c][i][v] = cmiv.compModelValue[v]; - } - // Fill with default model values (VFGS needs them; it actually ignores num_model_values) - switch( fgs.num_model_values[c] ) - { - // clang-format off - case 0: - case 1: fgs.comp_model_value[c][i][1] = fgs.model_id ? 0 : 8; // H high cutoff / 1st AR coef (left & top) - case 2: fgs.comp_model_value[c][i][2] = fgs.model_id ? 0 : fgs.comp_model_value[c][i][1]; // V high cutoff / x-comp corr - case 3: fgs.comp_model_value[c][i][3] = 0; // H low cutoff / 2nd AR coef (top-left, top-right) - case 4: fgs.comp_model_value[c][i][4] = fgs.model_id << fgs.log2_scale_factor; // V low cutoff / aspect ratio - fgs.comp_model_value[c][i][5] = 0; // x-comp corr / 3rd AR coef (left-left, top-top) - // clang-format on - } - } - } - } - - // Conversion of component model values for 4:2:0 chroma format - if( fgs.model_id == 0 ) - { - for( int c = 1; c < 3; c++ ) - { - if( fgs.comp_model_present_flag[c] ) - { - for( int k = 0; k < fgs.num_intensity_intervals[c]; k++ ) - { - fgs.comp_model_value[c][k][1] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][1] << 1 ) ); // Horizontal frequency - fgs.comp_model_value[c][k][2] = std::max( 2, std::min( 14, fgs.comp_model_value[c][k][2] << 1 ) ); // Vertical frequency - fgs.comp_model_value[c][k][0] >>= 1; - } - } - } - } - m_filmGrainSynth->set_depth( 10 ); - m_filmGrainSynth->set_chroma_subsampling( 2, 2 ); - m_filmGrainSynth->init_sei( &fgs ); - - // if (!m_bFgs) - // // TODO: get something random - // // TODO: make seed also impact the pattern gen - // vfgs_set_seed(uint32_t seed); - + m_filmGrainSynth->updateFGC( sei ); m_filmGrainCharacteristicsState = sei->filmGrainCharacteristicsPersistenceFlag ? FgcPersist : FgcDontPersist; } From c0bce238a0abd2f9bb74f1b2afd9dd5170d6eee7 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 15:52:00 +0200 Subject: [PATCH 7/8] move duplicate implementations of prng() and round() --- source/Lib/FilmGrain/FilmGrain.cpp | 19 ------------------- source/Lib/FilmGrain/FilmGrainImpl.cpp | 17 ----------------- source/Lib/FilmGrain/FilmGrainImpl.h | 22 ++++++++++++++++++++++ 3 files changed, 22 insertions(+), 36 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrain.cpp b/source/Lib/FilmGrain/FilmGrain.cpp index b644204b..dacc3ff4 100644 --- a/source/Lib/FilmGrain/FilmGrain.cpp +++ b/source/Lib/FilmGrain/FilmGrain.cpp @@ -65,12 +65,6 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { -template -constexpr inline auto round( T a, uint8_t s ) -{ - return ( a + ( 1 << ( s - 1 ) ) ) >> s; -} - // clang-format off static constexpr int8_t Gaussian_LUT[2048] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, @@ -310,19 +304,6 @@ static constexpr int8_t DCT2_64[64][64] = \ DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2); // clang-format on -/** Pseudo-random number generator (32-bit) */ -static inline uint32_t prng( uint32_t x ) -{ -#if 1 // same as HW (bit-reversed RDD-5) - uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); -#else // RDD-5 - uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; - x = ( x << 1 ) | s; -#endif - return x; -} - /** Apply iDCT2 to block B[64][64] + clipping */ static void idct2_64( int8_t B[][64] ) { diff --git a/source/Lib/FilmGrain/FilmGrainImpl.cpp b/source/Lib/FilmGrain/FilmGrainImpl.cpp index b198334c..b2d8a616 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.cpp +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -66,23 +66,6 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { -template -constexpr inline auto round( T a, uint8_t s ) -{ - return ( a + ( 1 << ( s - 1 ) ) ) >> s; -} - -/** Pseudo-random number generator - * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register - * (the register LSB has basically no effect since it is never fed back) - */ -static inline uint32_t prng( uint32_t x ) -{ - uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; - x = s | ( x >> 1 ); - return x; -} - /** Derive Y x/y offsets from (random) number * * Bit fields are designed to minimize overlaps across color channels, to diff --git a/source/Lib/FilmGrain/FilmGrainImpl.h b/source/Lib/FilmGrain/FilmGrainImpl.h index 0fbb85d9..24056326 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -63,6 +63,28 @@ POSSIBILITY OF SUCH DAMAGE. namespace vvdec { +/** Pseudo-random number generator (32-bit) + * Note: loops on the 31 MSBs, so seed should be MSB-aligned in the register + * (the register LSB has basically no effect since it is never fed back) + */ +static inline uint32_t prng( uint32_t x ) +{ +#if 1 // same as HW (bit-reversed RDD-5) + uint32_t s = ( ( x << 30 ) ^ ( x << 2 ) ) & 0x80000000; + x = s | ( x >> 1 ); +#else // RDD-5 + uint32_t s = ( ( x >> 30 ) ^ ( x >> 2 ) ) & 1; + x = ( x << 1 ) | s; +#endif + return x; +} + +template +constexpr inline auto round( T a, uint8_t s ) +{ + return ( a + ( 1 << ( s - 1 ) ) ) >> s; +} + class FilmGrainImpl { // Note: declarations optimized for code readability; e.g. pattern storage in From d36438971ed040888c9c92e77c1782d9590ec669 Mon Sep 17 00:00:00 2001 From: Gabriel Hege Date: Mon, 3 Jun 2024 16:24:56 +0200 Subject: [PATCH 8/8] fix clang build and utf-8 encoding --- source/Lib/FilmGrain/FilmGrainImpl.cpp | 11 ++++++++++- source/Lib/FilmGrain/FilmGrainImpl.h | 8 +++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/source/Lib/FilmGrain/FilmGrainImpl.cpp b/source/Lib/FilmGrain/FilmGrainImpl.cpp index b2d8a616..85858487 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.cpp +++ b/source/Lib/FilmGrain/FilmGrainImpl.cpp @@ -6,7 +6,7 @@ the Software are granted under this license. The Clear BSD License -Copyright (c) 2018-2024, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. +Copyright (c) 2018-2024, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -381,4 +381,13 @@ void FilmGrainImpl::set_chroma_subsampling( int subx, int suby ) csuby = suby; } +FilmGrainImpl::FilmGrainImpl() +{ + memset( pattern, 0, sizeof( pattern ) ); + memset( sLUT, 0, sizeof( sLUT ) ); + memset( pLUT, 0, sizeof( pLUT ) ); + memset( grain, 0, sizeof( grain ) ); + memset( scale, 0, sizeof( scale ) ); +} + } // namespace vvdec diff --git a/source/Lib/FilmGrain/FilmGrainImpl.h b/source/Lib/FilmGrain/FilmGrainImpl.h index 24056326..41150271 100644 --- a/source/Lib/FilmGrain/FilmGrainImpl.h +++ b/source/Lib/FilmGrain/FilmGrainImpl.h @@ -89,9 +89,9 @@ class FilmGrainImpl { // Note: declarations optimized for code readability; e.g. pattern storage in // actual hardware implementation would differ significantly - int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64] = { 0, }; // +1 to simplify interpolation code - uint8_t sLUT[3][256] = { 0, }; - uint8_t pLUT[3][256] = { 0, }; + int8_t pattern[2][VFGS_MAX_PATTERNS + 1][64][64]; // +1 to simplify interpolation code + uint8_t sLUT[3][256]; + uint8_t pLUT[3][256]; uint32_t rnd = 0xdeadbeef; uint32_t rnd_up = 0xdeadbeef; @@ -116,6 +116,8 @@ class FilmGrainImpl void add_grain_block( void* I, int c, int x, int y, int width ); protected: + FilmGrainImpl(); + void set_luma_pattern( int index, int8_t* P ); void set_chroma_pattern( int index, int8_t* P ); void set_scale_lut( int c, uint8_t lut[] );