Ogeon · Ogeon · Jan 12, 2025 · Jan 7, 2025
diff --git a/codegen/src/lut.rs b/codegen/src/lut.rs
@@ -0,0 +1,266 @@
+use anyhow::Result;
+use proc_macro2::{Ident, TokenStream};
+use quote::{format_ident, quote};
+
+use crate::{codegen_file::CodegenFile, lut::model::LinearModel};
+
+mod model;
+
+pub fn generate() -> Result<()> {
+    let mut file = CodegenFile::create("palette/src/encoding/lut/codegen.rs")?;
+
+    let transfer_fn_u8 = vec![
+        LutEntryU8::new(
+            "srgb",
+            "SRGB",
+            TransferFn::new_with_linear(12.92, 0.0031308, 2.4),
+        ),
+        LutEntryU8::new(
+            "rec_standards",
+            "REC_OETF",
+            TransferFn::new_with_linear(4.5, 0.018053968510807, 1.0 / 0.45),
+        ),
+        LutEntryU8::new(
+            "adobe",
+            "ADOBE_RGB",
+            TransferFn::new_pure_gamma(563.0 / 256.0),
+        ),
+        LutEntryU8::new("p3", "P3_GAMMA", TransferFn::new_pure_gamma(2.6)),
+    ];
+
+    let transfer_fn_u16 = vec![LutEntryU16::new(
+        "prophoto",
+        "PROPHOTO_RGB",
+        TransferFn::new_with_linear(16.0, 0.001953125, 1.8),
+    )];
+
+    for LutEntryU8 {
+        module,
+        fn_type_uppercase,
+        transfer_fn,
+    } in transfer_fn_u8
+    {
+        let u8_to_float = build_u8_to_float_lut(&fn_type_uppercase, &transfer_fn);
+        let float_to_u8 = build_float_to_u8_lut(&fn_type_uppercase, &transfer_fn);
+
+        file.append(quote! {
+            pub mod #module {
+                #u8_to_float
+
+                #float_to_u8
+            }
+        })?;
+    }
+
+    for LutEntryU16 {
+        module,
+        fn_type_uppercase,
+        transfer_fn,
+    } in transfer_fn_u16
+    {
+        let u16_to_float = build_u16_to_float_lut(&fn_type_uppercase, &transfer_fn);
+        let float_to_u8 = build_float_to_u16_lut(&fn_type_uppercase, &transfer_fn);
+
+        file.append(quote! {
+            #[cfg(feature = "gamma_lut_u16")]
+            pub mod #module {
+                #u16_to_float
+
+                #float_to_u8
+            }
+        })?;
+    }
+
+    Ok(())
+}
+
+/// This struct is able to model a given transfer function.
+///
+/// Any transfer function will have a linear part (optional) for input values
+/// less than some value `beta` and an exponential part determined by the function's
+/// `gamma` value. For transfer functions with a linear part, `alpha` is chosen to
+/// preserve function continuity.
+struct TransferFn {
+    into_linear: Box<dyn Fn(f64) -> f64>,
+    linear_scale: Option<f64>,
+    alpha: f64,
+    beta: f64,
+    gamma: f64,
+}
+
+impl TransferFn {
+    fn new_with_linear(linear_scale: f64, linear_end: f64, gamma: f64) -> Self {
+        let alpha = (linear_scale * linear_end - 1.0) / (linear_end.powf(gamma.recip()) - 1.0);
+        let beta = linear_end;
+        Self {
+            into_linear: Box::new(move |encoded| {
+                if encoded <= linear_scale * beta {
+                    encoded / linear_scale
+                } else {
+                    ((encoded + alpha - 1.0) / alpha).powf(gamma)
+                }
+            }),
+            linear_scale: Some(linear_scale),
+            alpha,
+            beta,
+            gamma,
+        }
+    }
+
+    fn new_pure_gamma(gamma: f64) -> Self {
+        Self {
+            into_linear: Box::new(move |encoded| encoded.powf(gamma)),
+            linear_scale: None,
+            alpha: 1.0,
+            beta: 0.0,
+            gamma,
+        }
+    }
+}
+
+struct LutEntryU8 {
+    module: Ident,
+    fn_type_uppercase: String,
+    transfer_fn: TransferFn,
+}
+
+struct LutEntryU16 {
+    module: Ident,
+    fn_type_uppercase: String,
+    transfer_fn: TransferFn,
+}
+
+impl LutEntryU8 {
+    fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
+        Self {
+            module: format_ident!("{module}"),
+            fn_type_uppercase: fn_type_uppercase.to_owned(),
+            transfer_fn,
+        }
+    }
+}
+
+impl LutEntryU16 {
+    fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
+        Self {
+            module: format_ident!("{module}"),
+            fn_type_uppercase: fn_type_uppercase.to_owned(),
+            transfer_fn,
+        }
+    }
+}
+
+fn build_u8_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
+    let table = (0..=u8::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 255.0));
+    let table_ident = format_ident!("{fn_type_uppercase}_U8_TO_F64");
+    let table_f32 = table.clone().map(|f| f as f32);
+    let table_f32_ident = format_ident!("{fn_type_uppercase}_U8_TO_F32");
+    quote! {
+        pub const #table_ident: [f64; 256] = [
+            #(#table),*
+        ];
+
+        pub const #table_f32_ident: [f32; 256] = [
+            #(#table_f32),*
+        ];
+    }
+}
+
+fn build_u16_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
+    let table = (0..=u16::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 65535.0));
+    let table_ident = format_ident!("{fn_type_uppercase}_U16_TO_F64");
+    quote! {
+        pub static #table_ident: [f64; 65536] = [
+            #(#table),*
+        ];
+    }
+}
+
+/// This algorithm is an adaptation of [this C++ code](<https://gist.github.com/rygorous/2203834>)
+/// by Fabian "ryg" Giesen, which utilizes simple linear regression on
+/// sub-intervals of the transfer function's domain and stores the resulting
+/// models' scales and biases into a lookup table.
+///
+/// The algorithm linked above calculates the transfer function for every
+/// potential `f32` input and feeds that into the regression model. In
+/// contrast, this algorithm replaces the discrete sums in the model with
+/// continuous integrals in order to reduce the time it takes to generate
+/// the tables. We are able to do this since transfer functions follow a
+/// predictable pattern for which the anti-derivative is known.
+fn build_float_to_u8_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
+    // 1.0 - f32::EPSILON
+    const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
+    // The number of mantissa bits used to index into the lookup table
+    const MAN_INDEX_WIDTH: u32 = 3;
+    // The number of bits in the remainder of the mantissa
+    const BUCKET_INDEX_WIDTH: u32 = 20;
+    const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
+    // Any input less than or equal to this maps to 0
+    let min_float_bits =
+        (((transfer_fn.into_linear)(0.5 / 255.0) as f32).to_bits() - 1) & 0xff800000;
+
+    let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
+    let table_size = exp_table_size << MAN_INDEX_WIDTH;
+
+    let table = (0..table_size).map(|i| {
+        let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
+        let end = start + BUCKET_SIZE;
+
+        LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 8).into_u8_lookup()
+    });
+
+    let table_ident = format_ident!("TO_{fn_type_uppercase}_U8");
+    let table_size_usize = table_size as usize;
+
+    let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
+    quote! {
+        pub const #float_const_ident: u32 = #min_float_bits;
+
+        pub const #table_ident: [u32; #table_size_usize] = [
+            #(#table),*
+        ];
+    }
+}
+
+fn build_float_to_u16_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
+    // 1.0 - f32::EPSILON
+    const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
+    // The number of mantissa bits used to index into the lookup table
+    const MAN_INDEX_WIDTH: u32 = 7;
+    // The number of bits in the remainder of the mantissa
+    const BUCKET_INDEX_WIDTH: i32 = 16;
+    const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
+    let TransferFn {
+        into_linear,
+        linear_scale,
+        beta,
+        ..
+    } = transfer_fn;
+    let min_float_bits = (*beta as f32)
+        .to_bits()
+        .max((into_linear(0.5 / 65535.0) as f32).to_bits() - 1)
+        & 0xff800000;
+    let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
+    let table_size = exp_table_size << MAN_INDEX_WIDTH;
+    let table = (0..table_size).map(|i| {
+        let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
+        let end = start + BUCKET_SIZE;
+
+        LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 16).into_u16_lookup()
+    });
+
+    let table_ident = format_ident!("TO_{fn_type_uppercase}_U16");
+    let table_size_usize = table_size as usize;
+    let linear_scale = 65535.0 * (linear_scale.unwrap_or_default() as f32);
+
+    let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
+    let linear_scale_ident = format_ident!("{fn_type_uppercase}_LINEAR_SCALE");
+    quote! {
+        pub const #float_const_ident: u32 = #min_float_bits;
+        pub const #linear_scale_ident: f32 = #linear_scale;
+
+        pub const #table_ident: [u64; #table_size_usize] = [
+            #(#table),*
+        ];
+    }
+}
diff --git a/codegen/src/lut/model.rs b/codegen/src/lut/model.rs
@@ -0,0 +1,129 @@
+use super::TransferFn;
+
+/// This struct contains the scale and bias for a linear
+/// regression model of a transfer function on a given interval.
+///
+/// This model is calculated by using simple linear regression with
+/// integration instead of summation.
+pub(super) struct LinearModel {
+    scale: f64,
+    bias: f64,
+}
+
+impl LinearModel {
+    pub(super) fn new(
+        transfer_fn: &TransferFn,
+        start: u32,
+        end: u32,
+        man_index_width: u32,
+        t_width: u32,
+    ) -> Self {
+        let TransferFn {
+            linear_scale,
+            alpha,
+            beta,
+            gamma,
+            ..
+        } = *transfer_fn;
+
+        let beta_bits = (beta as f32).to_bits();
+        // Corresponds to the scale between differentials. Specifically,
+        // `dx = exp_scale * dt`
+        let exp_scale = f32::from_bits(((start >> 23) - man_index_width - t_width) << 23) as f64;
+        let start_x = f32::from_bits(start) as f64;
+        let end_x = f32::from_bits(end) as f64;
+
+        // If the transfer function is purely linear on a given interval,
+        // integration is unnecessary.
+        if let Some(linear_scale) = linear_scale {
+            if end <= beta_bits {
+                return Self {
+                    scale: linear_scale * exp_scale,
+                    bias: linear_scale * start_x,
+                };
+            }
+        }
+
+        let max_t = 2.0f64.powi(t_width as i32);
+
+        let (integral_y, integral_ty) = match linear_scale {
+            Some(linear_scale) if start < beta_bits => {
+                let beta_t =
+                    (beta_bits << (9 + man_index_width)) as f64 * 2.0f64.powi(t_width as i32 - 32);
+                let int_linear =
+                    integrate_linear((start_x, beta), (0.0, beta_t), linear_scale, exp_scale);
+                let int_exponential =
+                    integrate_exponential((beta, end_x), (beta_t, max_t), alpha, gamma, exp_scale);
+                (
+                    int_linear.0 + int_exponential.0,
+                    int_linear.1 + int_exponential.1,
+                )
+            }
+            _ => integrate_exponential((start_x, end_x), (0.0, max_t), alpha, gamma, exp_scale),
+        };
+        let max_t2 = max_t * max_t;
+        let integral_t = max_t2 * 0.5;
+        let integral_t2 = max_t2 * max_t / 3.0;
+
+        let scale = (max_t * integral_ty - integral_t * integral_y)
+            / (max_t * integral_t2 - integral_t * integral_t);
+        Self {
+            scale,
+            bias: (integral_y - scale * integral_t) / max_t,
+        }
+    }
+
+    pub(super) fn into_u8_lookup(self) -> u32 {
+        let scale_uint = (255.0 * self.scale * 65536.0 + 0.5) as u32;
+        let bias_uint = (((255.0 * self.bias + 0.5) * 128.0 + 0.5) as u32) << 9;
+        (bias_uint << 7) | scale_uint
+    }
+
+    pub(super) fn into_u16_lookup(self) -> u64 {
+        let scale_uint = (65535.0 * self.scale * 4294967296.0 + 0.5) as u64;
+        let bias_uint = (((65535.0 * self.bias + 0.5) * 32768.0 + 0.5) as u64) << 17;
+        (bias_uint << 15) | scale_uint
+    }
+}
+
+fn integrate_linear(
+    (start_x, end_x): (f64, f64),
+    (start_t, end_t): (f64, f64),
+    linear_scale: f64,
+    exp_scale: f64,
+) -> (f64, f64) {
+    let antiderive_y = |x: f64| 0.5 * linear_scale * x * x / exp_scale;
+    let antiderive_ty =
+        |x: f64, t: f64| 0.5 * linear_scale * x * x * (t - x / (3.0 * exp_scale)) / exp_scale;
+
+    (
+        antiderive_y(end_x) - antiderive_y(start_x),
+        antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
+    )
+}
+
+fn integrate_exponential(
+    (start_x, end_x): (f64, f64),
+    (start_t, end_t): (f64, f64),
+    alpha: f64,
+    gamma: f64,
+    exp_scale: f64,
+) -> (f64, f64) {
+    let one_plus_gamma_inv = 1.0 + gamma.recip();
+    let antiderive_y = |x: f64, t: f64| {
+        alpha * gamma * x.powf(one_plus_gamma_inv) / (exp_scale * (1.0 + gamma)) + (1.0 - alpha) * t
+    };
+    let antiderive_ty = |x: f64, t: f64| {
+        alpha
+            * gamma
+            * x.powf(one_plus_gamma_inv)
+            * (t - gamma * x / (exp_scale * (1.0 + 2.0 * gamma)))
+            / (exp_scale * (1.0 + gamma))
+            + 0.5 * (1.0 - alpha) * t * t
+    };
+
+    (
+        antiderive_y(end_x, end_t) - antiderive_y(start_x, start_t),
+        antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
+    )
+}