Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FromLinear and IntoLinear lookup table creation to build script #416

Merged
merged 1 commit into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 266 additions & 0 deletions codegen/src/lut.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
use anyhow::Result;
use proc_macro2::{Ident, TokenStream};
use quote::{format_ident, quote};

use crate::{codegen_file::CodegenFile, lut::model::LinearModel};

mod model;

pub fn generate() -> Result<()> {
let mut file = CodegenFile::create("palette/src/encoding/lut/codegen.rs")?;

let transfer_fn_u8 = vec![
LutEntryU8::new(
"srgb",
"SRGB",
TransferFn::new_with_linear(12.92, 0.0031308, 2.4),
),
LutEntryU8::new(
"rec_standards",
"REC_OETF",
TransferFn::new_with_linear(4.5, 0.018053968510807, 1.0 / 0.45),
),
LutEntryU8::new(
"adobe",
"ADOBE_RGB",
TransferFn::new_pure_gamma(563.0 / 256.0),
),
LutEntryU8::new("p3", "P3_GAMMA", TransferFn::new_pure_gamma(2.6)),
];

let transfer_fn_u16 = vec![LutEntryU16::new(
"prophoto",
"PROPHOTO_RGB",
TransferFn::new_with_linear(16.0, 0.001953125, 1.8),
)];

for LutEntryU8 {
module,
fn_type_uppercase,
transfer_fn,
} in transfer_fn_u8
{
let u8_to_float = build_u8_to_float_lut(&fn_type_uppercase, &transfer_fn);
let float_to_u8 = build_float_to_u8_lut(&fn_type_uppercase, &transfer_fn);

file.append(quote! {
pub mod #module {
#u8_to_float

#float_to_u8
}
})?;
}

for LutEntryU16 {
module,
fn_type_uppercase,
transfer_fn,
} in transfer_fn_u16
{
let u16_to_float = build_u16_to_float_lut(&fn_type_uppercase, &transfer_fn);
let float_to_u8 = build_float_to_u16_lut(&fn_type_uppercase, &transfer_fn);

file.append(quote! {
#[cfg(feature = "gamma_lut_u16")]
pub mod #module {
#u16_to_float

#float_to_u8
}
})?;
}

Ok(())
}

/// This struct is able to model a given transfer function.
///
/// Any transfer function will have a linear part (optional) for input values
/// less than some value `beta` and an exponential part determined by the function's
/// `gamma` value. For transfer functions with a linear part, `alpha` is chosen to
/// preserve function continuity.
struct TransferFn {
into_linear: Box<dyn Fn(f64) -> f64>,
linear_scale: Option<f64>,
alpha: f64,
beta: f64,
gamma: f64,
}

impl TransferFn {
fn new_with_linear(linear_scale: f64, linear_end: f64, gamma: f64) -> Self {
let alpha = (linear_scale * linear_end - 1.0) / (linear_end.powf(gamma.recip()) - 1.0);
let beta = linear_end;
Self {
into_linear: Box::new(move |encoded| {
if encoded <= linear_scale * beta {
encoded / linear_scale
} else {
((encoded + alpha - 1.0) / alpha).powf(gamma)
}
}),
linear_scale: Some(linear_scale),
alpha,
beta,
gamma,
}
}

fn new_pure_gamma(gamma: f64) -> Self {
Self {
into_linear: Box::new(move |encoded| encoded.powf(gamma)),
linear_scale: None,
alpha: 1.0,
beta: 0.0,
gamma,
}
}
}

struct LutEntryU8 {
module: Ident,
fn_type_uppercase: String,
transfer_fn: TransferFn,
}

struct LutEntryU16 {
module: Ident,
fn_type_uppercase: String,
transfer_fn: TransferFn,
}

impl LutEntryU8 {
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
Self {
module: format_ident!("{module}"),
fn_type_uppercase: fn_type_uppercase.to_owned(),
transfer_fn,
}
}
}

impl LutEntryU16 {
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
Self {
module: format_ident!("{module}"),
fn_type_uppercase: fn_type_uppercase.to_owned(),
transfer_fn,
}
}
}

fn build_u8_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
let table = (0..=u8::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 255.0));
let table_ident = format_ident!("{fn_type_uppercase}_U8_TO_F64");
let table_f32 = table.clone().map(|f| f as f32);
let table_f32_ident = format_ident!("{fn_type_uppercase}_U8_TO_F32");
quote! {
pub const #table_ident: [f64; 256] = [
#(#table),*
];

pub const #table_f32_ident: [f32; 256] = [
#(#table_f32),*
];
}
}

fn build_u16_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
let table = (0..=u16::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 65535.0));
let table_ident = format_ident!("{fn_type_uppercase}_U16_TO_F64");
quote! {
pub static #table_ident: [f64; 65536] = [
#(#table),*
];
}
}

/// This algorithm is an adaptation of [this C++ code](<https://gist.github.com/rygorous/2203834>)
/// by Fabian "ryg" Giesen, which utilizes simple linear regression on
/// sub-intervals of the transfer function's domain and stores the resulting
/// models' scales and biases into a lookup table.
///
/// The algorithm linked above calculates the transfer function for every
/// potential `f32` input and feeds that into the regression model. In
/// contrast, this algorithm replaces the discrete sums in the model with
/// continuous integrals in order to reduce the time it takes to generate
/// the tables. We are able to do this since transfer functions follow a
/// predictable pattern for which the anti-derivative is known.
fn build_float_to_u8_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
// 1.0 - f32::EPSILON
const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
// The number of mantissa bits used to index into the lookup table
const MAN_INDEX_WIDTH: u32 = 3;
// The number of bits in the remainder of the mantissa
const BUCKET_INDEX_WIDTH: u32 = 20;
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
// Any input less than or equal to this maps to 0
let min_float_bits =
(((transfer_fn.into_linear)(0.5 / 255.0) as f32).to_bits() - 1) & 0xff800000;

let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
let table_size = exp_table_size << MAN_INDEX_WIDTH;

let table = (0..table_size).map(|i| {
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
let end = start + BUCKET_SIZE;

LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 8).into_u8_lookup()
});

let table_ident = format_ident!("TO_{fn_type_uppercase}_U8");
let table_size_usize = table_size as usize;

let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
quote! {
pub const #float_const_ident: u32 = #min_float_bits;

pub const #table_ident: [u32; #table_size_usize] = [
#(#table),*
];
}
}

fn build_float_to_u16_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
// 1.0 - f32::EPSILON
const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
// The number of mantissa bits used to index into the lookup table
const MAN_INDEX_WIDTH: u32 = 7;
// The number of bits in the remainder of the mantissa
const BUCKET_INDEX_WIDTH: i32 = 16;
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
let TransferFn {
into_linear,
linear_scale,
beta,
..
} = transfer_fn;
let min_float_bits = (*beta as f32)
.to_bits()
.max((into_linear(0.5 / 65535.0) as f32).to_bits() - 1)
& 0xff800000;
let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
let table_size = exp_table_size << MAN_INDEX_WIDTH;
let table = (0..table_size).map(|i| {
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
let end = start + BUCKET_SIZE;

LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 16).into_u16_lookup()
});

let table_ident = format_ident!("TO_{fn_type_uppercase}_U16");
let table_size_usize = table_size as usize;
let linear_scale = 65535.0 * (linear_scale.unwrap_or_default() as f32);

let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
let linear_scale_ident = format_ident!("{fn_type_uppercase}_LINEAR_SCALE");
quote! {
pub const #float_const_ident: u32 = #min_float_bits;
pub const #linear_scale_ident: f32 = #linear_scale;

pub const #table_ident: [u64; #table_size_usize] = [
#(#table),*
];
}
}
129 changes: 129 additions & 0 deletions codegen/src/lut/model.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use super::TransferFn;

/// This struct contains the scale and bias for a linear
/// regression model of a transfer function on a given interval.
///
/// This model is calculated by using simple linear regression with
/// integration instead of summation.
pub(super) struct LinearModel {
scale: f64,
bias: f64,
}

impl LinearModel {
pub(super) fn new(
transfer_fn: &TransferFn,
start: u32,
end: u32,
man_index_width: u32,
t_width: u32,
) -> Self {
let TransferFn {
linear_scale,
alpha,
beta,
gamma,
..
} = *transfer_fn;

let beta_bits = (beta as f32).to_bits();
// Corresponds to the scale between differentials. Specifically,
// `dx = exp_scale * dt`
let exp_scale = f32::from_bits(((start >> 23) - man_index_width - t_width) << 23) as f64;
let start_x = f32::from_bits(start) as f64;
let end_x = f32::from_bits(end) as f64;

// If the transfer function is purely linear on a given interval,
// integration is unnecessary.
if let Some(linear_scale) = linear_scale {
if end <= beta_bits {
return Self {
scale: linear_scale * exp_scale,
bias: linear_scale * start_x,
};
}
}

let max_t = 2.0f64.powi(t_width as i32);

let (integral_y, integral_ty) = match linear_scale {
Some(linear_scale) if start < beta_bits => {
let beta_t =
(beta_bits << (9 + man_index_width)) as f64 * 2.0f64.powi(t_width as i32 - 32);
let int_linear =
integrate_linear((start_x, beta), (0.0, beta_t), linear_scale, exp_scale);
let int_exponential =
integrate_exponential((beta, end_x), (beta_t, max_t), alpha, gamma, exp_scale);
(
int_linear.0 + int_exponential.0,
int_linear.1 + int_exponential.1,
)
}
_ => integrate_exponential((start_x, end_x), (0.0, max_t), alpha, gamma, exp_scale),
};
let max_t2 = max_t * max_t;
let integral_t = max_t2 * 0.5;
let integral_t2 = max_t2 * max_t / 3.0;

let scale = (max_t * integral_ty - integral_t * integral_y)
/ (max_t * integral_t2 - integral_t * integral_t);
Self {
scale,
bias: (integral_y - scale * integral_t) / max_t,
}
}

pub(super) fn into_u8_lookup(self) -> u32 {
let scale_uint = (255.0 * self.scale * 65536.0 + 0.5) as u32;
let bias_uint = (((255.0 * self.bias + 0.5) * 128.0 + 0.5) as u32) << 9;
(bias_uint << 7) | scale_uint
}

pub(super) fn into_u16_lookup(self) -> u64 {
let scale_uint = (65535.0 * self.scale * 4294967296.0 + 0.5) as u64;
let bias_uint = (((65535.0 * self.bias + 0.5) * 32768.0 + 0.5) as u64) << 17;
(bias_uint << 15) | scale_uint
}
}

fn integrate_linear(
(start_x, end_x): (f64, f64),
(start_t, end_t): (f64, f64),
linear_scale: f64,
exp_scale: f64,
) -> (f64, f64) {
let antiderive_y = |x: f64| 0.5 * linear_scale * x * x / exp_scale;
let antiderive_ty =
|x: f64, t: f64| 0.5 * linear_scale * x * x * (t - x / (3.0 * exp_scale)) / exp_scale;

(
antiderive_y(end_x) - antiderive_y(start_x),
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
)
}

fn integrate_exponential(
(start_x, end_x): (f64, f64),
(start_t, end_t): (f64, f64),
alpha: f64,
gamma: f64,
exp_scale: f64,
) -> (f64, f64) {
let one_plus_gamma_inv = 1.0 + gamma.recip();
let antiderive_y = |x: f64, t: f64| {
alpha * gamma * x.powf(one_plus_gamma_inv) / (exp_scale * (1.0 + gamma)) + (1.0 - alpha) * t
};
let antiderive_ty = |x: f64, t: f64| {
alpha
* gamma
* x.powf(one_plus_gamma_inv)
* (t - gamma * x / (exp_scale * (1.0 + 2.0 * gamma)))
/ (exp_scale * (1.0 + gamma))
+ 0.5 * (1.0 - alpha) * t * t
};

(
antiderive_y(end_x, end_t) - antiderive_y(start_x, start_t),
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
)
}
Loading
Loading