correction: support erasures

rust-bitcoin · Sep 30, 2024 · c2d0ac8 · c2d0ac8
1 parent b5fdff3
commit c2d0ac8
Show file tree

Hide file tree

Showing 4 changed files with 189 additions and 56 deletions.
diff --git a/fuzz/fuzz_targets/correct_bech32.rs b/fuzz/fuzz_targets/correct_bech32.rs
@@ -40,14 +40,14 @@ fn do_test(data: &[u8]) {
  println!("{}", unsafe { core::str::from_utf8_unchecked(CORRECT) });
  println!("{}", s);
  */
- let corrections = CheckedHrpstring::new::<Bech32>(s)
+ let correct_ctx = CheckedHrpstring::new::<Bech32>(s)
  .unwrap_err()
  .correction_context::<Bech32>()
- .unwrap()
- .bch_errors();
+ .unwrap();
 
+ let iter = correct_ctx.bch_errors();
  if errors.len() <= 4 {
- for (idx, fe) in corrections.unwrap() {
+ for (idx, fe) in iter.unwrap() {
  let idx = s.len() - idx - 1;
  //println!("Errors: {:?}", errors);
  //println!("Remove: {} {}", idx, fe);

diff --git a/fuzz/fuzz_targets/correct_codex32.rs b/fuzz/fuzz_targets/correct_codex32.rs
@@ -38,47 +38,66 @@ fn do_test(data: &[u8]) {
  return;
  }
 
+ let mut any_actual_errors = false;
+ let mut e2t = 0;
+ let mut erasures = Vec::with_capacity(CORRECT.len());
  // Start with a correct string
  let mut hrpstring = *CORRECT;
  // ..then mangle it
  let mut errors = HashMap::with_capacity(data.len() / 2);
  for sl in data.chunks_exact(2) {
- let idx = usize::from(sl[0]);
+ let idx = usize::from(sl[0]) & 0x7f;
  if idx >= CORRECT.len() - 3 {
  return;
  }
  let offs = match Fe32::try_from(sl[1]) {
- Ok(Fe32::Q) => return,
  Ok(fe) => fe,
  Err(_) => return,
  };
 
  hrpstring[idx + 3] =
  (Fe32::from_char(hrpstring[idx + 3].into()).unwrap() + offs).to_char() as u8;
- if errors.insert(idx + 3, offs).is_some() {
+
+ if errors.insert(CORRECT.len() - (idx + 3) - 1, offs).is_some() {
  return;
  }
+ if sl[0] & 0x80 == 0x80 {
+ // We might push "dummy" errors which are erasures that aren't actually wrong.
+ // If we do this too many times, we'll exceed the singleton bound so correction
+ // will fail, but as long as we're within the bound everything should "work",
+ // in the sense that there will be no crashes and the error corrector will
+ // just yield an error with value Q.
+ erasures.push(CORRECT.len() - (idx + 3) - 1);
+ e2t += 1;
+ if offs != Fe32::Q {
+ any_actual_errors = true;
+ }
+ } else if offs != Fe32::Q {
+ any_actual_errors = true;
+ e2t += 2;
+ }
+ }
+ // We need _some_ errors.
+ if !any_actual_errors {
+ return;
  }
 
  let s = unsafe { core::str::from_utf8_unchecked(&hrpstring) };
- /*
- println!("{}", unsafe { core::str::from_utf8_unchecked(CORRECT) });
- println!("{}", s);
- */
- let corrections = CheckedHrpstring::new::<Codex32>(s)
+ let mut correct_ctx = CheckedHrpstring::new::<Codex32>(s)
  .unwrap_err()
  .correction_context::<Codex32>()
- .unwrap()
- .bch_errors();
+ .unwrap();
+
+ correct_ctx.add_erasures(&erasures);
 
- if errors.len() <= 4 {
- for (idx, fe) in corrections.unwrap() {
- let idx = s.len() - idx - 1;
- //println!("Errors: {:?}", errors);
- //println!("Remove: {} {}", idx, fe);
+ let iter = correct_ctx.bch_errors();
+ if e2t <= 8 { // FIXME should be <=
+ for (idx, fe) in iter.unwrap() {
  assert_eq!(errors.remove(&idx), Some(fe));
  }
- assert_eq!(errors.len(), 0);
+ for val in errors.values() {
+ assert_eq!(*val, Fe32::Q);
+ }
  }
 }
 
@@ -112,7 +131,7 @@ mod tests {
  #[test]
  fn duplicate_crash() {
  let mut a = Vec::new();
- extend_vec_from_hex("", &mut a);
+ extend_vec_from_hex("8c00a10091039e0185008000831f8e0f", &mut a);
  super::do_test(&a);
  }
 }
diff --git a/src/primitives/correction.rs b/src/primitives/correction.rs
@@ -76,7 +76,11 @@ pub trait CorrectableError {
  return None;
  }
 
- self.residue_error().map(|e| Corrector { residue: e.residue(), phantom: PhantomData })
+ self.residue_error().map(|e| Corrector {
+ erasures: FieldVec::new(),
+ residue: e.residue(),
+ phantom: PhantomData,
+ })
  }
 }
 
@@ -127,12 +131,40 @@ impl CorrectableError for DecodeError {
 }
 
 /// An error-correction context.
-pub struct Corrector<Ck> {
+pub struct Corrector<Ck: Checksum> {
+ erasures: FieldVec<usize>,
  residue: Polynomial<Fe32>,
  phantom: PhantomData<Ck>,
 }
 
 impl<Ck: Checksum> Corrector<Ck> {
+ /// A bound on the number of errors and erasures (errors with known location)
+ /// can be corrected by this corrector.
+ ///
+ /// Returns N such that, given E errors and X erasures, corection is possible
+ /// iff 2E + X <= N.
+ pub fn singleton_bound(&self) -> usize {
+ // d - 1, where d = [number of consecutive roots] + 2
+ Ck::ROOT_EXPONENTS.end() - Ck::ROOT_EXPONENTS.start() + 1
+ }
+
+ /// TODO
+ pub fn add_erasures(&mut self, locs: &[usize]) {
+ for loc in locs {
+ // If the user tries to add too many erasures, just ignore them. In
+ // this case error correction is guaranteed to fail anyway, because
+ // they will have exceeded the singleton bound. (Otherwise, the
+ // singleton bound, which is always <= the checksum length, must be
+ // greater than NO_ALLOC_MAX_LENGTH. So the checksum length must be
+ // greater than NO_ALLOC_MAX_LENGTH. Then correction will still fail.)
+ #[cfg(not(feature = "alloc"))]
+ if self.erasures.len() == NO_ALLOC_MAX_LENGTH {
+ break;
+ }
+ self.erasures.push(*loc);
+ }
+ }
+
  /// Returns an iterator over the errors in the string.
  ///
  /// Returns `None` if it can be determined that there are too many errors to be
@@ -145,29 +177,44 @@ impl<Ck: Checksum> Corrector<Ck> {
  /// string may not actually be the intended string.
  pub fn bch_errors(&self) -> Option<ErrorIterator<Ck>> {
  // 1. Compute all syndromes by evaluating the residue at each power of the generator.
- let syndromes: FieldVec<_> = Ck::ROOT_GENERATOR
+ let syndromes: Polynomial<_> = Ck::ROOT_GENERATOR
  .powers_range(Ck::ROOT_EXPONENTS)
  .map(|rt| self.residue.evaluate(&rt))
  .collect();
 
+ // 1a. Compute the "Forney syndrome polynomial" which is the product of the syndrome
+ // polynomial and the erasure locator. This "erases the erasures" so that B-M
+ // can find only the errors.
+ let mut erasure_locator = Polynomial::with_monic_leading_term(&[]); // 1
+ for loc in &self.erasures {
+ let factor: Polynomial<_> =
+ [Ck::CorrectionField::ONE, -Ck::ROOT_GENERATOR.powi(*loc as i64)]
+ .iter()
+ .cloned()
+ .collect(); // alpha^-ix - 1
+ erasure_locator = erasure_locator.mul_mod_x_d(&factor, usize::MAX);
+ }
+ let forney_syndromes = erasure_locator.convolution(&syndromes);
+
  // 2. Use the Berlekamp-Massey algorithm to find the connection polynomial of the
  // LFSR that generates these syndromes. For magical reasons this will be equal
  // to the error locator polynomial for the syndrome.
- let lfsr = LfsrIter::berlekamp_massey(&syndromes[..]);
+ let lfsr = LfsrIter::berlekamp_massey(&forney_syndromes.as_inner()[..]);
  let conn = lfsr.coefficient_polynomial();
 
  // 3. The connection polynomial is the error locator polynomial. Use this to get
  // the errors.
- let max_correctable_errors =
- (Ck::ROOT_EXPONENTS.end() - Ck::ROOT_EXPONENTS.start() + 1) / 2;
- if conn.degree() <= max_correctable_errors {
+ if erasure_locator.degree() + 2 * conn.degree() <= self.singleton_bound() {
+ // 3a. Compute the "errata locator" which is the product of the error locator
+ // and the erasure locator. Note that while we used the Forney syndromes
+ // when calling the BM algorithm, in all other cases we use the ordinary
+ // unmodified syndromes.
+ let errata_locator = conn.mul_mod_x_d(&erasure_locator, usize::MAX);
  Some(ErrorIterator {
- evaluator: conn.mul_mod_x_d(
- &Polynomial::from(syndromes),
- Ck::ROOT_EXPONENTS.end() - Ck::ROOT_EXPONENTS.start() + 1,
- ),
- locator_derivative: conn.formal_derivative(),
- inner: conn.find_nonzero_distinct_roots(Ck::ROOT_GENERATOR),
+ evaluator: errata_locator.mul_mod_x_d(&syndromes, self.singleton_bound()),
+ locator_derivative: errata_locator.formal_derivative(),
+ erasures: &self.erasures[..],
+ errors: conn.find_nonzero_distinct_roots(Ck::ROOT_GENERATOR),
  a: Ck::ROOT_GENERATOR,
  c: *Ck::ROOT_EXPONENTS.start(),
  })
@@ -206,32 +253,39 @@ impl<Ck: Checksum> Corrector<Ck> {
 /// caller should fix this before attempting error correction. If it is unknown,
 /// the caller cannot assume anything about the intended checksum, and should not
 /// attempt error correction.
-pub struct ErrorIterator<Ck: Checksum> {
+pub struct ErrorIterator<'c, Ck: Checksum> {
  evaluator: Polynomial<Ck::CorrectionField>,
  locator_derivative: Polynomial<Ck::CorrectionField>,
- inner: super::polynomial::RootIter<Ck::CorrectionField>,
+ erasures: &'c [usize],
+ errors: super::polynomial::RootIter<Ck::CorrectionField>,
  a: Ck::CorrectionField,
  c: usize,
 }
 
-impl<Ck: Checksum> Iterator for ErrorIterator<Ck> {
+impl<'c, Ck: Checksum> Iterator for ErrorIterator<'c, Ck> {
  type Item = (usize, Fe32);
 
  fn next(&mut self) -> Option<Self::Item> {
  // Compute -i, which is the location we will return to the user.
- let neg_i = match self.inner.next() {
- None => return None,
- Some(0) => 0,
- Some(x) => Ck::ROOT_GENERATOR.multiplicative_order() - x,
+ let neg_i = if self.erasures.is_empty() {
+ match self.errors.next() {
+ None => return None,
+ Some(0) => 0,
+ Some(x) => Ck::ROOT_GENERATOR.multiplicative_order() - x,
+ }
+ } else {
+ let pop = self.erasures[0];
+ self.erasures = &self.erasures[1..];
+ pop
  };
 
  // Forney's equation, as described in https://en.wikipedia.org/wiki/BCH_code#Forney_algorithm
  //
  // It is rendered as
  //
- // a^i evaluator(a^-i)
- // e_k = - ---------------------------------
- // a^(ci) locator_derivative(a^-i)
+ //   evaluator(a^-i)
+ // e_k = - -----------------------------------------
+ // (a^i)^(c - 1)) locator_derivative(a^-i)
  //
  // where here a is `Ck::ROOT_GENERATOR`, c is the first element of the range
  // `Ck::ROOT_EXPONENTS`, and both evalutor and locator_derivative are polynomials
@@ -240,8 +294,8 @@ impl<Ck: Checksum> Iterator for ErrorIterator<Ck> {
  let a_i = self.a.powi(neg_i as i64);
  let a_neg_i = a_i.clone().multiplicative_inverse();
 
- let num = self.evaluator.evaluate(&a_neg_i) * &a_i;
- let den = a_i.powi(self.c as i64) * self.locator_derivative.evaluate(&a_neg_i);
+ let num = self.evaluator.evaluate(&a_neg_i);
+ let den = a_i.powi(self.c as i64 - 1) * self.locator_derivative.evaluate(&a_neg_i);
  let ret = -num / den;
  match ret.try_into() {
  Ok(ret) => Some((neg_i, ret)),
@@ -263,9 +317,13 @@ mod tests {
  match SegwitHrpstring::new(s) {
  Ok(_) => panic!("{} successfully, and wrongly, parsed", s),
  Err(e) => {
- let ctx = e.correction_context::<Bech32>().unwrap();
+ let mut ctx = e.correction_context::<Bech32>().unwrap();
  let mut iter = ctx.bch_errors().unwrap();
+ assert_eq!(iter.next(), Some((0, Fe32::X)));
+ assert_eq!(iter.next(), None);
 
+ ctx.add_erasures(&[0]);
+ let mut iter = ctx.bch_errors().unwrap();
  assert_eq!(iter.next(), Some((0, Fe32::X)));
  assert_eq!(iter.next(), None);
  }
@@ -276,9 +334,13 @@ mod tests {
  match SegwitHrpstring::new(s) {
  Ok(_) => panic!("{} successfully, and wrongly, parsed", s),
  Err(e) => {
- let ctx = e.correction_context::<Bech32>().unwrap();
+ let mut ctx = e.correction_context::<Bech32>().unwrap();
  let mut iter = ctx.bch_errors().unwrap();
+ assert_eq!(iter.next(), Some((6, Fe32::T)));
+ assert_eq!(iter.next(), None);
 
+ ctx.add_erasures(&[6]);
+ let mut iter = ctx.bch_errors().unwrap();
  assert_eq!(iter.next(), Some((6, Fe32::T)));
  assert_eq!(iter.next(), None);
  }
@@ -297,13 +359,42 @@ mod tests {
  }
  }
 
- // Two errors.
- let s = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mxx";
+ // Two errors; cannot correct.
+ let s = "bc1qar0srrr7xfkvy5l64qlydnw9re59gtzzwf5mdx";
  match SegwitHrpstring::new(s) {
  Ok(_) => panic!("{} successfully, and wrongly, parsed", s),
  Err(e) => {
- let ctx = e.correction_context::<Bech32>().unwrap();
+ let mut ctx = e.correction_context::<Bech32>().unwrap();
  assert!(ctx.bch_errors().is_none());
+
+ // But we can correct it if we inform where an error is.
+ ctx.add_erasures(&[0]);
+ let mut iter = ctx.bch_errors().unwrap();
+ assert_eq!(iter.next(), Some((0, Fe32::X)));
+ assert_eq!(iter.next(), Some((20, Fe32::_3)));
+ assert_eq!(iter.next(), None);
+
+ ctx.add_erasures(&[20]);
+ let mut iter = ctx.bch_errors().unwrap();
+ assert_eq!(iter.next(), Some((0, Fe32::X)));
+ assert_eq!(iter.next(), Some((20, Fe32::_3)));
+ assert_eq!(iter.next(), None);
+ }
+ }
+
+ // In fact, if we know the locations, we can correct up to 3 errors.
+ let s = "bc1q9r0srrr7xfkvy5l64qlydnw9re59gtzzwf5mdx";
+ match SegwitHrpstring::new(s) {
+ Ok(_) => panic!("{} successfully, and wrongly, parsed", s),
+ Err(e) => {
+ let mut ctx = e.correction_context::<Bech32>().unwrap();
+ ctx.add_erasures(&[37, 0, 20]);
+ let mut iter = ctx.bch_errors().unwrap();
+
+ assert_eq!(iter.next(), Some((37, Fe32::C)));
+ assert_eq!(iter.next(), Some((0, Fe32::X)));
+ assert_eq!(iter.next(), Some((20, Fe32::_3)));
+ assert_eq!(iter.next(), None);
  }
  }
  }