From 88ba27e4cb985e9efd81f30f242a669d8e9d3e17 Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Thu, 16 Apr 2015 23:53:15 +0900 Subject: [PATCH] stateful_decoder! rewrites. `stateful_decoder!` no longer defines a RawDecoder. The actual decoder implementation is expected to use `::State` type and call `::raw_{feed,finish}` methods as needed. This change is made to make the stateful decoder more flexible. --- src/codec/japanese.rs | 102 +++++++++++++++++++++++++++++++++------ src/codec/korean.rs | 34 +++++++++++-- src/codec/simpchinese.rs | 71 ++++++++++++++++++++++----- src/codec/tradchinese.rs | 34 +++++++++++-- src/util.rs | 56 ++++----------------- 5 files changed, 217 insertions(+), 80 deletions(-) diff --git a/src/codec/japanese.rs b/src/codec/japanese.rs index b40e3766..50eff784 100644 --- a/src/codec/japanese.rs +++ b/src/codec/japanese.rs @@ -5,6 +5,7 @@ //! Legacy Japanese encodings based on JIS X 0208 and JIS X 0212. use std::convert::Into; +use std::default::Default; use util::StrCharIndex; use index_japanese as index; use types::*; @@ -82,11 +83,36 @@ impl RawEncoder for EUCJPEncoder { } } -ascii_compatible_stateful_decoder! { - #[doc="A decoder for EUC-JP with JIS X 0212 in G3."] - #[derive(Clone, Copy)] - struct EUCJP0212Decoder; +/// A decoder for EUC-JP with JIS X 0212 in G3. +#[derive(Clone, Copy)] +struct EUCJP0212Decoder { + st: eucjp::State, +} + +impl EUCJP0212Decoder { + pub fn new() -> Box { + Box::new(EUCJP0212Decoder { st: Default::default() }) + } +} + +impl RawDecoder for EUCJP0212Decoder { + fn from_self(&self) -> Box { EUCJP0212Decoder::new() } + fn is_ascii_compatible(&self) -> bool { true } + + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = eucjp::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = eucjp::raw_finish(self.st, output, &()); + self.st = st; + err + } +} + +stateful_decoder! { module eucjp; internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 { @@ -479,11 +505,36 @@ impl RawEncoder for Windows31JEncoder { } } -ascii_compatible_stateful_decoder! { - #[doc="A decoder for Shift_JIS with IBM/NEC extensions."] - #[derive(Clone, Copy)] - struct Windows31JDecoder; +/// A decoder for Shift_JIS with IBM/NEC extensions. +#[derive(Clone, Copy)] +struct Windows31JDecoder { + st: windows31j::State, +} + +impl Windows31JDecoder { + pub fn new() -> Box { + Box::new(Windows31JDecoder { st: Default::default() }) + } +} + +impl RawDecoder for Windows31JDecoder { + fn from_self(&self) -> Box { Windows31JDecoder::new() } + fn is_ascii_compatible(&self) -> bool { true } + + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = windows31j::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = windows31j::raw_finish(self.st, output, &()); + self.st = st; + err + } +} +stateful_decoder! { module windows31j; internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 { @@ -798,14 +849,37 @@ impl RawEncoder for ISO2022JPEncoder { } } -stateful_decoder! { - #[doc="A decoder for ISO-2022-JP with JIS X 0212 support."] - #[derive(Clone, Copy)] - struct ISO2022JPDecoder; +/// A decoder for ISO-2022-JP with JIS X 0212 support. +#[derive(Clone, Copy)] +struct ISO2022JPDecoder { + st: iso2022jp::State, +} - module iso2022jp; +impl ISO2022JPDecoder { + pub fn new() -> Box { + Box::new(ISO2022JPDecoder { st: Default::default() }) + } +} + +impl RawDecoder for ISO2022JPDecoder { + fn from_self(&self) -> Box { ISO2022JPDecoder::new() } + fn is_ascii_compatible(&self) -> bool { false } + + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = iso2022jp::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } - ascii_compatible false; + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = iso2022jp::raw_finish(self.st, output, &()); + self.st = st; + err + } +} + +stateful_decoder! { + module iso2022jp; internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 { use index_japanese as index; diff --git a/src/codec/korean.rs b/src/codec/korean.rs index 41f378f2..7fb3b119 100644 --- a/src/codec/korean.rs +++ b/src/codec/korean.rs @@ -5,6 +5,7 @@ //! Legacy Korean encodings based on KS X 1001. use std::convert::Into; +use std::default::Default; use util::StrCharIndex; use index_korean as index; use types::*; @@ -68,11 +69,36 @@ impl RawEncoder for Windows949Encoder { } } -ascii_compatible_stateful_decoder! { - #[doc="A decoder for Windows code page 949."] - #[derive(Clone, Copy)] - struct Windows949Decoder; +/// A decoder for Windows code page 949. +#[derive(Clone, Copy)] +struct Windows949Decoder { + st: windows949::State, +} + +impl Windows949Decoder { + pub fn new() -> Box { + Box::new(Windows949Decoder { st: Default::default() }) + } +} + +impl RawDecoder for Windows949Decoder { + fn from_self(&self) -> Box { Windows949Decoder::new() } + fn is_ascii_compatible(&self) -> bool { true } + + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = windows949::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = windows949::raw_finish(self.st, output, &()); + self.st = st; + err + } +} +stateful_decoder! { module windows949; internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 { diff --git a/src/codec/simpchinese.rs b/src/codec/simpchinese.rs index 1c6ea8fc..c4181cf8 100644 --- a/src/codec/simpchinese.rs +++ b/src/codec/simpchinese.rs @@ -6,6 +6,7 @@ use std::convert::Into; use std::marker::PhantomData; +use std::default::Default; use util::StrCharIndex; use index_simpchinese as index; use types::*; @@ -94,7 +95,7 @@ impl Encoding for GBEncoding { } /** - * An encoder for GBK and GB18030. + * An encoder for GBK and GB18030. * * ## Specialization * @@ -160,11 +161,36 @@ impl RawEncoder for GBEncoder { } } -ascii_compatible_stateful_decoder! { - #[doc="A decoder for GB 18030 (also used by GBK)."] - #[derive(Clone, Copy)] - struct GB18030Decoder; +/// A decoder for GB 18030 (also used by GBK). +#[derive(Clone, Copy)] +struct GB18030Decoder { + st: gb18030::State, +} + +impl GB18030Decoder { + pub fn new() -> Box { + Box::new(GB18030Decoder { st: Default::default() }) + } +} + +impl RawDecoder for GB18030Decoder { + fn from_self(&self) -> Box { GB18030Decoder::new() } + fn is_ascii_compatible(&self) -> bool { true } + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = gb18030::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = gb18030::raw_finish(self.st, output, &()); + self.st = st; + err + } +} + +stateful_decoder! { module gb18030; internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 { @@ -517,14 +543,37 @@ impl RawEncoder for HZEncoder { } } -stateful_decoder! { - #[doc="A decoder for HZ."] - #[derive(Clone, Copy)] - struct HZDecoder; +/// A decoder for HZ. +#[derive(Clone, Copy)] +struct HZDecoder { + st: hz::State, +} - module hz; +impl HZDecoder { + pub fn new() -> Box { + Box::new(HZDecoder { st: Default::default() }) + } +} + +impl RawDecoder for HZDecoder { + fn from_self(&self) -> Box { HZDecoder::new() } + fn is_ascii_compatible(&self) -> bool { true } - ascii_compatible false; + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = hz::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = hz::raw_finish(self.st, output, &()); + self.st = st; + err + } +} + +stateful_decoder! { + module hz; internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 { use index_simpchinese as index; diff --git a/src/codec/tradchinese.rs b/src/codec/tradchinese.rs index 1aa9a716..18a57451 100644 --- a/src/codec/tradchinese.rs +++ b/src/codec/tradchinese.rs @@ -5,6 +5,7 @@ //! Legacy traditional Chinese encodings. use std::convert::Into; +use std::default::Default; use util::StrCharIndex; use index_tradchinese as index; use types::*; @@ -74,11 +75,36 @@ impl RawEncoder for BigFive2003Encoder { } } -ascii_compatible_stateful_decoder! { - #[doc="A decoder for Big5-2003 with HKSCS-2008 extension."] - #[derive(Clone, Copy)] - struct BigFive2003HKSCS2008Decoder; +/// A decoder for Big5-2003 with HKSCS-2008 extension. +#[derive(Clone, Copy)] +struct BigFive2003HKSCS2008Decoder { + st: bigfive2003::State, +} + +impl BigFive2003HKSCS2008Decoder { + pub fn new() -> Box { + Box::new(BigFive2003HKSCS2008Decoder { st: Default::default() }) + } +} + +impl RawDecoder for BigFive2003HKSCS2008Decoder { + fn from_self(&self) -> Box { BigFive2003HKSCS2008Decoder::new() } + fn is_ascii_compatible(&self) -> bool { true } + + fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option) { + let (st, processed, err) = bigfive2003::raw_feed(self.st, input, output, &()); + self.st = st; + (processed, err) + } + + fn raw_finish(&mut self, output: &mut StringWriter) -> Option { + let (st, err) = bigfive2003::raw_finish(self.st, output, &()); + self.st = st; + err + } +} +stateful_decoder! { module bigfive2003; internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 { diff --git a/src/util.rs b/src/util.rs index eb299bfb..1d313aa7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -130,12 +130,9 @@ impl<'a, St: Default, Data> StatefulDecoderHelper<'a, St, Data> { } /// Defines a stateful decoder from given state machine. -macro_rules! stateful_decoder( +macro_rules! stateful_decoder { ( - $(#[$decmeta:meta])* - struct $dec:ident; module $stmod:ident; // should be unique from other existing identifiers - ascii_compatible $asciicompat:expr; $(internal $item:item)* // will only be visible from state functions initial: state $inist:ident($inictx:ident: Context) { @@ -153,15 +150,11 @@ macro_rules! stateful_decoder( final => $($fin:expr),+; })* ) => ( - $(#[$decmeta])* - pub struct $dec { - st: $stmod::State - } - #[allow(non_snake_case)] mod $stmod { pub use self::State::*; - #[derive(PartialEq,Clone,Copy)] + + #[derive(PartialEq, Clone, Copy)] pub enum State { $inist, $( @@ -294,38 +287,10 @@ macro_rules! stateful_decoder( (st, ctx.err.take()) } } + ); - impl $dec { - pub fn new() -> Box { - Box::new($dec { st: $stmod::$inist }) - } - } - - impl RawDecoder for $dec { - fn from_self(&self) -> Box { $dec::new() } - fn is_ascii_compatible(&self) -> bool { $asciicompat } - - fn raw_feed(&mut self, input: &[u8], - output: &mut StringWriter) -> (usize, Option) { - let (st, processed, err) = $stmod::raw_feed(self.st, input, output, &()); - self.st = st; - (processed, err) - } - - fn raw_finish(&mut self, output: &mut StringWriter) -> Option { - let (st, err) = $stmod::raw_finish(self.st, output, &()); - self.st = st; - err - } - } - ) -); - -/// Defines an ASCII-compatible stateful decoder from given state machine. -macro_rules! ascii_compatible_stateful_decoder( + // simplified rules: no checkpoint and default final actions ( - $(#[$decmeta:meta])* - struct $dec:ident; module $stmod:ident; // should be unique from other existing identifiers $(internal $item:item)* // will only be visible from state functions initial: @@ -337,11 +302,8 @@ macro_rules! ascii_compatible_stateful_decoder( $(case $($lhs:pat),+ => $($rhs:expr),+;)+ })* ) => ( - stateful_decoder!( - $(#[$decmeta])* - struct $dec; + stateful_decoder! { module $stmod; - ascii_compatible true; $(internal $item)* initial: state $inist($inictx: Context) { @@ -354,7 +316,7 @@ macro_rules! ascii_compatible_stateful_decoder( $(case $($lhs),+ => $($rhs),+;)+ final => $ctx.err("incomplete sequence"); })* - ); - ) -); + } + ); +}