Skip to content

Commit

Permalink
stateful_decoder! rewrites.
Browse files Browse the repository at this point in the history
`stateful_decoder!` no longer defines a RawDecoder. The actual
decoder implementation is expected to use `<module>::State` type
and call `<module>::raw_{feed,finish}` methods as needed.
This change is made to make the stateful decoder more flexible.
  • Loading branch information
lifthrasiir committed Apr 16, 2015
1 parent 2733b03 commit 88ba27e
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 80 deletions.
102 changes: 88 additions & 14 deletions src/codec/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Legacy Japanese encodings based on JIS X 0208 and JIS X 0212.
use std::convert::Into;
use std::default::Default;
use util::StrCharIndex;
use index_japanese as index;
use types::*;
Expand Down Expand Up @@ -82,11 +83,36 @@ impl RawEncoder for EUCJPEncoder {
}
}

ascii_compatible_stateful_decoder! {
#[doc="A decoder for EUC-JP with JIS X 0212 in G3."]
#[derive(Clone, Copy)]
struct EUCJP0212Decoder;
/// A decoder for EUC-JP with JIS X 0212 in G3.
#[derive(Clone, Copy)]
struct EUCJP0212Decoder {
st: eucjp::State,
}

impl EUCJP0212Decoder {
pub fn new() -> Box<RawDecoder> {
Box::new(EUCJP0212Decoder { st: Default::default() })
}
}

impl RawDecoder for EUCJP0212Decoder {
fn from_self(&self) -> Box<RawDecoder> { EUCJP0212Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = eucjp::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = eucjp::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module eucjp;

internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
Expand Down Expand Up @@ -479,11 +505,36 @@ impl RawEncoder for Windows31JEncoder {
}
}

ascii_compatible_stateful_decoder! {
#[doc="A decoder for Shift_JIS with IBM/NEC extensions."]
#[derive(Clone, Copy)]
struct Windows31JDecoder;
/// A decoder for Shift_JIS with IBM/NEC extensions.
#[derive(Clone, Copy)]
struct Windows31JDecoder {
st: windows31j::State,
}

impl Windows31JDecoder {
pub fn new() -> Box<RawDecoder> {
Box::new(Windows31JDecoder { st: Default::default() })
}
}

impl RawDecoder for Windows31JDecoder {
fn from_self(&self) -> Box<RawDecoder> { Windows31JDecoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = windows31j::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = windows31j::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module windows31j;

internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
Expand Down Expand Up @@ -798,14 +849,37 @@ impl RawEncoder for ISO2022JPEncoder {
}
}

stateful_decoder! {
#[doc="A decoder for ISO-2022-JP with JIS X 0212 support."]
#[derive(Clone, Copy)]
struct ISO2022JPDecoder;
/// A decoder for ISO-2022-JP with JIS X 0212 support.
#[derive(Clone, Copy)]
struct ISO2022JPDecoder {
st: iso2022jp::State,
}

module iso2022jp;
impl ISO2022JPDecoder {
pub fn new() -> Box<RawDecoder> {
Box::new(ISO2022JPDecoder { st: Default::default() })
}
}

impl RawDecoder for ISO2022JPDecoder {
fn from_self(&self) -> Box<RawDecoder> { ISO2022JPDecoder::new() }
fn is_ascii_compatible(&self) -> bool { false }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = iso2022jp::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

ascii_compatible false;
fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = iso2022jp::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module iso2022jp;

internal pub fn map_two_0208_bytes(lead: u8, trail: u8) -> u32 {
use index_japanese as index;
Expand Down
34 changes: 30 additions & 4 deletions src/codec/korean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Legacy Korean encodings based on KS X 1001.
use std::convert::Into;
use std::default::Default;
use util::StrCharIndex;
use index_korean as index;
use types::*;
Expand Down Expand Up @@ -68,11 +69,36 @@ impl RawEncoder for Windows949Encoder {
}
}

ascii_compatible_stateful_decoder! {
#[doc="A decoder for Windows code page 949."]
#[derive(Clone, Copy)]
struct Windows949Decoder;
/// A decoder for Windows code page 949.
#[derive(Clone, Copy)]
struct Windows949Decoder {
st: windows949::State,
}

impl Windows949Decoder {
pub fn new() -> Box<RawDecoder> {
Box::new(Windows949Decoder { st: Default::default() })
}
}

impl RawDecoder for Windows949Decoder {
fn from_self(&self) -> Box<RawDecoder> { Windows949Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = windows949::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = windows949::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module windows949;

internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
Expand Down
71 changes: 60 additions & 11 deletions src/codec/simpchinese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use std::convert::Into;
use std::marker::PhantomData;
use std::default::Default;
use util::StrCharIndex;
use index_simpchinese as index;
use types::*;
Expand Down Expand Up @@ -94,7 +95,7 @@ impl<T: GBType> Encoding for GBEncoding<T> {
}

/**
* An encoder for GBK and GB18030.
* An encoder for GBK and GB18030.
*
* ## Specialization
*
Expand Down Expand Up @@ -160,11 +161,36 @@ impl<T: GBType> RawEncoder for GBEncoder<T> {
}
}

ascii_compatible_stateful_decoder! {
#[doc="A decoder for GB 18030 (also used by GBK)."]
#[derive(Clone, Copy)]
struct GB18030Decoder;
/// A decoder for GB 18030 (also used by GBK).
#[derive(Clone, Copy)]
struct GB18030Decoder {
st: gb18030::State,
}

impl GB18030Decoder {
pub fn new() -> Box<RawDecoder> {
Box::new(GB18030Decoder { st: Default::default() })
}
}

impl RawDecoder for GB18030Decoder {
fn from_self(&self) -> Box<RawDecoder> { GB18030Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = gb18030::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = gb18030::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module gb18030;

internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
Expand Down Expand Up @@ -517,14 +543,37 @@ impl RawEncoder for HZEncoder {
}
}

stateful_decoder! {
#[doc="A decoder for HZ."]
#[derive(Clone, Copy)]
struct HZDecoder;
/// A decoder for HZ.
#[derive(Clone, Copy)]
struct HZDecoder {
st: hz::State,
}

module hz;
impl HZDecoder {
pub fn new() -> Box<RawDecoder> {
Box::new(HZDecoder { st: Default::default() })
}
}

impl RawDecoder for HZDecoder {
fn from_self(&self) -> Box<RawDecoder> { HZDecoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

ascii_compatible false;
fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = hz::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = hz::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module hz;

internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
use index_simpchinese as index;
Expand Down
34 changes: 30 additions & 4 deletions src/codec/tradchinese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! Legacy traditional Chinese encodings.
use std::convert::Into;
use std::default::Default;
use util::StrCharIndex;
use index_tradchinese as index;
use types::*;
Expand Down Expand Up @@ -74,11 +75,36 @@ impl RawEncoder for BigFive2003Encoder {
}
}

ascii_compatible_stateful_decoder! {
#[doc="A decoder for Big5-2003 with HKSCS-2008 extension."]
#[derive(Clone, Copy)]
struct BigFive2003HKSCS2008Decoder;
/// A decoder for Big5-2003 with HKSCS-2008 extension.
#[derive(Clone, Copy)]
struct BigFive2003HKSCS2008Decoder {
st: bigfive2003::State,
}

impl BigFive2003HKSCS2008Decoder {
pub fn new() -> Box<RawDecoder> {
Box::new(BigFive2003HKSCS2008Decoder { st: Default::default() })
}
}

impl RawDecoder for BigFive2003HKSCS2008Decoder {
fn from_self(&self) -> Box<RawDecoder> { BigFive2003HKSCS2008Decoder::new() }
fn is_ascii_compatible(&self) -> bool { true }

fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
let (st, processed, err) = bigfive2003::raw_feed(self.st, input, output, &());
self.st = st;
(processed, err)
}

fn raw_finish(&mut self, output: &mut StringWriter) -> Option<CodecError> {
let (st, err) = bigfive2003::raw_finish(self.st, output, &());
self.st = st;
err
}
}

stateful_decoder! {
module bigfive2003;

internal pub fn map_two_bytes(lead: u8, trail: u8) -> u32 {
Expand Down
Loading

0 comments on commit 88ba27e

Please sign in to comment.