Skip to content

Commit

Permalink
Let utf16be_to_string_lossy return a String directly
Browse files Browse the repository at this point in the history
Function utf16be_tu_string_lossy always returns an Ok(..) result,
as its name suggests. Therefore wrapping in a Result<_> is
unnecessary. Bubbles up to PdfString::to_string_lossy.
  • Loading branch information
mwanner authored and s3bk committed Jan 24, 2023
1 parent 47d65db commit 19a08fb
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 23 deletions.
2 changes: 1 addition & 1 deletion pdf/examples/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ fn main() {
let mut dests_cb = |key: &PdfString, val: &Option<Dest>| {
//println!("{:?} {:?}", key, val);
if let Some(Dest { page: Some(page), ..}) = val {
pages_map.insert(key.to_string_lossy().unwrap(), page.get_inner());
pages_map.insert(key.to_string_lossy(), page.get_inner());
}

count += 1;
Expand Down
7 changes: 1 addition & 6 deletions pdf/examples/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,7 @@ fn main() -> Result<(), PdfError> {
for field in forms.fields.iter() {
print!(" {:?} = ", field.name);
match field.value {
Primitive::String(ref s) => {
match s.to_string_lossy() {
Ok(s) => println!("{:?}", s),
Err(_) => println!("{:?}", s),
}
}
Primitive::String(ref s) => println!("{}", s.to_string_lossy()),
Primitive::Integer(i) => println!("{}", i),
Primitive::Name(ref s) => println!("{}", s),
ref p => println!("{:?}", p),
Expand Down
8 changes: 4 additions & 4 deletions pdf/src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,10 +477,10 @@ pub fn utf16be_to_char(
char::decode_utf16(data.chunks(2).map(|w| u16::from_be_bytes([w[0], w[1]])))
}
/// converts UTF16-BE to a string replacing illegal/unknown characters
pub fn utf16be_to_string_lossy(data: &[u8]) -> pdf::error::Result<String> {
Ok(utf16be_to_char(data)
pub fn utf16be_to_string_lossy(data: &[u8]) -> String {
utf16be_to_char(data)
.map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER))
.collect())
.collect()
}
/// converts UTF16-BE to a string errors out in illegal/unknonw characters
pub fn utf16be_to_string(data: &[u8]) -> pdf::error::Result<SmallString> {
Expand Down Expand Up @@ -624,6 +624,6 @@ mod tests {
assert_eq!(r.to_string(), "UTF16 decode error");
}
assert_eq!(utf16be_to_string(&v[..8]).unwrap(), String::from("𝄞mu"));
assert_eq!(utf16be_to_string_lossy(&v).unwrap(), lossy);
assert_eq!(utf16be_to_string_lossy(&v), lossy);
}
}
24 changes: 12 additions & 12 deletions pdf/src/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,12 +401,12 @@ impl PdfString {
/// therefore only lossy decoding is possible replacing unknown characters.
/// For decoding correctly see
/// pdf_tools/src/lib.rs
pub fn to_string_lossy(&self) -> Result<String> {
pub fn to_string_lossy(&self) -> String {
if self.data.starts_with(&[0xfe, 0xff]) {
Ok(crate::font::utf16be_to_string_lossy(&self.data[2..])?)
crate::font::utf16be_to_string_lossy(&self.data[2..])
}
else {
Ok(String::from_utf8_lossy(&self.data).into())
String::from_utf8_lossy(&self.data).into()
}
}
/// without encoding information the PdfString cannot be sensibly decoded into a String
Expand Down Expand Up @@ -534,7 +534,7 @@ impl Primitive {
}
pub fn to_string_lossy(&self) -> Result<String> {
let s = self.as_string()?;
s.to_string_lossy()
Ok(s.to_string_lossy())
}
pub fn to_string(&self) -> Result<String> {
let s = self.as_string()?;
Expand Down Expand Up @@ -642,7 +642,7 @@ impl<'a> TryInto<Cow<'a, str>> for &'a Primitive {
fn try_into(self) -> Result<Cow<'a, str>> {
match *self {
Primitive::Name(ref s) => Ok(Cow::Borrowed(&*s)),
Primitive::String(ref s) => Ok(Cow::Owned(s.to_string_lossy()?)),
Primitive::String(ref s) => Ok(Cow::Owned(s.to_string_lossy())),
ref p => Err(PdfError::UnexpectedPrimitive {
expected: "Name or String",
found: p.get_debug_name()
Expand All @@ -655,7 +655,7 @@ impl<'a> TryInto<String> for &'a Primitive {
fn try_into(self) -> Result<String> {
match *self {
Primitive::Name(ref s) => Ok(s.as_str().into()),
Primitive::String(ref s) => Ok(s.to_string_lossy()?),
Primitive::String(ref s) => Ok(s.to_string_lossy()),
ref p => Err(PdfError::UnexpectedPrimitive {
expected: "Name or String",
found: p.get_debug_name()
Expand Down Expand Up @@ -725,22 +725,22 @@ mod tests {
#[test]
fn utf16be_string() {
let s = PdfString::new([0xfe, 0xff, 0x20, 0x09].as_slice().into());
assert_eq!(s.to_string_lossy().unwrap(), "\u{2009}");
assert_eq!(s.to_string_lossy(), "\u{2009}");
}

#[test]
fn utf16be_invalid_string() {
let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34].as_slice().into());
let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER);
assert_eq!(s.to_string_lossy().unwrap(), repl_ch);
assert_eq!(s.to_string_lossy(), repl_ch);
}

#[test]
#[should_panic]
fn utf16be_invalid_bytelen() {
let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34, 0x20].as_slice().into());
let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER);
assert_eq!(s.to_string_lossy().unwrap(), repl_ch);
assert_eq!(s.to_string_lossy(), repl_ch);
}

#[test]
Expand All @@ -750,16 +750,16 @@ mod tests {
assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode
// verify UTF-16-BE supports umlauts
let s = PdfString::new([0xfe, 0xff, 0x00, 0xe4 /*ä*/].as_slice().into());
assert_eq!(s.to_string_lossy().unwrap(), "ä");
assert_eq!(s.to_string_lossy(), "ä");
assert_eq!(s.to_string().unwrap(), "ä");
// verify valid UTF-8 bytestream with umlaut works
let s = PdfString::new([b'm', b'i', b't', 0xc3, 0xa4 /*ä*/].as_slice().into());
assert_eq!(s.to_string_lossy().unwrap(), "mitä");
assert_eq!(s.to_string_lossy(), "mitä");
assert_eq!(s.to_string().unwrap(), "mitä");
// verify valid ISO-8859-1 bytestream with umlaut fails
let s = PdfString::new([b'm', b'i', b't', 0xe4/*ä in latin1*/].as_slice().into());
let repl_ch = ['m', 'i', 't', std::char::REPLACEMENT_CHARACTER].iter().collect::<String>();
assert_eq!(s.to_string_lossy().unwrap(), repl_ch);
assert_eq!(s.to_string_lossy(), repl_ch);
assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode
}
}

0 comments on commit 19a08fb

Please sign in to comment.