diff --git a/src/lib.rs b/src/lib.rs index 9c95dcd..cef8e2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,7 +46,10 @@ )] mod error; -mod metadata; +/// Declare a public module named `metadata`. +/// This module contains code related to handling metadata associated with translation entries. +/// It provides functionality for managing key-value pairs of metadata. +pub mod metadata; mod parser; mod plurals; @@ -57,6 +60,7 @@ use std::ops::Deref; use crate::parser::default_resolver; use crate::plurals::*; pub use crate::{error::Error, parser::ParseOptions}; +use metadata::MetadataMap; fn key_with_context(context: &str, key: &str) -> String { let mut result = context.to_owned(); @@ -69,8 +73,11 @@ fn key_with_context(context: &str, key: &str) -> String { /// parsed out of one MO file. #[derive(Clone, Debug)] pub struct Catalog { - strings: HashMap, + /// Creates a public property to store the `Message` values from MO files + pub strings: HashMap, resolver: Resolver, + /// Creates a public optional property to store the metadata from MO files + pub metadata: Option, } impl Catalog { @@ -86,9 +93,15 @@ impl Catalog { Catalog { strings: HashMap::new(), resolver: Resolver::Function(default_resolver), + metadata: None, } } + /// Merge another catalog. + pub fn merge(&mut self, catalog: &Catalog) { + self.strings.extend(catalog.strings.to_owned()); + } + /// Parses a gettext catalog from the given binary MO file. /// Returns the `Err` variant upon encountering an invalid file format /// or invalid byte sequence in strings. @@ -130,7 +143,7 @@ impl Catalog { /// with the correct plural form for the number `n` of objects. /// Returns msg_id if a translation does not exist and `n == 1`, /// msg_id_plural otherwise. - pub fn ngettext<'a>(&'a self, msg_id: &'a str, msg_id_plural: &'a str, n: u64) -> &'a str { + pub fn ngettext<'a>(&'a self, msg_id: &'a str, msg_id_plural: &'a str, n: i64) -> &'a str { let form_no = self.resolver.resolve(n); let message = self.strings.get(msg_id); match message.and_then(|m| m.get_translated(form_no)) { @@ -164,7 +177,7 @@ impl Catalog { msg_context: &str, msg_id: &'a str, msg_id_plural: &'a str, - n: u64, + n: i64, ) -> &'a str { let key = key_with_context(msg_context, &msg_id); let form_no = self.resolver.resolve(n); @@ -179,18 +192,47 @@ impl Catalog { } #[derive(Clone, Debug, Eq, PartialEq)] -struct Message { - id: String, - context: Option, - translated: Vec, +/// `Message` represents a message that can be translated. It contains +/// the original string (ID), an optional plural form, an optional context +/// for disambiguation, and the translated strings for the message. +pub struct Message { + /// The original string to be translated, used as the key for looking up + /// translations. + pub id: String, + /// An optional context for the translation, used for disambiguation + /// when the same original string can have different translations + /// depending on its usage. + pub context: Option, + /// Translated strings for the message. Contains one string for each + /// plural form in the target language. + pub translated: Vec, + /// An optional plural form of the original string, used with ngettext. + pub plural: Option, } impl Message { + /// Constructs a new `Message` instance with the given id, context and translated strings. fn new>(id: T, context: Option, translated: Vec) -> Self { Message { id: id.into(), context: context.map(Into::into), translated: translated.into_iter().map(Into::into).collect(), + plural: None, + } + } + /// Constructs a new `Message` instance with the given id, context, translated strings, + /// and an optional plural form which will be used only when a plural form is available. + fn with_plural>( + id: T, + context: Option, + translated: Vec, + plural: Option, + ) -> Self { + Message { + id: id.into(), + context: context.map(Into::into), + translated: translated.into_iter().map(Into::into).collect(), + plural: plural.map(Into::into), } } @@ -209,19 +251,53 @@ fn catalog_impls_send_sync() { fn catalog_insert() { let mut cat = Catalog::new(); cat.insert(Message::new("thisisid", None, vec![])); - cat.insert(Message::new("anotherid", Some("context"), vec![])); + cat.insert(Message::new("thisisid", Some("context"), vec![])); + cat.insert(Message::with_plural( + "anotherid", + None, + vec![], + Some("thisispluralid"), + )); + cat.insert(Message::with_plural( + "anotherid", + Some("context"), + vec![], + Some("thisispluralid"), + )); let mut keys = cat.strings.keys().collect::>(); keys.sort(); - assert_eq!(keys, &["context\x04anotherid", "thisisid"]) + assert_eq!( + keys, + &[ + "anotherid", + "context\x04anotherid", + "context\x04thisisid", + "thisisid" + ] + ) } #[test] fn catalog_gettext() { let mut cat = Catalog::new(); cat.insert(Message::new("Text", None, vec!["Tekstas"])); - cat.insert(Message::new("Image", Some("context"), vec!["Paveikslelis"])); + cat.insert(Message::new("Text", Some("context"), vec!["Tekstas"])); + cat.insert(Message::with_plural( + "Image", + None, + vec!["Paveikslelis"], + Some("Images"), + )); + cat.insert(Message::with_plural( + "Image", + Some("context"), + vec!["Paveikslelis"], + Some("Images"), + )); assert_eq!(cat.gettext("Text"), "Tekstas"); - assert_eq!(cat.gettext("Image"), "Image"); + assert_eq!(cat.gettext("context\x04Text"), "Tekstas"); + assert_eq!(cat.gettext("Image"), "Paveikslelis"); + assert_eq!(cat.gettext("context\x04Image"), "Paveikslelis"); } #[test] @@ -235,7 +311,12 @@ fn catalog_ngettext() { assert_eq!(cat.ngettext("Text", "Texts", 2), "Texts"); } { - cat.insert(Message::new("Text", None, vec!["Tekstas", "Tekstai"])); + cat.insert(Message::with_plural( + "Text", + None, + vec!["Tekstas", "Tekstai"], + Some("Texts"), + )); // n == 1, translation available assert_eq!(cat.ngettext("Text", "Texts", 1), "Tekstas"); // n != 1, translation available @@ -265,10 +346,11 @@ fn catalog_npgettext_not_enough_forms_in_message() { } let mut cat = Catalog::new(); - cat.insert(Message::new( + cat.insert(Message::with_plural( "Text", Some("ctx"), vec!["Tekstas", "Tekstai"], + Some("Texts"), )); cat.resolver = Resolver::Function(resolver); assert_eq!(cat.npgettext("ctx", "Text", "Texts", 0), "Tekstas"); @@ -279,7 +361,12 @@ fn catalog_npgettext_not_enough_forms_in_message() { #[test] fn catalog_pgettext() { let mut cat = Catalog::new(); - cat.insert(Message::new("Text", Some("unit test"), vec!["Tekstas"])); + cat.insert(Message::with_plural( + "Text", + Some("unit test"), + vec!["Tekstas"], + Some("Texts"), + )); assert_eq!(cat.pgettext("unit test", "Text"), "Tekstas"); assert_eq!(cat.pgettext("integration test", "Text"), "Text"); } @@ -287,10 +374,11 @@ fn catalog_pgettext() { #[test] fn catalog_npgettext() { let mut cat = Catalog::new(); - cat.insert(Message::new( + cat.insert(Message::with_plural( "Text", Some("unit test"), vec!["Tekstas", "Tekstai"], + Some("Texts"), )); assert_eq!(cat.npgettext("unit test", "Text", "Texts", 1), "Tekstas"); diff --git a/src/metadata.rs b/src/metadata.rs index 7e61f18..fed7616 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -4,12 +4,16 @@ use std::ops::{Deref, DerefMut}; use super::Error; use crate::Error::MalformedMetadata; -#[derive(Debug)] -pub struct MetadataMap<'a>(HashMap<&'a str, &'a str>); +#[derive(Debug, Clone)] -impl<'a> MetadataMap<'a> { +/// Define a struct called `MetadataMap` that represents a map of metadata. +/// It is a simple wrapper around a `HashMap` with `String` keys and `String` values. +/// This struct is used to store key-value pairs of metadata associated with a translation entry or other data. +pub struct MetadataMap(HashMap); + +impl MetadataMap { /// Returns a string that indicates the character set. - pub fn charset(&self) -> Option<&'a str> { + pub fn charset(&self) -> Option<&str> { self.get("Content-Type") .and_then(|x| x.split("charset=").nth(1)) } @@ -19,7 +23,7 @@ impl<'a> MetadataMap<'a> { /// the number of elements. /// /// Defaults to `n_plurals = 2` and `plural = n!=1` (as in English). - pub fn plural_forms(&self) -> (Option, Option<&'a str>) { + pub fn plural_forms(&self) -> (Option, Option<&str>) { self.get("Plural-Forms") .map(|f| { f.split(';').fold((None, None), |(n_pl, pl), prop| { @@ -41,27 +45,31 @@ impl<'a> MetadataMap<'a> { } } -impl<'a> Deref for MetadataMap<'a> { - type Target = HashMap<&'a str, &'a str>; +impl Deref for MetadataMap { + type Target = HashMap; fn deref(&self) -> &Self::Target { &self.0 } } -impl<'a> DerefMut for MetadataMap<'a> { +impl DerefMut for MetadataMap { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -pub fn parse_metadata(blob: &str) -> Result { +/// Parses the given metadata blob into a `MetadataMap`. +pub fn parse_metadata(blob: String) -> Result { let mut map = MetadataMap(HashMap::new()); - for line in blob.split('\n').filter(|s| s != &"") { + for line in blob.split('\n').filter(|s| !s.is_empty()) { let pos = match line.bytes().position(|b| b == b':') { Some(p) => p, None => return Err(MalformedMetadata), }; - map.insert(line[..pos].trim(), line[pos + 1..].trim()); + map.insert( + line[..pos].trim().to_string(), + line[pos + 1..].trim().to_string(), + ); } Ok(map) } @@ -71,11 +79,14 @@ fn test_metadatamap_charset() { { let mut map = MetadataMap(HashMap::new()); assert!(map.charset().is_none()); - map.insert("Content-Type", ""); + map.insert("Content-Type".to_string(), "".to_string()); assert!(map.charset().is_none()); - map.insert("Content-Type", "abc"); + map.insert("Content-Type".to_string(), "abc".to_string()); assert!(map.charset().is_none()); - map.insert("Content-Type", "text/plain; charset=utf-42"); + map.insert( + "Content-Type".to_string(), + "text/plain; charset=utf-42".to_string(), + ); assert_eq!(map.charset().unwrap(), "utf-42"); } } @@ -86,19 +97,28 @@ fn test_metadatamap_plural() { let mut map = MetadataMap(HashMap::new()); assert_eq!(map.plural_forms(), (None, None)); - map.insert("Plural-Forms", ""); + map.insert("Plural-Forms".to_string(), "".to_string()); assert_eq!(map.plural_forms(), (None, None)); // n_plural - map.insert("Plural-Forms", "n_plurals=42"); + map.insert("Plural-Forms".to_string(), "n_plurals=42".to_string()); assert_eq!(map.plural_forms(), (Some(42), None)); // plural is specified - map.insert("Plural-Forms", "n_plurals=2; plural=n==12"); + map.insert( + "Plural-Forms".to_string(), + "n_plurals=2; plural=n==12".to_string(), + ); assert_eq!(map.plural_forms(), (Some(2), Some("n==12"))); // plural before n_plurals - map.insert("Plural-Forms", "plural=n==12; n_plurals=2"); + map.insert( + "Plural-Forms".to_string(), + "plural=n==12; n_plurals=2".to_string(), + ); assert_eq!(map.plural_forms(), (Some(2), Some("n==12"))); // with spaces - map.insert("Plural-Forms", " n_plurals = 42 ; plural = n > 10 "); + map.insert( + "Plural-Forms".to_string(), + " n_plurals = 42 ; plural = n > 10 ".to_string(), + ); assert_eq!(map.plural_forms(), (Some(42), Some("n > 10"))); } } diff --git a/src/parser.rs b/src/parser.rs index 35d760e..7df1209 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -28,7 +28,7 @@ static utf8_encoding: EncodingRef = &encoding::codec::utf_8::UTF8Encoding; #[derive(Default)] pub struct ParseOptions { force_encoding: Option, - force_plural: Option usize>, + force_plural: Option usize>, } impl ParseOptions { @@ -57,7 +57,7 @@ impl ParseOptions { /// If this option is not enabled, /// the parser tries to use the plural formula specified in the metadata /// or `n != 1` if metadata is non-existent. - pub fn force_plural(mut self, plural: fn(u64) -> usize) -> Self { + pub fn force_plural(mut self, plural: fn(i64) -> usize) -> Self { self.force_plural = Some(plural); self } @@ -119,13 +119,24 @@ pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result None, }; - // extract msg_id singular, ignoring the plural - let id = match original + // extract msg_id singular and plural + let (id, plural) = match original .iter() .position(|x| *x == 0) - .map(|i| &original[..i]) + .map(|i| (&original[..i], &original[i + 1..])) { - Some(b) => encoding.decode(b, Strict)?, + Some((b_singular, b_plural)) => { + if b_plural.is_empty() { + (encoding.decode(b_singular, Strict)?, None) + } else { + let plural_string = encoding.decode(b_plural, Strict)?; + let trimmed_plural = plural_string.trim_end_matches('\0'); + ( + encoding.decode(b_singular, Strict)?, + Some(trimmed_plural.to_string()), + ) + } + } None => return Err(Eof), }; if id == "" && i != 0 { @@ -147,7 +158,10 @@ pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result, _>>()?; if id == "" { - let map = parse_metadata(&*translated[0])?; + // Parse the metadata from the first translation string, returning early if there's an error. + let map = parse_metadata((*translated[0]).to_string())?; + // Set the metadata of the catalog with the parsed result. + catalog.metadata = Some(map.clone()); if let (Some(c), None) = (map.charset(), opts.force_encoding) { encoding = encoding_from_whatwg_label(c).ok_or(UnknownEncoding)?; } @@ -158,7 +172,14 @@ pub fn parse_catalog(mut file: R, opts: ParseOptions) -> Result(mut file: R, opts: ParseOptions) -> Result usize { +pub fn default_resolver(n: i64) -> usize { if n == 1 { 0 } else { @@ -245,7 +266,11 @@ fn test_parse_catalog() { assert_eq!(catalog.strings.len(), 1); assert_eq!( catalog.strings["this is context\x04Text"], - Message::new("Text", Some("this is context"), vec!["Tekstas", "Tekstai"]) + Message::new( + "Text", + Some("this is context"), + vec!["Tekstas", "Tekstai"] + ) ); } diff --git a/src/plurals.rs b/src/plurals.rs index b48c9f5..4501e09 100644 --- a/src/plurals.rs +++ b/src/plurals.rs @@ -8,7 +8,7 @@ pub enum Resolver { /// Use Ast::parse to get an Ast Expr(Ast), /// A function - Function(fn(u64) -> usize), + Function(fn(i64) -> usize), } /// Finds the index of a pattern, outside of parenthesis @@ -74,7 +74,7 @@ pub enum Operator { } impl Ast { - fn resolve(&self, n: u64) -> usize { + fn resolve(&self, n: i64) -> usize { match *self { Ternary(ref cond, ref ok, ref nok) => { if cond.resolve(n) == 0 { @@ -280,7 +280,7 @@ impl Ast { impl Resolver { /// Returns the number of the correct plural form /// for `n` objects, as defined by the rule contained in this resolver. - pub fn resolve(&self, n: u64) -> usize { + pub fn resolve(&self, n: i64) -> usize { match *self { Expr(ref ast) => ast.resolve(n), Function(ref f) => f(n),