Skip to content

Commit

Permalink
feat: add citation element (#120)
Browse files Browse the repository at this point in the history
* feat: add citation element

* feat: add preamble parsing

* feat: allow multiple csl locale files

* fix: only merge preamble if a preamble exists

* add citeproc-js + rendering of the in-text part of citations

* prepare citeproc adapter for footnotes and bibliography

* use values from preamble + add some styles and locales

* add bibliography and footnotes to end of document

* replace String::from with .to_string()

* support german and english for bibliography heading

* add the csl struct that Manuel generated

* merge input .csl files to one json object

* change type of citation_locales to HashMap<Locale, PathBuf>

* add warnings to reading the locale and style files

* add CiteErrors

* run cargo fmt

* move json serialization from get_citation_strings to new of Context + add logs when it fails

* serialize each citation ids vector individually

* add class for Css entries

* try to stop clippy from complaining about the generated file

* parse csl-locales from cli using a vector of tuples

* serialize locale-pathbuf map using map serializer from serde

* use custom visitor for citation_locales deserialization

* use custom visitor for multiple locales deserialization

* run cargo fmt

* Revert "Merge branch 'main' into cite-box"

This reverts commit 4c5eeb0, reversing
changes made to 9d064a9.

* Revert "Revert "Merge branch 'main' into cite-box""

This reverts commit 2746775.

* fix: resolve merge conflicts

* fix: add bib & footnotes to umi

* fix: pass output format to render context

* add unit test for citations

* don't print bibliography and footnotes if the strings are empty

* don't start the citeproc processor if the input doesn't contain citations

* fix: remove double insert of locale serialization

* fix: use clap::Error when converting csl locales

* fix: destructure csl locale tuple in loop

* fix: use Option for footnote & bib rendering

* fix: add prop-column to footnote & bib umi dummies

* fix: resolve logic conflicts due to merge

---------

Co-authored-by: Elena Krippner <[email protected]>
Co-authored-by: ElenaKrippner <[email protected]>
  • Loading branch information
3 people authored Feb 2, 2024
1 parent 7a46853 commit b133fb6
Show file tree
Hide file tree
Showing 68 changed files with 43,136 additions and 104 deletions.
5 changes: 5 additions & 0 deletions cli/tests/test_files/preamble.um
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
+++
lang: "de-AT"
+++

Deutscher Text
217 changes: 207 additions & 10 deletions commons/src/config/locale.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,78 @@ pub mod serde {

pub mod multiple {
use super::*;
use crate::config::log_id::ConfigWarning;
use logid::log;
use serde::de::{SeqAccess, Visitor};
use serde::ser::SerializeSeq;
use std::collections::HashSet;

pub fn serialize<S>(locales: &HashSet<Locale>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut res = String::new();
let mut seq = serializer.serialize_seq(Some(locales.len()))?;
for locale in locales {
seq.serialize_element(&locale.to_string())?;
}
seq.end()
}

// The signature of a deserialize_with function must follow the pattern:
//
// fn deserialize<'de, D>(D) -> Result<T, D::Error>
// where
// D: Deserializer<'de>
//
// although it may also be generic over the output types T.
pub fn deserialize<'de, D>(deserializer: D) -> Result<HashSet<Locale>, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(LocaleSeqVisitor {})
}

struct LocaleSeqVisitor {}

for locale in locales.iter() {
res.push_str(&locale.to_string());
res.push(',');
impl<'de> Visitor<'de> for LocaleSeqVisitor {
type Value = HashSet<Locale>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("Sequence of locales.")
}
fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: SeqAccess<'de>,
{
let mut set = HashSet::with_capacity(access.size_hint().unwrap_or(0));
while let Some(value) = access.next_element::<String>()? {
match value.parse::<Locale>() {
Ok(locale) => {
set.insert(locale);
}
Err(e) => {
log!(
ConfigWarning::InvalidOutputLang,
format!("Could not parse the output language to locale with error: '{}'", e)
);
}
}
}
Ok(set)
}
}
}

res.pop();
pub mod optional {
use super::*;

serializer.serialize_str(&res)
pub fn serialize<S>(locale: &Option<Locale>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match locale.as_ref().map(|l| l.to_string()) {
Some(locale) => serializer.serialize_some(&locale),
None => serializer.serialize_none(),
}
}

// The signature of a deserialize_with function must follow the pattern:
Expand All @@ -56,15 +112,69 @@ pub mod serde {
// D: Deserializer<'de>
//
// although it may also be generic over the output types T.
pub fn deserialize<'de, D>(deserializer: D) -> Result<HashSet<Locale>, D::Error>
pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Locale>, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
match s.parse() {
Ok(locale) => Ok(Some(locale)),
Err(_) => Ok(None),
}
}
}

pub mod hashmap {
use super::*;
use serde::de::{MapAccess, Visitor};
use serde::ser::SerializeMap;
use std::collections::HashMap;
use std::path::PathBuf;

pub fn serialize<S>(
locales_map: &HashMap<Locale, PathBuf>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut map = serializer.serialize_map(Some(locales_map.len()))?;
for (locale, path) in locales_map {
map.serialize_entry(&locale.to_string(), path)?;
}
map.end()
}

s.split(',')
.map(|lang| lang.parse().map_err(serde::de::Error::custom))
.collect()
// The signature of a deserialize_with function must follow the pattern:
//
// fn deserialize<'de, D>(D) -> Result<T, D::Error>
// where
// D: Deserializer<'de>
//
// although it may also be generic over the output types T.
pub fn deserialize<'de, D>(deserializer: D) -> Result<HashMap<Locale, PathBuf>, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_map(LocaleMapVisitor {})
}
struct LocaleMapVisitor {}
impl<'de> Visitor<'de> for LocaleMapVisitor {
type Value = HashMap<Locale, PathBuf>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("Map with locale as key and file path as value.")
}
fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
where
M: MapAccess<'de>,
{
let mut map = HashMap::with_capacity(access.size_hint().unwrap_or(0));
while let Some((key, value)) = access.next_entry::<String, PathBuf>()? {
let locale = key.parse::<Locale>().map_err(serde::de::Error::custom)?;
map.insert(locale, value);
}
Ok(map)
}
}
}
}
Expand All @@ -79,3 +189,90 @@ pub mod clap {
})
}
}

#[cfg(test)]
mod tests {
use crate::config::locale::serde::{hashmap, multiple};
use icu_locid::{locale, Locale};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::str::FromStr;

#[derive(Serialize, Deserialize)]
struct LocalesStruct {
#[serde(with = "multiple")]
locales: HashSet<Locale>,
}

#[derive(Serialize, Deserialize)]
struct LocalePathBufStruct {
#[serde(with = "hashmap")]
map: HashMap<Locale, PathBuf>,
}

#[test]
fn serialize_multiple() {
let mut locales = HashSet::new();
locales.insert(locale!("de"));
locales.insert(locale!("en"));

let locales_struct = LocalesStruct { locales };

let actual = serde_yaml::to_string(&locales_struct).unwrap();
assert!(
actual == "---\nlocales:\n - en\n - de\n"
|| actual == "---\nlocales:\n - de\n - en\n"
);
}

#[test]
fn deserialize_multiple() {
let serialized = "locales:\n - en\n - de";
let actual: LocalesStruct = serde_yaml::from_str(serialized).unwrap();

let locales_vec: Vec<Locale> = actual.locales.into_iter().collect();
assert_eq!(locales_vec.len(), 2);
assert_ne!(locales_vec[0], locales_vec[1]);
assert!(locales_vec[0] == locale!("de") || locales_vec[0] == locale!("en"));
assert!(locales_vec[1] == locale!("de") || locales_vec[1] == locale!("en"));
}

#[test]
fn serialize_hashmap() {
let mut map = HashMap::new();
map.insert(locale!("de"), PathBuf::from_str("path/to/de").unwrap());
map.insert(locale!("en"), PathBuf::from_str("path/to/en").unwrap());

let locale_pathbuf_struct = LocalePathBufStruct { map };

let actual = serde_yaml::to_string(&locale_pathbuf_struct).unwrap();
assert!(
actual == "---\nmap:\n en: path/to/en\n de: path/to/de\n"
|| actual == "---\nmap:\n de: path/to/de\n en: path/to/en\n"
);
}

#[test]
fn deserialize_hashmap() {
let serialized = "map:\n de: path/to/de\n en: path/to/en";
let actual: LocalePathBufStruct = serde_yaml::from_str(serialized).unwrap();

let locales_map: Vec<(Locale, PathBuf)> = actual.map.into_iter().collect();

assert_eq!(locales_map.len(), 2);
assert_ne!(locales_map[0], locales_map[1]);
assert!(
(locales_map[0].0 == locale!("de")
&& locales_map[0].1 == PathBuf::from_str("path/to/de").unwrap())
|| (locales_map[0].0 == locale!("en")
&& locales_map[0].1 == PathBuf::from_str("path/to/en").unwrap())
);
assert!(
(locales_map[1].0 == locale!("de")
&& locales_map[1].1 == PathBuf::from_str("path/to/de").unwrap())
|| (locales_map[1].0 == locale!("en")
&& locales_map[1].1 == PathBuf::from_str("path/to/en").unwrap())
);
}
}
9 changes: 8 additions & 1 deletion commons/src/config/log_id.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use logid::ErrLogId;
use logid::{ErrLogId, WarnLogId};
use thiserror::Error;

/// Log-ids for config errors
Expand All @@ -20,3 +20,10 @@ pub enum ConfigErr {
#[error("Given locale is not in default locales data. Please provide data or use one of supported default locales: en-US, de-AT, bs-BA")]
BadLocaleUsed,
}

/// Log-ids for config warnings
#[derive(Debug, Clone, WarnLogId)]
pub enum ConfigWarning {
/// Log-id denoting an invalid output_lang
InvalidOutputLang,
}
1 change: 1 addition & 0 deletions commons/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
use self::{log_id::ConfigErr, output::Output, preamble::Preamble};

pub use icu_locid;
use icu_locid::locale;

pub mod locale;
pub mod log_id;
Expand Down
51 changes: 48 additions & 3 deletions commons/src/config/preamble.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{

use clap::Args;
use icu_locid::Locale;
use logid::err;
use logid::{err, log};
use serde::{Deserialize, Serialize};

use super::{locale, log_id::ConfigErr, parse_to_hashset, ConfigFns, ReplaceIfNone};
Expand Down Expand Up @@ -49,8 +49,8 @@ pub fn default_locale() -> Locale {
#[derive(Args, Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct I18n {
#[arg(long, value_parser = locale::clap::parse_locale, default_value = "en")]
#[serde(with = "locale::serde::single", default = "self::default_locale")]
pub lang: Locale,
#[serde(with = "locale::serde::optional")]
pub lang: Option<Locale>,

#[arg(long, value_parser = parse_to_hashset::<Locale>, required = false, default_value = "")]
#[serde(with = "locale::serde::multiple", default)]
Expand All @@ -60,6 +60,7 @@ pub struct I18n {

impl ConfigFns for I18n {
fn merge(&mut self, other: Self) {
self.lang.replace_none(other.lang);
self.output_langs.extend(other.output_langs);
}

Expand Down Expand Up @@ -92,6 +93,8 @@ impl ConfigFns for RenderConfig {
fn merge(&mut self, other: Self) {
self.ignore.extend(other.ignore);
self.parameter.extend(other.parameter);
self.keep_comments |= other.keep_comments;
self.allow_unsafe |= other.allow_unsafe;
}

fn validate(&self) -> Result<(), ConfigErr> {
Expand All @@ -113,17 +116,57 @@ pub struct Citedata {
#[serde(skip_serializing_if = "HashSet::is_empty")]
#[serde(default)]
pub references: HashSet<PathBuf>,
/// Optional files containing locale information to render citations.
#[clap(skip)]
#[serde(with = "locale::serde::hashmap", default)]
pub citation_locales: HashMap<Locale, PathBuf>,

#[arg(long = "csl-locale", value_parser = parse_locale_path_buf, required = false, default_value = "")]
#[serde(skip)]
pub csl_locales: Vec<(Locale, PathBuf)>,
}

fn parse_locale_path_buf(s: &str) -> Result<(Locale, PathBuf), clap::Error> {
if s.is_empty() {
return Ok((locale!("en"), PathBuf::default()));
}
let pos = s.find('=').ok_or_else(|| {
clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("invalid KEY=value: no `=` found in `{s}`"),
)
})?;
let mut locale = locale!("en");
match s[..pos].parse::<Locale>() {
Ok(l) => locale = l,
Err(e) => {
log!(
ConfigErr::InvalidFile,
format!("Parsing the locale failed with error: '{:?}'", e)
);
}
};
let path_buf: PathBuf = s[pos + 1..].parse().unwrap();
Ok((locale, path_buf))
}

impl ConfigFns for Citedata {
fn merge(&mut self, other: Self) {
self.style.replace_none(other.style);
self.references.extend(other.references);
for (locale, pathbuf) in self.csl_locales.clone() {
self.citation_locales.insert(locale, pathbuf);
}
for (locale, pathbuf) in other.csl_locales.clone() {
self.citation_locales.insert(locale, pathbuf);
}
self.citation_locales.extend(other.citation_locales);
}

fn validate(&self) -> Result<(), ConfigErr> {
if let Some(file) = &self.style {
if !file.exists() {
// TODO: check for included styles
return err!(
ConfigErr::InvalidFile,
format!("Citation Style Language file not found: {:?}", file)
Expand All @@ -140,6 +183,8 @@ impl ConfigFns for Citedata {
}
}

// TODO: ensure locale is available for citations

Ok(())
}
}
Expand Down
Loading

0 comments on commit b133fb6

Please sign in to comment.