diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43f4b935..3e33b53b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -253,6 +253,7 @@ jobs: gen_locale en_US.UTF-8 # Assumed to be there by tests # Used by various tests + gen_locale de_DE.UTF-8 gen_locale he_IL.UTF-8 gen_locale ja_JP.UTF-8 gen_locale ru_RU.UTF-8 diff --git a/include/boost/locale.hpp b/include/boost/locale.hpp index 28709aa5..b22ca2ad 100644 --- a/include/boost/locale.hpp +++ b/include/boost/locale.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/include/boost/locale/numpunct.hpp b/include/boost/locale/numpunct.hpp new file mode 100644 index 00000000..84af076e --- /dev/null +++ b/include/boost/locale/numpunct.hpp @@ -0,0 +1,87 @@ +// +// Copyright (c) 2021-2021 Salvo Miosi +// Copyright (c) 2023-2023 Alexander Grund +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef BOOST_LOCALE_NUMPUNCT_HPP_INCLUDED +#define BOOST_LOCALE_NUMPUNCT_HPP_INCLUDED + +#include +#include +#include +#include + +namespace boost { namespace locale { + + /// \brief Extension of `std::numpunct` providing possibly encoded values of decimal point and thousands separator. + /// + /// To achieve interface compatibility with `std::numpunct` for the case where the separators are encoded using + /// multiple chars the functions `do_decimal_point` and `do_thousands_sep` will fall back to the values used by the + /// "C" locale. + /// + /// \note + /// + /// - Not all backends support encoded separators, so \ref decimal_point_str & \ref thousands_sep_str may return + /// strings of length 1. + /// - Some backends may provide single char replacements of the encoded separators instead of falling back to the + /// "C" locale. + template + class numpunct : public std::numpunct { + BOOST_LOCALE_ASSERT_IS_SUPPORTED(CharType); + + public: + using string_type = std::numpunct::string_type; + + numpunct(size_t refs = 0) : std::numpunct(refs) {} + + /// Provides the character to use as decimal point possibly encoded into multiple code units + string_type decimal_point_str() const { return do_decimal_point_str(); } + /// Provides the character to use as thousands separator possibly encoded into multiple code units + string_type thousands_sep_str() const { return do_thousands_sep_str(); } + + protected: + CharType do_decimal_point() const override + { + const string_type dec = do_decimal_point_str(); + return (dec.size() > 1) ? '.' : dec[0]; + } + + /// Provides the character to use as decimal point possibly encoded into multiple code units + virtual string_type do_decimal_point_str() const + { + static const char t[] = "."; + return string_type(t, t + sizeof(t) - 1); + } + + CharType do_thousands_sep() const override + { + const string_type sep = do_thousands_sep_str(); + return (sep.size() > 1) ? '.' : sep[0]; + } + + /// Provides the character to use as thousands separator possibly encoded into multiple code units + virtual string_type do_thousands_sep_str() const + { + static const char t[] = ","; + return string_type(t, t + sizeof(t) - 1); + } + + string_type do_truename() const override + { + static const char t[] = "true"; + return string_type(t, t + sizeof(t) - 1); + } + + string_type do_falsename() const override + { + static const char t[] = "false"; + return string_type(t, t + sizeof(t) - 1); + } + }; +}} // namespace boost::locale + +#endif \ No newline at end of file diff --git a/src/boost/locale/icu/numeric.cpp b/src/boost/locale/icu/numeric.cpp index d81f55f7..6a3d1e90 100644 --- a/src/boost/locale/icu/numeric.cpp +++ b/src/boost/locale/icu/numeric.cpp @@ -1,20 +1,26 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) +// Copyright (c) 2021-2021 Salvo Miosi +// Copyright (c) 2022-2023 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt #include +#include #include "boost/locale/icu/all_generator.hpp" #include "boost/locale/icu/cdata.hpp" #include "boost/locale/icu/formatter.hpp" #include "boost/locale/icu/formatters_cache.hpp" +#include "boost/locale/icu/uconv.hpp" #include #include #include #include #include #include +#include +#include namespace boost { namespace locale { namespace impl_icu { @@ -304,12 +310,49 @@ namespace boost { namespace locale { namespace impl_icu { std::string enc_; }; + template + struct icu_numpunct : public numpunct { + typedef std::basic_string string_type; + + public: + icu_numpunct(const cdata& d) + { + UErrorCode err = U_ZERO_ERROR; + icu::NumberFormat* fmt = icu::NumberFormat::createInstance(d.locale(), UNUM_DECIMAL, err); + if(icu::DecimalFormat* dec = icu_cast(fmt)) { + boost::locale::impl_icu::icu_std_converter cnv(d.encoding()); + const icu::DecimalFormatSymbols* syms = dec->getDecimalFormatSymbols(); + decimal_point_ = cnv.std(syms->getSymbol(icu::DecimalFormatSymbols::kDecimalSeparatorSymbol)); + thousands_sep_ = cnv.std(syms->getSymbol(icu::DecimalFormatSymbols::kGroupingSeparatorSymbol)); + if(dec->isGroupingUsed()) { + int32_t grouping_size = dec->getGroupingSize(); + grouping_ = std::string(reinterpret_cast(&grouping_size), 1); + int32_t grouping_size_2 = dec->getSecondaryGroupingSize(); + if(grouping_size_2 > 0 && grouping_size_2 != grouping_size) { + grouping_ += static_cast(grouping_size_2); + } + } + } + } + + protected: + string_type do_decimal_point_str() const override { return decimal_point_; } + string_type do_thousands_sep_str() const override { return thousands_sep_; } + std::string do_grouping() const override { return grouping_; } + + private: + string_type decimal_point_; + string_type thousands_sep_; + std::string grouping_; + }; + template std::locale install_formatting_facets(const std::locale& in, const cdata& cd) { std::locale tmp = std::locale(in, new num_format(cd)); if(!std::has_facet(in)) tmp = std::locale(tmp, new formatters_cache(cd.locale())); + tmp = std::locale(tmp, new icu_numpunct(cd)); return tmp; } diff --git a/src/boost/locale/posix/numeric.cpp b/src/boost/locale/posix/numeric.cpp index a3ee2c40..6d14d6a5 100644 --- a/src/boost/locale/posix/numeric.cpp +++ b/src/boost/locale/posix/numeric.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -341,38 +342,24 @@ namespace boost { namespace locale { namespace impl_posix { }; template - class num_punct_posix : public std::numpunct { + class num_punct_posix : public numpunct { public: typedef std::basic_string string_type; - num_punct_posix(locale_t lc, size_t refs = 0) : std::numpunct(refs) + num_punct_posix(locale_t lc, size_t refs = 0) : numpunct(refs) { basic_numpunct np(lc); to_str(np.thousands_sep, thousands_sep_, lc); to_str(np.decimal_point, decimal_point_, lc); grouping_ = np.grouping; - if(thousands_sep_.size() > 1) - grouping_ = std::string(); - if(decimal_point_.size() > 1) - decimal_point_ = CharType('.'); } void to_str(std::string& s1, std::string& s2, locale_t /*lc*/) { s2.swap(s1); } void to_str(std::string& s1, std::wstring& s2, locale_t lc) { s2 = conv::to_utf(s1, nl_langinfo_l(CODESET, lc)); } - CharType do_decimal_point() const override { return *decimal_point_.c_str(); } - CharType do_thousands_sep() const override { return *thousands_sep_.c_str(); } + string_type do_decimal_point_str() const override { return decimal_point_; } + string_type do_thousands_sep_str() const override { return thousands_sep_; } std::string do_grouping() const override { return grouping_; } - string_type do_truename() const override - { - static const char t[] = "true"; - return string_type(t, t + sizeof(t) - 1); - } - string_type do_falsename() const override - { - static const char t[] = "false"; - return string_type(t, t + sizeof(t) - 1); - } private: string_type decimal_point_; diff --git a/src/boost/locale/win32/numeric.cpp b/src/boost/locale/win32/numeric.cpp index 556de79a..f31cc5ef 100644 --- a/src/boost/locale/win32/numeric.cpp +++ b/src/boost/locale/win32/numeric.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "boost/locale/win32/all_generator.hpp" #include "boost/locale/win32/api.hpp" #include @@ -96,10 +97,10 @@ namespace boost { namespace locale { namespace impl_win { }; template - class num_punct_win : public std::numpunct { + class num_punct_win : public numpunct { public: typedef std::basic_string string_type; - num_punct_win(const winlocale& lc, size_t refs = 0) : std::numpunct(refs) + num_punct_win(const winlocale& lc, size_t refs = 0) : numpunct(refs) { numeric_info np = wcsnumformat_l(lc); @@ -111,28 +112,14 @@ namespace boost { namespace locale { namespace impl_win { to_str(np.thousands_sep, thousands_sep_); to_str(np.decimal_point, decimal_point_); grouping_ = np.grouping; - if(thousands_sep_.size() > 1) - grouping_ = std::string(); - if(decimal_point_.size() > 1) - decimal_point_ = CharType('.'); } void to_str(std::wstring& s1, std::wstring& s2) { s2.swap(s1); } void to_str(std::wstring& s1, std::string& s2) { s2 = conv::utf_to_utf(s1); } - CharType do_decimal_point() const override { return *decimal_point_.c_str(); } - CharType do_thousands_sep() const override { return *thousands_sep_.c_str(); } + string_type do_decimal_point_str() const override { return decimal_point_; } + string_type do_thousands_sep_str() const override { return thousands_sep_; } std::string do_grouping() const override { return grouping_; } - string_type do_truename() const override - { - static const char t[] = "true"; - return string_type(t, t + sizeof(t) - 1); - } - string_type do_falsename() const override - { - static const char t[] = "false"; - return string_type(t, t + sizeof(t) - 1); - } private: string_type decimal_point_; @@ -143,29 +130,25 @@ namespace boost { namespace locale { namespace impl_win { template std::locale create_formatting_impl(const std::locale& in, const winlocale& lc) { + std::locale tmp(in, new num_format(lc)); if(lc.is_c()) { - std::locale tmp(in, new std::numpunct_byname("C")); + tmp = std::locale(tmp, new numpunct()); tmp = std::locale(tmp, new std::time_put_byname("C")); - tmp = std::locale(tmp, new num_format(lc)); - return tmp; } else { - std::locale tmp(in, new num_punct_win(lc)); + tmp = std::locale(tmp, new num_punct_win(lc)); tmp = std::locale(tmp, new time_put_win(lc)); - tmp = std::locale(tmp, new num_format(lc)); - return tmp; } + return tmp; } template std::locale create_parsing_impl(const std::locale& in, const winlocale& lc) { - std::numpunct* np = 0; + std::locale tmp(in, new util::base_num_parse()); if(lc.is_c()) - np = new std::numpunct_byname("C"); + tmp = std::locale(tmp, new numpunct()); else - np = new num_punct_win(lc); - std::locale tmp(in, np); - tmp = std::locale(tmp, new util::base_num_parse()); + tmp = std::locale(tmp, new num_punct_win(lc)); return tmp; } diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 40f4b6f1..7e3a1263 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -44,6 +44,7 @@ run test_codecvt.cpp ; run test_codepage_converter.cpp ; run test_stream_io.cpp ; run test_message.cpp : $(BOOST_ROOT)/libs/locale/test ; +run test_numpunct.cpp ; run test_generator.cpp ; # icu run test_collate.cpp ; diff --git a/test/test_generator.cpp b/test/test_generator.cpp index 084101bf..982cf9aa 100644 --- a/test/test_generator.cpp +++ b/test/test_generator.cpp @@ -375,6 +375,7 @@ void test_main(int /*argc*/, char** /*argv*/) TEST_HAS_FACETS(std::num_put, l); TEST_HAS_FACETS(std::time_put, l); TEST_HAS_FACETS(std::numpunct, l); + TEST_HAS_FACETS(bl::numpunct, l); TEST_HAS_FACETS(std::moneypunct, l); // Parsing TEST_HAS_FACETS(std::num_get, l); diff --git a/test/test_numpunct.cpp b/test/test_numpunct.cpp new file mode 100644 index 00000000..0ee0909b --- /dev/null +++ b/test/test_numpunct.cpp @@ -0,0 +1,150 @@ +// +// Copyright (c) 2023 Alexander Grund +// +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +#include +#include "boostLocale/test/tools.hpp" +#include "boostLocale/test/unit_test.hpp" +#include +#include +#include +#include + +namespace bl = boost::locale; + +template +void as_if_std_numpunct(const std::locale& l) +{ + const std::numpunct& std_facet = std::use_facet>(l); + const bl::numpunct& boost_facet = std::use_facet>(l); + // All functions present in std::numpunct are also present in boost::locale::numpunct and yield the same results + TEST_REQUIRE(dynamic_cast*>(&std_facet)); // In fact they are equal + TEST_EQ(std_facet.decimal_point(), boost_facet.decimal_point()); + TEST_EQ(std_facet.thousands_sep(), boost_facet.thousands_sep()); + TEST_EQ(std_facet.grouping(), boost_facet.grouping()); + TEST_EQ(std_facet.truename(), boost_facet.truename()); + TEST_EQ(std_facet.falsename(), boost_facet.falsename()); +} + +namespace { +template +struct Punctuation { + std::basic_string decimal; + std::basic_string thousand; +}; + +const std::map> expected_punctuations = { + {"en", {".", ","}}, + {"de", {",", "."}}, + {"he", {".", ","}}, + {"ja", {".", ","}}, + {"ru", {",", "\xC2\xA0"}}, + {"it", {".", ","}}, +}; + +template class Res> +Res get_expected(const std::map>& from, const std::locale& l) +{ + const auto& src = from.at(std::use_facet(l).language()); + return {to_correct_string(src.decimal, l), to_correct_string(src.thousand, l)}; +} + +template +struct split_result { + std::basic_string digits, others; +}; + +template +split_result split_number(const std::basic_string& s) +{ + split_result res; + for(Char c : s) { + if(c >= std::numeric_limits::min() && c <= std::numeric_limits::max() + && boost::locale::util::is_numeric_ascii(static_cast(c))) + res.digits += c; + else + res.others += c; + } + return res; +} +} // namespace + +template +void test_for_char(const std::locale& l) +{ + using string_type = std::basic_string; + as_if_std_numpunct(l); + { + const auto& expected = get_expected(expected_punctuations, l); + const auto& boost_facet = std::use_facet>(l); + TEST_EQ(boost_facet.decimal_point_str(), expected.decimal); + TEST_EQ(boost_facet.thousands_sep_str(), expected.thousand); + } + std::basic_ostringstream s; + s.imbue(l); + // Formatting not using the Boost.Locale modifiers uses (only) the std::numpunct values + { + const auto& facet = std::use_facet>(l); + empty_stream(s) << 1234567890; + auto actual = split_number(s.str()); + TEST_EQ(actual.digits, ascii_to("1234567890")); + TEST_EQ(actual.others, string_type(actual.others.size(), facet.thousands_sep())); + + empty_stream(s) << 12.25; + actual = split_number(s.str()); + TEST_EQ(actual.digits, ascii_to("1225")); + TEST_EQ(actual.others, string_type(actual.others.size(), facet.decimal_point())); + } + // Formatting using the Boost.Locale modifiers uses the boost::locale::numpunct values + s << bl::as::number; + { + const auto& facet = std::use_facet>(l); + empty_stream(s) << 1234567890; + auto actual = split_number(s.str()); + TEST_EQ(actual.digits, ascii_to("1234567890")); + TEST_EQ(actual.others, string_type(actual.others.size(), facet.thousands_sep())); + + empty_stream(s) << 12.25; + actual = split_number(s.str()); + TEST_EQ(actual.digits, ascii_to("1225")); + TEST_EQ(actual.others, string_type(actual.others.size(), facet.decimal_point())); + } +} + +void test_for_locale(const std::string& name) +{ + std::cout << "-- Locale: " << name << '\n'; + const std::locale l = bl::generator{}(name); + std::cout << "---- char\n"; + test_for_char(l); + std::cout << "---- wchar_t\n"; + test_for_char(l); +#ifdef BOOST_LOCALE_ENABLE_CHAR16_T + std::cout << "---- char16_t\n"; + test_for_char(l); +#endif +#ifdef BOOST_LOCALE_ENABLE_CHAR32_T + std::cout << "---- char32_t\n"; + test_for_char(l); +#endif +} + +void test_main(int /*argc*/, char** /*argv*/) +{ + const bl::localization_backend_manager orig_backend = bl::localization_backend_manager::global(); + for(const std::string& backendName : orig_backend.get_all_backends()) { + std::cout << "Backend: " << backendName << std::endl; + bl::localization_backend_manager tmp_backend = bl::localization_backend_manager::global(); + tmp_backend.select(backendName); + bl::localization_backend_manager::global(tmp_backend); + test_for_locale("en_US.UTF-8"); + test_for_locale("de_DE.UTF-8"); + test_for_locale("he_IL.UTF-8"); + test_for_locale("ja_JP.UTF-8"); + test_for_locale("ru_RU.UTF-8"); + test_for_locale("it_IT"); + } +}