From c16473329ddb51619114b00cf367de84e742e6ed Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sat, 7 Sep 2024 20:25:20 +0800 Subject: [PATCH] [attempt] test 500 dates with all locales, filtering into successful parses test 500 dates in 797 locales filtered down to 796 locales in 3.37s --- .../csv-imp/gnc-tokenizer-csv.hpp | 2 ++ .../import-export/csv-imp/gnc-tokenizer.cpp | 22 +++++++++++++++ .../csv-imp/test/test-tokenizer.cpp | 28 +++++++++++++++++++ libgnucash/engine/gnc-datetime.cpp | 9 +++--- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp index c60910b5a42..3f04037f543 100644 --- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp +++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp @@ -59,4 +59,6 @@ class GncCsvTokenizer : public GncTokenizer std::string m_sep_str = ","; }; +void gnc_filter_locales (StrVec& candidate_locales, const StrVec dates); + #endif diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer.cpp index 8dc6b450eba..2d2b9827777 100644 --- a/gnucash/import-export/csv-imp/gnc-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/gnc-tokenizer.cpp @@ -125,3 +125,25 @@ GncTokenizer::get_tokens() { return m_tokenized_contents; } + + + +using StrVec = std::vector; +#include "gnc-datetime.hpp" + +void +gnc_filter_locales (StrVec& candidate_locales, const StrVec dates) +{ + StrVec new_candidate_locales; + new_candidate_locales.reserve (candidate_locales.size()); + + for (const auto& date : dates) + { + new_candidate_locales.clear (); + for (const auto& locale : candidate_locales) + try { GncDate (date, locale); new_candidate_locales.push_back (locale); } + catch (const std::exception&) {}; + + std::swap (candidate_locales, new_candidate_locales); + } +} diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp index aad18520e0a..ff02e8d8300 100644 --- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp @@ -245,7 +245,35 @@ static tokenize_fw_test_data fixed_width [] = { { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } }, }; +#include // time_t +#include "gnc-locale-utils.hpp" + +static void test_filter_locales () +{ + std::vector dates; + + for (auto i = 0; i < 500; ++i) + dates.push_back ("09/22/2021"); + + auto locales = gnc_get_available_locales (); + std::cout << locales.size() << " locales available. Testing " + << dates.size() << " dates.\n"; + + auto start = clock(); + gnc_filter_locales (locales, dates); + auto end = clock(); + + double duration_sec = double(end-start)/CLOCKS_PER_SEC; + + std::cout << locales.size() << " locales left, checked in " + << duration_sec << " seconds:\n"; + for (auto locale : locales) + std::cout << ' ' << locale; + std::cout << '\n'; +} + TEST_F (GncTokenizerTest, tokenize_fw) { test_gnc_tokenize_helper (fixed_width); + test_filter_locales (); } diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index 24afbd0bfc5..0112de29072 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -552,10 +552,11 @@ locale_to_formatter_and_calendar (const std::string locale_str) if (!tuple) { auto locale = icu::Locale::createCanonical (locale_str.c_str()); - std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); + std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kShort, locale)); if (formatter == nullptr) throw std::invalid_argument ("Cannot parse string"); + formatter->setLenient (false); UErrorCode status = U_ZERO_ERROR; std::shared_ptr calendar(icu::Calendar::createInstance(locale, status)); if (U_FAILURE(status)) @@ -573,7 +574,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : /* Temporarily initialized to today, will be used and adjusted in the code below */ m_greg(boost::gregorian::day_clock::local_day()) { - std::cout << locale_str << '|' << str << ": "; + // std::cout << locale_str << '|' << str << ": "; auto [formatter, calendar] = locale_to_formatter_and_calendar (locale_str); icu::UnicodeString input = icu::UnicodeString::fromUTF8(str); @@ -582,7 +583,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : UDate date = formatter->parse(input, parsePos); if (parsePos.getErrorIndex() != -1) { - std::cout << "cannot parse " << std::endl; + // std::cout << "cannot parse " << std::endl; throw std::invalid_argument ("Cannot parse string"); } @@ -598,7 +599,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : if (U_FAILURE(status)) throw std::invalid_argument ("Cannot parse string"); - std::cout << day << '/' << month << '/' << year << std::endl; + // std::cout << day << '/' << month << '/' << year << std::endl; m_greg = Date(year, month, day); }