Skip to content

Commit

Permalink
[attempt] test 500 dates with all locales, filtering into successful …
Browse files Browse the repository at this point in the history
…parses

test 500 dates in 797 locales filtered down to 796 locales in 3.37s
  • Loading branch information
christopherlam committed Sep 7, 2024
1 parent 8ab8958 commit c164733
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 4 deletions.
2 changes: 2 additions & 0 deletions gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ class GncCsvTokenizer : public GncTokenizer
std::string m_sep_str = ",";
};

void gnc_filter_locales (StrVec& candidate_locales, const StrVec dates);

#endif
22 changes: 22 additions & 0 deletions gnucash/import-export/csv-imp/gnc-tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,25 @@ GncTokenizer::get_tokens()
{
return m_tokenized_contents;
}



using StrVec = std::vector<std::string>;
#include "gnc-datetime.hpp"

void
gnc_filter_locales (StrVec& candidate_locales, const StrVec dates)
{
StrVec new_candidate_locales;
new_candidate_locales.reserve (candidate_locales.size());

for (const auto& date : dates)
{
new_candidate_locales.clear ();
for (const auto& locale : candidate_locales)
try { GncDate (date, locale); new_candidate_locales.push_back (locale); }
catch (const std::exception&) {};

std::swap (candidate_locales, new_candidate_locales);
}
}
28 changes: 28 additions & 0 deletions gnucash/import-export/csv-imp/test/test-tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,35 @@ static tokenize_fw_test_data fixed_width [] = {
{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
};

#include <ctime> // time_t
#include "gnc-locale-utils.hpp"

static void test_filter_locales ()
{
std::vector<std::string> dates;

for (auto i = 0; i < 500; ++i)
dates.push_back ("09/22/2021");

auto locales = gnc_get_available_locales ();
std::cout << locales.size() << " locales available. Testing "
<< dates.size() << " dates.\n";

auto start = clock();
gnc_filter_locales (locales, dates);
auto end = clock();

double duration_sec = double(end-start)/CLOCKS_PER_SEC;

std::cout << locales.size() << " locales left, checked in "
<< duration_sec << " seconds:\n";
for (auto locale : locales)
std::cout << ' ' << locale;
std::cout << '\n';
}

TEST_F (GncTokenizerTest, tokenize_fw)
{
test_gnc_tokenize_helper (fixed_width);
test_filter_locales ();
}
9 changes: 5 additions & 4 deletions libgnucash/engine/gnc-datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,10 +552,11 @@ locale_to_formatter_and_calendar (const std::string locale_str)
if (!tuple)
{
auto locale = icu::Locale::createCanonical (locale_str.c_str());
std::shared_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale));
std::shared_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kShort, locale));
if (formatter == nullptr)
throw std::invalid_argument ("Cannot parse string");

formatter->setLenient (false);
UErrorCode status = U_ZERO_ERROR;
std::shared_ptr<icu::Calendar> calendar(icu::Calendar::createInstance(locale, status));
if (U_FAILURE(status))
Expand All @@ -573,7 +574,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) :
/* Temporarily initialized to today, will be used and adjusted in the code below */
m_greg(boost::gregorian::day_clock::local_day())
{
std::cout << locale_str << '|' << str << ": ";
// std::cout << locale_str << '|' << str << ": ";

auto [formatter, calendar] = locale_to_formatter_and_calendar (locale_str);
icu::UnicodeString input = icu::UnicodeString::fromUTF8(str);
Expand All @@ -582,7 +583,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) :
UDate date = formatter->parse(input, parsePos);
if (parsePos.getErrorIndex() != -1)
{
std::cout << "cannot parse " << std::endl;
// std::cout << "cannot parse " << std::endl;
throw std::invalid_argument ("Cannot parse string");
}

Expand All @@ -598,7 +599,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) :
if (U_FAILURE(status))
throw std::invalid_argument ("Cannot parse string");

std::cout << day << '/' << month << '/' << year << std::endl;
// std::cout << day << '/' << month << '/' << year << std::endl;
m_greg = Date(year, month, day);
}

Expand Down

0 comments on commit c164733

Please sign in to comment.