diff --git a/include/libsemigroups/detail/rewriters.hpp b/include/libsemigroups/detail/rewriters.hpp new file mode 100644 index 000000000..81d6115f6 --- /dev/null +++ b/include/libsemigroups/detail/rewriters.hpp @@ -0,0 +1,706 @@ +// +// libsemigroups - C++ library for semigroups and monoids +// Copyright (C) 2023-2024 Joseph Edwards + James D. Mitchell +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +// This file contains the implementation of a Rule object containers for Rule +// objects. It also includes rewriter classes that can be used to rewrite +// strings relative to a collection of rules. + +#ifndef LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_ +#define LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_ + +#include // for atomic +#include // for time_point +#include // for set +#include // for basic_string, operator== +#include // for unordered map +#include // for unordered set + +#include "../aho-corasick.hpp" +#include "../debug.hpp" // for LIBSEMIGROUPS_ASSERT +#include "../order.hpp" // for shortlex_compare + +#include "multi-string-view.hpp" // for MultiStringView + +// TODO(2) Add a KnuthBendix pointer to the rewriter class so that overlap +// detection can be handled by the rewriter (and therefore depend on the +// implementation) rather than on the KB object. + +//! \defgroup \rewriters_group Rewriters +//! +//! This file contains documentation for the functionality of the following +//! classes in `libsemigroups`: +//! * \ref libsemigroups::Rule "Rule" +//! * \ref RuleLookup +//! * \ref Rules +//! * \ref RewriterBase +//! * \ref RewriteFromLeft +//! * \ref RewriteTrie +namespace libsemigroups { + namespace detail { + // TODO(2) remove from libsemigroups namespace and put into relevant class + + //! \ingroup rewriters_group + //! + //! Alias for the type of word that can be input by the user + using external_string_type = std::string; + + //! \ingroup rewriters_group + //! + //! Alias for the type of word used internally in the implementation + using internal_string_type = std::string; + + //! \ingroup rewriters_group + //! + //! Alias for the type of letter that can be input by the user + using external_char_type = char; + + //! \ingroup rewriters_group + //! + //! Alias for the type of letter used internally in the implementation + using internal_char_type = char; + + //! \ingroup rewriters_group + //! + //! \brief For a rewriting rule + //! + //! Defined in ``rewriters.hpp``. + //! + //! This class implements a data structure for storing *rewriting rules*. + //! Here, a rewriting rule is a rule of the form \f$A \to B\f$, where + //! \f$A\f$ and \f$B\f$ are both words over some alphabet \f$\Sigma\f$. + //! + //! The left-hand and right-hand sides of a rule are specified externally + //! with the type \ref external_string_type, and stored internally with type + //! \ref internal_string_type. + class Rule { + internal_string_type* _lhs; + internal_string_type* _rhs; + int64_t _id; + + public: + //! \brief Construct with new empty left-hand and right-hand sides. + //! + //! Construct with new empty left-hand and right-hand sides. + //! + //! \param id the id of the new rule. + //! + //! \exception + //! \no_libsemigroups_except + explicit Rule(int64_t id); + + Rule& operator=(Rule const& copy) = delete; + Rule(Rule const& copy) = delete; + Rule(Rule&& copy) = delete; + Rule& operator=(Rule&& copy) = delete; + + //! \brief Destruct the Rule. + //! + //! This function destructs a \ref Rule object by deleting the pointers + //! used for the left-hand and right-hand sides. + ~Rule() { + delete _lhs; + delete _rhs; + } + + //! \brief Return the left-hand side of the rule. + //! + //! Return the left-hand side of the rule. If this rule was create by a + //! \ref KnuthBendix, this is guaranteed to be greater than its right-hand + //! side according to the reduction ordering of that \ref KnuthBendix. + //! + //! \returns A pointer to the left-hand side. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \sa + //! \ref KnuthBendix + [[nodiscard]] internal_string_type* lhs() const noexcept { + return _lhs; + } + + //! \brief Return the right-hand side of the rule. + //! + //! Return the right-hand side of the rule. If this rule was create by a + //! \ref KnuthBendix, this is guaranteed to be less than its left-hand + //! side according to the reduction ordering of that \ref KnuthBendix. + //! + //! \returns A pointer to the right-hand side. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \sa + //! \ref KnuthBendix + [[nodiscard]] internal_string_type* rhs() const noexcept { + return _rhs; + } + + //! \brief Check if the left-hand and right-hand sides are empty. + //! + //! Check if the words pointed to by both the left-hand and the right-hand + //! sides are empty. + //! + //! \returns A value of type `bool`. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + [[nodiscard]] bool empty() const noexcept { + return _lhs->empty() && _rhs->empty(); + } + + //! \brief Check if the Rule is active. + //! + //! Check if the rule is active. + //! + //! \returns A value of type `bool`. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \sa + //! \ref RewriterBase::active_rules() + // TODO check the above ref points to something sensible + [[nodiscard]] inline bool active() const noexcept { + LIBSEMIGROUPS_ASSERT(_id != 0); + return (_id > 0); + } + + //! \brief Deactivate a rule. + //! + //! Deactivate a rule, if it is active. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \sa + //! \ref active + void deactivate() noexcept; + + //! \brief Activate a rule. + //! + //! Activate a rule, if it is inactive. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \sa + //! \ref active + void activate() noexcept; + + //! \brief Set the id of a rule. + //! + //! Set the id of a rule. + //! + //! \param id the id to set. + //! + //! \exception + //! \noexcept + //! + //! \complexity + //! Constant. + //! + //! \note + //! This function does no checks on its parameters; however, the id of a + //! rule should only be set if the rule is inactive, and the id of a rule + //! should always be positive. + void set_id_no_checks(int64_t id) noexcept { + LIBSEMIGROUPS_ASSERT(id > 0); + LIBSEMIGROUPS_ASSERT(!active()); + _id = -1 * id; + } + + //! \brief Set the id of a rule. + //! + //! After checking that the Rule is inactive \p id is positive, this + //! function performs the same as \ref set_id_no_checks. + //! + //! \throws LIBSEMIGROUPS_EXCEPTION if \p id is non-positive, or if `this` + //! is active. + void set_id(int64_t id) { + if (id <= 0) { + LIBSEMIGROUPS_EXCEPTION( + "invalid id, expected a value greater than 0, found {}", id); + } + if (active()) { + LIBSEMIGROUPS_EXCEPTION("cannot set the id of an active rule"); + } + set_id_no_checks(id); + } + + //! \brief Return the id of a rule. + //! + //! Return the id of a rule. + //! + //! \returns A value of type `int64_t` + //! + //! \complexity + //! Constant. + //! + //! \exception + //! \noexcept + [[nodiscard]] int64_t id() const noexcept { + LIBSEMIGROUPS_ASSERT(_id != 0); + return _id; + } + + //! \brief Reorder the left-hand and right-hand sides. + //! + //! If the right-hand side is greater than the left-hand side of a rule, + //! with regards to length-lexicographical order, then swap them. + //! + //! \complexity + //! The same complexity as \ref shortlex_compare(T* const, T* const) + //! + //! \exceptions + //! Throws if \ref shortlex_compare(T* const, T* const) does. + //! + //! \sa + //! shortlex_compare(T* const, T* const) + void reorder() { + if (shortlex_compare(_lhs, _rhs)) { + std::swap(_lhs, _rhs); + } + } + }; // class Rule + + class RuleLookup { + public: + RuleLookup() : _rule(nullptr) {} + + explicit RuleLookup(Rule* rule) + : _first(rule->lhs()->cbegin()), + _last(rule->lhs()->cend()), + _rule(rule) {} + + RuleLookup& operator()(internal_string_type::iterator const& first, + internal_string_type::iterator const& last) { + _first = first; + _last = last; + return *this; + } + + Rule const* rule() const { + return _rule; + } + + // This implements reverse lex comparison of this and that, which + // satisfies the requirement of std::set that equivalent items be + // incomparable, so, for example bcbc and abcbc are considered + // equivalent, but abcba and bcbc are not. + bool operator<(RuleLookup const& that) const; + + private: + internal_string_type::const_iterator _first; + internal_string_type::const_iterator _last; + Rule const* _rule; + }; // class RuleLookup + + class Rules { + public: + using iterator = std::list::iterator; + using const_iterator = std::list::const_iterator; + using const_reverse_iterator + = std::list::const_reverse_iterator; + + private: + struct Stats { + Stats() noexcept; + Stats& init() noexcept; + + Stats(Stats const&) noexcept = default; + Stats(Stats&&) noexcept = default; + Stats& operator=(Stats const&) noexcept = default; + Stats& operator=(Stats&&) noexcept = default; + + size_t max_word_length; + size_t max_active_word_length; + size_t max_active_rules; + size_t min_length_lhs_rule; + uint64_t total_rules; + // std::unordered_set unique_lhs_rules; + }; + + // TODO(2) remove const? + std::list _active_rules; + std::array _cursors; + std::list _inactive_rules; + mutable Stats _stats; + + public: + Rules() = default; + + // Rules(Rules const& that); + // Rules(Rules&& that); + Rules& operator=(Rules const&); + + // TODO(1) the other constructors + + ~Rules(); + + Rules& init(); + + const_iterator begin() const noexcept { + return _active_rules.cbegin(); + } + + const_iterator end() const noexcept { + return _active_rules.cend(); + } + + iterator begin() noexcept { + return _active_rules.begin(); + } + + iterator end() noexcept { + return _active_rules.end(); + } + + const_reverse_iterator rbegin() const noexcept { + return _active_rules.crbegin(); + } + + const_reverse_iterator rend() const noexcept { + return _active_rules.crend(); + } + + [[nodiscard]] size_t number_of_active_rules() const noexcept { + return _active_rules.size(); + } + + [[nodiscard]] size_t number_of_inactive_rules() const noexcept { + return _inactive_rules.size(); + } + + [[nodiscard]] size_t max_active_word_length() const; + + iterator& cursor(size_t index) { + LIBSEMIGROUPS_ASSERT(index < _cursors.size()); + return _cursors[index]; + } + + // TODO(0) is this ever called? + void add_active_rule(Rule* rule) { + _active_rules.push_back(rule); + } + + void add_inactive_rule(Rule* rule) { + _inactive_rules.push_back(rule); + } + + Stats const& stats() const { + return _stats; + } + + [[nodiscard]] iterator erase_from_active_rules(iterator it); + + // TODO(0) this feels like it should be add_active rule. The above + // add_active_rule seems a bit dangerous + void add_rule(Rule* rule); + + [[nodiscard]] Rule* copy_rule(Rule const* rule); + + // private: + [[nodiscard]] Rule* new_rule(); + + protected: + template + [[nodiscard]] Rule* new_rule(Iterator begin_lhs, + Iterator end_lhs, + Iterator begin_rhs, + Iterator end_rhs) { + Rule* rule = new_rule(); + rule->lhs()->assign(begin_lhs, end_lhs); + rule->rhs()->assign(begin_rhs, end_rhs); + rule->reorder(); + return rule; + } + }; + + class RewriterBase : public Rules { + std::unordered_set _alphabet; + mutable std::atomic _cached_confluent; + mutable std::atomic _confluence_known; + size_t _max_stack_depth; + std::stack _pending_rules; + std::atomic _requires_alphabet; + + using alphabet_citerator + = std::unordered_set::const_iterator; + + public: + // TODO(0) to cpp + RewriterBase() + : _alphabet(), + _cached_confluent(false), + _confluence_known(false), + _max_stack_depth(0), + _pending_rules(), + _requires_alphabet() {} + + RewriterBase& init(); + + explicit RewriterBase(bool requires_alphabet) : RewriterBase() { + _requires_alphabet = requires_alphabet; + } + + ~RewriterBase(); + + RewriterBase& operator=(RewriterBase const& that) { + Rules::operator=(that); + _cached_confluent = that._cached_confluent.load(); + _confluence_known = that._confluence_known.load(); + _requires_alphabet = that._requires_alphabet.load(); + while (!_pending_rules.empty()) { + _pending_rules.pop(); + } + decltype(_pending_rules) tmp = that._pending_rules; + while (!tmp.empty()) { + auto const* rule = tmp.top(); + _pending_rules.push(copy_rule(rule)); + tmp.pop(); + } + + if (_requires_alphabet) { + _alphabet = that._alphabet; + } + return *this; + } + + bool requires_alphabet() const { + return _requires_alphabet; + } + + decltype(_alphabet) alphabet() const { + return _alphabet; + } + + alphabet_citerator alphabet_cbegin() const { + return _alphabet.cbegin(); + } + + alphabet_citerator alphabet_cend() const { + return _alphabet.cend(); + } + + void set_cached_confluent(tril val) const; + + bool cached_confluent() const noexcept { + return _cached_confluent; + } + + [[nodiscard]] bool consistent() const noexcept { + return _pending_rules.empty(); + } + + [[nodiscard]] bool confluence_known() const { + return _confluence_known; + } + + [[nodiscard]] size_t max_stack_depth() const { + return _max_stack_depth; + } + + bool add_pending_rule(Rule* rule); + + bool process_pending_rules(); + + void reduce(); + + void reduce_rhs(); + + void rewrite(Rule* rule) const { + rewrite(*rule->lhs()); + rewrite(*rule->rhs()); + rule->reorder(); + } + + // TODO(2) remove virtual functions + virtual void rewrite(internal_string_type& u) const = 0; + + virtual void add_rule(Rule* rule) = 0; + + virtual Rules::iterator make_active_rule_pending(Rules::iterator it) = 0; + + size_t number_of_pending_rules() const noexcept { + return _pending_rules.size(); + } + + Rule* next_pending_rule() { + LIBSEMIGROUPS_ASSERT(_pending_rules.size() != 0); + Rule* rule = _pending_rules.top(); + _pending_rules.pop(); + return rule; + } + + template + void add_rule(StringLike const& lhs, StringLike const& rhs) { + if (lhs != rhs) { + if (add_pending_rule(new_rule( + lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()))) { + // TODO(0) only process_pending_rules when ready to run + process_pending_rules(); + } + } + } + + template + void add_pending_rule(StringLike const& lhs, StringLike const& rhs) { + if (lhs != rhs) { + add_pending_rule( + new_rule(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend())); + } + } + + void add_to_alphabet(internal_char_type letter) { + _alphabet.emplace(letter); + } + }; + + class RewriteFromLeft : public RewriterBase { + std::set _set_rules; + + public: + using RewriterBase::cached_confluent; + using Rules::stats; + + RewriteFromLeft() = default; + + RewriteFromLeft& operator=(RewriteFromLeft const&); + + // TODO(2) the other constructors + + ~RewriteFromLeft() = default; // TODO(2) out-of-line this + + RewriteFromLeft& init(); + + void rewrite(internal_string_type& u) const; + + [[nodiscard]] bool confluent() const; + + // TODO(0) private? + void add_rule(Rule* rule); + + using RewriterBase::add_rule; + + private: + void rewrite(Rule* rule) const; + + iterator make_active_rule_pending(iterator); + + void report_from_confluent( + std::atomic_uint64_t const&, + std::chrono::high_resolution_clock::time_point const&) const; + + bool confluent_impl(std::atomic_uint64_t&) const; + }; + + class RewriteTrie : public RewriterBase { + using index_type = AhoCorasick::index_type; + + std::map _rules; + AhoCorasick _trie; + + public: + using RewriterBase::cached_confluent; + using Rules::stats; + using iterator = internal_string_type::iterator; + using rule_iterator = std::map::iterator; + + RewriteTrie() : RewriterBase(true), _rules(), _trie() {} + + RewriteTrie(const RewriteTrie& that); + + RewriteTrie& operator=(RewriteTrie const& that); + + ~RewriteTrie() = default; + + RewriteTrie& init(); + + rule_iterator rules_begin() { + return _rules.begin(); + } + + rule_iterator rules_end() { + return _rules.end(); + } + + void all_overlaps(); + + void rule_overlaps(index_type node); + + void add_overlaps(Rule* rule, index_type node, size_t overlap_length); + + void rewrite(internal_string_type& u) const; + + [[nodiscard]] bool confluent() const; + + void add_rule(Rule* rule) { + Rules::add_rule(rule); + add_rule_to_trie(rule); + set_cached_confluent(tril::unknown); + } + + using RewriterBase::add_rule; + + private: + [[nodiscard]] bool descendants_confluent(Rule const* rule1, + index_type current_node, + size_t backtrack_depth) const; + + // TODO (After removing virtual functions) Put in base + void rewrite(Rule* rule) const { + rewrite(*rule->lhs()); + rewrite(*rule->rhs()); + rule->reorder(); + } + + void add_rule_to_trie(Rule* rule) { + index_type node = _trie.add_word_no_checks(rule->lhs()->cbegin(), + rule->lhs()->cend()); + _rules.emplace(node, rule); + } + + Rules::iterator make_active_rule_pending(Rules::iterator it); + + void report_from_confluent( + std::atomic_uint64_t const&, + std::chrono::high_resolution_clock::time_point const&) const; + + bool confluent_impl(std::atomic_uint64_t&) const; + }; + } // namespace detail +} // namespace libsemigroups +#endif // LIBSEMIGROUPS_DETAIL_REWRITERS_HPP_ diff --git a/include/libsemigroups/knuth-bendix.hpp b/include/libsemigroups/knuth-bendix.hpp index f766eff28..936adc3a7 100644 --- a/include/libsemigroups/knuth-bendix.hpp +++ b/include/libsemigroups/knuth-bendix.hpp @@ -54,7 +54,6 @@ #include "paths.hpp" // for Paths #include "presentation.hpp" // for Presentation #include "ranges.hpp" // for operator<< -#include "rewriters.hpp" // for RewriteTrie #include "runner.hpp" // for Runner #include "to-presentation.hpp" // for to_presentation #include "types.hpp" // for word_type @@ -63,6 +62,7 @@ #include "detail/multi-string-view.hpp" // for MultiStringView #include "detail/report.hpp" // for Reporter, REPORT_DEFAULT, REP... +#include "detail/rewriters.hpp" // for RewriteTrie #include "detail/string.hpp" // for is_prefix, maximum_common_prefix #include "ranges.hpp" // for iterator_range @@ -105,7 +105,7 @@ namespace libsemigroups { //! kb.confluent(); // true //! kb.number_of_classes(); // POSITIVE_INFINITY //! \endcode - template class KnuthBendix : public CongruenceInterface { // defined in detail/kbe.hpp @@ -115,10 +115,10 @@ namespace libsemigroups { // KnuthBendix - typedefs/aliases - private //////////////////////////////////////////////////////////////////////// - using external_string_type = std::string; - using internal_string_type = std::string; - using external_char_type = char; - using internal_char_type = char; + // using external_string_type = std::string; + // using internal_string_type = std::string; + // using external_char_type = char; + // using internal_char_type = char; //////////////////////////////////////////////////////////////////////// // KnuthBendix - nested subclasses - private @@ -126,9 +126,10 @@ namespace libsemigroups { // Overlap measures struct OverlapMeasure { - virtual size_t operator()(Rule const*, - Rule const* examples, - internal_string_type::const_iterator const&) + virtual size_t + operator()(detail::Rule const*, + detail::Rule const* examples, + detail::internal_string_type::const_iterator const&) = 0; virtual ~OverlapMeasure() {} }; @@ -728,8 +729,10 @@ namespace libsemigroups { return iterator_range(_rewriter.begin(), _rewriter.end()) | transform([this](auto const& rule) { // TODO remove allocation - internal_string_type lhs = internal_string_type(*rule->lhs()); - internal_string_type rhs = internal_string_type(*rule->rhs()); + detail::internal_string_type lhs + = detail::internal_string_type(*rule->lhs()); + detail::internal_string_type rhs + = detail::internal_string_type(*rule->rhs()); internal_to_external_string(lhs); internal_to_external_string(rhs); if (this->kind() == congruence_kind::left) { @@ -912,28 +915,31 @@ namespace libsemigroups { void throw_if_started() const; void stats_check_point(); - [[nodiscard]] static internal_char_type uint_to_internal_char(size_t a); - [[nodiscard]] static size_t internal_char_to_uint(internal_char_type c); + [[nodiscard]] static detail::internal_char_type + uint_to_internal_char(size_t a); + [[nodiscard]] static size_t + internal_char_to_uint(detail::internal_char_type c); - [[nodiscard]] static internal_string_type uint_to_internal_string(size_t i); + [[nodiscard]] static detail::internal_string_type + uint_to_internal_string(size_t i); [[nodiscard]] static word_type - internal_string_to_word(internal_string_type const& s); + internal_string_to_word(detail::internal_string_type const& s); - [[nodiscard]] internal_char_type - external_to_internal_char(external_char_type c) const; - [[nodiscard]] external_char_type - internal_to_external_char(internal_char_type a) const; + [[nodiscard]] detail::internal_char_type + external_to_internal_char(detail::external_char_type c) const; + [[nodiscard]] detail::external_char_type + internal_to_external_char(detail::internal_char_type a) const; - void external_to_internal_string(external_string_type& w) const; - void internal_to_external_string(internal_string_type& w) const; + void external_to_internal_string(detail::external_string_type& w) const; + void internal_to_external_string(detail::internal_string_type& w) const; - void add_octo(external_string_type& w) const; - void rm_octo(external_string_type& w) const; + void add_octo(detail::external_string_type& w) const; + void rm_octo(detail::external_string_type& w) const; void add_rule_impl(std::string const& p, std::string const& q); - void overlap(Rule const* u, Rule const* v); + void overlap(detail::Rule const* u, detail::Rule const* v); [[nodiscard]] size_t max_active_word_length() const { return _rewriter.max_active_word_length(); @@ -1131,8 +1137,8 @@ namespace libsemigroups { continue; } - if (rule.first.find(lhs) != internal_string_type::npos - || rule.second.find(lhs) != internal_string_type::npos) { + if (rule.first.find(lhs) != detail::internal_string_type::npos + || rule.second.find(lhs) != detail::internal_string_type::npos) { return false; } } diff --git a/include/libsemigroups/knuth-bendix.tpp b/include/libsemigroups/knuth-bendix.tpp index 7a10ec6b3..1702d3c9c 100644 --- a/include/libsemigroups/knuth-bendix.tpp +++ b/include/libsemigroups/knuth-bendix.tpp @@ -38,9 +38,9 @@ namespace libsemigroups { template struct KnuthBendix::ABC : KnuthBendix::OverlapMeasure { - size_t operator()(Rule const* AB, - Rule const* BC, - internal_string_type::const_iterator const& it) { + size_t operator()(detail::Rule const* AB, + detail::Rule const* BC, + detail::internal_string_type::const_iterator const& it) { LIBSEMIGROUPS_ASSERT(AB->active() && BC->active()); LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it); LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend()); @@ -52,9 +52,9 @@ namespace libsemigroups { template struct KnuthBendix::AB_BC : KnuthBendix::OverlapMeasure { - size_t operator()(Rule const* AB, - Rule const* BC, - internal_string_type::const_iterator const& it) { + size_t operator()(detail::Rule const* AB, + detail::Rule const* BC, + detail::internal_string_type::const_iterator const& it) { LIBSEMIGROUPS_ASSERT(AB->active() && BC->active()); LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it); LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend()); @@ -67,9 +67,9 @@ namespace libsemigroups { template struct KnuthBendix::MAX_AB_BC : KnuthBendix::OverlapMeasure { - size_t operator()(Rule const* AB, - Rule const* BC, - internal_string_type::const_iterator const& it) { + size_t operator()(detail::Rule const* AB, + detail::Rule const* BC, + detail::internal_string_type::const_iterator const& it) { LIBSEMIGROUPS_ASSERT(AB->active() && BC->active()); LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it); LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend()); @@ -291,8 +291,8 @@ namespace libsemigroups { return true; } - external_string_type uu = u; - external_string_type vv = v; + detail::external_string_type uu = u; + detail::external_string_type vv = v; if (kind() == congruence_kind::left) { std::reverse(uu.begin(), uu.end()); @@ -454,7 +454,7 @@ namespace libsemigroups { // some rules. template void KnuthBendix::rewrite_inplace( - external_string_type& w) { + detail::external_string_type& w) { if (kind() == congruence_kind::left) { std::reverse(w.begin(), w.end()); } @@ -568,7 +568,7 @@ namespace libsemigroups { // non-trivial overlaps are added and there are a no pending rules. while (add_overlaps) { while (first != _rewriter.end() && !stop_running()) { - Rule const* rule1 = *first; + detail::Rule const* rule1 = *first; // It is tempting to remove rule1 and rule2 here and use *first and // *second instead but this leads to some badness (which we didn't // understand, but it also didn't seem super important). @@ -576,7 +576,7 @@ namespace libsemigroups { overlap(rule1, rule1); while (second != _rewriter.begin() && rule1->active()) { --second; - Rule const* rule2 = *second; + detail::Rule const* rule2 = *second; overlap(rule1, rule2); ++nr; if (rule1->active() && rule2->active()) { @@ -774,7 +774,7 @@ namespace libsemigroups { template size_t KnuthBendix::internal_char_to_uint( - internal_char_type c) { + detail::internal_char_type c) { #ifdef LIBSEMIGROUPS_DEBUG LIBSEMIGROUPS_ASSERT(c >= 97); return static_cast(c - 97); @@ -784,57 +784,58 @@ namespace libsemigroups { } template - typename KnuthBendix::internal_char_type + typename detail::internal_char_type KnuthBendix::uint_to_internal_char(size_t a) { LIBSEMIGROUPS_ASSERT( - a <= size_t(std::numeric_limits::max())); + a <= size_t(std::numeric_limits::max())); #ifdef LIBSEMIGROUPS_DEBUG LIBSEMIGROUPS_ASSERT( - a <= size_t(std::numeric_limits::max() - 97)); - return static_cast(a + 97); + a + <= size_t(std::numeric_limits::max() - 97)); + return static_cast(a + 97); #else - return static_cast(a + 1); + return static_cast(a + 1); #endif } template - typename KnuthBendix::internal_string_type + typename detail::internal_string_type KnuthBendix::uint_to_internal_string(size_t i) { LIBSEMIGROUPS_ASSERT( - i <= size_t(std::numeric_limits::max())); - return internal_string_type({uint_to_internal_char(i)}); + i <= size_t(std::numeric_limits::max())); + return detail::internal_string_type({uint_to_internal_char(i)}); } template word_type KnuthBendix::internal_string_to_word( - internal_string_type const& s) { + detail::internal_string_type const& s) { word_type w; w.reserve(s.size()); - for (internal_char_type const& c : s) { + for (detail::internal_char_type const& c : s) { w.push_back(internal_char_to_uint(c)); } return w; } template - typename KnuthBendix::internal_char_type + typename detail::internal_char_type KnuthBendix::external_to_internal_char( - external_char_type c) const { + detail::external_char_type c) const { LIBSEMIGROUPS_ASSERT(!_internal_is_same_as_external); return uint_to_internal_char(presentation().index(c)); } template - typename KnuthBendix::external_char_type + typename detail::external_char_type KnuthBendix::internal_to_external_char( - internal_char_type a) const { + detail::internal_char_type a) const { LIBSEMIGROUPS_ASSERT(!_internal_is_same_as_external); return presentation().letter_no_checks(internal_char_to_uint(a)); } template void KnuthBendix::external_to_internal_string( - external_string_type& w) const { + detail::external_string_type& w) const { if (_internal_is_same_as_external) { return; } @@ -845,7 +846,7 @@ namespace libsemigroups { template void KnuthBendix::internal_to_external_string( - internal_string_type& w) const { + detail::internal_string_type& w) const { if (_internal_is_same_as_external) { return; } @@ -856,7 +857,7 @@ namespace libsemigroups { template void KnuthBendix::add_octo( - external_string_type& w) const { + detail::external_string_type& w) const { if (kind() != congruence_kind::twosided && (generating_pairs() | rx::count()) != 0) { w = presentation().alphabet().back() + w; @@ -865,7 +866,7 @@ namespace libsemigroups { template void KnuthBendix::rm_octo( - external_string_type& w) const { + detail::external_string_type& w) const { if (kind() != congruence_kind::twosided && (generating_pairs() | rx::count()) != 0) { LIBSEMIGROUPS_ASSERT(w.front() == presentation().alphabet().back()); @@ -914,8 +915,8 @@ namespace libsemigroups { // OVERLAP_2 from Sims, p77 template - void KnuthBendix::overlap(Rule const* u, - Rule const* v) { + void KnuthBendix::overlap(detail::Rule const* u, + detail::Rule const* v) { LIBSEMIGROUPS_ASSERT(u->active() && v->active()); auto const &ulhs = *(u->lhs()), vlhs = *(v->lhs()); auto const &urhs = *(u->rhs()), vrhs = *(v->rhs()); diff --git a/include/libsemigroups/libsemigroups.hpp b/include/libsemigroups/libsemigroups.hpp index 037c97797..d72b4ff43 100644 --- a/include/libsemigroups/libsemigroups.hpp +++ b/include/libsemigroups/libsemigroups.hpp @@ -62,7 +62,6 @@ #include "pbr.hpp" #include "presentation.hpp" #include "ranges.hpp" -#include "rewriters.hpp" #include "runner.hpp" #include "schreier-sims.hpp" #include "sims.hpp" diff --git a/include/libsemigroups/obvinf.hpp b/include/libsemigroups/obvinf.hpp index 63fec95f5..2b3e73063 100644 --- a/include/libsemigroups/obvinf.hpp +++ b/include/libsemigroups/obvinf.hpp @@ -64,13 +64,13 @@ #include // for pair #include // for vector -#include "config.hpp" // for LIBSEMIGROUPS_EIGEN_ENABLED -#include "ranges.hpp" // for word_type etc -#include "rewriters.hpp" // for RewriteTrie -#include "types.hpp" // for word_type etc +#include "config.hpp" // for LIBSEMIGROUPS_EIGEN_ENABLED +#include "ranges.hpp" // for word_type etc +#include "types.hpp" // for word_type etc #include "detail/eigen.hpp" -#include "detail/uf.hpp" // for Duf +#include "detail/rewriters.hpp" // for RewriteTrie +#include "detail/uf.hpp" // for Duf namespace libsemigroups { #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/include/libsemigroups/rewriters.hpp b/include/libsemigroups/rewriters.hpp deleted file mode 100644 index cf9a5b553..000000000 --- a/include/libsemigroups/rewriters.hpp +++ /dev/null @@ -1,704 +0,0 @@ -// -// libsemigroups - C++ library for semigroups and monoids -// Copyright (C) 2023-2024 Joseph Edwards + James D. Mitchell -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -// This file contains the implementation of a Rule object containers for Rule -// objects. It also includes rewriter classes that can be used to rewrite -// strings relative to a collection of rules. - -#ifndef LIBSEMIGROUPS_REWRITERS_HPP_ -#define LIBSEMIGROUPS_REWRITERS_HPP_ - -#include // for atomic -#include // for time_point -#include // for set -#include // for basic_string, operator== -#include // for unordered map -#include // for unordered set - -#include "aho-corasick.hpp" -#include "debug.hpp" // for LIBSEMIGROUPS_ASSERT -#include "order.hpp" // for shortlex_compare - -#include "detail/multi-string-view.hpp" // for MultiStringView - -// TODO(2) Add a KnuthBendix pointer to the rewriter class so that overlap -// detection can be handled by the rewriter (and therefore depend on the -// implementation) rather than on the KB object. - -//! \defgroup \rewriters_group Rewriters -//! -//! This file contains documentation for the functionality of the following -//! classes in `libsemigroups`: -//! * \ref libsemigroups::Rule "Rule" -//! * \ref RuleLookup -//! * \ref Rules -//! * \ref RewriterBase -//! * \ref RewriteFromLeft -//! * \ref RewriteTrie -namespace libsemigroups { - // TODO(2) remove from libsemigroups namespace and put into relevant class - - //! \ingroup rewriters_group - //! - //! Alias for the type of word that can be input by the user - using external_string_type = std::string; - - //! \ingroup rewriters_group - //! - //! Alias for the type of word used internally in the implementation - using internal_string_type = std::string; - - //! \ingroup rewriters_group - //! - //! Alias for the type of letter that can be input by the user - using external_char_type = char; - - //! \ingroup rewriters_group - //! - //! Alias for the type of letter used internally in the implementation - using internal_char_type = char; - - //! \ingroup rewriters_group - //! - //! \brief For a rewriting rule - //! - //! Defined in ``rewriters.hpp``. - //! - //! This class implements a data structure for storing *rewriting rules*. - //! Here, a rewriting rule is a rule of the form \f$A \to B\f$, where \f$A\f$ - //! and \f$B\f$ are both words over some alphabet \f$\Sigma\f$. - //! - //! The left-hand and right-hand sides of a rule are specified externally with - //! the type \ref external_string_type, and stored internally with type \ref - //! internal_string_type. - class Rule { - internal_string_type* _lhs; - internal_string_type* _rhs; - int64_t _id; - - public: - //! \brief Construct with new empty left-hand and right-hand sides. - //! - //! Construct with new empty left-hand and right-hand sides. - //! - //! \param id the id of the new rule. - //! - //! \exception - //! \no_libsemigroups_except - explicit Rule(int64_t id); - - Rule& operator=(Rule const& copy) = delete; - Rule(Rule const& copy) = delete; - Rule(Rule&& copy) = delete; - Rule& operator=(Rule&& copy) = delete; - - //! \brief Destruct the Rule. - //! - //! This function destructs a \ref Rule object by deleting the pointers used - //! for the left-hand and right-hand sides. - ~Rule() { - delete _lhs; - delete _rhs; - } - - //! \brief Return the left-hand side of the rule. - //! - //! Return the left-hand side of the rule. If this rule was create by a - //! \ref KnuthBendix, this is guaranteed to be greater than its right-hand - //! side according to the reduction ordering of that \ref KnuthBendix. - //! - //! \returns A pointer to the left-hand side. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \sa - //! \ref KnuthBendix - [[nodiscard]] internal_string_type* lhs() const noexcept { - return _lhs; - } - - //! \brief Return the right-hand side of the rule. - //! - //! Return the right-hand side of the rule. If this rule was create by a - //! \ref KnuthBendix, this is guaranteed to be less than its left-hand - //! side according to the reduction ordering of that \ref KnuthBendix. - //! - //! \returns A pointer to the right-hand side. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \sa - //! \ref KnuthBendix - [[nodiscard]] internal_string_type* rhs() const noexcept { - return _rhs; - } - - //! \brief Check if the left-hand and right-hand sides are empty. - //! - //! Check if the words pointed to by both the left-hand and the right-hand - //! sides are empty. - //! - //! \returns A value of type `bool`. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - [[nodiscard]] bool empty() const noexcept { - return _lhs->empty() && _rhs->empty(); - } - - //! \brief Check if the Rule is active. - //! - //! Check if the rule is active. - //! - //! \returns A value of type `bool`. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \sa - //! \ref RewriterBase::active_rules() - // TODO check the above ref points to something sensible - [[nodiscard]] inline bool active() const noexcept { - LIBSEMIGROUPS_ASSERT(_id != 0); - return (_id > 0); - } - - //! \brief Deactivate a rule. - //! - //! Deactivate a rule, if it is active. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \sa - //! \ref active - void deactivate() noexcept; - - //! \brief Activate a rule. - //! - //! Activate a rule, if it is inactive. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \sa - //! \ref active - void activate() noexcept; - - //! \brief Set the id of a rule. - //! - //! Set the id of a rule. - //! - //! \param id the id to set. - //! - //! \exception - //! \noexcept - //! - //! \complexity - //! Constant. - //! - //! \note - //! This function does no checks on its parameters; however, the id of a - //! rule should only be set if the rule is inactive, and the id of a rule - //! should always be positive. - void set_id_no_checks(int64_t id) noexcept { - LIBSEMIGROUPS_ASSERT(id > 0); - LIBSEMIGROUPS_ASSERT(!active()); - _id = -1 * id; - } - - //! \brief Set the id of a rule. - //! - //! After checking that the Rule is inactive \p id is positive, this - //! function performs the same as \ref set_id_no_checks. - //! - //! \throws LIBSEMIGROUPS_EXCEPTION if \p id is non-positive, or if `this` - //! is active. - void set_id(int64_t id) { - if (id <= 0) { - LIBSEMIGROUPS_EXCEPTION( - "invalid id, expected a value greater than 0, found {}", id); - } - if (active()) { - LIBSEMIGROUPS_EXCEPTION("cannot set the id of an active rule"); - } - set_id_no_checks(id); - } - - //! \brief Return the id of a rule. - //! - //! Return the id of a rule. - //! - //! \returns A value of type `int64_t` - //! - //! \complexity - //! Constant. - //! - //! \exception - //! \noexcept - [[nodiscard]] int64_t id() const noexcept { - LIBSEMIGROUPS_ASSERT(_id != 0); - return _id; - } - - //! \brief Reorder the left-hand and right-hand sides. - //! - //! If the right-hand side is greater than the left-hand side of a rule, - //! with regards to length-lexicographical order, then swap them. - //! - //! \complexity - //! The same complexity as \ref shortlex_compare(T* const, T* const) - //! - //! \exceptions - //! Throws if \ref shortlex_compare(T* const, T* const) does. - //! - //! \sa - //! shortlex_compare(T* const, T* const) - void reorder() { - if (shortlex_compare(_lhs, _rhs)) { - std::swap(_lhs, _rhs); - } - } - }; // class Rule - - class RuleLookup { - public: - RuleLookup() : _rule(nullptr) {} - - explicit RuleLookup(Rule* rule) - : _first(rule->lhs()->cbegin()), - _last(rule->lhs()->cend()), - _rule(rule) {} - - RuleLookup& operator()(internal_string_type::iterator const& first, - internal_string_type::iterator const& last) { - _first = first; - _last = last; - return *this; - } - - Rule const* rule() const { - return _rule; - } - - // This implements reverse lex comparison of this and that, which - // satisfies the requirement of std::set that equivalent items be - // incomparable, so, for example bcbc and abcbc are considered - // equivalent, but abcba and bcbc are not. - bool operator<(RuleLookup const& that) const; - - private: - internal_string_type::const_iterator _first; - internal_string_type::const_iterator _last; - Rule const* _rule; - }; // class RuleLookup - - class Rules { - public: - using iterator = std::list::iterator; - using const_iterator = std::list::const_iterator; - using const_reverse_iterator - = std::list::const_reverse_iterator; - - private: - struct Stats { - Stats() noexcept; - Stats& init() noexcept; - - Stats(Stats const&) noexcept = default; - Stats(Stats&&) noexcept = default; - Stats& operator=(Stats const&) noexcept = default; - Stats& operator=(Stats&&) noexcept = default; - - size_t max_word_length; - size_t max_active_word_length; - size_t max_active_rules; - size_t min_length_lhs_rule; - uint64_t total_rules; - // std::unordered_set unique_lhs_rules; - }; - - // TODO(2) remove const? - std::list _active_rules; - std::array _cursors; - std::list _inactive_rules; - mutable Stats _stats; - - public: - Rules() = default; - - // Rules(Rules const& that); - // Rules(Rules&& that); - Rules& operator=(Rules const&); - - // TODO(1) the other constructors - - ~Rules(); - - Rules& init(); - - const_iterator begin() const noexcept { - return _active_rules.cbegin(); - } - - const_iterator end() const noexcept { - return _active_rules.cend(); - } - - iterator begin() noexcept { - return _active_rules.begin(); - } - - iterator end() noexcept { - return _active_rules.end(); - } - - const_reverse_iterator rbegin() const noexcept { - return _active_rules.crbegin(); - } - - const_reverse_iterator rend() const noexcept { - return _active_rules.crend(); - } - - [[nodiscard]] size_t number_of_active_rules() const noexcept { - return _active_rules.size(); - } - - [[nodiscard]] size_t number_of_inactive_rules() const noexcept { - return _inactive_rules.size(); - } - - [[nodiscard]] size_t max_active_word_length() const; - - iterator& cursor(size_t index) { - LIBSEMIGROUPS_ASSERT(index < _cursors.size()); - return _cursors[index]; - } - - // TODO(0) is this ever called? - void add_active_rule(Rule* rule) { - _active_rules.push_back(rule); - } - - void add_inactive_rule(Rule* rule) { - _inactive_rules.push_back(rule); - } - - Stats const& stats() const { - return _stats; - } - - [[nodiscard]] iterator erase_from_active_rules(iterator it); - - // TODO(0) this feels like it should be add_active rule. The above - // add_active_rule seems a bit dangerous - void add_rule(Rule* rule); - - [[nodiscard]] Rule* copy_rule(Rule const* rule); - - // private: - [[nodiscard]] Rule* new_rule(); - - protected: - template - [[nodiscard]] Rule* new_rule(Iterator begin_lhs, - Iterator end_lhs, - Iterator begin_rhs, - Iterator end_rhs) { - Rule* rule = new_rule(); - rule->lhs()->assign(begin_lhs, end_lhs); - rule->rhs()->assign(begin_rhs, end_rhs); - rule->reorder(); - return rule; - } - }; - - class RewriterBase : public Rules { - std::unordered_set _alphabet; - mutable std::atomic _cached_confluent; - mutable std::atomic _confluence_known; - size_t _max_stack_depth; - std::stack _pending_rules; - std::atomic _requires_alphabet; - - using alphabet_citerator - = std::unordered_set::const_iterator; - - public: - // TODO(0) to cpp - RewriterBase() - : _alphabet(), - _cached_confluent(false), - _confluence_known(false), - _max_stack_depth(0), - _pending_rules(), - _requires_alphabet() {} - - RewriterBase& init(); - - explicit RewriterBase(bool requires_alphabet) : RewriterBase() { - _requires_alphabet = requires_alphabet; - } - - ~RewriterBase(); - - RewriterBase& operator=(RewriterBase const& that) { - Rules::operator=(that); - _cached_confluent = that._cached_confluent.load(); - _confluence_known = that._confluence_known.load(); - _requires_alphabet = that._requires_alphabet.load(); - while (!_pending_rules.empty()) { - _pending_rules.pop(); - } - decltype(_pending_rules) tmp = that._pending_rules; - while (!tmp.empty()) { - auto const* rule = tmp.top(); - _pending_rules.push(copy_rule(rule)); - tmp.pop(); - } - - if (_requires_alphabet) { - _alphabet = that._alphabet; - } - return *this; - } - - bool requires_alphabet() const { - return _requires_alphabet; - } - - decltype(_alphabet) alphabet() const { - return _alphabet; - } - - alphabet_citerator alphabet_cbegin() const { - return _alphabet.cbegin(); - } - - alphabet_citerator alphabet_cend() const { - return _alphabet.cend(); - } - - void set_cached_confluent(tril val) const; - - bool cached_confluent() const noexcept { - return _cached_confluent; - } - - [[nodiscard]] bool consistent() const noexcept { - return _pending_rules.empty(); - } - - [[nodiscard]] bool confluence_known() const { - return _confluence_known; - } - - [[nodiscard]] size_t max_stack_depth() const { - return _max_stack_depth; - } - - bool add_pending_rule(Rule* rule); - - bool process_pending_rules(); - - void reduce(); - - void reduce_rhs(); - - void rewrite(Rule* rule) const { - rewrite(*rule->lhs()); - rewrite(*rule->rhs()); - rule->reorder(); - } - - // TODO(2) remove virtual functions - virtual void rewrite(internal_string_type& u) const = 0; - - virtual void add_rule(Rule* rule) = 0; - - virtual Rules::iterator make_active_rule_pending(Rules::iterator it) = 0; - - size_t number_of_pending_rules() const noexcept { - return _pending_rules.size(); - } - - Rule* next_pending_rule() { - LIBSEMIGROUPS_ASSERT(_pending_rules.size() != 0); - Rule* rule = _pending_rules.top(); - _pending_rules.pop(); - return rule; - } - - template - void add_rule(StringLike const& lhs, StringLike const& rhs) { - if (lhs != rhs) { - if (add_pending_rule( - new_rule(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()))) { - // TODO(0) only process_pending_rules when ready to run - process_pending_rules(); - } - } - } - - template - void add_pending_rule(StringLike const& lhs, StringLike const& rhs) { - if (lhs != rhs) { - add_pending_rule( - new_rule(lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend())); - } - } - - void add_to_alphabet(internal_char_type letter) { - _alphabet.emplace(letter); - } - }; - - class RewriteFromLeft : public RewriterBase { - std::set _set_rules; - - public: - using RewriterBase::cached_confluent; - using Rules::stats; - - RewriteFromLeft() = default; - - RewriteFromLeft& operator=(RewriteFromLeft const&); - - // TODO(2) the other constructors - - ~RewriteFromLeft() = default; // TODO(2) out-of-line this - - RewriteFromLeft& init(); - - void rewrite(internal_string_type& u) const; - - [[nodiscard]] bool confluent() const; - - // TODO(0) private? - void add_rule(Rule* rule); - - using RewriterBase::add_rule; - - private: - void rewrite(Rule* rule) const; - - iterator make_active_rule_pending(iterator); - - void report_from_confluent( - std::atomic_uint64_t const&, - std::chrono::high_resolution_clock::time_point const&) const; - - bool confluent_impl(std::atomic_uint64_t&) const; - }; - - class RewriteTrie : public RewriterBase { - using index_type = AhoCorasick::index_type; - - std::map _rules; - AhoCorasick _trie; - - public: - using RewriterBase::cached_confluent; - using Rules::stats; - using iterator = internal_string_type::iterator; - using rule_iterator = std::map::iterator; - - RewriteTrie() : RewriterBase(true), _rules(), _trie() {} - - RewriteTrie(const RewriteTrie& that); - - RewriteTrie& operator=(RewriteTrie const& that); - - ~RewriteTrie() = default; - - RewriteTrie& init(); - - rule_iterator rules_begin() { - return _rules.begin(); - } - - rule_iterator rules_end() { - return _rules.end(); - } - - void all_overlaps(); - - void rule_overlaps(index_type node); - - void add_overlaps(Rule* rule, index_type node, size_t overlap_length); - - void rewrite(internal_string_type& u) const; - - [[nodiscard]] bool confluent() const; - - void add_rule(Rule* rule) { - Rules::add_rule(rule); - add_rule_to_trie(rule); - set_cached_confluent(tril::unknown); - } - - using RewriterBase::add_rule; - - private: - [[nodiscard]] bool descendants_confluent(Rule const* rule1, - index_type current_node, - size_t backtrack_depth) const; - - // TODO (After removing virtual functions) Put in base - void rewrite(Rule* rule) const { - rewrite(*rule->lhs()); - rewrite(*rule->rhs()); - rule->reorder(); - } - - void add_rule_to_trie(Rule* rule) { - index_type node = _trie.add_word_no_checks(rule->lhs()->cbegin(), - rule->lhs()->cend()); - _rules.emplace(node, rule); - } - - Rules::iterator make_active_rule_pending(Rules::iterator it); - - void report_from_confluent( - std::atomic_uint64_t const&, - std::chrono::high_resolution_clock::time_point const&) const; - - bool confluent_impl(std::atomic_uint64_t&) const; - }; -} // namespace libsemigroups -#endif // LIBSEMIGROUPS_REWRITERS_HPP_ diff --git a/include/libsemigroups/sims.hpp b/include/libsemigroups/sims.hpp index 349cff808..5406f9e94 100644 --- a/include/libsemigroups/sims.hpp +++ b/include/libsemigroups/sims.hpp @@ -72,11 +72,10 @@ #include -#include "debug.hpp" // for LIBSEMIGROUPS_ASSERT -#include "exception.hpp" // for LIBSEMIGROUPS_EXCEPTION -#include "felsch-graph.hpp" // for FelschGraph -#include "knuth-bendix.hpp" // for KnuthBendix -#include "libsemigroups/rewriters.hpp" +#include "debug.hpp" // for LIBSEMIGROUPS_ASSERT +#include "exception.hpp" // for LIBSEMIGROUPS_EXCEPTION +#include "felsch-graph.hpp" // for FelschGraph +#include "knuth-bendix.hpp" // for KnuthBendix #include "presentation.hpp" // for Presentation, Presentati... #include "to-presentation.hpp" // for to_presentation #include "todd-coxeter.hpp" // for ToddCoxeter @@ -86,6 +85,7 @@ #include "matrix.hpp" #include "detail/iterator.hpp" // for detail/default_postfix_increment +#include "detail/rewriters.hpp" #include "rx/ranges.hpp" diff --git a/include/libsemigroups/to-froidure-pin.hpp b/include/libsemigroups/to-froidure-pin.hpp index 6acf1a0c9..d34f4277a 100644 --- a/include/libsemigroups/to-froidure-pin.hpp +++ b/include/libsemigroups/to-froidure-pin.hpp @@ -22,13 +22,13 @@ #include // for size_t #include // for enable_if_t, is_base_of -#include "debug.hpp" // for LIBSEMIGROUPS_ASSERT -#include "exception.hpp" // for LIBSEMIGROUPS_EXCEPTION -#include "rewriters.hpp" +#include "debug.hpp" // for LIBSEMIGROUPS_ASSERT +#include "exception.hpp" // for LIBSEMIGROUPS_EXCEPTION #include "word-graph.hpp" // for WordGraph #include "detail/kbe.hpp" // for KBE #include "detail/ke.hpp" // for KE +#include "detail/rewriters.hpp" #include "detail/tce.hpp" // for TCE namespace libsemigroups { diff --git a/src/rewriters.cpp b/src/rewriters.cpp index b8d288a4e..fa8b0cf00 100644 --- a/src/rewriters.cpp +++ b/src/rewriters.cpp @@ -16,7 +16,7 @@ // along with this program. If not, see . // -#include "libsemigroups/rewriters.hpp" +#include "libsemigroups/detail/rewriters.hpp" #include "libsemigroups/detail/report.hpp" // for report_default #include "libsemigroups/runner.hpp" // for Ticker @@ -24,687 +24,693 @@ #include namespace libsemigroups { - // Construct from KnuthBendix with new but empty internal_string_type's - Rule::Rule(int64_t id) - : _lhs(new internal_string_type()), - _rhs(new internal_string_type()), - _id(-1 * id) { - LIBSEMIGROUPS_ASSERT(_id < 0); - } - - void Rule::deactivate() noexcept { - LIBSEMIGROUPS_ASSERT(_id != 0); - if (active()) { - _id *= -1; - } - } - - void Rule::activate() noexcept { - LIBSEMIGROUPS_ASSERT(_id != 0); - if (!active()) { - _id *= -1; - } - } - - bool RuleLookup::operator<(RuleLookup const& that) const { - auto it_this = _last - 1; - auto it_that = that._last - 1; - while (it_this > _first && it_that > that._first && *it_this == *it_that) { - --it_that; - --it_this; - } - return *it_this < *it_that; - } - - Rules::Stats::Stats() noexcept { - init(); - } - - Rules::Stats& Rules::Stats::init() noexcept { - max_word_length = 0; - max_active_word_length = 0; - max_active_rules = 0; - min_length_lhs_rule = std::numeric_limits::max(); - total_rules = 0; - return *this; - } - - Rules& Rules::init() { - // Put all active rules and those rules in the stack into the - // inactive_rules list - for (Rule const* cptr : _active_rules) { - Rule* ptr = const_cast(cptr); - ptr->deactivate(); - _inactive_rules.insert(_inactive_rules.end(), ptr); - } - _active_rules.clear(); - for (auto& it : _cursors) { - it = _active_rules.end(); - } - return *this; - } - - Rules& Rules::operator=(Rules const& that) { - init(); - for (Rule const* rule : that) { - add_rule(copy_rule(rule)); - } - for (size_t i = 0; i < _cursors.size(); ++i) { - _cursors[i] = _active_rules.begin(); - std::advance( - _cursors[i], - std::distance(that.begin(), - static_cast(that._cursors[i]))); - } - return *this; - } - - Rules::~Rules() { - for (Rule const* rule : _active_rules) { - delete const_cast(rule); - } - for (Rule* rule : _inactive_rules) { - delete rule; - } - } - - Rule* Rules::new_rule() { - ++_stats.total_rules; - Rule* rule; - if (!_inactive_rules.empty()) { - rule = _inactive_rules.front(); - rule->set_id(_stats.total_rules); - _inactive_rules.erase(_inactive_rules.begin()); - } else { - rule = new Rule(_stats.total_rules); - } - LIBSEMIGROUPS_ASSERT(!rule->active()); - return rule; - } - - Rule* Rules::copy_rule(Rule const* rule) { - return new_rule(rule->lhs()->cbegin(), - rule->lhs()->cend(), - rule->rhs()->cbegin(), - rule->rhs()->cend()); - } - - Rules::iterator Rules::erase_from_active_rules(iterator it) { - // _stats.unique_lhs_rules.erase(*((*it)->lhs())); - Rule* rule = const_cast(*it); - rule->deactivate(); - - if (it != _cursors[0] && it != _cursors[1]) { - it = _active_rules.erase(it); - } else if (it == _cursors[0] && it != _cursors[1]) { - _cursors[0] = _active_rules.erase(it); - it = _cursors[0]; - } else if (it != _cursors[0] && it == _cursors[1]) { - _cursors[1] = _active_rules.erase(it); - it = _cursors[1]; - } else { - _cursors[0] = _active_rules.erase(it); - _cursors[1] = _cursors[0]; - it = _cursors[0]; - } - return it; - } - - void Rules::add_rule(Rule* rule) { - LIBSEMIGROUPS_ASSERT(*rule->lhs() != *rule->rhs()); - _stats.max_word_length - = std::max(_stats.max_word_length, rule->lhs()->size()); - _stats.max_active_rules - = std::max(_stats.max_active_rules, number_of_active_rules()); - // _stats.unique_lhs_rules.insert(*rule->lhs()); - rule->activate(); - _active_rules.push_back(rule); - for (auto& it : _cursors) { - if (it == end()) { - --it; - } - } - if (rule->lhs()->size() < _stats.min_length_lhs_rule) { - // TODO(later) this is not valid when using non-length reducing - // orderings (such as RECURSIVE) - _stats.min_length_lhs_rule = rule->lhs()->size(); - } - } - - size_t Rules::max_active_word_length() const { - auto comp = [](Rule const* p, Rule const* q) -> bool { - return p->lhs()->size() < q->lhs()->size(); - }; - auto max = std::max_element(begin(), end(), comp); - if (max != end()) { - _stats.max_active_word_length - = std::max(_stats.max_active_word_length, (*max)->lhs()->size()); - } - return _stats.max_active_word_length; - } - - RewriterBase& RewriterBase::init() { - Rules::init(); - if (_requires_alphabet) { - _alphabet.clear(); - } - // Put all active rules and those rules in the stack into the - // inactive_rules list - while (!_pending_rules.empty()) { - Rules::add_inactive_rule(_pending_rules.top()); - _pending_rules.pop(); - } - _max_stack_depth = 0; - _cached_confluent = false; - _confluence_known = false; - return *this; - } - - RewriterBase::~RewriterBase() { - while (!_pending_rules.empty()) { - Rule* rule = _pending_rules.top(); - _pending_rules.pop(); - delete rule; - } - } - - void RewriterBase::set_cached_confluent(tril val) const { - if (val == tril::TRUE) { - _confluence_known = true; - _cached_confluent = true; - } else if (val == tril::FALSE) { - _confluence_known = true; + namespace detail { + // Construct from KnuthBendix with new but empty internal_string_type's + Rule::Rule(int64_t id) + : _lhs(new internal_string_type()), + _rhs(new internal_string_type()), + _id(-1 * id) { + LIBSEMIGROUPS_ASSERT(_id < 0); + } + + void Rule::deactivate() noexcept { + LIBSEMIGROUPS_ASSERT(_id != 0); + if (active()) { + _id *= -1; + } + } + + void Rule::activate() noexcept { + LIBSEMIGROUPS_ASSERT(_id != 0); + if (!active()) { + _id *= -1; + } + } + + bool RuleLookup::operator<(RuleLookup const& that) const { + auto it_this = _last - 1; + auto it_that = that._last - 1; + while (it_this > _first && it_that > that._first + && *it_this == *it_that) { + --it_that; + --it_this; + } + return *it_this < *it_that; + } + + Rules::Stats::Stats() noexcept { + init(); + } + + Rules::Stats& Rules::Stats::init() noexcept { + max_word_length = 0; + max_active_word_length = 0; + max_active_rules = 0; + min_length_lhs_rule = std::numeric_limits::max(); + total_rules = 0; + return *this; + } + + Rules& Rules::init() { + // Put all active rules and those rules in the stack into the + // inactive_rules list + for (Rule const* cptr : _active_rules) { + Rule* ptr = const_cast(cptr); + ptr->deactivate(); + _inactive_rules.insert(_inactive_rules.end(), ptr); + } + _active_rules.clear(); + for (auto& it : _cursors) { + it = _active_rules.end(); + } + return *this; + } + + Rules& Rules::operator=(Rules const& that) { + init(); + for (Rule const* rule : that) { + add_rule(copy_rule(rule)); + } + for (size_t i = 0; i < _cursors.size(); ++i) { + _cursors[i] = _active_rules.begin(); + std::advance( + _cursors[i], + std::distance(that.begin(), + static_cast(that._cursors[i]))); + } + return *this; + } + + Rules::~Rules() { + for (Rule const* rule : _active_rules) { + delete const_cast(rule); + } + for (Rule* rule : _inactive_rules) { + delete rule; + } + } + + Rule* Rules::new_rule() { + ++_stats.total_rules; + Rule* rule; + if (!_inactive_rules.empty()) { + rule = _inactive_rules.front(); + rule->set_id(_stats.total_rules); + _inactive_rules.erase(_inactive_rules.begin()); + } else { + rule = new Rule(_stats.total_rules); + } + LIBSEMIGROUPS_ASSERT(!rule->active()); + return rule; + } + + Rule* Rules::copy_rule(Rule const* rule) { + return new_rule(rule->lhs()->cbegin(), + rule->lhs()->cend(), + rule->rhs()->cbegin(), + rule->rhs()->cend()); + } + + Rules::iterator Rules::erase_from_active_rules(iterator it) { + // _stats.unique_lhs_rules.erase(*((*it)->lhs())); + Rule* rule = const_cast(*it); + rule->deactivate(); + + if (it != _cursors[0] && it != _cursors[1]) { + it = _active_rules.erase(it); + } else if (it == _cursors[0] && it != _cursors[1]) { + _cursors[0] = _active_rules.erase(it); + it = _cursors[0]; + } else if (it != _cursors[0] && it == _cursors[1]) { + _cursors[1] = _active_rules.erase(it); + it = _cursors[1]; + } else { + _cursors[0] = _active_rules.erase(it); + _cursors[1] = _cursors[0]; + it = _cursors[0]; + } + return it; + } + + void Rules::add_rule(Rule* rule) { + LIBSEMIGROUPS_ASSERT(*rule->lhs() != *rule->rhs()); + _stats.max_word_length + = std::max(_stats.max_word_length, rule->lhs()->size()); + _stats.max_active_rules + = std::max(_stats.max_active_rules, number_of_active_rules()); + // _stats.unique_lhs_rules.insert(*rule->lhs()); + rule->activate(); + _active_rules.push_back(rule); + for (auto& it : _cursors) { + if (it == end()) { + --it; + } + } + if (rule->lhs()->size() < _stats.min_length_lhs_rule) { + // TODO(later) this is not valid when using non-length reducing + // orderings (such as RECURSIVE) + _stats.min_length_lhs_rule = rule->lhs()->size(); + } + } + + size_t Rules::max_active_word_length() const { + auto comp = [](Rule const* p, Rule const* q) -> bool { + return p->lhs()->size() < q->lhs()->size(); + }; + auto max = std::max_element(begin(), end(), comp); + if (max != end()) { + _stats.max_active_word_length + = std::max(_stats.max_active_word_length, (*max)->lhs()->size()); + } + return _stats.max_active_word_length; + } + + RewriterBase& RewriterBase::init() { + Rules::init(); + if (_requires_alphabet) { + _alphabet.clear(); + } + // Put all active rules and those rules in the stack into the + // inactive_rules list + while (!_pending_rules.empty()) { + Rules::add_inactive_rule(_pending_rules.top()); + _pending_rules.pop(); + } + _max_stack_depth = 0; _cached_confluent = false; - } else { _confluence_known = false; + return *this; } - } - bool RewriterBase::add_pending_rule(Rule* rule) { - LIBSEMIGROUPS_ASSERT(!rule->active()); - if (*rule->lhs() != *rule->rhs()) { - _pending_rules.emplace(rule); - _max_stack_depth = std::max(_max_stack_depth, _pending_rules.size()); - return true; - } else { - Rules::add_inactive_rule(rule); - return false; - } - } - - bool RewriterBase::process_pending_rules() { - bool rules_added = false; - Rule* rule1; - internal_string_type const* lhs; - while (number_of_pending_rules() != 0) { - rule1 = next_pending_rule(); - LIBSEMIGROUPS_ASSERT(!rule1->active()); - LIBSEMIGROUPS_ASSERT(*rule1->lhs() != *rule1->rhs()); - // Rewrite both sides and reorder if necessary . . . - rewrite(rule1); - - // Check rule is non-trivial - if (*rule1->lhs() != *rule1->rhs()) { - lhs = rule1->lhs(); - - for (auto it = begin(); it != end();) { - Rule* rule2 = const_cast(*it); - - // Check if lhs is contained within either the lhs or rhs of rule2 - if (rule2->lhs()->find(*lhs) != external_string_type::npos - || rule2->rhs()->find(*lhs) != external_string_type::npos) { - // If it is, rule2 must be deactivated and re-processed - it = make_active_rule_pending(it); - } else { - ++it; + RewriterBase::~RewriterBase() { + while (!_pending_rules.empty()) { + Rule* rule = _pending_rules.top(); + _pending_rules.pop(); + delete rule; + } + } + + void RewriterBase::set_cached_confluent(tril val) const { + if (val == tril::TRUE) { + _confluence_known = true; + _cached_confluent = true; + } else if (val == tril::FALSE) { + _confluence_known = true; + _cached_confluent = false; + } else { + _confluence_known = false; + } + } + + bool RewriterBase::add_pending_rule(Rule* rule) { + LIBSEMIGROUPS_ASSERT(!rule->active()); + if (*rule->lhs() != *rule->rhs()) { + _pending_rules.emplace(rule); + _max_stack_depth = std::max(_max_stack_depth, _pending_rules.size()); + return true; + } else { + Rules::add_inactive_rule(rule); + return false; + } + } + + bool RewriterBase::process_pending_rules() { + bool rules_added = false; + Rule* rule1; + internal_string_type const* lhs; + while (number_of_pending_rules() != 0) { + rule1 = next_pending_rule(); + LIBSEMIGROUPS_ASSERT(!rule1->active()); + LIBSEMIGROUPS_ASSERT(*rule1->lhs() != *rule1->rhs()); + // Rewrite both sides and reorder if necessary . . . + rewrite(rule1); + + // Check rule is non-trivial + if (*rule1->lhs() != *rule1->rhs()) { + lhs = rule1->lhs(); + + for (auto it = begin(); it != end();) { + Rule* rule2 = const_cast(*it); + + // Check if lhs is contained within either the lhs or rhs of rule2 + if (rule2->lhs()->find(*lhs) != external_string_type::npos + || rule2->rhs()->find(*lhs) != external_string_type::npos) { + // If it is, rule2 must be deactivated and re-processed + it = make_active_rule_pending(it); + } else { + ++it; + } } + add_rule(rule1); + if (!rules_added) { + rules_added = true; + } + } else { + add_inactive_rule(rule1); } - add_rule(rule1); - if (!rules_added) { - rules_added = true; - } - } else { - add_inactive_rule(rule1); } + // reduce_rhs(); + return rules_added; } - // reduce_rhs(); - return rules_added; - } - void RewriterBase::reduce() { - for (Rule const* rule : *this) { - // Copy rule and add_pending_rule so that it is not modified by the - // call to process_pending_rules. - LIBSEMIGROUPS_ASSERT(rule->lhs() != rule->rhs()); - if (add_pending_rule(copy_rule(rule))) { - process_pending_rules(); + void RewriterBase::reduce() { + for (Rule const* rule : *this) { + // Copy rule and add_pending_rule so that it is not modified by the + // call to process_pending_rules. + LIBSEMIGROUPS_ASSERT(rule->lhs() != rule->rhs()); + if (add_pending_rule(copy_rule(rule))) { + process_pending_rules(); + } } } - } - void RewriterBase::reduce_rhs() { - for (Rule const* rule : *this) { - rewrite(*rule->rhs()); + void RewriterBase::reduce_rhs() { + for (Rule const* rule : *this) { + rewrite(*rule->rhs()); + } } - } - RewriteFromLeft& RewriteFromLeft::init() { - RewriterBase::init(); - _set_rules.clear(); - return *this; - } + RewriteFromLeft& RewriteFromLeft::init() { + RewriterBase::init(); + _set_rules.clear(); + return *this; + } - RewriteFromLeft& RewriteFromLeft::operator=(RewriteFromLeft const& that) { - init(); - RewriterBase::operator=(that); - for (auto* crule : that) { - Rule* rule = const_cast(crule); + RewriteFromLeft& RewriteFromLeft::operator=(RewriteFromLeft const& that) { + init(); + RewriterBase::operator=(that); + for (auto* crule : that) { + Rule* rule = const_cast(crule); #ifdef LIBSEMIGROUPS_DEBUG - LIBSEMIGROUPS_ASSERT(_set_rules.emplace(RuleLookup(rule)).second); + LIBSEMIGROUPS_ASSERT(_set_rules.emplace(RuleLookup(rule)).second); #else - _set_rules.emplace(RuleLookup(rule)); + _set_rules.emplace(RuleLookup(rule)); #endif + } + return *this; } - return *this; - } - RewriteFromLeft::iterator - RewriteFromLeft::make_active_rule_pending(iterator it) { - Rule* rule = const_cast(*it); - rule->deactivate(); - add_pending_rule(rule); + RewriteFromLeft::iterator + RewriteFromLeft::make_active_rule_pending(iterator it) { + Rule* rule = const_cast(*it); + rule->deactivate(); + add_pending_rule(rule); #ifdef LIBSEMIGROUPS_DEBUG - LIBSEMIGROUPS_ASSERT(_set_rules.erase(RuleLookup(rule))); + LIBSEMIGROUPS_ASSERT(_set_rules.erase(RuleLookup(rule))); #else - _set_rules.erase(RuleLookup(rule)); + _set_rules.erase(RuleLookup(rule)); #endif - LIBSEMIGROUPS_ASSERT(_set_rules.size() == number_of_active_rules() - 1); - return Rules::erase_from_active_rules(it); - } + LIBSEMIGROUPS_ASSERT(_set_rules.size() == number_of_active_rules() - 1); + return Rules::erase_from_active_rules(it); + } - void RewriteFromLeft::add_rule(Rule* rule) { - Rules::add_rule(rule); - // _stats.unique_lhs_rules.insert(*rule->lhs()); + void RewriteFromLeft::add_rule(Rule* rule) { + Rules::add_rule(rule); + // _stats.unique_lhs_rules.insert(*rule->lhs()); #ifdef LIBSEMIGROUPS_DEBUG - LIBSEMIGROUPS_ASSERT(_set_rules.emplace(RuleLookup(rule)).second); + LIBSEMIGROUPS_ASSERT(_set_rules.emplace(RuleLookup(rule)).second); #else - _set_rules.emplace(RuleLookup(rule)); + _set_rules.emplace(RuleLookup(rule)); #endif - LIBSEMIGROUPS_ASSERT(_set_rules.size() == number_of_active_rules()); - set_cached_confluent(tril::unknown); - } - - // REWRITE_FROM_LEFT from Sims, p67 - // Caution: this uses the assumption that rules are length reducing, if they - // are not, then u might not have sufficient space! - void RewriteFromLeft::rewrite(internal_string_type& u) const { - using iterator = internal_string_type::iterator; - - if (u.size() < stats().min_length_lhs_rule) { - return; - } - - iterator v_begin = u.begin(); - iterator v_end = u.begin() + stats().min_length_lhs_rule - 1; - iterator w_begin = v_end; - iterator w_end = u.end(); - - RuleLookup lookup; - - while (w_begin != w_end) { - *v_end = *w_begin; - ++v_end; - ++w_begin; - - auto it = _set_rules.find(lookup(v_begin, v_end)); - if (it != _set_rules.end()) { - Rule const* rule = (*it).rule(); - if (rule->lhs()->size() <= static_cast(v_end - v_begin)) { - LIBSEMIGROUPS_ASSERT(detail::is_suffix( - v_begin, v_end, rule->lhs()->cbegin(), rule->lhs()->cend())); - v_end -= rule->lhs()->size(); - w_begin -= rule->rhs()->size(); - detail::string_replace( - w_begin, rule->rhs()->cbegin(), rule->rhs()->cend()); - } + LIBSEMIGROUPS_ASSERT(_set_rules.size() == number_of_active_rules()); + set_cached_confluent(tril::unknown); + } + + // REWRITE_FROM_LEFT from Sims, p67 + // Caution: this uses the assumption that rules are length reducing, if they + // are not, then u might not have sufficient space! + void RewriteFromLeft::rewrite(internal_string_type& u) const { + using iterator = internal_string_type::iterator; + + if (u.size() < stats().min_length_lhs_rule) { + return; } - while (w_begin != w_end - && stats().min_length_lhs_rule - 1 - > static_cast((v_end - v_begin))) { + + iterator v_begin = u.begin(); + iterator v_end = u.begin() + stats().min_length_lhs_rule - 1; + iterator w_begin = v_end; + iterator w_end = u.end(); + + RuleLookup lookup; + + while (w_begin != w_end) { *v_end = *w_begin; ++v_end; ++w_begin; + + auto it = _set_rules.find(lookup(v_begin, v_end)); + if (it != _set_rules.end()) { + Rule const* rule = (*it).rule(); + if (rule->lhs()->size() <= static_cast(v_end - v_begin)) { + LIBSEMIGROUPS_ASSERT(detail::is_suffix( + v_begin, v_end, rule->lhs()->cbegin(), rule->lhs()->cend())); + v_end -= rule->lhs()->size(); + w_begin -= rule->rhs()->size(); + detail::string_replace( + w_begin, rule->rhs()->cbegin(), rule->rhs()->cend()); + } + } + while (w_begin != w_end + && stats().min_length_lhs_rule - 1 + > static_cast((v_end - v_begin))) { + *v_end = *w_begin; + ++v_end; + ++w_begin; + } } + u.erase(v_end - u.cbegin()); } - u.erase(v_end - u.cbegin()); - } - - void RewriteFromLeft::rewrite(Rule* rule) const { - // LIBSEMIGROUPS_ASSERT(_id != 0); - rewrite(*rule->lhs()); - rewrite(*rule->rhs()); - rule->reorder(); - } - - void RewriteFromLeft::report_from_confluent( - std::atomic_uint64_t const& seen, - std::chrono::high_resolution_clock::time_point const& start_time) const { - auto total_pairs = std::pow(Rules::number_of_active_rules(), 2); - auto total_pairs_s = detail::group_digits(total_pairs); - auto now = std::chrono::high_resolution_clock::now(); - auto time - = std::chrono::duration_cast(now - start_time); - - report_default("KnuthBendix: locally confluent for: {:>{width}} / " - "{:>{width}} ({:>4.1f}%) pairs of rules ({}s)\n", - detail::group_digits(seen), - total_pairs_s, - (total_pairs != 0) - ? 100 * static_cast(seen) / total_pairs - : 100, - time.count(), - fmt::arg("width", total_pairs_s.size())); - } - - bool RewriteFromLeft::confluent_impl(std::atomic_uint64_t& seen) const { - set_cached_confluent(tril::TRUE); - internal_string_type word1; - internal_string_type word2; - - for (auto it1 = begin(); it1 != end(); ++it1) { - Rule const* rule1 = *it1; - // Seems to be much faster to do this in reverse. - for (auto it2 = rbegin(); it2 != rend(); ++it2) { - seen++; - Rule const* rule2 = *it2; - for (auto it = rule1->lhs()->cend() - 1; it >= rule1->lhs()->cbegin(); - --it) { - // Find longest common prefix of suffix B of rule1.lhs() defined by - // it and R = rule2.lhs() - auto prefix = detail::maximum_common_prefix(it, - rule1->lhs()->cend(), - rule2->lhs()->cbegin(), - rule2->lhs()->cend()); - if (prefix.first == rule1->lhs()->cend() - || prefix.second == rule2->lhs()->cend()) { - // Seems that this function isn't called enough to merit using - // MSV's here. - word1.assign(rule1->lhs()->cbegin(), - it); // A - word1.append(*rule2->rhs()); // S - word1.append(prefix.first, - rule1->lhs()->cend()); // D - - word2.assign(*rule1->rhs()); // Q - word2.append(prefix.second, - rule2->lhs()->cend()); // E - - if (word1 != word2) { - rewrite(word1); - rewrite(word2); + + void RewriteFromLeft::rewrite(Rule* rule) const { + // LIBSEMIGROUPS_ASSERT(_id != 0); + rewrite(*rule->lhs()); + rewrite(*rule->rhs()); + rule->reorder(); + } + + void RewriteFromLeft::report_from_confluent( + std::atomic_uint64_t const& seen, + std::chrono::high_resolution_clock::time_point const& start_time) + const { + auto total_pairs = std::pow(Rules::number_of_active_rules(), 2); + auto total_pairs_s = detail::group_digits(total_pairs); + auto now = std::chrono::high_resolution_clock::now(); + auto time + = std::chrono::duration_cast(now - start_time); + + report_default("KnuthBendix: locally confluent for: {:>{width}} / " + "{:>{width}} ({:>4.1f}%) pairs of rules ({}s)\n", + detail::group_digits(seen), + total_pairs_s, + (total_pairs != 0) + ? 100 * static_cast(seen) / total_pairs + : 100, + time.count(), + fmt::arg("width", total_pairs_s.size())); + } + + bool RewriteFromLeft::confluent_impl(std::atomic_uint64_t& seen) const { + set_cached_confluent(tril::TRUE); + internal_string_type word1; + internal_string_type word2; + + for (auto it1 = begin(); it1 != end(); ++it1) { + Rule const* rule1 = *it1; + // Seems to be much faster to do this in reverse. + for (auto it2 = rbegin(); it2 != rend(); ++it2) { + seen++; + Rule const* rule2 = *it2; + for (auto it = rule1->lhs()->cend() - 1; it >= rule1->lhs()->cbegin(); + --it) { + // Find longest common prefix of suffix B of rule1.lhs() defined by + // it and R = rule2.lhs() + auto prefix = detail::maximum_common_prefix(it, + rule1->lhs()->cend(), + rule2->lhs()->cbegin(), + rule2->lhs()->cend()); + if (prefix.first == rule1->lhs()->cend() + || prefix.second == rule2->lhs()->cend()) { + // Seems that this function isn't called enough to merit using + // MSV's here. + word1.assign(rule1->lhs()->cbegin(), + it); // A + word1.append(*rule2->rhs()); // S + word1.append(prefix.first, + rule1->lhs()->cend()); // D + + word2.assign(*rule1->rhs()); // Q + word2.append(prefix.second, + rule2->lhs()->cend()); // E + if (word1 != word2) { - set_cached_confluent(tril::FALSE); - return false; + rewrite(word1); + rewrite(word2); + if (word1 != word2) { + set_cached_confluent(tril::FALSE); + return false; + } } } } } } + return cached_confluent(); } - return cached_confluent(); - } - bool RewriteFromLeft::confluent() const { - if (number_of_pending_rules() != 0) { - set_cached_confluent(tril::unknown); - return false; - } else if (confluence_known()) { - return RewriterBase::cached_confluent(); - } - std::atomic_uint64_t seen = 0; - if (reporting_enabled()) { - using std::chrono::time_point; - time_point start_time = std::chrono::high_resolution_clock::now(); - detail::Ticker t([&]() { report_from_confluent(seen, start_time); }); - report_no_prefix("{:-<95}\n", ""); - return confluent_impl(seen); - } else { - return confluent_impl(seen); - } - } - - RewriteTrie& RewriteTrie::init() { - RewriterBase::init(); - _trie.init(); - _rules.clear(); - return *this; - } - - RewriteTrie& RewriteTrie::operator=(RewriteTrie const& that) { - init(); - RewriterBase::operator=(that); - for (auto* crule : that) { - Rule* rule = const_cast(crule); - add_rule_to_trie(rule); - } - return *this; - } - - void RewriteTrie::all_overlaps() { - // For each active rule, get the corresponding terminal node. - for (auto node_it = _rules.begin(); node_it != _rules.end(); ++node_it) { - index_type link = _trie.suffix_link_no_checks(node_it->first); - while (link != _trie.root) { - // For each suffix link, add an overlap between rule and every other - // rule that corresponds to a terminal descendant of link - add_overlaps(node_it->second, link, _trie.height_no_checks(link)); - link = _trie.suffix_link_no_checks(link); + bool RewriteFromLeft::confluent() const { + if (number_of_pending_rules() != 0) { + set_cached_confluent(tril::unknown); + return false; + } else if (confluence_known()) { + return RewriterBase::cached_confluent(); + } + std::atomic_uint64_t seen = 0; + if (reporting_enabled()) { + using std::chrono::time_point; + time_point start_time = std::chrono::high_resolution_clock::now(); + detail::Ticker t([&]() { report_from_confluent(seen, start_time); }); + report_no_prefix("{:-<95}\n", ""); + return confluent_impl(seen); + } else { + return confluent_impl(seen); } } - } - void RewriteTrie::rule_overlaps(index_type node) { - index_type link = _trie.suffix_link_no_checks(node); - while (link != _trie.root) { - // For each suffix link, add an overlap between rule and every other - // rule that corresponds to a terminal descendant of link - add_overlaps(_rules[node], link, _trie.height_no_checks(link)); - link = _trie.suffix_link_no_checks(link); + RewriteTrie& RewriteTrie::init() { + RewriterBase::init(); + _trie.init(); + _rules.clear(); + return *this; } - } - void RewriteTrie::add_overlaps(Rule* rule, - index_type node, - size_t overlap_length) { - // BFS find the terminal descendants of node and add overlaps with rule - if (_trie.node_no_checks(node).is_terminal()) { - Rule const* rule2 = _rules.find(node)->second; - detail::MultiStringView x(rule->lhs()->cbegin(), - rule->lhs()->cend() - overlap_length); - x.append(rule2->rhs()->cbegin(), rule2->rhs()->cend()); - detail::MultiStringView y(rule->rhs()->cbegin(), rule->rhs()->cend()); - y.append(rule2->lhs()->cbegin() + overlap_length, - rule2->lhs()->cend()); // rule = AQ_j -> Q_iC - add_pending_rule(x, y); + RewriteTrie& RewriteTrie::operator=(RewriteTrie const& that) { + init(); + RewriterBase::operator=(that); + for (auto* crule : that) { + Rule* rule = const_cast(crule); + add_rule_to_trie(rule); + } + return *this; + } + + void RewriteTrie::all_overlaps() { + // For each active rule, get the corresponding terminal node. + for (auto node_it = _rules.begin(); node_it != _rules.end(); ++node_it) { + index_type link = _trie.suffix_link_no_checks(node_it->first); + while (link != _trie.root) { + // For each suffix link, add an overlap between rule and every other + // rule that corresponds to a terminal descendant of link + add_overlaps(node_it->second, link, _trie.height_no_checks(link)); + link = _trie.suffix_link_no_checks(link); + } + } } - for (auto a = alphabet_cbegin(); a != alphabet_cend(); ++a) { - auto child = _trie.child_no_checks(node, static_cast(*a)); - if (child != UNDEFINED) { - add_overlaps(rule, child, overlap_length); + + void RewriteTrie::rule_overlaps(index_type node) { + index_type link = _trie.suffix_link_no_checks(node); + while (link != _trie.root) { + // For each suffix link, add an overlap between rule and every other + // rule that corresponds to a terminal descendant of link + add_overlaps(_rules[node], link, _trie.height_no_checks(link)); + link = _trie.suffix_link_no_checks(link); } } - } - // As with RewriteFromLeft::rewrite, this assumes that all rules are length - // reducing. - void RewriteTrie::rewrite(internal_string_type& u) const { - // Check if u is rewriteable - if (u.size() < stats().min_length_lhs_rule) { - return; + void RewriteTrie::add_overlaps(Rule* rule, + index_type node, + size_t overlap_length) { + // BFS find the terminal descendants of node and add overlaps with rule + if (_trie.node_no_checks(node).is_terminal()) { + Rule const* rule2 = _rules.find(node)->second; + detail::MultiStringView x(rule->lhs()->cbegin(), + rule->lhs()->cend() - overlap_length); + x.append(rule2->rhs()->cbegin(), rule2->rhs()->cend()); + detail::MultiStringView y(rule->rhs()->cbegin(), rule->rhs()->cend()); + y.append(rule2->lhs()->cbegin() + overlap_length, + rule2->lhs()->cend()); // rule = AQ_j -> Q_iC + add_pending_rule(x, y); + } + for (auto a = alphabet_cbegin(); a != alphabet_cend(); ++a) { + auto child = _trie.child_no_checks(node, static_cast(*a)); + if (child != UNDEFINED) { + add_overlaps(rule, child, overlap_length); + } + } } - std::vector nodes; - index_type current = _trie.root; - nodes.push_back(current); + // As with RewriteFromLeft::rewrite, this assumes that all rules are length + // reducing. + void RewriteTrie::rewrite(internal_string_type& u) const { + // Check if u is rewriteable + if (u.size() < stats().min_length_lhs_rule) { + return; + } + + std::vector nodes; + index_type current = _trie.root; + nodes.push_back(current); #ifdef LIBSEMIGROUPS_DEBUG - iterator v_begin = u.begin(); + iterator v_begin = u.begin(); #endif - iterator v_end = u.begin(); - iterator w_begin = v_end; - iterator w_end = u.end(); - - while (w_begin != w_end) { - // Read first letter of W and traverse trie - auto x = *w_begin; - ++w_begin; - current = _trie.traverse_no_checks(current, static_cast(x)); - - if (!_trie.node_no_checks(current).is_terminal()) { - nodes.push_back(current); - *v_end = x; - ++v_end; - } else { - // Find rule that corresponds to terminal node - Rule const* rule = _rules.find(current)->second; - auto lhs_size = rule->lhs()->size(); - - // Check the lhs is smaller than the portion of the word that has - // been read - LIBSEMIGROUPS_ASSERT(lhs_size - <= static_cast(v_end - v_begin) + 1); - v_end -= lhs_size - 1; - w_begin -= rule->rhs()->size(); - // Replace lhs with rhs in-place - detail::string_replace( - w_begin, rule->rhs()->cbegin(), rule->rhs()->cend()); - for (size_t i = 0; i < lhs_size - 1; ++i) { - nodes.pop_back(); + iterator v_end = u.begin(); + iterator w_begin = v_end; + iterator w_end = u.end(); + + while (w_begin != w_end) { + // Read first letter of W and traverse trie + auto x = *w_begin; + ++w_begin; + current + = _trie.traverse_no_checks(current, static_cast(x)); + + if (!_trie.node_no_checks(current).is_terminal()) { + nodes.push_back(current); + *v_end = x; + ++v_end; + } else { + // Find rule that corresponds to terminal node + Rule const* rule = _rules.find(current)->second; + auto lhs_size = rule->lhs()->size(); + + // Check the lhs is smaller than the portion of the word that has + // been read + LIBSEMIGROUPS_ASSERT(lhs_size + <= static_cast(v_end - v_begin) + 1); + v_end -= lhs_size - 1; + w_begin -= rule->rhs()->size(); + // Replace lhs with rhs in-place + detail::string_replace( + w_begin, rule->rhs()->cbegin(), rule->rhs()->cend()); + for (size_t i = 0; i < lhs_size - 1; ++i) { + nodes.pop_back(); + } + current = nodes.back(); } - current = nodes.back(); } + u.erase(v_end - u.cbegin()); } - u.erase(v_end - u.cbegin()); - } - bool RewriteTrie::confluent() const { - if (number_of_pending_rules() != 0) { - set_cached_confluent(tril::unknown); - return false; - } else if (confluence_known()) { - return RewriterBase::cached_confluent(); - } - - std::atomic_uint64_t seen = 0; - if (reporting_enabled()) { - using std::chrono::time_point; - time_point start_time = std::chrono::high_resolution_clock::now(); - detail::Ticker t([&]() { report_from_confluent(seen, start_time); }); - report_no_prefix("{:-<95}\n", ""); - return confluent_impl(seen); - } else { - return confluent_impl(seen); - } - } - - void RewriteTrie::report_from_confluent( - std::atomic_uint64_t const& seen, - std::chrono::high_resolution_clock::time_point const& start_time) const { - auto total_rules = Rules::number_of_active_rules(); - auto total_rules_s = detail::group_digits(total_rules); - auto now = std::chrono::high_resolution_clock::now(); - auto time - = std::chrono::duration_cast(now - start_time); - - report_default("KnuthBendix: locally confluent for: {:>{width}} / " - "{:>{width}} ({:>4.1f}%) rules ({}s)\n", - detail::group_digits(seen), - total_rules_s, - (total_rules != 0) - ? 100 * static_cast(seen) / total_rules - : 100, - time.count(), - fmt::arg("width", total_rules_s.size())); - } - - bool RewriteTrie::confluent_impl(std::atomic_uint64_t& seen) const { - index_type link; - set_cached_confluent(tril::TRUE); - - // For each rule, check if any descendent of any suffix breaks confluence - for (auto node_it = _rules.begin(); node_it != _rules.end(); ++node_it) { - seen++; - link = _trie.suffix_link_no_checks(node_it->first); - LIBSEMIGROUPS_ASSERT(node_it->first != _trie.root); - while (link != _trie.root) { - if (!descendants_confluent( - node_it->second, link, _trie.height_no_checks(link))) { - set_cached_confluent(tril::FALSE); - return false; + bool RewriteTrie::confluent() const { + if (number_of_pending_rules() != 0) { + set_cached_confluent(tril::unknown); + return false; + } else if (confluence_known()) { + return RewriterBase::cached_confluent(); + } + + std::atomic_uint64_t seen = 0; + if (reporting_enabled()) { + using std::chrono::time_point; + time_point start_time = std::chrono::high_resolution_clock::now(); + detail::Ticker t([&]() { report_from_confluent(seen, start_time); }); + report_no_prefix("{:-<95}\n", ""); + return confluent_impl(seen); + } else { + return confluent_impl(seen); + } + } + + void RewriteTrie::report_from_confluent( + std::atomic_uint64_t const& seen, + std::chrono::high_resolution_clock::time_point const& start_time) + const { + auto total_rules = Rules::number_of_active_rules(); + auto total_rules_s = detail::group_digits(total_rules); + auto now = std::chrono::high_resolution_clock::now(); + auto time + = std::chrono::duration_cast(now - start_time); + + report_default("KnuthBendix: locally confluent for: {:>{width}} / " + "{:>{width}} ({:>4.1f}%) rules ({}s)\n", + detail::group_digits(seen), + total_rules_s, + (total_rules != 0) + ? 100 * static_cast(seen) / total_rules + : 100, + time.count(), + fmt::arg("width", total_rules_s.size())); + } + + bool RewriteTrie::confluent_impl(std::atomic_uint64_t& seen) const { + index_type link; + set_cached_confluent(tril::TRUE); + + // For each rule, check if any descendent of any suffix breaks confluence + for (auto node_it = _rules.begin(); node_it != _rules.end(); ++node_it) { + seen++; + link = _trie.suffix_link_no_checks(node_it->first); + LIBSEMIGROUPS_ASSERT(node_it->first != _trie.root); + while (link != _trie.root) { + if (!descendants_confluent( + node_it->second, link, _trie.height_no_checks(link))) { + set_cached_confluent(tril::FALSE); + return false; + } + link = _trie.suffix_link_no_checks(link); } - link = _trie.suffix_link_no_checks(link); } + // Set cached value + // set_cached_confluent(tril::TRUE); + return true; } - // Set cached value - // set_cached_confluent(tril::TRUE); - return true; - } - [[nodiscard]] bool - RewriteTrie::descendants_confluent(Rule const* rule1, - index_type current_node, - size_t overlap_length) const { - if (_trie.node_no_checks(current_node).is_terminal()) { - Rule const* rule2 = _rules.find(current_node)->second; - // Process overlap - // Word looks like ABC where the LHS of rule1 corresponds to AB, - // the LHS of rule2 corresponds to BC, and |C|=nodes.size() - 1. - // AB -> X, BC -> Y - // ABC gets rewritten to XC and AY + [[nodiscard]] bool + RewriteTrie::descendants_confluent(Rule const* rule1, + index_type current_node, + size_t overlap_length) const { + if (_trie.node_no_checks(current_node).is_terminal()) { + Rule const* rule2 = _rules.find(current_node)->second; + // Process overlap + // Word looks like ABC where the LHS of rule1 corresponds to AB, + // the LHS of rule2 corresponds to BC, and |C|=nodes.size() - 1. + // AB -> X, BC -> Y + // ABC gets rewritten to XC and AY - internal_string_type word1; - internal_string_type word2; + internal_string_type word1; + internal_string_type word2; - word1.assign(*rule1->rhs()); // X - word1.append(rule2->lhs()->cbegin() + overlap_length, - rule2->lhs()->cend()); // C + word1.assign(*rule1->rhs()); // X + word1.append(rule2->lhs()->cbegin() + overlap_length, + rule2->lhs()->cend()); // C - word2.assign(rule1->lhs()->cbegin(), - rule1->lhs()->cend() - overlap_length); // A - word2.append(*rule2->rhs()); // Y + word2.assign(rule1->lhs()->cbegin(), + rule1->lhs()->cend() - overlap_length); // A + word2.append(*rule2->rhs()); // Y - if (word1 != word2) { - rewrite(word1); - rewrite(word2); if (word1 != word2) { - set_cached_confluent(tril::FALSE); - return false; + rewrite(word1); + rewrite(word2); + if (word1 != word2) { + set_cached_confluent(tril::FALSE); + return false; + } } + return true; } - return true; - } - // Read each possible letter and traverse down the trie - for (auto x = alphabet_cbegin(); x != alphabet_cend(); ++x) { - auto child - = _trie.child_no_checks(current_node, static_cast(*x)); - if (child != UNDEFINED) { - if (!descendants_confluent(rule1, child, overlap_length)) { - return false; + // Read each possible letter and traverse down the trie + for (auto x = alphabet_cbegin(); x != alphabet_cend(); ++x) { + auto child + = _trie.child_no_checks(current_node, static_cast(*x)); + if (child != UNDEFINED) { + if (!descendants_confluent(rule1, child, overlap_length)) { + return false; + } } } + return true; } - return true; - } - Rules::iterator RewriteTrie::make_active_rule_pending(Rules::iterator it) { - Rule* rule = const_cast(*it); - rule->deactivate(); // Done in Rules::erase_from - add_pending_rule(rule); - index_type node - = _trie.rm_word_no_checks(rule->lhs()->cbegin(), rule->lhs()->cend()); - _rules.erase(node); - return Rules::erase_from_active_rules(it); - LIBSEMIGROUPS_ASSERT(_trie.number_of_nodes() == number_of_active_rules()); - } + Rules::iterator RewriteTrie::make_active_rule_pending(Rules::iterator it) { + Rule* rule = const_cast(*it); + rule->deactivate(); // Done in Rules::erase_from + add_pending_rule(rule); + index_type node + = _trie.rm_word_no_checks(rule->lhs()->cbegin(), rule->lhs()->cend()); + _rules.erase(node); + return Rules::erase_from_active_rules(it); + LIBSEMIGROUPS_ASSERT(_trie.number_of_nodes() == number_of_active_rules()); + } + } // namespace detail } // namespace libsemigroups diff --git a/tests/test-fpsemi-examples-2.cpp b/tests/test-fpsemi-examples-2.cpp index 9f9b88737..c6f53c97a 100644 --- a/tests/test-fpsemi-examples-2.cpp +++ b/tests/test-fpsemi-examples-2.cpp @@ -40,7 +40,6 @@ #include "libsemigroups/paths.hpp" // for ReversiblePaths #include "libsemigroups/presentation.hpp" // for longest_rule_length #include "libsemigroups/ranges.hpp" // for operator|, to_vector -#include "libsemigroups/rewriters.hpp" // for RewriteTrie #include "libsemigroups/to-presentation.hpp" // for to_presentation #include "libsemigroups/types.hpp" // for congruence_kind, word_type #include "libsemigroups/word-graph.hpp" // for is_complete @@ -48,8 +47,9 @@ #include "libsemigroups/detail/eigen.hpp" // // for DenseBase::row, DenseBa... #include "libsemigroups/detail/fmt.hpp" // for format, print -#include "libsemigroups/detail/iterator.hpp" // for operator+ -#include "libsemigroups/detail/report.hpp" // for ReportGuard +#include "libsemigroups/detail/iterator.hpp" // for operator+ +#include "libsemigroups/detail/report.hpp" // for ReportGuard +#include "libsemigroups/detail/rewriters.hpp" // for RewriteTrie namespace libsemigroups { diff --git a/tests/test-knuth-bendix-1.cpp b/tests/test-knuth-bendix-1.cpp index b882c8958..69d0476f5 100644 --- a/tests/test-knuth-bendix-1.cpp +++ b/tests/test-knuth-bendix-1.cpp @@ -94,7 +94,7 @@ namespace libsemigroups { } // namespace #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix TEMPLATE_TEST_CASE("confluent fp semigroup 1 (infinite)", "[000][quick][knuth-bendix][" __FILE__ diff --git a/tests/test-knuth-bendix-2.cpp b/tests/test-knuth-bendix-2.cpp index e7e281967..a1f8b23d4 100644 --- a/tests/test-knuth-bendix-2.cpp +++ b/tests/test-knuth-bendix-2.cpp @@ -73,7 +73,7 @@ namespace libsemigroups { using rule_type = KnuthBendix<>::rule_type; #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix namespace { struct weird_cmp { diff --git a/tests/test-knuth-bendix-3.cpp b/tests/test-knuth-bendix-3.cpp index d5e6733a6..b919b00a4 100644 --- a/tests/test-knuth-bendix-3.cpp +++ b/tests/test-knuth-bendix-3.cpp @@ -68,7 +68,7 @@ namespace libsemigroups { struct LibsemigroupsException; #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix namespace { struct weird_cmp { bool operator()(rule_type const& x, rule_type const& y) const noexcept { diff --git a/tests/test-knuth-bendix-4.cpp b/tests/test-knuth-bendix-4.cpp index a71f8a654..259bbf5a4 100644 --- a/tests/test-knuth-bendix-4.cpp +++ b/tests/test-knuth-bendix-4.cpp @@ -72,7 +72,7 @@ namespace libsemigroups { using rule_type = KnuthBendix<>::rule_type; #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix //////////////////////////////////////////////////////////////////////// // Standard tests diff --git a/tests/test-knuth-bendix-5.cpp b/tests/test-knuth-bendix-5.cpp index 54a357fc6..71a916629 100644 --- a/tests/test-knuth-bendix-5.cpp +++ b/tests/test-knuth-bendix-5.cpp @@ -68,7 +68,7 @@ namespace libsemigroups { using namespace rx; #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix namespace { using rule_type = KnuthBendix<>::rule_type; diff --git a/tests/test-knuth-bendix-6.cpp b/tests/test-knuth-bendix-6.cpp index eb67c5a47..771d9ae37 100644 --- a/tests/test-knuth-bendix-6.cpp +++ b/tests/test-knuth-bendix-6.cpp @@ -56,7 +56,7 @@ namespace libsemigroups { using literals::operator""_w; #define KNUTH_BENDIX_TYPES \ - KnuthBendix, KnuthBendix + KnuthBendix, KnuthBendix TEMPLATE_TEST_CASE("Presentation", "[129][quick][knuth-bendix]", diff --git a/tests/test-rewriters.cpp b/tests/test-rewriters.cpp index 8cefff686..c2c5e8fce 100644 --- a/tests/test-rewriters.cpp +++ b/tests/test-rewriters.cpp @@ -20,234 +20,237 @@ #include "catch.hpp" // for AssertionHandler, ope... #include "test-main.hpp" // for LIBSEMIGROUPS_TEST_CASE -#include "libsemigroups/aho-corasick.hpp" // for dot -#include "libsemigroups/rewriters.hpp" // for RewriteTrie +#include "libsemigroups/aho-corasick.hpp" // for dot +#include "libsemigroups/detail/rewriters.hpp" // for RewriteTrie namespace libsemigroups { - using namespace std::literals; - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "000", "initial test", "[quick]") { - RewriteTrie rt = RewriteTrie(); - REQUIRE(rt.number_of_active_rules() == 0); - rt.add_rule("ba"s, "a"s); - REQUIRE(rt.number_of_active_rules() == 1); - REQUIRE(rt.requires_alphabet()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("ac"s, "ca"s); - rt.add_rule("aa"s, "a"s); - rt.add_rule("ac"s, "a"s); - rt.add_rule("ca"s, "a"s); - rt.add_rule("bb"s, "bb"s); - rt.add_rule("bc"s, "cb"s); - rt.add_rule("bbb"s, "b"s); - rt.add_rule("bc"s, "b"s); - rt.add_rule("cb"s, "b"s); - rt.add_rule("a"s, "b"s); - - REQUIRE(rt.confluent()); - - std::string w1 = "aa"; - rt.rewrite(w1); - REQUIRE(w1 == "a"s); - - std::string w2 = "ab"; - rt.rewrite(w2); - REQUIRE(w2 == "a"s); - - std::string w3 = "abc"; - rt.rewrite(w3); - REQUIRE(w3 == "a"s); - - std::string w4 = "abca"; - rt.rewrite(w4); - REQUIRE(w4 == "a"s); - - std::string w5 = "cbcabcabcabcbacbacbacabacabbaccabbacabbaccabacabbacabba"; - rt.rewrite(w5); - REQUIRE(w5 == "a"s); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "002", - "confluent fp semigroup 3 (infinite)", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("01"s, "10"s); - rt.add_rule("02"s, "20"s); - rt.add_rule("00"s, "0"s); - rt.add_rule("02"s, "0"s); - rt.add_rule("20"s, "0"s); - rt.add_rule("11"s, "11"s); - rt.add_rule("12"s, "21"s); - rt.add_rule("111"s, "1"s); - rt.add_rule("12"s, "1"s); - rt.add_rule("21"s, "1"s); - rt.add_rule("0"s, "1"s); - - REQUIRE(rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "003", - "non-confluent fp semigroup from " - "wikipedia (infinite)", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - rt.add_rule("000"s, ""s); - rt.add_rule("111"s, ""s); - rt.add_rule("010101"s, ""s); - rt.add_to_alphabet('0'); - rt.add_to_alphabet('1'); - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "004", - "Example 5.1 in Sims (infinite)", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - rt.add_rule("ab"s, ""s); - rt.add_rule("ba"s, ""s); - rt.add_rule("cd"s, ""s); - rt.add_rule("dc"s, ""s); - rt.add_rule("ca"s, "ac"s); - - rt.add_to_alphabet('a'); - rt.add_to_alphabet('b'); - rt.add_to_alphabet('c'); - rt.add_to_alphabet('d'); - - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "005", - "Example 5.1 in Sims (infinite)", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("Aa"s, ""s); - rt.add_rule("aA"s, ""s); - rt.add_rule("Bb"s, ""s); - rt.add_rule("bB"s, ""s); - rt.add_rule("ba"s, "ab"s); - - rt.add_to_alphabet('A'); - rt.add_to_alphabet('a'); - rt.add_to_alphabet('B'); - rt.add_to_alphabet('b'); - - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "006", - "Example 5.3 in Sims", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("aa"s, ""s); - rt.add_rule("bbb"s, ""s); - rt.add_rule("ababab"s, ""s); - - rt.add_to_alphabet('a'); - rt.add_to_alphabet('b'); - - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "007", - "Example 5.4 in Sims", - "[quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("aa"s, ""s); - rt.add_rule("bB"s, ""s); - rt.add_rule("bbb"s, ""s); - rt.add_rule("ababab"s, ""s); - - rt.add_to_alphabet('a'); - rt.add_to_alphabet('b'); - rt.add_to_alphabet('B'); - - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "008", - "Example 6.4 in Sims (size 168)", - "[no-valgrind][quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("aa"s, ""s); - rt.add_rule("bc"s, ""s); - rt.add_rule("bbb"s, ""s); - rt.add_rule("ababababababab"s, ""s); - rt.add_rule("abacabacabacabac"s, ""s); - - rt.add_to_alphabet('a'); - rt.add_to_alphabet('b'); - - REQUIRE(!rt.confluent()); - } - - LIBSEMIGROUPS_TEST_CASE("RewriteTrie", - "009", - "random example", - "[no-valgrind][quick]") { - RewriteTrie rt = RewriteTrie(); - - rt.add_rule("000"s, "2"s); - rt.add_rule("111"s, "2"s); - rt.add_rule("010101"s, "2"s); - rt.add_rule("02"s, "0"s); - rt.add_rule("12"s, "1"s); - rt.add_rule("12"s, "2"s); - - rt.add_to_alphabet('0'); - rt.add_to_alphabet('1'); - rt.add_to_alphabet('2'); - - REQUIRE(!rt.confluent()); - } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } - - // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") - // { - // RewriteTrie rt = RewriteTrie(); - // } + namespace detail { + using namespace std::literals; + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "000", "initial test", "[quick]") { + RewriteTrie rt = RewriteTrie(); + REQUIRE(rt.number_of_active_rules() == 0); + rt.add_rule("ba"s, "a"s); + REQUIRE(rt.number_of_active_rules() == 1); + REQUIRE(rt.requires_alphabet()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("ac"s, "ca"s); + rt.add_rule("aa"s, "a"s); + rt.add_rule("ac"s, "a"s); + rt.add_rule("ca"s, "a"s); + rt.add_rule("bb"s, "bb"s); + rt.add_rule("bc"s, "cb"s); + rt.add_rule("bbb"s, "b"s); + rt.add_rule("bc"s, "b"s); + rt.add_rule("cb"s, "b"s); + rt.add_rule("a"s, "b"s); + + REQUIRE(rt.confluent()); + + std::string w1 = "aa"; + rt.rewrite(w1); + REQUIRE(w1 == "a"s); + + std::string w2 = "ab"; + rt.rewrite(w2); + REQUIRE(w2 == "a"s); + + std::string w3 = "abc"; + rt.rewrite(w3); + REQUIRE(w3 == "a"s); + + std::string w4 = "abca"; + rt.rewrite(w4); + REQUIRE(w4 == "a"s); + + std::string w5 + = "cbcabcabcabcbacbacbacabacabbaccabbacabbaccabacabbacabba"; + rt.rewrite(w5); + REQUIRE(w5 == "a"s); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "002", + "confluent fp semigroup 3 (infinite)", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("01"s, "10"s); + rt.add_rule("02"s, "20"s); + rt.add_rule("00"s, "0"s); + rt.add_rule("02"s, "0"s); + rt.add_rule("20"s, "0"s); + rt.add_rule("11"s, "11"s); + rt.add_rule("12"s, "21"s); + rt.add_rule("111"s, "1"s); + rt.add_rule("12"s, "1"s); + rt.add_rule("21"s, "1"s); + rt.add_rule("0"s, "1"s); + + REQUIRE(rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "003", + "non-confluent fp semigroup from " + "wikipedia (infinite)", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + rt.add_rule("000"s, ""s); + rt.add_rule("111"s, ""s); + rt.add_rule("010101"s, ""s); + rt.add_to_alphabet('0'); + rt.add_to_alphabet('1'); + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "004", + "Example 5.1 in Sims (infinite)", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + rt.add_rule("ab"s, ""s); + rt.add_rule("ba"s, ""s); + rt.add_rule("cd"s, ""s); + rt.add_rule("dc"s, ""s); + rt.add_rule("ca"s, "ac"s); + + rt.add_to_alphabet('a'); + rt.add_to_alphabet('b'); + rt.add_to_alphabet('c'); + rt.add_to_alphabet('d'); + + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "005", + "Example 5.1 in Sims (infinite)", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("Aa"s, ""s); + rt.add_rule("aA"s, ""s); + rt.add_rule("Bb"s, ""s); + rt.add_rule("bB"s, ""s); + rt.add_rule("ba"s, "ab"s); + + rt.add_to_alphabet('A'); + rt.add_to_alphabet('a'); + rt.add_to_alphabet('B'); + rt.add_to_alphabet('b'); + + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "006", + "Example 5.3 in Sims", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("aa"s, ""s); + rt.add_rule("bbb"s, ""s); + rt.add_rule("ababab"s, ""s); + + rt.add_to_alphabet('a'); + rt.add_to_alphabet('b'); + + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "007", + "Example 5.4 in Sims", + "[quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("aa"s, ""s); + rt.add_rule("bB"s, ""s); + rt.add_rule("bbb"s, ""s); + rt.add_rule("ababab"s, ""s); + + rt.add_to_alphabet('a'); + rt.add_to_alphabet('b'); + rt.add_to_alphabet('B'); + + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "008", + "Example 6.4 in Sims (size 168)", + "[no-valgrind][quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("aa"s, ""s); + rt.add_rule("bc"s, ""s); + rt.add_rule("bbb"s, ""s); + rt.add_rule("ababababababab"s, ""s); + rt.add_rule("abacabacabacabac"s, ""s); + + rt.add_to_alphabet('a'); + rt.add_to_alphabet('b'); + + REQUIRE(!rt.confluent()); + } + + LIBSEMIGROUPS_TEST_CASE("RewriteTrie", + "009", + "random example", + "[no-valgrind][quick]") { + RewriteTrie rt = RewriteTrie(); + + rt.add_rule("000"s, "2"s); + rt.add_rule("111"s, "2"s); + rt.add_rule("010101"s, "2"s); + rt.add_rule("02"s, "0"s); + rt.add_rule("12"s, "1"s); + rt.add_rule("12"s, "2"s); + + rt.add_to_alphabet('0'); + rt.add_to_alphabet('1'); + rt.add_to_alphabet('2'); + + REQUIRE(!rt.confluent()); + } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + + // LIBSEMIGROUPS_TEST_CASE("RewriteTrie", "001", "simple test", "[quick]") + // { + // RewriteTrie rt = RewriteTrie(); + // } + } // namespace detail } // namespace libsemigroups