diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 51a26d692e..3e202b4709 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -55,6 +55,10 @@ jobs: uses: egor-tensin/setup-gcc@v1 with: version: 11 + - name: Set up clang + uses: egor-tensin/setup-clang@v1 + with: + version: 15 - run: sudo apt-get update -qq - run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test - run: sudo apt-get update -qq @@ -87,6 +91,10 @@ jobs: name: Measure Test Coverage runs-on: ubuntu-22.04 steps: + - name: Set up clang + uses: egor-tensin/setup-clang@v1 + with: + version: 15 - uses: actions/checkout@v3 with: submodules: 'recursive' diff --git a/.gitignore b/.gitignore index d4668f7843..ac2e137bd6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *~ +*.csv *.debug *.dSYM *.gcov @@ -17,6 +18,8 @@ *.dat *.exe *tmp.* +*-bak.* +*-bak2.* */env/* */3nv/* @@ -24,9 +27,6 @@ .DS_Store a.out a.out.js -fitness.csv -population.csv -systematics.csv incoming/ tmp/ @@ -48,6 +48,7 @@ demos/Emphatic/Emphatic demos/Emphatic/examples/ConceptTest demos/Emphatic/examples/ConceptTest.cpp demos/MABE/examples/NK +demos/MAP-Elites-Arm/web/MAP-Elites-Arm.js demos/NK.bak/ demos/NK/NK demos/NK/web/NK.js @@ -76,6 +77,13 @@ demos/NK/web/jquery-1.11.2.min.js demos/NK/web/NK.asm.js demos/NK/web/NK.html demos/NK/web/NK.js.mem +demos/utils/words/Wordle/Wordle +demos/utils/words/Wordle/web/ +demos/utils/words/annotate-length +demos/utils/words/has-only +demos/utils/words/wordlists/ +demos/utils/words/wordplay-remove +demos/utils/words/wordplay-shuffle doc/doxygen/ examples/*/* @@ -88,7 +96,6 @@ examples/*/* !examples/*/Makefile !examples/timing/BENCHMARKS -tests/*.csv tests/StatsConfig.cfg tests/web/*.js tests/web/package.json diff --git a/.gitmodules b/.gitmodules index b6dbc3bf5d..8a533ca029 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,10 +10,6 @@ path = third-party/cereal url = https://github.com/mmore500/cereal.git shallow = true -[submodule "third-party/span-lite"] - path = third-party/span-lite - url = https://github.com/martinmoene/span-lite.git - shallow = true [submodule "third-party/robin-hood-hashing"] path = third-party/robin-hood-hashing url = https://github.com/martinus/robin-hood-hashing.git diff --git a/Dockerfile b/Dockerfile index 8190ae8cb1..0767f70ece 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ RUN \ build-essential \ dpkg-dev \ g++-11 \ - libc6 \ + libc6=2.27-3ubuntu1 \ xvfb \ x11vnc \ x11-xkb-utils \ diff --git a/Planning/NEXT_PASS b/Planning/NEXT_PASS new file mode 100644 index 0000000000..3568548ccb --- /dev/null +++ b/Planning/NEXT_PASS @@ -0,0 +1,13 @@ +Next time we do a full pass through files, we should: + +* Use notify for proper error tracking. Bias toward exceptions so developers can choose individualized responses to errors. +* Change member functions (and variables?) to begin with _ +* Cleanup Doxygen? +* Move std::string to emp::String, where possible. +* Cleanup any SFINEA or other template tricks and use constexpr instead. + + +And if implemented +* Simplify emp_asserts to assume that will disentangle tests and print values of components? +* Add in the ability to turn on "EMP_THREADED" options? +* Add in serialization capabilities? diff --git a/demos/MAP-Elites-Arm/Makefile b/demos/MAP-Elites-Arm/Makefile index 52b547329f..866d9093ed 100644 --- a/demos/MAP-Elites-Arm/Makefile +++ b/demos/MAP-Elites-Arm/Makefile @@ -3,7 +3,7 @@ PROJECT := MAP-Elites-Arm EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ @@ -12,7 +12,7 @@ CFLAGS_nat_debug := -g -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align -Woverloa # Emscripten compiler information CXX_web := emcc -OFLAGS_web_all := -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library $(EMP_DIR)/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 #--embed-file configs +OFLAGS_web_all := -s "EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library $(EMP_DIR)/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 #--embed-file configs OFLAGS_web := -Oz -DNDEBUG # OFLAGS_web_debug := -g4 -Oz -pedantic -Wno-dollar-in-identifier-extension -s ASSERTIONS=2 OFLAGS_web_debug := -g4 -pedantic -Wno-dollar-in-identifier-extension -s ASSERTIONS=2 diff --git a/demos/NK/Makefile b/demos/NK/Makefile index 85cdf9efb6..5a1f1fdae3 100644 --- a/demos/NK/Makefile +++ b/demos/NK/Makefile @@ -3,7 +3,7 @@ PROJECT := NK EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/demos/SelectionAnalyze/Makefile b/demos/SelectionAnalyze/Makefile index 39543cfd70..3a81bbc910 100644 --- a/demos/SelectionAnalyze/Makefile +++ b/demos/SelectionAnalyze/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/demos/SpatialCoop2017/Makefile b/demos/SpatialCoop2017/Makefile index dced09e09b..1b61d0e8e9 100644 --- a/demos/SpatialCoop2017/Makefile +++ b/demos/SpatialCoop2017/Makefile @@ -3,7 +3,7 @@ PROJECT := SimplePDWorld EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/demos/Sudoku/Makefile b/demos/Sudoku/Makefile index ff3f1e6658..7d0df6543a 100644 --- a/demos/Sudoku/Makefile +++ b/demos/Sudoku/Makefile @@ -1,5 +1,5 @@ # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/demos/utils/graphs/web/Makefile b/demos/utils/graphs/web/Makefile index 4725b29149..19ec718967 100644 --- a/demos/utils/graphs/web/Makefile +++ b/demos/utils/graphs/web/Makefile @@ -1,7 +1,7 @@ CXX_web := emcc # OFLAGS_web := -g4 -Wall OFLAGS_web := -oz -DNDEBUG -CFLAGS_web := -std=c++17 $(OFLAGS_web) -s EXPORTED_FUNCTIONS="['_empLoadString']" -I../../../../include/ +CFLAGS_web := -std=c++20 $(OFLAGS_web) -s EXPORTED_FUNCTIONS="['_empLoadString']" -I../../../../include/ default: web diff --git a/demos/utils/words/Wordle-simple.cpp b/demos/utils/words/Wordle-simple.cpp new file mode 100644 index 0000000000..be8c470f91 --- /dev/null +++ b/demos/utils/words/Wordle-simple.cpp @@ -0,0 +1,422 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Wordle-simple.cpp + * + * This version of Wordle is a bit simpler than it should be; it does not handle double letters + * correctly. + */ + +#include +#include +#include +#include +#include + +#include "../../../include/emp/base/Ptr.hpp" +#include "../../../include/emp/base/vector.hpp" +#include "../../../include/emp/bits/BitSet.hpp" +#include "../../../include/emp/bits/BitVector.hpp" +#include "../../../include/emp/config/command_line.hpp" +#include "../../../include/emp/datastructs/map_utils.hpp" +#include "../../../include/emp/datastructs/vector_utils.hpp" +#include "../../../include/emp/io/File.hpp" +#include "../../../include/emp/tools/string_utils.hpp" + +enum class Result { NOWHERE=0, ELSEWHERE, HERE }; + +/// A collection of results for a whole word. +struct ResultSet { + emp::vector results; + + static const emp::vector & PlaceValues(const size_t num_results) { + static emp::vector place_values; + if (place_values.size() == 0) { + place_values.resize(num_results); + size_t value = 1; + for (size_t i = 0; i < num_results; ++i) { + place_values[i] = value; + value *= 3; + } + } + return place_values; + } + + ResultSet(const emp::vector & in) : results(in) { } + ResultSet(size_t size, size_t id) : results(size) { + emp::vector place_values = PlaceValues(results.size()); + for (size_t i = results.size()-1; i < results.size(); --i) { + if (id > place_values[i]) { + size_t value = id / place_values[i]; + results[i] = (Result) value; + id -= value * place_values[i]; + } + } + } + ResultSet(const ResultSet &) = default; + + size_t ToID() { + emp::vector place_values = PlaceValues(results.size()); + size_t id = 0; + for (size_t i = 0; i < results.size(); ++i) { + id += place_values[i] * (size_t) results[i]; + } + return id; + } +}; + +// A clue is a given letter, position, and result +struct Clue { + emp::BitVector words; // IDs of words consistent with this clue. +}; + +// All of the clues for a given position. +struct PositionClues { + std::array nowhere; + std::array elsewhere; + std::array here; + + void SetNumWords(size_t num_words) { + for (auto & x : nowhere) x.words.resize(num_words); + for (auto & x : elsewhere) x.words.resize(num_words); + for (auto & x : here) x.words.resize(num_words); + } +}; + +// Trying to build a full tree of solutions... +struct SolveState { + emp::BitVector words; +}; + +struct WordData { + std::string word; + emp::BitSet<26> letters; + size_t max_options = 0; // Maximum number of word options after used as a guess. + double ave_options = 0.0; // Average number of options after used as a guess. + double entropy = 0.0; // What is the entropy (and thus information gained) for this choice? + bool is_active = false; + + WordData(const std::string & in_word) : word(in_word) { + for (char x : word) letters.Set(x - 'a'); + } +}; + +class WordSet { +private: + size_t word_length; + emp::vector words; + emp::vector clues; // A PositionClues object for each position. + std::unordered_map pos_map; // Map of words to their position ids. + emp::BitVector start_options; // Current options. + size_t start_count; // Count of start options (cached) + + bool verbose = true; + + // Get the ID (0-26) associated with a letter. + size_t ID(char letter) { + emp_assert(letter >= 'a' && letter <= 'z'); + return static_cast(letter - 'a'); + } + + char LET(size_t id) { + emp_assert(id < 26); + return (char) (id + 'a'); + } + +public: + WordSet(size_t length=5) : word_length(length) { } + + void AddWord(std::string & in_word) { + size_t id = words.size(); + pos_map[in_word] = id; + words.emplace_back(in_word); + } + + void Load(std::istream & is, std::ostream & os) { + // Load in all of the words. + std::string in_word; + size_t wrong_size_count = 0; + size_t invalid_char_count = 0; + size_t dup_count = 0; + while (is) { + is >> in_word; + // Only keep words of the correct size and all lowercase. + if (in_word.size() != word_length) { wrong_size_count++; continue; } + if (!emp::is_lower(in_word)) { invalid_char_count++; continue; } + if (emp::Has(pos_map, in_word)) { dup_count++; continue; } + AddWord(in_word); + } + + if (wrong_size_count) { + std::cerr << "Warning: eliminated " << wrong_size_count << " words of the wrong size." + << std::endl; + } + if (invalid_char_count) { + std::cerr << "Warning: eliminated " << invalid_char_count << " words with invalid characters." + << std::endl; + } + if (dup_count) { + std::cerr << "Warning: eliminated " << dup_count << " words that were duplicates." + << std::endl; + } + + if (verbose) std::cerr << "Loaded " << words.size() << " valid words." << std::endl; + } + + void ResetOptions() { + start_count = words.size(); + start_options.resize(start_count); + start_options.SetAll(); + } + + // Once the words are loaded, Preprocess will collect info. + void Preprocess() { + // Setup all clue info to know the number of words. + clues.resize(word_length); + for (auto & x : clues) x.SetNumWords(words.size()); + + // Loop through each word, indicating which clues it is consistent with. + for (size_t word_id = 0; word_id < words.size(); ++word_id) { + const std::string & word = words[word_id].word; + + // Figure out which letters are in this word. + emp::BitSet<26> letters = words[word_id].letters; + + // Now figure out what clues it is consistent with. + for (size_t pos=0; pos < word.size(); ++pos) { + const char cur_letter = word[pos]; + // Incorrect letter for alternatives at this position. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + if (letter_id == ID(cur_letter)) { // Letter is HERE. + clues[pos].here[letter_id].words.Set(word_id); + } else if (letters.Has(letter_id)) { // Letter is ELSEWHERE + clues[pos].elsewhere[letter_id].words.Set(word_id); + } else { // Letter is NOT IN WORD + clues[pos].nowhere[letter_id].words.Set(word_id); + } + } + } + } + + ResetOptions(); + } + + /// Limit starting options based on a specific clue. + void AddClue(size_t pos, char letter, Result result) { + size_t let_id = ID(letter); + if (result == Result::NOWHERE) { + start_options &= clues[pos].nowhere[let_id].words; + } else if (result == Result::ELSEWHERE) { + start_options &= clues[pos].elsewhere[let_id].words; + } else { + start_options &= clues[pos].here[let_id].words; + } + start_count = start_options.CountOnes(); + } + + void AddClue(std::string word, std::string result) { + for (size_t i = 0; i < word.size(); ++i) { + if (result[i] == 'N') AddClue(i, word[i], Result::NOWHERE); + else if (result[i] == 'E') AddClue(i, word[i], Result::ELSEWHERE); + else if (result[i] == 'H') AddClue(i, word[i], Result::HERE); + } + } + + emp::BitVector AnalyzeGuess(const std::string & guess, const WordData & answer) { + // Loop through all possible answers to see how much a word cuts down choices. + emp::BitVector options(start_options); + + for (size_t pos = 0; pos < word_length; ++pos) { + const size_t guess_letter = ID(guess[pos]); + if (guess[pos] == answer.word[pos]) { // CORRECT GUESS FOR POSITION! + options &= clues[pos].here[guess_letter].words; + } else if (answer.letters.Has(guess_letter)) { // WRONG POSITION + options &= clues[pos].elsewhere[guess_letter].words; + } else { // WRONG CHARACTER + options &= clues[pos].nowhere[guess_letter].words; + } + } + + return options; + } + + // Slow way to manually call on specific words; brute-force find the entires for each. + emp::BitVector AnalyzeGuess(const std::string & guess, const std::string & answer) { + if (!emp::Has(pos_map, answer)) std::cerr << "UNKNOWN WORD: " << answer << std::endl; + return AnalyzeGuess(guess, words[pos_map[answer]]); + } + + void AnalyzeGuess(WordData & guess) { + size_t max_options = 0; + size_t total_options = 0; + double entropy = 0.0; + + // Scan through all possible answers... + for (WordData & answer : words) { + size_t options = AnalyzeGuess(guess.word, answer).CountOnes(); + if (options > max_options) max_options = options; + total_options += options; + const double p = static_cast(options) / static_cast(start_count); + entropy -= p * std::log2(p); + } + guess.max_options = max_options; + guess.ave_options = static_cast(total_options) / static_cast(words.size()); + guess.entropy = entropy; + } + + void Analyze() { + // for (int id = start_options.FindOne(); id >= 0; id = start_options.FindOne(id+1)) { + for (size_t id = 0; id < words.size(); ++id) { + AnalyzeGuess(words[id]); + } + } + + /// Also analyze non-word guesses. + void AnalyzeAll() { + std::string guess(word_length, 'a'); + size_t best_max_options = 10000; + double best_ave_options = 10000.0; + double best_entropy = 0.0; + std::string best_max_options_word = ""; + std::string best_ave_options_word = ""; + std::string best_entropy_word = ""; + + size_t silent_count = 0; // Keep a count of how many loops since out last output. + while (true) { + size_t max_options = 0; + size_t total_options = 0; + double entropy = 0.0; + + // Scan through all possible answers... + for (WordData & answer : words) { + size_t options = AnalyzeGuess(guess, answer).CountOnes(); + if (options > max_options) max_options = options; + total_options += options; + const double p = static_cast(options) / static_cast(start_count); + entropy -= p * std::log2(p); + } + double ave_options = static_cast(total_options) / static_cast(words.size()); + + ++silent_count; + if (max_options < best_max_options) { + best_max_options = max_options; + best_max_options_word = guess; + std::cout << "New best MAX options: " << guess << " : " << max_options << std::endl; + silent_count = 0; + } + if (ave_options < best_ave_options) { + best_ave_options = ave_options; + best_ave_options_word = guess; + std::cout << "New best AVE options: " << guess << " : " << ave_options << std::endl; + silent_count = 0; + } + if (entropy > best_entropy) { + best_entropy = entropy; + best_entropy_word = guess; + std::cout << "New best ENTROPY: " << guess << " : " << entropy << std::endl; + silent_count = 0; + } + if (silent_count >= 10000) { + std::cout << "...processing... ('" << guess << "')" << std::endl; + silent_count = 0; + } + + // Now move on to the next word... + size_t inc_pos = word_length - 1; // find the first non-z letter. + while (inc_pos < word_length && guess[inc_pos] == 'z') { + guess[inc_pos] = 'a'; + --inc_pos; + } + if (inc_pos == word_length) break; + ++guess[inc_pos]; + } + } + + /// Print all of the words with a given set of IDs. + void PrintWords(const emp::BitVector & word_ids) { + size_t count = 0; + for (int id = word_ids.FindOne(); id >= 0; id = word_ids.FindOne(id+1)) { + if (count) std::cout << ","; + std::cout << words[id].word; + ++count; + } + std::cout << " (" << count << " words found)" << std::endl; + } + + /// Print all of the results, sorted by max number of options. + void PrintResults() { + for (size_t i = 0; i < words.size(); ++i) { + words[i].is_active = start_options.Has(i); + } + emp::Sort(words, [](const WordData & w1, const WordData & w2){ + if (w1.is_active == w2.is_active) { + return w1.max_options < w2.max_options; + } + return w2.is_active; + }); + for (auto & word : words) { + std::cout << word.word + << ", " << word.max_options + << ", " << word.ave_options + << ", " << word.is_active + << std::endl; + } + } +}; + +int main(int argc, char* argv[]) +{ + emp::vector args = emp::cl::args_to_strings(argc, argv); + + if (args.size() > 3) { + std::cerr << "May provide am input filename (with the word list to use) and output filename (for results)" + << std::endl; + exit(1); + } + + WordSet word_set(5); + + if (args.size() == 1) word_set.Load(std::cin, std::cout); + else { + std::ifstream in_file{args[1]}; + if (args.size() == 2) word_set.Load(in_file, std::cout); + else { + std::ofstream out_file{args[2]}; + word_set.Load(in_file, out_file); + } + } + + word_set.Preprocess(); + + //word_set.AddClue("aloes", "NNNEN"); + word_set.AddClue("rates", "NENEN"); + // word_set.AddClue("login", "ENNEN"); + // word_set.AddClue("dimly", "NHNHH"); + // word_set.AddClue("finch", "NNNNN"); + + /* + word_set.AddClue(0,'a',Result::NOWHERE); + word_set.AddClue(1,'l',Result::NOWHERE); + word_set.AddClue(2,'o',Result::NOWHERE); + word_set.AddClue(3,'e',Result::ELSEWHERE); + word_set.AddClue(4,'s',Result::NOWHERE); + + word_set.AddClue(0,'d',Result::NOWHERE); + word_set.AddClue(1,'i',Result::ELSEWHERE); + word_set.AddClue(2,'r',Result::NOWHERE); + word_set.AddClue(3,'t',Result::NOWHERE); + word_set.AddClue(4,'y',Result::NOWHERE); + + word_set.AddClue(0,'h',Result::NOWHERE); + word_set.AddClue(1,'e',Result::NOWHERE); + word_set.AddClue(2,'n',Result::NOWHERE); + word_set.AddClue(3,'g',Result::NOWHERE); + word_set.AddClue(4,'e',Result::HERE); + */ + + word_set.Analyze(); + word_set.PrintResults(); +// word_set.AnalyzeAll(); +} diff --git a/demos/utils/words/Wordle/Makefile b/demos/utils/words/Wordle/Makefile new file mode 100644 index 0000000000..71a2bb9d57 --- /dev/null +++ b/demos/utils/words/Wordle/Makefile @@ -0,0 +1,69 @@ +EMP_DIR := ../../../../include + +# Flags to use regardless of compiler +CFLAGS_all := -Wall -Wextra -Wno-unused-function -I$(EMP_DIR)/ +CFLAGS_version := -std=c++20 + +# Emscripten compiler information +CXX_web := emcc +CXX_native := g++ + +OFLAGS_native_opt := -O3 -DNDEBUG +OFLAGS_native_debug := -g -pedantic -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align +OFLAGS_native_grumpy := -g -pedantic -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align -Wconversion -Weffc++ + +OFLAGS_web_opt := -Os -DNDEBUG -s TOTAL_MEMORY=67108864 +OFLAGS_web_debug := -g4 -pedantic -Wno-dollar-in-identifier-extension -s TOTAL_MEMORY=67108864 -s ASSERTIONS=2 -s DEMANGLE_SUPPORT=1 # -s SAFE_HEAP=1 + +CFLAGS_native_opt := $(CFLAGS_all) $(OFLAGS_native_opt) +CFLAGS_native_debug := $(CFLAGS_all) $(OFLAGS_native_debug) +CFLAGS_native_grumpy := $(CFLAGS_all) $(OFLAGS_native_grumpy) + +CFLAGS_web_debug := $(CFLAGS_all) $(OFLAGS_web_debug) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 +CFLAGS_web_opt := $(CFLAGS_all) $(OFLAGS_web_opt) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 +#CFLAGS_web := $(CFLAGS_all) $(OFLAGS_web) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 + +TARGETS := Wordle + +default: native + +CXX := $(CXX_native) +CFLAGS := $(CFLAGS_native_opt) + +debug: CFLAGS := $(CFLAGS_native_debug) +debug: all + +grumpy: CFLAGS := $(CFLAGS_native_grumpy) +grumpy: all + +web: CXX := $(CXX_web) +web: CFLAGS := $(CFLAGS_web_opt) +web: all + +web-debug: CXX := $(CXX_web) +web-debug: CFLAGS := $(CFLAGS_web_debug) +web-debug: all + +native: all + +all: $(TARGETS) + +$(TARGETS): % : %.cpp + $(CXX) $(CFLAGS_version) $(CFLAGS) $< -o $@ + +$(JS_TARGETS): %.js : %.cpp + $(CXX_web) $(CFLAGS_web) $< -o $@ + +debug-%: $*.cpp + $(CXX) $(CFLAGS_version) $(CFLAGS_native_debug) $< -o $@ + +clean: + rm -rf debug-* *~ *.dSYM $(TARGETS) +# rm -rf debug-* *~ *.dSYM $(JS_TARGETS) + +new: clean +new: native + +# Debugging information +#print-%: ; @echo $*=$($*) +print-%: ; @echo '$(subst ','\'',$*=$($*))' diff --git a/demos/utils/words/Wordle/Result.hpp b/demos/utils/words/Wordle/Result.hpp new file mode 100644 index 0000000000..efce04ed72 --- /dev/null +++ b/demos/utils/words/Wordle/Result.hpp @@ -0,0 +1,164 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Result.hpp + * + */ + +#ifndef DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE +#define DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE + +#include + +#include "emp/base/array.hpp" +#include "emp/base/error.hpp" +#include "emp/bits/BitVector.hpp" +#include "emp/math/math.hpp" + +template +class Result { +public: + enum PositionResult { NOWHERE, ELSEWHERE, HERE }; + static constexpr size_t NUM_IDS = emp::Pow(3, WORD_SIZE); + +private: + using results_t = emp::array; + + results_t results; + size_t id; + + /// Return a result array where each index is an associated (unique) possible result set. + static const results_t & LookupResult(size_t result_id) { + static emp::array result_array; + static bool init = false; + + // If this is our first time requsting the result array, generate it. + if (!init) { + init = true; + for (size_t id = 0; id < NUM_IDS; ++id) { + size_t tmp_id = id; + for (size_t pos = WORD_SIZE-1; pos < WORD_SIZE; --pos) { + const size_t magnitude = emp::Pow(3, pos); + const size_t cur_result = tmp_id / magnitude; + result_array[id][pos] = static_cast(cur_result); + tmp_id -= cur_result * magnitude; + } + } + } + + return result_array[result_id]; + } + + /// Assume that we have results, calculate the associated ID. + void CalcID() { + size_t base = 1; + id = 0; + for (PositionResult r : results) { id += static_cast(r) * base; base *= 3; } + } + + /// Assume that we have an ID, lookup the correct results. + void CalcResults() { results = LookupResult(id); } + + /// Convert a results string of 'N's, 'E's, and 'W's into a Results object. + void FromString(const std::string & result_str) { + emp_assert(result_str.size() == WORD_SIZE); + for (size_t i=0; i < WORD_SIZE; ++i) { + switch (result_str[i]) { + case 'N': case 'n': results[i] = NOWHERE; break; + case 'E': case 'e': results[i] = ELSEWHERE; break; + case 'H': case 'h': results[i] = HERE; break; + default: + emp_error("Invalid character in result string", result_str[i]); + }; + } + } + +public: + /// Create a result by id. + Result(size_t _id) : id(_id) { CalcResults(); } + + /// Create a result by a result array. + Result(const results_t & _results) : results(_results) { CalcID(); } + + /// Create a result by a result string. + Result(const std::string & result_str) { FromString(result_str); } + + /// Create a result by an guess and answer pair. + Result(const std::string & guess, const std::string & answer) { + emp_assert(guess.size() == WORD_SIZE); + emp_assert(answer.size() == WORD_SIZE); + emp::BitVector used(answer.size()); + // Test perfect matches. + for (size_t i = 0; i < guess.size(); ++i) { + if (guess[i] == answer[i]) { results[i] = HERE; used.Set(i); } + } + // Test offset matches. + for (size_t i = 0; i < guess.size(); ++i) { + if (guess[i] == answer[i]) continue; // already matched. + bool found = false; + for (size_t j = 0; j < answer.size(); ++j) { // seek a match elsewhere in answer! + if (!used.Has(j) && guess[i] == answer[j]) { + results[i] = ELSEWHERE; // found letter elsewhere! + used.Set(j); // make sure this letter is noted as used. + found = true; + break; // move on to next letter; we found this one. + } + } + if (!found) results[i] = NOWHERE; + } + CalcID(); // Now that we know the symbols, figure out the ID. + } + + Result(const Result & result) = default; + Result(Result && result) = default; + + Result & operator=(const std::string & result_str) { FromString(result_str); } + Result & operator=(const Result & result) = default; + Result & operator=(Result && result) = default; + + bool operator==(const Result & in) const { return id == in.id; } + bool operator!=(const Result & in) const { return id != in.id; } + bool operator< (const Result & in) const { return id < in.id; } + bool operator<=(const Result & in) const { return id <= in.id; } + bool operator> (const Result & in) const { return id > in.id; } + bool operator>=(const Result & in) const { return id >= in.id; } + + size_t GetID() const { return id; } + size_t GetSize() const { return WORD_SIZE; } + size_t size() const { return WORD_SIZE; } + + PositionResult operator[](size_t id) const { return results[id]; } + + // Test if this result is valid for the given word. + bool IsValid(const std::string & word) const { + // Disallow letters marked "NOWHERE" that are subsequently marked "ELSEWHERE" + // (other order is okay). + for (size_t pos = 0; pos < WORD_SIZE-1; ++pos) { + if (results[pos] == NOWHERE) { + for (size_t pos2 = pos+1; pos2 < WORD_SIZE; ++pos2) { + if (results[pos2] == ELSEWHERE && word[pos] == word[pos2]) return false; + } + } + } + + return true; + } + + std::string ToString( + const std::string & here="H", + const std::string & elsewhere="E", + const std::string & nowhere="N" + ) const { + std::string out; // = emp::to_string(id, "-"); + for (auto x : results) { + if (x == HERE) out += here; + else if (x == ELSEWHERE) out += elsewhere; + else if (x == NOWHERE) out += nowhere; + } + return out; + } +}; + +#endif // #ifndef DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE diff --git a/demos/utils/words/Wordle/Wordle.cpp b/demos/utils/words/Wordle/Wordle.cpp new file mode 100644 index 0000000000..541817c273 --- /dev/null +++ b/demos/utils/words/Wordle/Wordle.cpp @@ -0,0 +1,579 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Wordle.cpp + */ + +#include +#include +#include +#include +#include + +#include "emp/base/Ptr.hpp" +#include "emp/base/vector.hpp" +#include "emp/bits/BitSet.hpp" +#include "emp/bits/BitVector.hpp" +#include "emp/config/command_line.hpp" +#include "emp/datastructs/map_utils.hpp" +#include "emp/datastructs/vector_utils.hpp" +#include "emp/io/File.hpp" +#include "emp/tools/string_utils.hpp" + +#include "Result.hpp" + + +template +class WordSet { +private: + static constexpr size_t MAX_LETTER_REPEAT = 4; + using word_list_t = emp::BitVector; + using result_t = Result; + + // Get the ID (0-26) associated with a letter. + static size_t ToID(char letter) { + emp_assert(letter >= 'a' && letter <= 'z'); + return static_cast(letter - 'a'); + } + + static char ToLetter(size_t id) { + emp_assert(id < 26); + return static_cast(id + 'a'); + } + + // All of the clues for a given position. + struct PositionClues { + size_t pos; + std::array here; // Is a given letter at this position? + + void SetNumWords(size_t num_words) { + for (auto & x : here) x.resize(num_words); + } + }; + + // All of the clues for zero or more instances of a given letter. + struct LetterClues { + size_t letter; // [0-25] + std::array at_least; ///< Are there at least x instances of letter? (0 is meaningless) + std::array exactly; ///< Are there exactly x instances of letter? + + void SetNumWords(size_t num_words) { + for (auto & x : at_least) x.resize(num_words); + for (auto & x : exactly) x.resize(num_words); + } + }; + + struct WordData { + std::string word; + // Pre=processed data + emp::BitSet<26> letters; // What letters are in this word? + emp::BitSet<26> multi_letters; // What letters are in this word more than once? + std::array next_words; + + // Collected data + size_t max_options = 0; // Maximum number of word options after used as a guess. + double ave_options = 0.0; // Average number of options after used as a guess. + double entropy = 0.0; // What is the entropy (and thus information gained) for this choice? + + WordData(const std::string & in_word) : word(in_word) { + for (char x : word) { + size_t let_id = ToID(x); + if (letters.Has(let_id)) multi_letters.Set(let_id); + else letters.Set(let_id); + } + } + }; + + emp::vector words; ///< Data about all words in this Wordle + emp::array pos_clues; ///< A PositionClues object for each position. + emp::array let_clues; ///< Clues based off the number of letters. + std::unordered_map pos_map; ///< Map of words to their position ids. + word_list_t start_options; ///< Current options. + size_t start_count; ///< Count of start options (cached) + + std::istream & is; + std::ostream & os; + + bool verbose = true; + +public: + WordSet(std::istream & _is, std::ostream & _os) : is(_is), os(_os) { } + + /// Include a single word into this WordSet. + void AddWord(std::string & in_word) { + size_t id = words.size(); // Set a unique ID for this word. + pos_map[in_word] = id; // Keep track of the ID for this word. + words.emplace_back(in_word); // Setup the word data. + } + + /// Load a whole series for words (from a file) into this WordSet + void Load() { + // Load in all of the words. + std::string in_word; + size_t wrong_size_count = 0; + size_t invalid_char_count = 0; + size_t dup_count = 0; + while (is) { + is >> in_word; + // Only keep words of the correct size and all lowercase. + if (in_word.size() != WORD_SIZE) { wrong_size_count++; continue; } + if (!emp::is_lower(in_word)) { invalid_char_count++; continue; } + if (emp::Has(pos_map, in_word)) { dup_count++; continue; } + AddWord(in_word); + } + + if (wrong_size_count) { + std::cerr << "Warning: eliminated " << wrong_size_count << " words of the wrong size." + << std::endl; + } + if (invalid_char_count) { + std::cerr << "Warning: eliminated " << invalid_char_count << " words with invalid characters." + << std::endl; + } + if (dup_count) { + std::cerr << "Warning: eliminated " << dup_count << " words that were duplicates." + << std::endl; + } + + if (verbose) std::cerr << "Loaded " << words.size() << " valid words." << std::endl; + } + + /// Clear out all prior guess information. + void ResetOptions() { + start_count = words.size(); + start_options.resize(start_count); + start_options.SetAll(); + } + + // Limit the current options based on a single guess and its result. + + word_list_t EvalGuess(const std::string & guess, const result_t & result) { + emp_assert(guess.size() == WORD_SIZE); + emp_assert(result.size() == WORD_SIZE); + + emp::array letter_counts; + std::fill(letter_counts.begin(), letter_counts.end(), 0); + emp::BitSet<26> letter_fail; + word_list_t word_options = start_options; + + // First add letter clues and collect letter information. + for (size_t i = 0; i < WORD_SIZE; ++i) { + const size_t cur_letter = ToID(guess[i]); + if (result[i] == result_t::HERE) { + word_options &= pos_clues[i].here[cur_letter]; + ++letter_counts[cur_letter]; + } else if (result[i] == result_t::ELSEWHERE) { + word_options &= ~pos_clues[i].here[cur_letter]; + ++letter_counts[cur_letter]; + } else { // Must be 'N' + word_options &= ~pos_clues[i].here[cur_letter]; + letter_fail.Set(cur_letter); + } + } + + // Next add letter clues. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + const size_t let_count = letter_counts[letter_id]; + if (let_count) { + word_options &= let_clues[letter_id].at_least[let_count]; + } + if (letter_fail.Has(letter_id)) { + word_options &= let_clues[letter_id].exactly[let_count]; + } + } + + return word_options; + } + + + void AnalyzeGuess(WordData & guess, const word_list_t & cur_words) { + size_t max_options = 0; + size_t total_options = 0; + size_t option_count = 0; + double entropy = 0.0; + const double word_count = static_cast(words.size()); + + // Scan through all of the possible result IDs. + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + word_list_t next_options = guess.next_words[result_id] & cur_words; + size_t num_options = next_options.CountOnes(); + if (num_options > max_options) max_options = num_options; + total_options += num_options * num_options; + option_count++; + double p = static_cast(num_options) / word_count; + if (p > 0.0) entropy -= p * std::log2(p); + } + + guess.max_options = max_options; + guess.ave_options = static_cast(total_options) / static_cast(words.size()); + guess.entropy = entropy; + } + + + /// Once the words are loaded, Preprocess will collect info. + void Preprocess() { + std::cout << "Beginning pre-process phase..." << std::endl; + + // Setup all position clue info to know the number of words. + for (size_t i=0; i < WORD_SIZE; ++i) { + pos_clues[i].pos = i; + pos_clues[i].SetNumWords(words.size()); + } + + // Setup all letter clue information + for (size_t let=0; let < 26; let++) { + let_clues[let].letter = let; + let_clues[let].SetNumWords(words.size()); + } + + // Counters for number of letters. + emp::array letter_counts; + + // Loop through each word, indicating which clues it is consistent with. + for (size_t word_id = 0; word_id < words.size(); ++word_id) { + const std::string & word = words[word_id].word; + + // Figure out which letters are in this word. + std::fill(letter_counts.begin(), letter_counts.end(), 0); // Reset counters to zero. + for (const char letter : word) ++letter_counts[ToID(letter)]; // Count letters. + + // Setup the LETTER clues that word is consistent with. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + const size_t cur_count = letter_counts[letter_id]; + let_clues[letter_id].exactly[cur_count].Set(word_id); + for (uint8_t count = 0; count <= cur_count; ++count) { + let_clues[letter_id].at_least[count].Set(word_id); + } + } + + // Now figure out what POSITION clues it is consistent with. + for (size_t pos=0; pos < word.size(); ++pos) { + const size_t cur_letter = ToID(word[pos]); + pos_clues[pos].here[cur_letter].Set(word_id); + } + } + + std::cout << "...clues are initialized..." << std::endl; + + ResetOptions(); + + // Loop through words one more time, filling out result lists and collecting data. + size_t word_count = 0; + const size_t step = words.size() / 100; + for (auto & word_info : words) { + if (++word_count % step == 0) { + std::cout << "."; + std::cout.flush(); + } + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + Result result(result_id); + if (!result.IsValid(word_info.word)) continue; + word_info.next_words[result_id] = EvalGuess(word_info.word, result_id); + } + AnalyzeGuess(word_info, start_options); + } + + std::cout << "...words are analyzed..." << std::endl; + } + + // /// Also analyze non-word guesses. + // void AnalyzeAll() { + // std::string guess(WORD_SIZE, 'a'); + // size_t best_max_options = 10000; + // double best_ave_options = 10000.0; + // double best_entropy = 0.0; + // std::string best_max_options_word = ""; + // std::string best_ave_options_word = ""; + // std::string best_entropy_word = ""; + + // size_t silent_count = 0; // Keep a count of how many loops since out last output. + // while (true) { + // size_t max_options = 0; + // size_t total_options = 0; + // double entropy = 0.0; + + // // Scan through all possible answers... + // for (WordData & answer : words) { + // size_t options = AnalyzeGuess(guess, answer).CountOnes(); + // if (options > max_options) max_options = options; + // total_options += options; + // const double p = static_cast(options) / static_cast(start_count); + // entropy -= p * std::log2(p); + // } + // double ave_options = static_cast(total_options) / static_cast(words.size()); + + // ++silent_count; + // if (max_options < best_max_options) { + // best_max_options = max_options; + // best_max_options_word = guess; + // std::cout << "New best MAX options: " << guess << " : " << max_options << std::endl; + // silent_count = 0; + // } + // if (ave_options < best_ave_options) { + // best_ave_options = ave_options; + // best_ave_options_word = guess; + // std::cout << "New best AVE options: " << guess << " : " << ave_options << std::endl; + // silent_count = 0; + // } + // if (entropy > best_entropy) { + // best_entropy = entropy; + // best_entropy_word = guess; + // std::cout << "New best ENTROPY: " << guess << " : " << entropy << std::endl; + // silent_count = 0; + // } + // if (silent_count >= 10000) { + // std::cout << "...processing... ('" << guess << "')" << std::endl; + // silent_count = 0; + // } + + // // Now move on to the next word... + // size_t inc_pos = WORD_SIZE - 1; // find the first non-z letter. + // while (inc_pos < WORD_SIZE && guess[inc_pos] == 'z') { + // guess[inc_pos] = 'a'; + // --inc_pos; + // } + // if (inc_pos == WORD_SIZE) break; + // ++guess[inc_pos]; + // } + // } + + /// Print all of the words with a given set of IDs. + void PrintWords(const word_list_t & word_ids, size_t max_count=(size_t)-1) const { + std::cout << "(" << word_ids.CountOnes() << " words) "; + size_t count = 0; + for (int id = word_ids.FindOne(); id >= 0; id = word_ids.FindOne(id+1)) { + if (count) std::cout << ","; + std::cout << words[id].word; + if (++count == max_count) { + if (id > 0) std::cout << " ..."; + break; + } + } + // std::cout << " (" << word_is.CountOnes() << " words)" << std::endl; + } + + void PrintPosClues(size_t pos) const { + const PositionClues & clue = pos_clues[pos]; + std::cout << "Position " << pos << ":\n"; + for (uint8_t i = 0; i < 26; ++i) { + std::cout << " '" << clue.let << "' : "; + PrintWords(clue.here[i], 10); + std::cout << std::endl; + } + } + + void PrintLetterClues(char letter) const { + const LetterClues & clue = let_clues[ToID(letter)]; + std::cout << "Letter '" << clue.letter << "':\n"; + for (size_t i = 0; i <= MAX_LETTER_REPEAT; ++i) { + std::cout << "EXACTLY " << i << ": "; + PrintWords(clue.exactly[i], 20); + std::cout << std::endl; + } + for (size_t i = 0; i <= MAX_LETTER_REPEAT; ++i) { + std::cout << "AT LEAST " << i << ": "; + PrintWords(clue.at_least[i], 20); + std::cout << std::endl; + } + } + + void PrintWordData(const WordData & word) const { + std::cout << "WORD: " << word.word << std::endl; + std::cout << "Letters: " << word.letters << std::endl; + std::cout << "Multi: " << word.multi_letters << std::endl; + std::cout << "MAX Opts: " << word.max_options << std::endl; + std::cout << "AVE Opts: " << word.ave_options << std::endl; + std::cout << "Entropy: " << word.entropy << std::endl; + std::cout << std::endl; + + size_t total_count = 0; + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + result_t result(result_id); + word_list_t result_words = word.next_words[result_id]; + std::cout << result_id << " - " << result.ToString() << " "; + PrintWords(result_words, 10); + total_count += result_words.CountOnes(); + std::cout << std::endl; + } + std::cout << "Total Count: " << total_count << std::endl; + } + + void PrintWordData(size_t id) const { PrintWordData(words[id]); } + void PrintWordData(const std::string & word) { + PrintWordData(words[pos_map[word]]); + } + + // Reorder words. NOTE: This is destructive to all word_list data! + void SortWords(const std::string & sort_type="max") { + using wd_t = const WordData &; + if (sort_type == "max") { + emp::Sort(words, [](wd_t w1, wd_t w2){ + if (w1.max_options == w2.max_options) return w1.ave_options < w2.ave_options; // tiebreak + return w1.max_options < w2.max_options; + } ); + } else if (sort_type == "ave") { + emp::Sort(words, [](wd_t w1, wd_t w2){ + if (w1.ave_options == w2.ave_options) return w1.max_options < w2.max_options; // tiebreak + return w1.ave_options < w2.ave_options; + } ); + } else if (sort_type == "entropy") { + emp::Sort(words, [](wd_t w1, wd_t w2){ return w1.entropy > w2.entropy; } ); + } else if (sort_type == "word") { + emp::Sort(words, [](wd_t w1, wd_t w2){ return w1.word < w2.word; } ); + } + for (size_t i = 0; i < words.size(); i++) { pos_map[words[i].word] = i; } // Update ID tracking. + } + + /// Print all of the results, sorted by max number of options. + void PrintResults() { + SortWords(); + for (auto & word : words) { + std::cout << word.word + << ", " << word.max_options + << ", " << word.ave_options + << ", " << word.entropy + << std::endl; + } + } + + /// Print out all words as HTML. + void PrintHTMLWord(const WordData & word) const { + std::string filename = emp::to_string("web/words/", word.word, ".html"); + std::ofstream of(filename); + + // const std::string black("⬛"); + static const std::string white("⬜"); + static const std::string green("🟩"); + static const std::string yellow("🟨"); + + of << "\n\n\n Wordle Analysis: '" + << word.word << "'\n\n\n"; + + of << "

Wordle Analysis: " << word.word << "

\n\n"; + of << "Worst case words remaining: " << word.max_options << "
\n"; + of << "Expected words remaining: " << word.ave_options << "
\n"; + of << "Information provided: " << word.entropy << "
\n

\n"; + + // Loop through all possible results. + // for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + for (size_t result_id = result_t::NUM_IDS-1; result_id < result_t::NUM_IDS; --result_id) { + result_t result(result_id); + word_list_t result_words = word.next_words[result_id]; + + of << result.ToString(green, yellow, white) << " (" << result_words.CountOnes() << " words) : "; + + for (int id = result_words.FindOne(); id >= 0; id = result_words.FindOne(id+1)) { + of << "" << words[id].word << " "; + } + + of << "
\n"; + } + + + of << "\n\n"; + + os << "Printed file '" << filename << "'." << std::endl; + } + + void PrintHTMLWordID(int id) const { PrintHTMLWord(words[(size_t) id]); } + void PrintHTMLWord(const std::string & word) { + PrintHTMLWord(words[pos_map[word]]); + } + + void PrintHTMLIndex(const std::string & order) { + SortWords(order); + std::string filename = emp::to_string("web/index-", order, ".html"); + std::ofstream of(filename); + + of << "\n\n\n Wordle Analysis: INDEX" + "\n\n\n" + "

Analysis of Wordle Guesses

\n" + "

\nWhen a guess is made in a game of Wordle, the results limit the set of words for the answer." + " A more useful guess will limit the remaining possibilities to be as small as possible." + " But the question remains: Which word should we choose first?" + " Here are some analyses to help make that decision.\n" + "

\nBelow are a list of 5-letter words " + "(from here)" + " with data on each. The columns are:
\n" + "\n" + " \n" + " \n" + "
ExpectedWords:" + " The average number of possible words if this were your first guess. (smaller is better!)
MaximumWords:" + " The largest possible number of words remaining after this guess. (smaller is better!)
Information:" + " The number of bits of information this guess provides about the final answer. (larger is better!)

\n" + "Click on any column to sort by it. " + "Click on any word to see the exact breakdown of how possible first guesses limit future options.\n" + "

\n"; + + of << "\n"; + for (const auto & word : words) { + of << "\n"; + } + } + + void PrintHTML() { + size_t count = 0; + std::cout << "Printing HTML files..." << std::endl; + size_t step = words.size() / 100; + for (auto & word : words) { + if (count % step == 0) { std::cout << "."; std::cout.flush(); } + PrintHTMLWord(word); + } + PrintHTMLIndex("ave"); + PrintHTMLIndex("entropy"); + PrintHTMLIndex("max"); + PrintHTMLIndex("word"); + } + +}; + +int main(int argc, char* argv[]) +{ + emp::vector args = emp::cl::args_to_strings(argc, argv); + + if (args.size() > 3) { + std::cerr << "May provide am input filename (with the word list to use) and output filename (for results)" + << std::endl; + exit(1); + } + + emp::Ptr is_ptr = &std::cin; + if (args.size() > 1) is_ptr = emp::NewPtr(args[1]); + + emp::Ptr os_ptr = &std::cout; + if (args.size() > 2) os_ptr = emp::NewPtr(args[2]); + + WordSet<5> word_set(*is_ptr, *os_ptr); + word_set.Load(); + word_set.SortWords("word"); + + word_set.Preprocess(); + // word_set.AddClue(0,'a',result_t::ELSEWHERE); + // word_set.AddClue(1,'l',result_t::ELSEWHERE); + // word_set.AddClue(2,'o',result_t::NOWHERE); + // word_set.AddClue(3,'e',result_t::NOWHERE); + // word_set.AddClue(4,'s',result_t::NOWHERE); + + // word_set.PrintLetterClues('x'); + // word_set.PrintPosClues(0); + // word_set.PrintWordData(0); + // word_set.PrintWordData("aloes"); + // word_set.PrintResults(); + // word_set.AnalyzeAll(); + // word_set.PrintHTMLWordID(0); + // word_set.PrintHTMLWord("aloes"); + word_set.PrintHTML(); + + if (args.size() > 1) is_ptr.Delete(); + if (args.size() > 2) os_ptr.Delete(); +} diff --git a/doc/QuickStartGuides/3-WebTools.md b/doc/QuickStartGuides/3-WebTools.md index 6a4bf8577c..4cb630c074 100644 --- a/doc/QuickStartGuides/3-WebTools.md +++ b/doc/QuickStartGuides/3-WebTools.md @@ -78,13 +78,13 @@ do is compile. The provided Makefile can be run by typing `make Example.js`. This will trigger: ```shell -emcc -std=c++17 -Wall -Wno-unused-function -I../../include/emp/ -Os -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 Example.cc -o Example.js +emcc -std=c++20 -Wall -Wno-unused-function -I../../include/emp/ -Os -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 Example.cc -o Example.js ``` - emscripten uses the `emcc` compiler (or `em++`, since we are using C++). -- `-std=c++17` : Empirical requires c++17. +- `-std=c++20` : Empirical requires c++20. - `-Wall -Wno-unused-function` : turn on all warnings by default except for unused functions, since not all library functions are diff --git a/doc/blogs/Binomial.md b/doc/blogs/Binomial.md new file mode 100644 index 0000000000..551b6d85bf --- /dev/null +++ b/doc/blogs/Binomial.md @@ -0,0 +1,43 @@ +# Drawing random values from from non-uniform distributions: A binomial case study + +One common challenge in scientific computing is drawing from specific random distributions. +These can be time-consuming and hard to be acurate, especially when rare events are +important to include. + +There are many different mathematical distributions to consider. For any common distribution, +you should be able to find plenty of information about it on the internet. There are a +handful of specific questions to ask: + +1. Is there a simple, accurate conversion from a uniform [0.0, 1.0) distribution -- like those +produced by most random number generators -- to the distribution I need. + +2. If not, how close of an approximation can I get? Is it good enough? + +3. If not, am I going to be using the same parameters over and over such that I can do some +pre-processing to produce a fast result? (For example, am I using a fair 6-sided die and so I +know each outcome always has a 1/6 chance of showing up?) + +4. If not, how much traditional optimization can I use in the brute-force calculation? + +Here, I am going to focus on *Binomial Distributions* and some other related distributions, +but the logic that I use is applicable elsewhere. + +As a reminder: + +A **Binomial Distribution** asks: If an event is going to occur with probability *p* and we test +for it *N* times, how many times will the event actually occur? *Example*: Each time an +programmer writes a line of code, there is a *p*=0.03 chance that she introduces a bug. How many +bugs does she create after *N*=100 lines of code? + +A **Negative Binomial Distribution** turns this around: If an event is going to occur with +probability *p*, how many times do we need to test for it for it to actually occur *N* times. +*Example*: Given a *p*=0.03 chance of introducing a bug with each line of code, how many lines +would a programmer need to write to reach *N*=10 bugs? + +A **Geometric Distribution** is a special case of the Negative Binomial Distribution where *N*=1. +*Example*: Given a *p*=0.03 chance of introducing a bug, how many lines can a programmer write +before introducing the next bug? + +A **Poisson Distribution** is a continuous version of a Binomial Distribution, used for measuring +the number of independent events that occur in a time period rather than during a specified +number of events. diff --git a/doc/library/Evolve/evolve.md b/doc/library/Evolve/evolve.md new file mode 100644 index 0000000000..664ce6e740 --- /dev/null +++ b/doc/library/Evolve/evolve.md @@ -0,0 +1,30 @@ +# Evolution tools + +## World + +```{eval-rst} +.. doxygenfile:: emp/Evolve/World.hpp + :project: Empirical + :no-link: +``` + +## Systematics + +```{ref} systematics +``` + +## NK + +```{eval-rst} +.. doxygenfile:: emp/Evolve/NK.hpp + :project: Empirical + :no-link: +``` + +## Selection + +```{eval-rst} +.. doxygenfile:: emp/Evolve/World_select.hpp + :project: Empirical + :no-link: +``` diff --git a/doc/library/Evolve/systematics.rst b/doc/library/Evolve/systematics.rst new file mode 100644 index 0000000000..2d1f2b8d9f --- /dev/null +++ b/doc/library/Evolve/systematics.rst @@ -0,0 +1,195 @@ +.. SystematicsDocumentation documentation master file, created by + sphinx-quickstart on Thu May 28 16:40:07 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +Documentation for Systematics +==================================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Systematics +=========== + +Systematics is a classification of organisms based on evolutionary (phylogenetic) relationships. + +*************** +Systematics.h +*************** + +This file is part of Empirical and is located in ``Empirical/source/Evolve/Systematics.h`` + +The systematics manager is used to track genotypes, species, clades, or lineages of organisms in a world. + +Systematics allows a user to generate data to form phylogenetic trees. + +The program can be run with different levels of abstraction, meaning the data can be generated by position, +phenotype, or even genotype if you have a lot of RAM. + +**Note**: You are responsible for filling in templates! Adding the template just gives you a place to store your data. + +Taxon Specifics +=============== + +* Taxon - a group of species with similar characteristics +* Genotypes are the most commonly used Taxon + +A user can see the type and number of mutations that ocurred to bring about a taxon. + +Some information that can be accessed is: + +* taxon ID# ``GetID()`` +* details of organisms in the taxon ``GetInfo()`` +* pointer to the parent group (will return a null pointer if the species was injected) ``GetParent()`` +* how many organisms currently exist in the group and how many total organisms have ever existed in the group ``GetNumOrgs()`` or ``GetTotOrgs()`` +* how many direct offspring groups exist from this group and how many total extant offspring that exist from this taxa ``GetTotalOffspring()`` +* how deep in the tree the node you are examining is ``GetDepth()`` +* when did this taxon first appear in the population ``GetOriginationTime()`` +* when did the taxon leave the population ``GetDestructionTime()`` + +New organisms are added to the taxon using ``AddOrg()``. +New offspring are added to the taxon with ``AddOffspring()`` . + +Organisms are removed with ``RemoveOrg()``. +Offspring are removed with ``RemoveOffspring()`` . + +If there are no more remaining organisms or offspring the taxon will deactivate. + + +General Systematics Data +========================= + +Things that systematics can tell you about a phylogeny and how to access them: + +* Are we tracking a synchronous population? ``GetTrackSynchronous()`` ``SetTrackSynchronous()`` +* Are we storing all taxa that are still alive in the population? ``GetStoreActive()`` ``SetStoreActive()`` +* Are we storing all taxa that are ancestors of the living organisms in the population? ``GetStoreAncestors()`` ``SetStoreAncestors()`` +* Are we storing all taxa that have died out, as have all of their descendants? ``GetStoreOutside()`` ``SetStoreOutside()`` +* Are we storing any taxa types that have died out? ``GetArchive()`` ``SetArchive()`` +* Are we storing the positions of taxa? ``GetStorePosition()`` ``SetStorePosition()`` +* How many living organisms are currently being tracked? ``GetTotalOrgs()`` +* How many independent trees are being tracked? ``GetNumRoots()`` +* What ID will the next taxon have? ``GetNextID()`` +* What is the average phylogenetic depth of organisms in the population? ``GetAveDepth()`` +* To find the most recent common ancestor (MRCA) use ``GetMRCA()`` or ``GetMRCADepth()`` to find the distance to the MRCA. + +**The systematics class tracks the relationships among all organisms bases on the INFO_TYPE +provided. If an offspring has the same value for INFO_TYPE as its parent, it is grouped into +the same taxon. Otherwise a new Taxon is created and the old one is used as its parent in +the phylogeny. If the provided INFO_TYPE is the organism's genome, a traditional phylogeny +is formed, with genotypes. If the organism's behavior/task set is used, then organisms are +grouped by phenotypes. If the organism's position is used, the evolutionary path through +space is tracked. Any other aspect of organisms can be tracked this way as well.** + + +**Generally, all living organisms' taxa should be tracked and ancestral organisms' taxa should be maintained for lineage. +However, not all dead taxa should be maintained, it gets too big.** + +*************************** +Diversity and Distinction +*************************** + +Systematics.h can also be used to find phylogenetic diversity for all extant taxa in the tree, +assuming all edges from parent to child have a length of one. + +When all branch lengths are equal, the phylogenetic diversity is the number of internal nodes plus the number of +extant taxa minus 1. + +You can also find how distinct a specific taxa is from the rest of the population +based on the amount of unique evolutionary history that it represents. + +***************************** +Synchronous Populations +***************************** + +A synchronous population is a population in which each generation is a discrete time point +and a completely new set of individual organisms is created for each generation. This means that +an organism and its parent can never exist at the same time. + +An asynchronous population is the opposite, where generations overlap and organisms reproduce +when they are ready. + +In the systematics manager, synchronicity is controlled with + +``GetTrackSynchronous()`` which returns true or false and +``SetTrackSynchronous(input true or false)`` which allows you to use a synchronous or asynchronous population. + + +Using the Systematics Manager +============================== + +The systematics.h file alone will not give you any useful information. You must use a test file in conjunction with the systematics manager +in order to see output. + +To retrieve some results we will use the file Systematics.cc +which is located in Empirical/tests/Evolve/Systematics.cc. + +To compile to code use this command in the tests directory:: + + make test-Systematics + + +********** +Output +********** + +Terminal Output:: + + AddOrg 25 (id1, no parent) + + AddOrg -10 (id2; parent id1) + + AddOrg 26 (id3, parent id1) + + AddOrg 27 (id4, parent id2) + +The first line of output shows the first organism in the examined phylogeny. This organism is added with AddOrg +and is assigned an ID of id1. The organism has no parent, as seen in the farthest column of output, meaning that +organism id1 will be the root of the phylogeny and produce offspring. + +If we then look at the first number is parenthesis, we see the second organism with and ID of id2. Id2 is a direct descendant of the id1 organism. + +Lastly, if we look at id4, we see that its parent is id2, meaning that we have created another node in the tree +as the organisms move through generations, producing new offspring. + +The terminal output should also include this section:: + + Active count: 11 [18|1,0|17] [17|1,2|11] [15|1,0|null] [12|1,1|11] [16|1,0|11] [11|1,3|null] [6|1,0|5] [19|1,0|17] [5|1,1|null] [4|1,0|null] [3|1,0|null] + + +The 11 at the front refers to the number of total taxa in the phylogeny. + +If we look at the first set of numbers: ``[18|1, 0|17]`` + +The first number in brackets, 18 in this case, is the taxon of the organism where +a mutation occurred. 1, the next number, is the number of mutations that led to this branch. +0 is the number of offspring from this organism. Lastly, 17 is the id of the parent organism. + +As for the second set ``[17|1, 2|11]`` -- this is taxon 17, one mutation occurred, +id17 had 2 offspring, and its parent is id11. + +The last portion of the output has several lines of 3 numbers. + +It should look like this: :: + + 1 : 0 : -1 + 2 : 0 : -1 + 3 : 0 : 0 + 4 : 0 : 0 + 5 : 0 : 0 + 6 : 0 : 0 + 7 : 0 : 0 + 8 : 0 : 987 + 9 : 0 : 986 + 10 : 0 : 987 + 11 : 0 : 988 + 12 : 0 : 987 + 13 : 0 : 988 + +The first number is the organism number. The second number is the position of the organism. +The third number is the fitness of the organism at position 0. diff --git a/doc/library/index.md b/doc/library/index.md new file mode 100644 index 0000000000..642bfc67e9 --- /dev/null +++ b/doc/library/index.md @@ -0,0 +1,25 @@ +# Using Empirical + +Contents: + +```{toctree} +:maxdepth: 1 + +base/base +bits/bits +compiler/compiler +data/data +datastructs/datastructs +debug/debug +Evolve/evolve +functional/functional +io/io +math/math +prefab/prefab +testing/testing +tools/tools +web/web +``` + +- {ref}`genindex` +- {ref}`search` diff --git a/examples/Evolve/Makefile b/examples/Evolve/Makefile index ffda44e599..79e9682ea1 100644 --- a/examples/Evolve/Makefile +++ b/examples/Evolve/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/Evolve/ShrinkPop.cpp b/examples/Evolve/ShrinkPop.cpp index 1796083a08..c6b98e36c8 100644 --- a/examples/Evolve/ShrinkPop.cpp +++ b/examples/Evolve/ShrinkPop.cpp @@ -17,7 +17,7 @@ int main() { constexpr size_t POP_SIZE = 3600; - constexpr size_t GENS = 10000; + // constexpr size_t GENS = 10000; const size_t POP_SIDE = (size_t) std::sqrt(POP_SIZE); emp::Random random; diff --git a/examples/OLD/Empower/Makefile b/examples/OLD/Empower/Makefile index afbb1ca155..949dc971ef 100644 --- a/examples/OLD/Empower/Makefile +++ b/examples/OLD/Empower/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/ProjectTemplate/Makefile b/examples/ProjectTemplate/Makefile index dc041579d5..7ff537c25f 100644 --- a/examples/ProjectTemplate/Makefile +++ b/examples/ProjectTemplate/Makefile @@ -3,7 +3,7 @@ PROJECT := project_name EMP_DIR := ../../../Empirical/include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/examples/base/Makefile b/examples/base/Makefile index 0540edf6de..9223056b51 100644 --- a/examples/base/Makefile +++ b/examples/base/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/base/assert.cpp b/examples/base/assert.cpp index 2a2f5698f0..17ff725b69 100644 --- a/examples/base/assert.cpp +++ b/examples/base/assert.cpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2020 + * date: 2020-2022 */ /** * @file @@ -12,7 +12,7 @@ int main() { - int x{ 42 }; + [[maybe_unused]] int x{ 42 }; emp_assert(x > 1, "This assert passes in debug mode!", x); diff --git a/examples/bits/BitVector.cpp b/examples/bits/BitVector.cpp index 35909c74e4..f9325c51d8 100644 --- a/examples/bits/BitVector.cpp +++ b/examples/bits/BitVector.cpp @@ -46,6 +46,7 @@ int main() auto set5 = set3 & set4; total += set5.CountOnes(); } + std::cout << "Total = " << total << std::endl; std::clock_t emp_tot_time = std::clock() - emp_start_time; double time = 1000.0 * ((double) emp_tot_time) / (double) CLOCKS_PER_SEC; diff --git a/examples/bits/Makefile b/examples/bits/Makefile index 2fecbac619..58396ca87e 100644 --- a/examples/bits/Makefile +++ b/examples/bits/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/compiler/Makefile b/examples/compiler/Makefile index 035b3dd1a6..5c03062b0f 100644 --- a/examples/compiler/Makefile +++ b/examples/compiler/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/config/Makefile b/examples/config/Makefile index 837c3a6f7b..534041b3a4 100644 --- a/examples/config/Makefile +++ b/examples/config/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/control/Makefile b/examples/control/Makefile index f20b711572..ecdb48d9d2 100644 --- a/examples/control/Makefile +++ b/examples/control/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/data/Makefile b/examples/data/Makefile index 48766adf83..4c0366926b 100644 --- a/examples/data/Makefile +++ b/examples/data/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/datastructs/Makefile b/examples/datastructs/Makefile index a4ca0afc60..9d79199f4c 100644 --- a/examples/datastructs/Makefile +++ b/examples/datastructs/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/functional/Makefile b/examples/functional/Makefile index 72b888fa6a..cb3b2e3280 100644 --- a/examples/functional/Makefile +++ b/examples/functional/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/games/Makefile b/examples/games/Makefile index c4b7a366c8..0a54a5eaf9 100644 --- a/examples/games/Makefile +++ b/examples/games/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/geometry/Makefile b/examples/geometry/Makefile index d94514c4bb..2aaada367f 100644 --- a/examples/geometry/Makefile +++ b/examples/geometry/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/hardware/Makefile b/examples/hardware/Makefile index cb73cfa432..f20f3246cc 100644 --- a/examples/hardware/Makefile +++ b/examples/hardware/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/io/File.cpp b/examples/io/File.cpp index 0d7c4b88e2..a77cfb711a 100644 --- a/examples/io/File.cpp +++ b/examples/io/File.cpp @@ -30,31 +30,31 @@ int main() spreadsheet.Write(std::cout); - emp::vector first_col = spreadsheet.ExtractCol(); + emp::vector first_col = spreadsheet.ExtractCol(); std::cout << "\nAfter column is extracted:" << std::endl; spreadsheet.Write(std::cout); - std::cout << "Extracted column: " << emp::to_string(first_col) << std::endl; + std::cout << "Extracted column: " << emp::MakeString(first_col) << std::endl; emp::vector second_col = spreadsheet.ExtractColAs(); std::cout << "\nAfter another column is extracted as size_t:" << std::endl; spreadsheet.Write(std::cout); - std::cout << "Extracted column: " << emp::to_string(second_col) << std::endl; + std::cout << "Extracted column: " << emp::MakeString(second_col) << std::endl; - emp::vector first_row = spreadsheet.ExtractRow(); + emp::vector first_row = spreadsheet.ExtractRow(); std::cout << "\nAfter a row is extracted:" << std::endl; spreadsheet.Write(std::cout); - std::cout << "Extracted row: " << emp::to_string(first_row) << std::endl; + std::cout << "Extracted row: " << emp::MakeString(first_row) << std::endl; emp::vector second_row = spreadsheet.ExtractRowAs(); std::cout << "\nAfter a row is extracted as size_t:" << std::endl; spreadsheet.Write(std::cout); - std::cout << "Extracted row: " << emp::to_string(second_row) << std::endl; + std::cout << "Extracted row: " << emp::MakeString(second_row) << std::endl; spreadsheet.Append("1000,1001,1002,1003"); auto full_data = spreadsheet.ToData(); std::cout << "\nAfter all remaining data is extracted as size_t:" << std::endl; spreadsheet.Write(std::cout); - std::cout << "Extracted data: " << emp::to_string(full_data) << std::endl; + std::cout << "Extracted data: " << emp::MakeString(full_data) << std::endl; } diff --git a/examples/io/Makefile b/examples/io/Makefile index ab2d1f9620..12d64df3e5 100644 --- a/examples/io/Makefile +++ b/examples/io/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/math/CombinedBinomialDistribution.cpp b/examples/math/CombinedBinomialDistribution.cpp new file mode 100644 index 0000000000..f053aba8a6 --- /dev/null +++ b/examples/math/CombinedBinomialDistribution.cpp @@ -0,0 +1,35 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022-2022 + * + * @file CombinedBinomialDistribution.cpp + * @brief Some examples code for using emp::CombinedBinomialDistribution + */ + + +#include "emp/math/CombinedBinomialDistribution.hpp" +#include "emp/math/Random.hpp" + +int main(int argc, char* argv[]) +{ + if(argc != 4){ + std::cout << "Error! Expecting exactly three command line arguments: " + << "p n num_trials" << std::endl; + emp_assert(false); + } + double p = std::stod(argv[1]); + size_t n = std::stoi(argv[2]); + size_t num_trials = std::stoi(argv[3]); + + emp::Random random; + emp::CombinedBinomialDistribution distribution(p, 1); + + double mean = 0; + + for(size_t i = 0; i < num_trials; i++){ + mean += (double)distribution.PickRandom(n, random) / num_trials; + } + std::cout << "Mean after " << num_trials << " trials: " << mean << std::endl; + return 0; +} diff --git a/examples/math/Makefile b/examples/math/Makefile index 0273c03544..d582e906cd 100644 --- a/examples/math/Makefile +++ b/examples/math/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc @@ -19,7 +19,7 @@ CFLAGS_web_debug := $(CFLAGS_all) $(OFLAGS_web_debug) --js-library ../../include CFLAGS_web_opt := $(CFLAGS_all) $(OFLAGS_web_opt) --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 #CFLAGS_web := $(CFLAGS_all) $(OFLAGS_web) --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 -TARGETS := combos constants Distribution info_theory math Random Range stats +TARGETS := combos constants Distribution info_theory math Random Range stats CombinedBinomialDistribution default: native diff --git a/examples/math/Random.cpp b/examples/math/Random.cpp index 8bb69dbe0f..7f99022976 100644 --- a/examples/math/Random.cpp +++ b/examples/math/Random.cpp @@ -21,6 +21,8 @@ int main() std::cout << "Digits in random orders:" << std::endl; size_t num_samples = 10; + + std::cout << "Permutations: " << std::endl; for (size_t s = 0; s < num_samples; s++) { emp::vector permut = emp::GetPermutation(random, 10); for (size_t i = 0; i < 10; i++) { @@ -28,4 +30,17 @@ int main() } std::cout << std::endl; } + + std::cout << "Exponentials: " << std::endl; + double p = 0.5; + std::cout << "p = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; + + p = 0.1; + std::cout << "\np = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; + + p = 0.9; + std::cout << "\np = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; } diff --git a/examples/math/Range.cpp b/examples/math/Range.cpp index 19dc6296ee..6a62e57987 100644 --- a/examples/math/Range.cpp +++ b/examples/math/Range.cpp @@ -19,7 +19,7 @@ int main() std::cout << "Upper = " << range.GetUpper() << std::endl; for (int i = 10; i < 40; i += 5) { - std::cout << "Value " << i << " valid = " << range.Valid(i) << std::endl; + std::cout << "Value " << i << " valid = " << range.Has(i) << std::endl; } for (size_t s = 4; s <= 8; s++) { diff --git a/examples/meta/Makefile b/examples/meta/Makefile index 8b534f319b..d162e47b45 100644 --- a/examples/meta/Makefile +++ b/examples/meta/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/prefab/Makefile b/examples/prefab/Makefile index e2d39568f5..37cff3b33f 100644 --- a/examples/prefab/Makefile +++ b/examples/prefab/Makefile @@ -7,7 +7,7 @@ # WebAssembly. # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/examples/scholar/Makefile b/examples/scholar/Makefile index f32e235068..16e119f2df 100644 --- a/examples/scholar/Makefile +++ b/examples/scholar/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/testing/Makefile b/examples/testing/Makefile index 9cdb06c2db..09e5b3b8aa 100644 --- a/examples/testing/Makefile +++ b/examples/testing/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/timing/Binomial.cpp b/examples/timing/Binomial.cpp new file mode 100644 index 0000000000..f69cfa2e9e --- /dev/null +++ b/examples/timing/Binomial.cpp @@ -0,0 +1,198 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Binomial.cpp + * + */ + +#include +#include + +#include "../../include/emp/math/Distribution.hpp" +#include "../../include/emp/math/Random.hpp" +#include "../../include/emp/tools/string_utils.hpp" + +void TestGeometric(emp::Random & random, const double p, const size_t num_tests=1000000) { + std::cout << emp::ANSI_GreenBG() << emp::ANSI_Black() + << "---- Geometric Tests: p = " << p << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::NegativeBinomial dist(p, 1); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Negative Binomial Distribution with p = " << p << " (and N=1)\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += random.GetGeometric(p); + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + +} + +void TestNegBinomial( + emp::Random & random, + const double p, + const size_t N, + const size_t num_tests=1000000) +{ + std::cout << emp::ANSI_BrightBlueBG() + << "---- Negative Binomial Tests: p = " << p << " ; N = " << N << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::NegativeBinomial dist(p, N); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Negative Binomial Distribution with p = " << p << " and N = " << N << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + for (size_t n = 0; n < N; ++n) { + total += random.GetGeometric(p); + } + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "N = " << N << " calls to random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; +} + +void TestBinomial( + emp::Random & random, + const double p, + const size_t N, + const size_t num_tests=1000000) +{ + std::cout << emp::ANSI_MagentaBG() + << "---- Binomial Tests: p = " << p << " ; N = " << N << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::Binomial dist(p, N); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Binomial Distribution with p = " << p << " and N = " << N << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + size_t pos = 0; + while( (pos += random.GetGeometric(p)) < N ) { + total++; + } + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "N = " << N << " calls to random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; +} + +int main() +{ + size_t num_tests = 1000000; + emp::Random random; + + TestGeometric(random, 0.9, num_tests); + TestGeometric(random, 0.5, num_tests); + TestGeometric(random, 0.1, num_tests); + TestGeometric(random, 0.01, num_tests); + TestGeometric(random, 0.001, num_tests); + TestGeometric(random, 0.0001, num_tests); + + TestNegBinomial(random, 0.9, 10, num_tests); + TestNegBinomial(random, 0.5, 10, num_tests); + TestNegBinomial(random, 0.1, 10, num_tests); + TestNegBinomial(random, 0.01, 10, num_tests); + TestNegBinomial(random, 0.001, 10, num_tests); + TestNegBinomial(random, 0.0001, 10, num_tests); + + TestNegBinomial(random, 0.9, 100, num_tests); + TestNegBinomial(random, 0.5, 100, num_tests); + TestNegBinomial(random, 0.1, 100, num_tests); + TestNegBinomial(random, 0.01, 100, num_tests); + TestNegBinomial(random, 0.001, 100, num_tests); + TestNegBinomial(random, 0.0001, 100, num_tests); + + TestBinomial(random, 0.9, 100, num_tests); + TestBinomial(random, 0.5, 100, num_tests); + TestBinomial(random, 0.1, 100, num_tests); + TestBinomial(random, 0.01, 100, num_tests); + TestBinomial(random, 0.001, 100, num_tests); + TestBinomial(random, 0.0001, 100, num_tests); + + TestBinomial(random, 0.9, 1000, num_tests); + TestBinomial(random, 0.5, 1000, num_tests); + TestBinomial(random, 0.1, 1000, num_tests); + TestBinomial(random, 0.01, 1000, num_tests); + TestBinomial(random, 0.001, 1000, num_tests); + TestBinomial(random, 0.0001, 1000, num_tests); +} diff --git a/examples/timing/Makefile b/examples/timing/Makefile index c1ae00ec39..f5a8deb74b 100644 --- a/examples/timing/Makefile +++ b/examples/timing/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/timing/bit_timings.cpp b/examples/timing/bit_timings.cpp index f2882af113..1874dfbd35 100644 --- a/examples/timing/bit_timings.cpp +++ b/examples/timing/bit_timings.cpp @@ -62,8 +62,8 @@ struct SpeedTester_impl : public SpeedTester_impl, OBJ_COUNT > bs_objs; - emp::array< emp::BitVector, OBJ_COUNT > bv_objs; + emp::array< emp::old::BitSet, OBJ_COUNT > bs_objs; + emp::array< emp::old::BitVector, OBJ_COUNT > bv_objs; using base_t = SpeedTester_impl; diff --git a/examples/tools/Makefile b/examples/tools/Makefile index 60251f1d80..f714250b0f 100644 --- a/examples/tools/Makefile +++ b/examples/tools/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/web/Makefile b/examples/web/Makefile index ecc3c5871a..42404c97a7 100644 --- a/examples/web/Makefile +++ b/examples/web/Makefile @@ -7,7 +7,7 @@ # WebAssembly. # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/examples/web/Sudoku.cpp b/examples/web/Sudoku.cpp index f831bf192b..664296e47a 100644 --- a/examples/web/Sudoku.cpp +++ b/examples/web/Sudoku.cpp @@ -84,7 +84,7 @@ class SudokuBoard : public UI::Div { for (size_t r = 0; r < 9; r++) { for (size_t c = 0; c < 9; c++) { auto cell = table.GetCell(r,c); - cell.On("mousedown", [cell,r,c]() mutable { + cell.On("mousedown", [cell/*,r,c*/]() mutable { // doc.Div("table_bg").SetBackground("red"); // cell.SetCSS("BackgroundColor", "grey"); cell.Clear(); diff --git a/examples/web/assert.cpp b/examples/web/assert.cpp index ab6a13ed60..4bfc6e36f5 100644 --- a/examples/web/assert.cpp +++ b/examples/web/assert.cpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2020 + * date: 2020-2022 */ /** * @file @@ -12,7 +12,7 @@ int main() { - int x{ 42 }; + [[maybe_unused]] int x{ 42 }; emp_assert(x > 1, "This assert passes in debug mode!", x); diff --git a/include/emp/Evolve/NK-const.hpp b/include/emp/Evolve/NK-const.hpp index c0b7fef26e..3ec437c9c4 100644 --- a/include/emp/Evolve/NK-const.hpp +++ b/include/emp/Evolve/NK-const.hpp @@ -94,7 +94,7 @@ namespace evo { /// Get the fitness of a whole bitstring double GetFitness(const BitSet & genome) const { // Create a double-length genome to easily handle wrap-around. - BitSet genome2( genome.template Export() ); + BitSet genome2( genome.template ExportArray() ); genome2 |= (genome2 << N); double total = 0.0; diff --git a/include/emp/Evolve/Systematics.hpp b/include/emp/Evolve/Systematics.hpp index 6681ce3d76..518723e581 100644 --- a/include/emp/Evolve/Systematics.hpp +++ b/include/emp/Evolve/Systematics.hpp @@ -1,11 +1,11 @@ -/* - * This file is part of Empirical, https://github.com/devosoft/Empirical - * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2023 -*/ /** - * @file - * @brief TODO. + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2017-2023 + * + * @file Systematics.hpp + * @brief Track genotypes, species, clades, or lineages of organisms in a world. + * * * @todo Technically, we don't need to keep the ancestors in a set in order to track a lineage... * If we delete all of their descendants they should automaticaly be deleted. @@ -26,7 +26,6 @@ #include #include #include -#include #include #include "../base/Ptr.hpp" diff --git a/include/emp/Evolve/SystematicsAnalysis.hpp b/include/emp/Evolve/SystematicsAnalysis.hpp index e6a975d9ff..1f49d9db25 100644 --- a/include/emp/Evolve/SystematicsAnalysis.hpp +++ b/include/emp/Evolve/SystematicsAnalysis.hpp @@ -1,13 +1,10 @@ -/* - * This file is part of Empirical, https://github.com/devosoft/Empirical - * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2018-2023 -*/ /** - * @file - * @brief Functions to assist in phylogeny and lineage analysis. - * Unlike methods of the @ref Systematics class, these methods take - * a single taxon (which is part of the systematics manager) to act on. + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2018-2023 + * + * @file SystematicsAnalysis.hpp + * @brief TODO. */ #ifndef EMP_EVOLVE_SYSTEMATICSANALYSIS_HPP_INCLUDE @@ -15,12 +12,13 @@ #include "../base/Ptr.hpp" +// Mutation info functions. Assumes each taxon has a struct containing an unordered map +// with keys that are strings indicating types of mutations and keys that are numbers +// indicating the number of that type of mutation that occurred to make this taxon from +// the parent. + namespace emp { - /// @returns the taxon with the highest fitness out of any active taxon - /// in the given systematics manager. - /// @tparam systematics_t The type of the systematics manager containing the phylogeny to analyze. - /// @param s the systematics manager to search in. Must have more than 0 active taxa. template Ptr FindDominant(systematics_t & s) { double best = -999999; @@ -35,9 +33,10 @@ namespace emp { return best_tax; } - /// Returns the total number of ancestor taxa in \c taxon 's lineage. - /// Requires that taxon is a member of a systematics manager that - /// has ancestor storing turned on + /// Returns the total number of times a mutation of type @param type + /// that along @param taxon 's lineage. (Different from CountMuts in + /// that CountMuts sums them whereas CountMutSteps would count two + /// simultaneous mutations of the same type as one event) template int LineageLength(Ptr taxon) { int count = 0; @@ -50,20 +49,10 @@ namespace emp { return count; } - /// Returns the total number of times a mutation of type \c type - /// occurred along \c taxon 's lineage. (Different from CountMuts in + /// Returns the total number of times a mutation of type @param type + /// that along @param taxon 's lineage. (Different from CountMuts in /// that CountMuts sums them whereas CountMutSteps would count two /// simultaneous mutations of the same type as one event) - /// Assumes each taxon has a struct containing an unordered map - /// with keys that are strings indicating types of mutations and keys that are numbers - /// indicating the number of that type of mutation that occurred to make this taxon from - /// the parent. - /// @param type string corresponding to a type of mutation. - /// Must be in the mut_counts dictionary (i.e. the dictionary - /// passed in when datastruct::mut_landscape_info::RecordMutation was called) - /// @param taxon a pointer to a taxon to count mutation steps for. - /// Must have a DATA_TYPE that supports mutation tracking - /// (e.g. mut_landscape_info) template int CountMutSteps(Ptr taxon, std::string type="substitution") { int count = 0; @@ -76,8 +65,8 @@ namespace emp { return count; } - /// Returns the total number of times a mutation of the types \c types - /// that along the given taxon 's lineage. (Different from CountMuts in + /// Returns the total number of times a mutation of the types @param types + /// that along @param taxon 's lineage. (Different from CountMuts in /// that CountMuts sums them whereas CountMutSteps would count two /// simultaneous mutations of the same type as one event) template @@ -94,8 +83,8 @@ namespace emp { return count; } - /// Returns the total number of mutations of type \c type that occurred - /// along \c taxon 's lineage. + /// Returns the total number of mutations of type @param type that occurred + /// along @param taxon 's lineage. template int CountMuts(Ptr taxon, std::string type="substitution") { int count = 0; @@ -108,8 +97,8 @@ namespace emp { return count; } - /// Returns the total number of mutations of the types in \c types that occurred - /// along the given taxon 's lineage. + /// Returns the total number of mutations of the types @param types that occurred + /// along @param taxon 's lineage. template int CountMuts(Ptr taxon, emp::vector types) { int count = 0; @@ -125,7 +114,7 @@ namespace emp { } /// Returns the total number of deleterious mutational steps that occurred - /// along the given taxon's lineage. (a change from parent to child taxon counts + /// along @param taxon 's lineage. (a change from parent to child taxon counts /// as a single step, regardless of the number of mutations that happened at /// that time point) template @@ -145,7 +134,7 @@ namespace emp { } /// Returns the total number of changes in phenotype that occurred - /// along the given taxon's lineage. + /// along @param taxon 's lineage. template int CountPhenotypeChanges(Ptr taxon) { int count = 0; // Start with current phenotype @@ -163,7 +152,7 @@ namespace emp { } /// Returns the total number of unique phenotypes that occurred - /// along the given taxon's lineage. + /// along @param taxon 's lineage. template int CountUniquePhenotypes(Ptr taxon) { std::setGetData().phenotype)> seen; diff --git a/include/emp/Evolve/World.hpp b/include/emp/Evolve/World.hpp index 762ddca409..a746e64d4f 100644 --- a/include/emp/Evolve/World.hpp +++ b/include/emp/Evolve/World.hpp @@ -1,10 +1,9 @@ -/* - * This file is part of Empirical, https://github.com/devosoft/Empirical - * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2017-2018 -*/ /** - * @file + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2017-2018 + * + * @file World.hpp * @brief Definition of a base class for a World template for use in evolutionary algorithms. * * A definition of the emp::World template, linking in specialized file handling, iterators, @@ -15,6 +14,8 @@ * whether or not they also affect injected organisms. (Right now they always do!!) * @todo We should Specialize World so that ANOTHER world can be used as an ORG, with proper * delegation to facilitate demes, pools, islands, etc. + * @todo We should be able to have any number of systematics managers, based on various type_trait + * information a that we want to track. * @todo Add a signal for DoBirth() for when a birth fails. * @todo Add a signal for population Reset() (and possibly Clear?) * @todo Add a feature to maintain population sorted by each phenotypic trait. This will allow @@ -27,7 +28,6 @@ #include #include -#include #include #include "../base/Ptr.hpp" @@ -367,8 +367,8 @@ namespace emp { return *(pop[id]); } - /// Retrieve a const reference to the organism as the specified x,y coordinates. - // @CAO: Technically, we should set this up with any number of coordinates. + /// Retrieve a const reference to the organsim as the specified x,y coordinates. + /// @CAO: Technically, we should set this up with any number of coordinates. ORG & GetOrg(size_t x, size_t y) { return GetOrg(x+y*GetWidth()); } /// Retrive a pointer to the contents of a specified cell; will be nullptr if the cell is diff --git a/include/emp/Evolve/World_reflect.hpp b/include/emp/Evolve/World_reflect.hpp index 63e0082322..768dd1596d 100644 --- a/include/emp/Evolve/World_reflect.hpp +++ b/include/emp/Evolve/World_reflect.hpp @@ -46,7 +46,7 @@ namespace emp { template void SetDefaultFitFun_impl(WORLD & world, ... ) { - world.SetFitFun( [](ORG & org){ + world.SetFitFun( [](ORG & /* org */){ emp_assert(false, "No default fitness function available"); return 0.0; } ); @@ -72,7 +72,7 @@ namespace emp { template void SetDefaultMutFun_impl(WORLD & world, ... ) { - world.SetMutFun( [](ORG & org, Random & random) { + world.SetMutFun( [](ORG & /* org */, Random & /* random */ ) { emp_assert(false, "No default DoMutations available"); return 0; } ); @@ -104,7 +104,7 @@ namespace emp { template void SetDefaultPrintFun_impl(WORLD & world, ... ) { - world.SetPrintFun( [](ORG & org, std::ostream & os){ + world.SetPrintFun( [](ORG & /* org */, std::ostream & /* os */){ emp_assert(false, "No default Print function available"); } ); } diff --git a/include/emp/Evolve/World_select.hpp b/include/emp/Evolve/World_select.hpp index b192bcb16b..01afdfa53a 100644 --- a/include/emp/Evolve/World_select.hpp +++ b/include/emp/Evolve/World_select.hpp @@ -188,7 +188,7 @@ namespace emp { /// EACH offspring produced. /// @param world The emp::World object with the organisms to be selected. /// @param fit_funs The set of fitness functions to shuffle for each organism reproduced. - /// @param repro_count How many rounds of repliction should we do. (default 1) + /// @param repro_count How many rounds of replication should we do. (default 1) /// @param max_funs The maximum number of fitness functions to use. (use 0 for all; default) template void LexicaseSelect(World & world, @@ -228,9 +228,9 @@ namespace emp { // Step through the functions in the proper order. cur_orgs = all_orgs; // Start with all of the organisms. - int depth = -1; +// int depth = -1; for (size_t fit_id : order) { - depth++; +// depth++; double max_fit = fitnesses[fit_id][cur_orgs[0]]; next_orgs.push_back(cur_orgs[0]); diff --git a/include/emp/Evolve/World_structure.hpp b/include/emp/Evolve/World_structure.hpp index 8918e00337..bdf469a5d8 100644 --- a/include/emp/Evolve/World_structure.hpp +++ b/include/emp/Evolve/World_structure.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2017-2018 + * date: 2017-2022 */ /** * @file @@ -48,13 +48,15 @@ namespace emp { } WorldPosition(const WorldPosition &) = default; + WorldPosition & operator=(const WorldPosition &) = default; + uint32_t GetIndex() const { return index; } uint32_t GetPopID() const { return pop_id; } bool IsActive() const { return pop_id == 0; } bool IsValid() const { return index != invalid_id; } - WorldPosition & SetActive(bool _active=true) { pop_id = 0; return *this; } + WorldPosition & SetActive(bool /*_active*/=true) { pop_id = 0; return *this; } WorldPosition & SetPopID(size_t _id) { emp_assert(_id <= invalid_id); pop_id = (uint32_t) _id; return *this; } WorldPosition & SetIndex(size_t _id) { emp_assert(_id <= invalid_id); index = (uint32_t) _id; return *this; } WorldPosition & MarkInvalid() { index = invalid_id; pop_id = invalid_id; return *this; } @@ -97,7 +99,7 @@ namespace emp { }; /// Set the population to be a set of pools that are individually well mixed, but with limited - /// migtation. Arguments are the number of pools, the size of each pool, and whether the + /// migration. Arguments are the number of pools, the size of each pool, and whether the /// generations should be synchronous (true) or not (false, default). template void SetPools(World & world, size_t num_pools, @@ -108,7 +110,7 @@ namespace emp { // -- Setup functions -- // Inject in an empty pool -or- randomly if none empty - world.SetAddInjectFun( [&world,pool_size](Ptr new_org) { + world.SetAddInjectFun( [&world,pool_size](Ptr /*new_org*/) { for (size_t id = 0; id < world.GetSize(); id += pool_size) { if (world.IsOccupied(id) == false) return WorldPosition(id); } @@ -129,7 +131,7 @@ namespace emp { if (synchronous_gen) { // Place births in the next open spot in the new pool (or randomly if full!) - world.SetAddBirthFun( [&world,pool_size](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [&world,pool_size]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { emp_assert(new_org); // New organism must exist. const size_t parent_id = parent_pos.GetIndex(); const size_t pool_id = parent_id / pool_size; @@ -145,7 +147,7 @@ namespace emp { world.SetAttribute("SynchronousGen", "True"); } else { // Asynchronous: always go to a neighbor in current population. - world.SetAddBirthFun( [&world](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [&world]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { auto pos = world.GetRandomNeighborPos(parent_pos); return pos; // Place org in existing population. }); @@ -175,7 +177,7 @@ namespace emp { world.MarkSpaceStructured(false).MarkPhenoStructured(true); // -- Setup functions -- - // Inject into the appropriate positon based on phenotype. Note that an inject will fail + // Inject into the appropriate position based on phenotype. Note that an inject will fail // if a more fit organism is already in place; you must run clear first if you want to // ensure placement. world.SetAddInjectFun( [&world,traits,trait_counts](Ptr new_org) { @@ -273,7 +275,7 @@ namespace emp { emp::vector distance; ///< And what is their distance? World & world; ///< World object being tracked. - TraitSet traits; ///< Traits we are tryng to spread + TraitSet traits; ///< Traits we are trying to spread emp::vector min_vals; ///< Smallest value found for each trait. emp::vector max_vals; ///< Largest value found for each trait. emp::vector bin_width; ///< Largest value found for each trait. @@ -326,10 +328,10 @@ namespace emp { size_t bin_id = org_bins[refresh_id]; Refresh_AgainstBin(refresh_id, bin_id); - // Then check all neighbor bins. Ignoring diagnols for now since they could be expensive... + // Then check all neighbor bins. Ignoring diagonals for now since they could be expensive... // (though technically we need them...) size_t trait_offset = 1; - for (size_t trait_id = 0; trait_id < traits.GetSize(); trait_id++) { + for (size_t trait_id = start_id; trait_id < traits.GetSize(); trait_id++) { size_t prev_bin_id = bin_id - trait_offset; if (prev_bin_id < num_total_bins) { Refresh_AgainstBin(refresh_id, prev_bin_id); @@ -399,7 +401,7 @@ namespace emp { is_setup = false; } - /// Find the best organism to kill in the popualtion. In this case, find the two closest organisms + /// Find the best organism to kill in the population. In this case, find the two closest organisms /// and kill the one with the lower fitness. size_t FindKill() { if (!is_setup) Setup(); // The first time we run out of space and need to kill, setup structure! @@ -488,7 +490,7 @@ namespace emp { emp_assert(org_bins[i] < num_total_bins, i, org_bins[i], num_total_bins, world.GetNumOrgs()); } - size_t org_count = 0; + [[maybe_unused]] size_t org_count = 0; for (size_t i = 0; i < num_total_bins; i++) { org_count += bin_ids[i].size(); for (size_t org_id : bin_ids[i]) { @@ -517,10 +519,10 @@ namespace emp { world.OnPlacement( [info_ptr](size_t pos) mutable { info_ptr->Update(pos); } ); // -- Setup functions -- - // Inject into the appropriate positon based on phenotype. Note that an inject will fail + // Inject into the appropriate position based on phenotype. Note that an inject will fail // if a more fit organism is already in place; you must run clear first if you want to // ensure placement. - world.SetAddInjectFun( [&world, traits, world_size, info_ptr](Ptr new_org) { + world.SetAddInjectFun( [/*&world, traits,*/ world_size, info_ptr]([[maybe_unused]] Ptr new_org) { size_t pos = info_ptr->GetBirthPos(world_size); return WorldPosition(pos); }); @@ -530,7 +532,7 @@ namespace emp { world.SetGetNeighborFun( [](WorldPosition pos) { emp_assert(false); return pos; }); // Find the two closest organisms and kill the lower fit one. (Killing sparsely...) - // Must unsetup population for next birth to work. + // Must un-setup population for next birth to work. world.SetKillOrgFun( [&world, info_ptr](){ const size_t last_id = world.GetSize() - 1; world.Swap(info_ptr->FindKill(), last_id); @@ -541,7 +543,7 @@ namespace emp { }); // Birth is effectively the same as inject. - world.SetAddBirthFun( [&world, traits, world_size, info_ptr](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [/*&world, traits,*/ world_size, info_ptr]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { (void) parent_pos; size_t pos = info_ptr->GetBirthPos(world_size); return WorldPosition(pos); diff --git a/include/emp/base/Ptr.hpp b/include/emp/base/Ptr.hpp index 5d751e4a2d..d5af258de9 100644 --- a/include/emp/base/Ptr.hpp +++ b/include/emp/base/Ptr.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2020. + * date: 2016-2022. */ /** * @file @@ -16,8 +16,11 @@ * intentionally) you can define EMP_NO_PTR_TO_PTR * * If you trip an assert, you can re-do the run a track a specific pointer by defining - * EMP_ABORT_PTR_NEW or EMP_ABORT_PTR_DELETE to the ID of the pointer in question. This will - * allow you to track the pointer more easily in a debugger. + * EMP_ABORT_PTR_NEW or EMP_ABORT_PTR_DELETE to the ID of the pointer in question. + * + * For example: -DEMP_ABORT_PTR_NEW=1691 + * + * This will allow you to track the pointer more easily in a debugger. * * @todo Track information about emp::vector and emp::array objects to make sure we don't * point directly into them? (A resize() could make such pointers invalid!) Or better, warn @@ -84,8 +87,8 @@ namespace emp { } PtrInfo(const PtrInfo &) = default; PtrInfo(PtrInfo &&) = default; - PtrInfo & operator=(const PtrInfo &) = default; - PtrInfo & operator=(PtrInfo &&) = default; + PtrInfo & operator=(const PtrInfo &) & = default; + PtrInfo & operator=(PtrInfo &&) & = default; ~PtrInfo() { if (internal::ptr_debug) std::cout << "Deleted info for pointer " << ptr << std::endl; @@ -330,7 +333,7 @@ namespace emp { namespace { // @CAO: Build this for real! template - bool PtrIsConvertable(FROM * ptr) { (void) ptr; return true; } + bool PtrIsConvertible(FROM * ptr) { (void) ptr; return true; } // emp_assert( (std::is_same() || dynamic_cast(in_ptr)) ); // Debug information provided for each pointer type. @@ -442,8 +445,10 @@ namespace emp { template Ptr(T2 * in_ptr, bool track=false) : BasePtr(in_ptr, UNTRACKED_ID) { - if (internal::ptr_debug) std::cout << "raw construct: " << ptr << ". track=" << track << std::endl; - emp_assert( (PtrIsConvertable(in_ptr)) ); + if (internal::ptr_debug) { + std::cout << "raw construct: " << ((void *) ptr) << ". track=" << track << std::endl; + } + emp_assert( (PtrIsConvertible(in_ptr)) ); // If this pointer is already active, link to it. if (Tracker().IsActive(ptr)) { @@ -465,7 +470,7 @@ namespace emp { if (internal::ptr_debug) std::cout << "raw ARRAY construct: " << ptr << ". size=" << array_size << "(" << array_bytes << " bytes); track=" << track << std::endl; - emp_assert( (PtrIsConvertable(_ptr)) ); + emp_assert( (PtrIsConvertible(_ptr)) ); // If this pointer is already active, link to it. if (Tracker().IsActive(ptr)) { @@ -484,7 +489,7 @@ namespace emp { template Ptr(Ptr _in) : BasePtr(_in.Raw(), _in.GetID()) { if (internal::ptr_debug) std::cout << "inexact copy construct: " << ptr << std::endl; - emp_assert( (PtrIsConvertable(_in.Raw())), id ); + emp_assert( (PtrIsConvertible(_in.Raw())), id ); Tracker().IncID(id); } @@ -497,7 +502,7 @@ namespace emp { ~Ptr() { if (internal::ptr_debug) { std::cout << "destructing Ptr instance "; - if (ptr) std::cout << id << " (" << ptr << ")\n"; + if (ptr) std::cout << id << " (" << ((void *) ptr) << ")\n"; else std::cout << "(nullptr)\n"; } Tracker().DecID(id); @@ -604,8 +609,8 @@ namespace emp { /// Delete this pointer to an array (must be an array). void DeleteArray() { - emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); emp_assert(ptr, "Trying to delete null Ptr."); + emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); emp_assert(Tracker().IsArrayID(id), id, "Trying to delete non-array pointer as array."); emp_assert(Tracker().IsActive(ptr), id, "Trying to delete inactive pointer (already deleted!)"); if (internal::ptr_debug) std::cout << "Ptr::DeleteArray() : " << ptr << std::endl; @@ -623,7 +628,7 @@ namespace emp { struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; /// Copy assignment - Ptr & operator=(const Ptr & _in) { + Ptr & operator=(const Ptr & _in) & { if (internal::ptr_debug) { std::cout << "copy assignment from id " << _in.id << " to id " << id << std::endl; @@ -644,9 +649,9 @@ namespace emp { /// Assign to a raw pointer of the correct type; if this is already tracked, hooked in /// correctly, otherwise don't track. template - Ptr & operator=(T2 * _in) { + Ptr & operator=(T2 * _in) & { if (internal::ptr_debug) std::cout << "raw assignment" << std::endl; - emp_assert( (PtrIsConvertable(_in)) ); + emp_assert( (PtrIsConvertible(_in)) ); Tracker().DecID(id); // Decrement references to former pointer at this position. ptr = _in; // Update to new pointer. @@ -664,11 +669,11 @@ namespace emp { return *this; } - /// Assign to a convertable Ptr + /// Assign to a convertible Ptr template - Ptr & operator=(Ptr _in) { + Ptr & operator=(Ptr _in) & { if (internal::ptr_debug) std::cout << "convert-copy assignment" << std::endl; - emp_assert( (PtrIsConvertable(_in.Raw())), _in.id ); + emp_assert( (PtrIsConvertible(_in.Raw())), _in.id ); emp_assert(Tracker().IsDeleted(_in.id) == false, _in.id, "Do not copy deleted pointers."); Tracker().DecID(id); ptr = _in.Raw(); @@ -694,42 +699,28 @@ namespace emp { /// Does this const pointer exist? operator bool() const { return ptr != nullptr; } - /// Does this Ptr point to the same memory position? - bool operator==(const Ptr & in_ptr) const { return ptr == in_ptr.ptr; } - - /// Does this Ptr point to different memory positions? - bool operator!=(const Ptr & in_ptr) const { return ptr != in_ptr.ptr; } - - /// Does this Ptr point to a memory position before another? - bool operator<(const Ptr & in_ptr) const { return ptr < in_ptr.ptr; } - - /// Does this Ptr point to a memory position before or equal to another? - bool operator<=(const Ptr & in_ptr) const { return ptr <= in_ptr.ptr; } - - /// Does this Ptr point to a memory position after another? - bool operator>(const Ptr & in_ptr) const { return ptr > in_ptr.ptr; } - - /// Does this Ptr point to a memory position after or equal to another? - bool operator>=(const Ptr & in_ptr) const { return ptr >= in_ptr.ptr; } - - - /// Does this Ptr point to the same memory position as a raw pointer? - bool operator==(const TYPE * in_ptr) const { return ptr == in_ptr; } - - /// Does this Ptr point to different memory positions as a raw pointer? - bool operator!=(const TYPE * in_ptr) const { return ptr != in_ptr; } - - /// Does this Ptr point to a memory position before a raw pointer? - bool operator<(const TYPE * in_ptr) const { return ptr < in_ptr; } - - /// Does this Ptr point to a memory position before or equal to a raw pointer? - bool operator<=(const TYPE * in_ptr) const { return ptr <= in_ptr; } - - /// Does this Ptr point to a memory position after a raw pointer? - bool operator>(const TYPE * in_ptr) const { return ptr > in_ptr; } + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } - /// Does this Ptr point to a memory position after or equal to a raw pointer? - bool operator>=(const TYPE * in_ptr) const { return ptr >= in_ptr; } + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } @@ -886,11 +877,11 @@ namespace emp { struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; // Copy assignments - Ptr & operator=(const Ptr & _in) { ptr = _in.ptr; return *this; } + Ptr & operator=(const Ptr & _in) & { ptr = _in.ptr; return *this; } // Assign to compatible Ptr or raw (non-managed) pointer. - template Ptr & operator=(T2 * _in) { ptr = _in; return *this; } - template Ptr & operator=(Ptr _in) { ptr = _in.Raw(); return *this; } + template Ptr & operator=(T2 * _in) & { ptr = _in; return *this; } + template Ptr & operator=(Ptr _in) & { ptr = _in.Raw(); return *this; } // Auto-cast to raw pointer type. operator TYPE *() { return ptr; } @@ -898,21 +889,28 @@ namespace emp { operator bool() { return ptr != nullptr; } operator bool() const { return ptr != nullptr; } - // Comparisons to other Ptr objects - bool operator==(const Ptr & in_ptr) const { return ptr == in_ptr.ptr; } - bool operator!=(const Ptr & in_ptr) const { return ptr != in_ptr.ptr; } - bool operator<(const Ptr & in_ptr) const { return ptr < in_ptr.ptr; } - bool operator<=(const Ptr & in_ptr) const { return ptr <= in_ptr.ptr; } - bool operator>(const Ptr & in_ptr) const { return ptr > in_ptr.ptr; } - bool operator>=(const Ptr & in_ptr) const { return ptr >= in_ptr.ptr; } - - // Comparisons to raw pointers. - bool operator==(const TYPE * in_ptr) const { return ptr == in_ptr; } - bool operator!=(const TYPE * in_ptr) const { return ptr != in_ptr; } - bool operator<(const TYPE * in_ptr) const { return ptr < in_ptr; } - bool operator<=(const TYPE * in_ptr) const { return ptr <= in_ptr; } - bool operator>(const TYPE * in_ptr) const { return ptr > in_ptr; } - bool operator>=(const TYPE * in_ptr) const { return ptr >= in_ptr; } + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } + + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } @@ -1016,7 +1014,7 @@ namespace emp { /// Fill an array with the provided fill_value. /// If fill_value is a function, repeatedly call function. template - void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value) { + void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value) { // If the fill value is a function, call that function for each memory position. if constexpr (std::is_invocable_v) { FillMemoryFunction(mem_ptr, num_bytes, std::forward(fill_value)); @@ -1062,6 +1060,19 @@ namespace emp { } } + /// Copy an array from the provided memory. + template + void CopyMemory( + emp::Ptr from_ptr, + emp::Ptr to_ptr, + const size_t num_items) + { + constexpr size_t FILL_CHUNK = sizeof(T); + const size_t num_bytes = num_items * FILL_CHUNK; + + std::memcpy(to_ptr.Raw(), from_ptr.Raw(), num_bytes); + } + } // namespace emp #endif // #ifndef EMP_BASE_PTR_HPP_INCLUDE diff --git a/include/emp/base/_assert_macros.hpp b/include/emp/base/_assert_macros.hpp index a403b06b4c..f2fa6098d4 100644 --- a/include/emp/base/_assert_macros.hpp +++ b/include/emp/base/_assert_macros.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Helper macros for building proper assert commands. - * Status: RELEASE + * @note Status: RELEASE * */ diff --git a/include/emp/base/_emscripten_error_trigger.hpp b/include/emp/base/_emscripten_error_trigger.hpp index 49ddb1a3c4..0fbc9feed3 100644 --- a/include/emp/base/_emscripten_error_trigger.hpp +++ b/include/emp/base/_emscripten_error_trigger.hpp @@ -13,7 +13,7 @@ #ifndef EMP_BASE__EMSCRIPTEN_ERROR_TRIGGER_HPP_INCLUDE #define EMP_BASE__EMSCRIPTEN_ERROR_TRIGGER_HPP_INCLUDE - +#include #include #include diff --git a/include/emp/base/always_assert.hpp b/include/emp/base/always_assert.hpp index e5ec365210..022ee38510 100644 --- a/include/emp/base/always_assert.hpp +++ b/include/emp/base/always_assert.hpp @@ -6,7 +6,7 @@ /** * @file * @brief A more dynamic replacement for standard library asserts. - * Status: RELEASE + * @note Status: RELEASE * * A replacement for the system-level assert.h, called "emp_always_assert" * Added functionality: diff --git a/include/emp/base/array.hpp b/include/emp/base/array.hpp index c983b5d5fd..54d77261ff 100644 --- a/include/emp/base/array.hpp +++ b/include/emp/base/array.hpp @@ -1,12 +1,12 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2021. + * date: 2016-2022. */ /** * @file * @brief A drop-in wrapper for std::array; adds on bounds checking in debug mode. - * Status: RELEASE + * @note Status: RELEASE * * If EMP_NDEBUG is set, emp::array is just an alias for std::array. * Otherwise, every time an array is accessed, tests are done to make sure that the @@ -21,9 +21,12 @@ #include #include +#include #include #include +#include "../../../third-party/cereal/include/cereal/cereal.hpp" + #include "assert.hpp" #ifdef EMP_NDEBUG @@ -37,130 +40,139 @@ namespace emp { namespace emp { + // Pre-declaration of array type. + template struct array; + + /// Setup an iterator wrapper to check validity. + template + struct array_iterator { + using this_t = array_iterator; + using array_t = ARRAY_T; + + // Iterator traits + using iterator_category = typename std::iterator_traits::iterator_category; + using value_type = typename std::iterator_traits::value_type; + using difference_type = typename std::iterator_traits::difference_type; + using pointer = typename std::iterator_traits::pointer; + using reference = typename std::iterator_traits::reference; + + ITERATOR_T it; + const array_t * arr_ptr { nullptr }; // Which array was iterator created from? + + array_iterator() { ; } + + array_iterator(ITERATOR_T _in, const array_t * _v) : it(_in), arr_ptr (_v) { ; } + array_iterator(const this_t &) = default; + array_iterator(this_t &&) = default; + ~array_iterator() { ; } + + // Debug tools to make sure this iterator is okay. + bool OK(bool begin_ok=true, bool end_ok=true) const { + if (arr_ptr == nullptr) return false; // Invalid array + if (it < arr_ptr->begin()) return false; // Iterator before array start. + if (it > arr_ptr->end()) return false; // Iterator after array end. + if (!begin_ok && it == arr_ptr->begin()) return false; // Iterator not allowed at start. + if (!end_ok && it == arr_ptr->end()) return false; // Iterator not allowed at end. + return true; + } + + this_t & operator=(const this_t &) = default; + this_t & operator=(this_t &&) = default; + + operator ITERATOR_T() { return it; } + operator const ITERATOR_T() const { return it; } + + auto & operator*() { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return *it; + } + const auto & operator*() const { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return *it; + } + + auto operator->() { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return it; + } + auto operator->() const { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return it; + } + + this_t & operator++() { emp_assert(OK(true,false)); ++it; return *this; } + this_t operator++(int /*x*/) { emp_assert(OK(true,false)); return this_t(it++, arr_ptr); } + this_t & operator--() { emp_assert(OK(false,true)); --it; return *this; } + this_t operator--(int /*x*/) { emp_assert(OK(false,true)); return this_t(it--, arr_ptr); } + + this_t operator+(int in) { emp_assert(OK()); return this_t(it + in, arr_ptr); } + this_t operator-(int in) { emp_assert(OK()); return this_t(it - in, arr_ptr); } + ptrdiff_t operator-(const this_t & in) { emp_assert(OK()); return it - in.it; } + + this_t & operator+=(int in) { emp_assert(OK()); it += in; return *this; } + this_t & operator-=(int in) { emp_assert(OK()); it -= in; return *this; } + + auto & operator[](int index) { emp_assert(OK()); return it[index]; } + const auto & operator[](int index) const { emp_assert(OK()); return it[index]; } + }; + /// We are in debug mode, so emp::array has the same interface as std::array, but with extra /// bounds checking. Using vector as our base since it has the right pieces and is dynamic. - template - class array : public std::vector { - private: + template + struct array { + static constexpr size_t N = NUM_ELEMENTS; using this_t = emp::array; - using base_t = std::vector; - - public: - bool valid; - - /// Setup an iterator wrapper to make sure that they're valid. - template - struct iterator_wrapper : public ITERATOR_T { - using this_t = iterator_wrapper; - using wrapped_t = ITERATOR_T; - using vec_t = emp::array; - - /// What vector was this iterator created from? - const vec_t * v_ptr{ nullptr }; - - iterator_wrapper() { ; } - - iterator_wrapper(const ITERATOR_T & _in, const vec_t * _v) : ITERATOR_T(_in), v_ptr(_v) { ; } - iterator_wrapper(const this_t &) = default; - iterator_wrapper(this_t &&) = default; - ~iterator_wrapper() { ; } - - // Debug tools to make sure this iterator is okay. - bool OK(bool begin_ok=true, bool end_ok=true) const { - if (v_ptr == nullptr) return false; // Invalid vector - if (!v_ptr->valid) return false; // Vector has been deleted! - size_t pos = (size_t) (*this - v_ptr->begin()); - if (pos > v_ptr->size()) return false; // Iterator out of range. - if (!begin_ok && pos == 0) return false; // Iterator not allowed at beginning. - if (!end_ok && pos == v_ptr->size()) return false; // Iterator not allowed at end. - return true; - } - - this_t & operator=(const this_t &) = default; - this_t & operator=(this_t &&) = default; - - operator ITERATOR_T() { return *this; } - operator const ITERATOR_T() const { return *this; } - - auto & operator*() { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator*(); - } - const auto & operator*() const { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator*(); - } - - auto operator->() { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator->(); - } - auto operator->() const { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator->(); - } - - this_t & operator++() { emp_assert(OK(true,false)); wrapped_t::operator++(); return *this; } - this_t operator++(int x) { emp_assert(OK(true,false)); return this_t(wrapped_t::operator++(x), v_ptr); } - this_t & operator--() { emp_assert(OK(false,true)); wrapped_t::operator--(); return *this; } - this_t operator--(int x) { emp_assert(OK(false,true)); return this_t(wrapped_t::operator--(x), v_ptr); } - - auto operator+(int in) { emp_assert(OK()); return this_t(wrapped_t::operator+(in), v_ptr); } - auto operator-(int in) { emp_assert(OK()); return this_t(wrapped_t::operator-(in), v_ptr); } - auto operator-(const this_t & in) { emp_assert(OK()); return ((wrapped_t) *this) - (wrapped_t) in; } - - this_t & operator+=(int in) { emp_assert(OK()); wrapped_t::operator+=(in); return *this; } - this_t & operator-=(int in) { emp_assert(OK()); wrapped_t::operator-=(in); return *this; } - auto & operator[](int offset) { emp_assert(OK()); return wrapped_t::operator[](offset); } - }; - - using iterator = iterator_wrapper< typename base_t::iterator >; - using const_iterator = iterator_wrapper< typename base_t::const_iterator >; - using reverse_iterator = iterator_wrapper< typename base_t::reverse_iterator >; - using const_reverse_iterator = iterator_wrapper< typename base_t::const_reverse_iterator >; + + T _data[ N ? N : 1 ]; + + using iterator = array_iterator< T*, this_t >; + using const_iterator = array_iterator< const T *, this_t >; + using reverse_iterator = array_iterator< std::reverse_iterator, this_t >; + using const_reverse_iterator = array_iterator< std::reverse_iterator, this_t >; using value_type = T; - using size_type = typename base_t::size_type; - using reference = typename base_t::reference; - using const_reference = typename base_t::const_reference; + using size_type = std::size_t; + using reference = value_type&; + using const_reference = const value_type&; + + // -- No constructors, destructors, or assignment operators to preserve aggregate type. - array() : base_t(N), valid(true) {}; - array(const this_t & _in) : base_t(_in), valid(true) { emp_assert(_in.size() == N); }; - array(std::initializer_list in_list) : base_t(in_list), valid(true) { emp_assert(size() == N); } - template - array(InputIt first, InputIt last) : base_t(first, last), valid(true) { emp_assert(size() == N); } - ~array() { valid=false; } // No longer valid when array is deleted. + int operator<=>(const array &) const = default; + // Allow automatic conversion to regular array type. operator std::array() { std::array ar; - for (size_t i = 0; i < N; i++) ar[i] = base_t::operator[](i); + for (size_t i = 0; i < N; i++) ar[i] = _data[i]; return ar; } constexpr size_t size() const { return N; } - iterator begin() noexcept { return iterator(base_t::begin(), this); } - const_iterator begin() const noexcept { return const_iterator(base_t::begin(), this); } - iterator end() noexcept { return iterator(base_t::end(), this); } - const_iterator end() const noexcept { return const_iterator(base_t::end(), this); } + auto & data() { return _data; } + const auto & data() const { return _data; } - this_t & operator=(const this_t &) = default; + iterator begin() noexcept { return iterator(_data, this); } + const_iterator begin() const noexcept { return const_iterator(_data, this); } + iterator end() noexcept { return iterator(_data + N, this); } + const_iterator end() const noexcept { return const_iterator(_data + N, this); } T & operator[](size_t pos) { emp_assert(pos < N, pos, N); - return base_t::operator[](pos); + return _data[pos]; } const T & operator[](size_t pos) const { emp_assert(pos < N, pos, N); - return base_t::operator[](pos); + return _data[pos]; } - T & back() { emp_assert(N > 0); return base_t::back(); } - const T & back() const { emp_assert(N > 0); return base_t::back(); } - T & front() { emp_assert(N > 0); return base_t::front(); } - const T & front() const { emp_assert(N > 0); return base_t::front(); } + T & back() { emp_assert(N > 0); return _data[N-1]; } + const T & back() const { emp_assert(N > 0); return _data[N-1]; } + T & front() { emp_assert(N > 0); return _data[0]; } + const T & front() const { emp_assert(N > 0); return _data[0]; } - void fill(const T & val) { this->assign(N, val); } + void fill(const T & val) { + for (size_t i = 0; i < N; ++i) _data[i] = val; + } // Functions to make sure to throw an error on: @@ -172,27 +184,30 @@ namespace emp { void pop_back() { emp_assert(false, "invalid operation for array!"); } template - iterator insert(ARGS &&... args) { + iterator insert(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::insert(std::forward(args)...), this ); + return end(); } template - iterator erase(ARGS &&... args) { + iterator erase(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::erase(std::forward(args)...), this ); + return end(); } template - iterator emplace(ARGS &&... args) { + iterator emplace(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::emplace(std::forward(args)...), this ); + return end(); } template void emplace_back(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); } + + template + void serialize( Archive & ar ) { ar(_data); } }; diff --git a/include/emp/base/assert.hpp b/include/emp/base/assert.hpp index 4148807ff3..36589c7fda 100644 --- a/include/emp/base/assert.hpp +++ b/include/emp/base/assert.hpp @@ -6,7 +6,7 @@ /** * @file * @brief A more dynamic replacement for standard library asserts. - * Status: RELEASE + * @note Status: RELEASE * * A replacement for the system-level assert.h, called "emp_assert" * Added functionality: @@ -44,6 +44,10 @@ // #define emp_assert(EXPR) ((void) sizeof(EXPR) ) // #define emp_assert(EXPR, ...) { constexpr bool __emp_assert_tmp = false && (EXPR); (void) __emp_assert_tmp; } + namespace emp { + static constexpr bool is_debug_mode = false; + } + #else /// Require a specified condition to be true. If it is false, immediately /// halt execution. Print also extra information on any variables or @@ -52,6 +56,10 @@ /// information will not be printed when compiling with MSVC. #define emp_assert(...) emp_always_assert(__VA_ARGS__) + namespace emp { + static constexpr bool is_debug_mode = true; + } + #endif diff --git a/include/emp/base/notify.hpp b/include/emp/base/notify.hpp new file mode 100644 index 0000000000..6ab969f6b6 --- /dev/null +++ b/include/emp/base/notify.hpp @@ -0,0 +1,472 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file notify.hpp + * @brief Tools to alert users of messages (including errors and warnings) in a consistant manner. + * @note Status: ALPHA + * + * + * There are a handful of notification types to consider: + * - Message: A simple notification. + * - Verbose: Optional messages that can be activated by category. + * - Warning: Something looks suspicious, but is not technically a problem (don't exit) + * - Error: Something has gone horribly wrong and is impossible to recover from (exit) + * - Exception: Something didn't go the way we expected, but we can still recover (exit if not handled) + * - Debug: A simple notification that should only be printed when NDEBUG is not set (don't exit) + * + * Messages default to "standard out"; all of the other default to "standard error". Handling of + * these notifications can all be overriden by either whole category or by specific tag. + * + * There are three possible recipients for all errors/warnings. + * - The end-user if the problem stems from inputs they provided to the executable. + * - The library user if the problem is due to mis-use of library functionality. + * - The library developers if something that should be impossible occurs. + * + * The content of this file primarily targets the first group; developers should prefer asserts + * to ensure that supposedly "impossible" situations do not occur. + * + * NOTES: + * - Whenever possible, exceptions should be preferred. They are more specific than warnings + * and can be responded to rather than automatically halting execution like errors. + * - Warnings should always detail what should be done differently to surpress that warning. + * + */ + +#ifndef EMP_BASE_NOTIFY_HPP_INCLUDE +#define EMP_BASE_NOTIFY_HPP_INCLUDE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vector.hpp" + +namespace emp { +namespace notify { + using id_t = std::string; + using message_t = std::string; + using except_data_t = std::any; + + using id_arg_t = const id_t &; + using message_arg_t = const message_t &; + using response_t = bool(id_arg_t, message_arg_t, except_data_t); + using exit_fun_t = std::function; + + /// Information about an exception that has occurred. + struct ExceptInfo { + id_t id = "__NONE__"; ///< Which exception was triggered? + message_t message = ""; ///< A detailed message of this exception. + except_data_t data; ///< Extra data needed to resolve this exception. + }; + + enum class Type { MESSAGE=0, DEBUG, WARNING, ERROR, EXCEPTION, NUM_TYPES }; + static constexpr size_t num_types = static_cast(Type::NUM_TYPES); + + /// Convert a type to a human-readable string. + static id_t TypeID(Type type) { + switch (type) { + case Type::MESSAGE: return "Message"; + case Type::DEBUG: return "Debug"; + case Type::WARNING: return "WARNING"; + case Type::ERROR: return "ERROR"; + case Type::EXCEPTION: return "EXCEPTION"; + default: return "Unknown"; + } + } + + /// Convert a type to a human-readable string in COLOR. + static id_t ColorTypeID(Type type) { + const std::string green_text = "\033[32m"; + const std::string magenta_text = "\033[35m"; + const std::string red_text = "\033[31m"; + const std::string yellow_text = "\033[33m"; + const std::string normal_text = "\033[39m"; + const std::string bold_text = "\033[1m"; + const std::string no_bold_text = "\033[22m"; + switch (type) { + case Type::MESSAGE: return green_text + "Message" + normal_text; + case Type::DEBUG: return green_text + bold_text + "Debug" + no_bold_text + normal_text; + case Type::WARNING: return yellow_text + bold_text + "WARNING" + no_bold_text + normal_text; + case Type::ERROR: return red_text + bold_text + "ERROR" + no_bold_text + normal_text; + case Type::EXCEPTION: return magenta_text + bold_text + "EXCEPTION" + no_bold_text + normal_text; + default: return "Unknown"; + } + } + + // Maintain a specified collection of handlers. + class HandlerSet { + private: + using fun_t = std::function; + using fun_no_data_t = std::function; + using fun_msg_only_t = std::function; + emp::vector handlers; + bool exit_on_fail = false; + + public: + HandlerSet() {} + HandlerSet(const HandlerSet &) = default; + HandlerSet(HandlerSet &&) = default; + ~HandlerSet() { } + + bool GetExitOnFail() const { return exit_on_fail; } + HandlerSet & SetExitOnFail(bool _exit=true) { + exit_on_fail = _exit; + return *this; + } + + /// Trigger all handlers associated with a given ID. + bool Trigger(id_arg_t id, message_arg_t message, except_data_t except_data) { + // Run handlers from most recently added to oldest. + for (auto it = handlers.rbegin(); + it != handlers.rend(); + ++it) { + // Run until "true" result + bool result = (*it)(id, message, except_data); + if (result) return true; // Stop if any handler succeeded. + } + + return false; + } + + // Trigger without providing data. + bool Trigger(id_arg_t id, message_arg_t message) { + return Trigger(id, message, 0); + } + + // Trigger from a stored notification. + bool Trigger(const ExceptInfo & info) { + return Trigger(info.id, info.message, info.data); + } + + // Add a function to this set. + HandlerSet & Add(fun_t in) { handlers.push_back(in); return *this; } + + // Add a function with no data. + HandlerSet & Add(fun_no_data_t in) { + handlers.push_back( + [fun=in](id_arg_t id, message_arg_t msg, except_data_t){ return fun(id,msg); } + ); + return *this; + } + + // Add a function with only a single message + HandlerSet & Add(fun_msg_only_t in) { + handlers.push_back( + [fun=in](id_arg_t, message_arg_t msg, except_data_t){ return fun(msg); } + ); + return *this; + } + + + // Clear all handlers associated with a given id. + HandlerSet & Clear() { handlers.resize(0); return *this; } + + /// Replace all handlers with nothing (i.e., clear them) + void Replace() { Clear(); } + + /// Replace all handlers with the generic ones provided. + template + void Replace(fun_t in, FUN_Ts... extra) { + Replace(extra...); + Add(in); + } + }; + + /// Staticly stored data about current notifications. + struct NotifyData { + // For each exception name we will keep a vector of handlers, appended to in the order + // that they arrive (most recent will be last) + std::unordered_map handler_map; // Map of all handlers to use for notifications. + std::unordered_map verbose_map; // Set of categories for verbose messages. + emp::vector exit_funs; // Set of handlers to run on exit. + emp::vector except_queue; // Unresolved exceptions after handlers have run + emp::vector pause_queue; // Unresolved notifications during pause + bool lethal_exceptions = true; // Should unresolved exceptions end the program? + bool is_paused = false; // When paused, save notifications until unpaused. + + HandlerSet & GetHandler(Type type) { return handler_map[TypeID(type)]; } + + NotifyData() { + // Setup the default handlers and exit rules. + GetHandler(Type::MESSAGE).Add( + [](id_arg_t, message_arg_t msg) { + std::cout << msg << std::endl; + return true; + } + ); + + GetHandler(Type::DEBUG).Add( +#ifdef NDEBUG + [](id_arg_t, message_arg_t){ return true; } +#else + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::DEBUG); + std::cout << tag << ": " << msg << std::endl; + return true; + } +#endif + ); + + GetHandler(Type::WARNING).Add( + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::WARNING); + std::cout << tag << ": " << msg << std::endl; + return true; // Only warning, do not exit. + } + ); + + GetHandler(Type::ERROR).Add( + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::ERROR); + std::cout << tag << ": " << msg << std::endl; + return false; // Does not correct the problem, so exit. + } + ); + + GetHandler(Type::EXCEPTION).Add( + [](id_arg_t id, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::EXCEPTION); + std::cerr << tag << " (" << id << "): " << msg << std::endl; + return false; // Does not correct the problem, so exit. + } + ); + GetHandler(Type::EXCEPTION).SetExitOnFail(); + + // The initial exit handler should actually exit, using the appropriate exit code. + exit_funs.push_back( [](int code){ exit(code); } ); + } + }; + + /// Central call to obtain NotifyData singleton. + static NotifyData & GetData() { static NotifyData data; return data; } + inline auto & MessageHandlers() { return GetData().GetHandler(Type::MESSAGE); } + inline auto & DebugHandlers() { return GetData().GetHandler(Type::DEBUG); } + inline auto & WarningHandlers() { return GetData().GetHandler(Type::WARNING); } + inline auto & ErrorHandlers() { return GetData().GetHandler(Type::ERROR); } + + [[maybe_unused]] static void AddExitHandler(exit_fun_t fun) { GetData().exit_funs.push_back(fun); } + [[maybe_unused]] static void ClearExitHandlers() { GetData().exit_funs.resize(0); } + [[maybe_unused]] static void ReplaceExitHandlers() { ClearExitHandlers(); } + template + static void ReplaceExitHandlers(exit_fun_t fun, FUN_Ts... extras) { + ReplaceExitHandlers(extras...); + AddExitHandler(fun); + } + + /// Generic exit handler that calls all of the provided functions. + [[maybe_unused]] static void Exit(int exit_code) { + NotifyData & data = GetData(); + + // Run any cleanup functions. + for (auto it = data.exit_funs.rbegin(); it != data.exit_funs.rend(); ++it) { + (*it)(exit_code); + } + + // Exit for real. + exit(exit_code); + } + + /// Generic Notification where type must be specified. + template + static bool Notify(Type type, Ts... args) { + NotifyData & data = GetData(); + const id_t id = TypeID(type); + + // Setup the message in a string stream. + std::stringstream ss; + ((ss << std::forward(args)), ...); + + // If we are are paused, save this notification for later. + if (data.is_paused) { + data.pause_queue.push_back(ExceptInfo{id, ss.str(), 0}); + return true; + } + + bool result = data.handler_map[id].Trigger(id, ss.str()); + + // And return the success result. + return result; + } + + [[maybe_unused]] static void Pause() { + NotifyData & data = GetData(); + data.is_paused = true; + } + + [[maybe_unused]] static void Unpause() { + NotifyData & data = GetData(); + + // Step through the notifications that have accrued. + for (size_t i = 0; i < data.pause_queue.size(); ++i) { + auto & notice = data.pause_queue[i]; + bool result = data.handler_map[notice.id].Trigger(notice); + if (!result) { // Failed; move to exception queue or exit if error. + if (notice.id == "ERROR") Exit(1); + data.except_queue.push_back(notice); + } + } + + data.pause_queue.resize(0); // Clear out the queue. + + data.is_paused = false; + } + + + /// Send out a regular notification. + template + static bool Message(Ts... args) { return Notify(Type::MESSAGE, std::forward(args)...); } + + /// Send out a DEBUG notification. + template + static bool Debug(Ts... args) { return Notify(Type::DEBUG, std::forward(args)...); } + + /// Send out a notification of a WARNING. + template + static bool Warning(Ts... args) { return Notify(Type::WARNING, std::forward(args)...); } + + /// Send out a notification of an ERROR. + template + static bool Error(Ts... args) { + bool success = Notify(Type::ERROR, std::forward(args)...); + if (!success) { +#ifdef NDEBUG + Exit(1); +#else + abort(); +#endif + } + return success; + } + + // Trigger a warning only if a specified condition is true. + template + static bool TestWarning(bool test, Ts... args) { + if (test) return Warning(std::forward(args)...); + return true; + } + + // Trigger an error only if a specified condition is true. + template + static bool TestError(bool test, Ts... args) { + if (test) return Error(std::forward(args)...); + return true; + } + + + /// Add a handler for a particular exception type. + template + static HandlerSet & AddHandler(id_arg_t id, FUN_T fun) { + return GetData().handler_map[id].Add(fun); + } + + /// Add a generic exception handler. + template + static HandlerSet & AddHandler(FUN_T fun) { + return GetData().handler_map["EXCEPTION"].Add(fun); + } + + /// Ignore exceptions of a specific type. + [[maybe_unused]] static HandlerSet & Ignore(id_arg_t id) { + return AddHandler(id, [](id_arg_t, message_arg_t){ return true; }); + } + + /// Turn on a particular verbosity category. + [[maybe_unused]] static void SetVerbose(std::string id, bool make_active=true) { + GetData().verbose_map[id] = make_active; + } + + /// Send out a notification of an "verbose" message. + template + [[maybe_unused]] static bool Verbose(const std::string & id, Ts... args) { + NotifyData & data = GetData(); + + if (data.verbose_map[id]) { + return Notify(Type::MESSAGE, std::forward(args)...); + } + + return false; + } + + /// Send out a notification of an Exception. + [[maybe_unused]] static bool Exception(id_arg_t id, message_arg_t message="", except_data_t except_data=0) { + NotifyData & data = GetData(); + + if (data.is_paused) { + data.pause_queue.push_back(ExceptInfo{id, message, except_data}); + return true; + } + + // Retrieve any specialized exception handlers for this type of exception. + bool result = data.handler_map[id].Trigger(id, message, except_data); + + // If unresolved, see if we should quit; else use a generic exception handler. + if (!result) { + if (data.handler_map[id].GetExitOnFail()) Exit(1); + result = data.handler_map["EXCEPTION"].Trigger(id, message, except_data); + } + + // If still unresolved, either give up or save the exception for later analysis. + if (!result) { + if (data.handler_map["EXCEPTION"].GetExitOnFail()) Exit(1); + data.except_queue.push_back(ExceptInfo{id, message, except_data}); + } + + return result; + } + + /// Retrieve a vector of ALL unresolved exceptions. + [[maybe_unused]] static const emp::vector & GetExceptions() { return GetData().except_queue; } + + /// Retrieve the first unresolved exception with a given id. + [[maybe_unused]] static ExceptInfo GetException(id_arg_t id) { + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) return x; + return ExceptInfo{}; + } + + /// Return a total count of how many unresolved exceptions are left. + [[maybe_unused]] static size_t CountExceptions() { return GetData().except_queue.size(); } + + /// Return a total count of how many unresolved exceptions have a given id. + [[maybe_unused]] static size_t CountExceptions(id_arg_t id) { + size_t count = 0; + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) ++count; + return count; + } + + /// Identify whether there are ANY unresolved exceptions. + [[maybe_unused]] static bool HasExceptions() { return CountExceptions(); } + + /// Identify whether there are any unresolved exceptions with a given id. + [[maybe_unused]] static bool HasException(id_arg_t id) { + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) return true; + return false; + } + + /// Remove all unresolved exceptions. + [[maybe_unused]] static void ClearExceptions() { GetData().except_queue.resize(0); } + + /// Remove first exception with a given id. + [[maybe_unused]] static void ClearException(id_arg_t id) { + auto & except_queue = GetData().except_queue; + for (size_t i = 0; i < except_queue.size(); ++i) { + if (except_queue[i].id == id) { + // If exception is NOT in the last position, move last position earlier and reduce size. + if (i < except_queue.size() - 1) except_queue[i] = except_queue.back(); + except_queue.resize(except_queue.size() - 1); + return; + } + } + } + +} +} + + +#endif // #ifndef EMP_BASE_NOTIFY_HPP_INCLUDE diff --git a/include/emp/base/optional.hpp b/include/emp/base/optional.hpp index 8592aab555..d7dcb587dc 100644 --- a/include/emp/base/optional.hpp +++ b/include/emp/base/optional.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Audited implementation of std::optional. - * Status: RELEASE + * @note Status: RELEASE * * Drop-in replacements for std::optional. * In debug mode, operator * and operator-> value accesses are checked for undefined behavior. diff --git a/include/emp/base/vector.hpp b/include/emp/base/vector.hpp index 33a75f6b11..c131da6993 100644 --- a/include/emp/base/vector.hpp +++ b/include/emp/base/vector.hpp @@ -31,7 +31,7 @@ #ifdef EMP_NDEBUG -// Seemlessly translate emp::vector to std::vector +// Seamlessly translate emp::vector to std::vector namespace emp { template using vector = std::vector; } @@ -78,11 +78,23 @@ namespace emp { // Debug tools to make sure this iterator is okay. static std::string & ErrorCode() { static std::string code="No Errors Found."; return code; } + static std::string ErrorStart() { + std::string vec_type = std::string("vector<") + typeid(typename stdv_t::value_type).name() + ">"; + std::string it_type = typeid(ITERATOR_T).name(); + if constexpr (std::is_same()) it_type = "iterator"; + if constexpr (std::is_same()) it_type = "const_iterator"; + if constexpr (std::is_same()) it_type = "reverse_iterator"; + if constexpr (std::is_same()) it_type = "const_reverse_iterator"; + return std::string("Iterator (type = '") + vec_type + "::" + it_type + "') "; + } + + bool OK(bool begin_ok=true, bool end_ok=true, std::string op="") const { + std::string type_name = typeid(ITERATOR_T).name();; - bool OK(bool begin_ok=true, bool end_ok=true) const { if (v_ptr == nullptr) { ErrorCode() = "Invalid Vector! (set to nullptr)"; return false; } if (v_ptr->revision == 0) { ErrorCode() = "Vector deleted! (revision==0)"; return false; } - if (revision != v_ptr->revision) { ErrorCode() = "Vector has changed memeory!"; return false; } + if (revision != v_ptr->revision) { ErrorCode() = "Vector has changed memory!"; return false; } + int64_t pos = 0; if constexpr (std::is_same() || std::is_same()) { @@ -94,58 +106,63 @@ namespace emp { pos = *((ITERATOR_T *) this) - ((stdv_t *) v_ptr)->begin(); } if (pos < 0 || ((size_t) pos) > v_ptr->size()) { - ErrorCode() = "Iterator out of range."; - ErrorCode() += " size="; - ErrorCode() += std::to_string(v_ptr->size()); - ErrorCode() += " pos="; - ErrorCode() += std::to_string(pos); + ErrorCode() = ErrorStart() + "out of range." + + " size=" + std::to_string(v_ptr->size()) + " pos=" + std::to_string(pos); + return false; + } + if (!begin_ok && pos == 0) { + ErrorCode() = ErrorStart() + "not allowed at begin() for operation " + op + "."; + return false; + } + if (!end_ok && ((size_t) pos) == v_ptr->size()) { + ErrorCode() = ErrorStart() + "not allowed at end() for operation " + op + "."; return false; } - if (!begin_ok && pos == 0) { ErrorCode() = "Iterator not allowed at begin()."; return false; } - if (!end_ok && ((size_t) pos) == v_ptr->size()) { ErrorCode() = "Iterator not allowed at end()."; return false; } return true; } - this_t & operator=(const this_t &) = default; - this_t & operator=(this_t &&) = default; + this_t & operator=(const this_t &) & = default; + this_t & operator=(this_t &&) & = default; operator ITERATOR_T() { return *this; } operator const ITERATOR_T() const { return *this; } auto & operator*() { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. + emp_assert(OK(true, false, "dereference"), ErrorCode()); return wrapped_t::operator*(); } const auto & operator*() const { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. + emp_assert(OK(true, false, "const dereference"), ErrorCode()); return wrapped_t::operator*(); } auto operator->() { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. +// emp_assert(OK(true, false, "->"), ErrorCode()); + emp_assert(OK(true, true, "->"), ErrorCode()); // Technically can use -> on end() for memory identification, just can't use result. return wrapped_t::operator->(); } auto operator->() const { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. +// emp_assert(OK(true, false, "const ->"), ErrorCode()); + emp_assert(OK(true, true, "const ->"), ErrorCode()); // Technically can use -> on end() for memory identification, just can't use result. return wrapped_t::operator->(); } this_t & operator++() { - emp_assert(OK(true,false), ErrorCode()); + emp_assert(OK(true,false, "++ (post)"), ErrorCode()); wrapped_t::operator++(); return *this; } this_t operator++(int x) { - emp_assert(OK(true,false), ErrorCode()); + emp_assert(OK(true,false, "++ (pre)"), ErrorCode()); return this_t(wrapped_t::operator++(x), v_ptr); } this_t & operator--() { - emp_assert(OK(false,true), ErrorCode()); + emp_assert(OK(false,true, "-- (post)"), ErrorCode()); wrapped_t::operator--(); return *this; } this_t operator--(int x) { - emp_assert(OK(false,true), ErrorCode()); + emp_assert(OK(false,true, "-- (pre)"), ErrorCode()); return this_t(wrapped_t::operator--(x), v_ptr); } @@ -216,7 +233,7 @@ namespace emp { stdv_t::resize(new_size, val); revision++; } - this_t & operator=(const this_t &) = default; + this_t & operator=(const this_t &) & = default; T & operator[](size_t pos) { emp_assert(pos < stdv_t::size(), pos, stdv_t::size()); @@ -311,7 +328,7 @@ namespace emp { emp_assert(new_size < MAX_SIZE, new_size); stdv_t::resize(new_size, val); } - this_t & operator=(const this_t &) = default; + this_t & operator=(const this_t &) & = default; auto operator[](size_t pos) -> decltype(stdv_t::operator[](pos)) { emp_assert(pos < stdv_t::size(), pos, stdv_t::size()); diff --git a/include/emp/bits/BitArray.hpp b/include/emp/bits/BitArray.hpp index 88cdb6ff91..c7edee49ed 100644 --- a/include/emp/bits/BitArray.hpp +++ b/include/emp/bits/BitArray.hpp @@ -6,7 +6,7 @@ /** * @file * @brief An Array of a fixed number of bits; similar to std::bitset, but with extra bit magic. - * Status: RELEASE + * @note Status: RELEASE * * @todo Some of the functions allow a start bit and end bit; each of these should be checked * to make sure that they will work if the start and end are part of the same byte. One @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "../base/assert.hpp" @@ -31,12 +32,13 @@ #include "../math/math.hpp" #include "../math/Random.hpp" #include "../meta/type_traits.hpp" -#include "../polyfill/span.hpp" #include "_bitset_helpers.hpp" #include "bitset_utils.hpp" -namespace emp { +#include "Bits.hpp" // New version of BitArray is in Bits.hpp + +namespace emp::old { /// A fixed-sized (but arbitrarily large) array of bits, and optimizes operations on those bits /// to be as fast as possible. @@ -173,16 +175,16 @@ namespace emp { ~BitArray() = default; /// Assignment operator (no separate move operator since no resources to move...) - BitArray & operator=(const this_t & in_bits) noexcept { return Copy(in_bits.bits); } + BitArray & operator=(const this_t & in_bits) & noexcept { return Copy(in_bits.bits); } /// Assignment operator from a std::bitset. - BitArray & operator=(const std::bitset & bitset); + BitArray & operator=(const std::bitset & bitset) &; /// Assignment operator from a string of '0's and '1's. - BitArray & operator=(const std::string & bitstring); + BitArray & operator=(const std::string & bitstring) &; /// Assignment operator from a literal string of '0's and '1's. - BitArray & operator=(const char * bitstring) { return operator=(std::string(bitstring)); } + BitArray & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } /// Assignment from another BitArray of a different size. template @@ -1004,7 +1006,7 @@ namespace emp { // Assignment operator from a std::bitset. template BitArray & - BitArray::operator=(const std::bitset & bitset) { + BitArray::operator=(const std::bitset & bitset) & { for (size_t i = 0; i < NUM_BITS; i++) Set(i, bitset[i]); return *this; } @@ -1012,7 +1014,7 @@ namespace emp { // Assignment operator from a string of '0's and '1's. template BitArray & - BitArray::operator=(const std::string & bitstring) { + BitArray::operator=(const std::string & bitstring) & { emp_assert(bitstring.size() <= NUM_BITS); Clear(); if constexpr (ZERO_LEFT) { @@ -1033,7 +1035,7 @@ namespace emp { const size_t from_bit ) { // Only check for same-ness if the two types are the same. - if constexpr (FROM_BITS == NUM_BITS) emp_assert(&from_array != this); + if constexpr (FROM_BITS == NUM_BITS) { emp_assert(&from_array != this); } emp_assert(from_bit < FROM_BITS); @@ -1096,7 +1098,7 @@ namespace emp { template bool BitArray::Get(size_t index) const { - emp_assert(index >= 0 && index < NUM_BITS); + emp_assert(index < NUM_BITS); const size_t field_id = FieldID(index); const size_t pos_id = FieldPos(index); return (bits[field_id] & (((field_t)1U) << pos_id)) != 0; @@ -1128,7 +1130,7 @@ namespace emp { // Flip a single bit template BitArray & BitArray::Toggle(size_t index) { - emp_assert(index >= 0 && index < NUM_BITS); + emp_assert(index < NUM_BITS); const size_t field_id = FieldID(index); const size_t pos_id = FieldPos(index); const field_t pos_mask = FIELD_1 << pos_id; @@ -2090,9 +2092,9 @@ namespace emp { namespace std { template - struct hash> + struct hash> { - size_t operator()( const emp::BitArray & bs ) const noexcept + size_t operator()( const emp::old::BitArray & bs ) const noexcept { return bs.Hash(); } diff --git a/include/emp/bits/BitSet.hpp b/include/emp/bits/BitSet.hpp index 9343d60c5f..59e871cdec 100644 --- a/include/emp/bits/BitSet.hpp +++ b/include/emp/bits/BitSet.hpp @@ -6,7 +6,7 @@ /** * @file * @brief A drop-in replacement for std::bitset, with additional bit magic features; aliases BitArray. - * Status: RELEASE + * @note Status: RELEASE * */ @@ -17,10 +17,12 @@ #include "BitArray.hpp" -namespace emp { +#include "Bits.hpp" // New version of BitSet is in Bits.hpp + +namespace emp::old { template - using BitSet = emp::BitArray; + using BitSet = emp::old::BitArray; } diff --git a/include/emp/bits/BitVector.hpp b/include/emp/bits/BitVector.hpp index fdd9290adf..fba53cdd5e 100644 --- a/include/emp/bits/BitVector.hpp +++ b/include/emp/bits/BitVector.hpp @@ -1,12 +1,12 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2021. + * date: 2016-2022. */ /** * @file * @brief A drop-in replacement for std::vector, with additional bitwise logic features. - * Status: RELEASE + * @note Status: RELEASE * * @note Compile with -O3 and -msse4.2 for fast bit counting. * @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "../base/assert.hpp" @@ -43,12 +44,13 @@ #include "../datastructs/hash_utils.hpp" #include "../math/math.hpp" #include "../math/Random.hpp" -#include "../polyfill/span.hpp" #include "_bitset_helpers.hpp" #include "bitset_utils.hpp" -namespace emp { +#include "Bits.hpp" // New version of BitVector is in Bits.hpp + +namespace emp::old { /// A drop-in replacement for std::vector, but with extra bitwise logic features. /// @@ -72,7 +74,7 @@ namespace emp { static constexpr size_t MAX_BITS = (size_t) -1; ///< Value larger than any bit ID. // Number of bits needed to specify position in a field + mask - static constexpr size_t FIELD_LOG2 = emp::Log2(FIELD_BITS); + static constexpr size_t FIELD_LOG2 = static_cast(emp::Log2(FIELD_BITS)); static constexpr field_t FIELD_LOG2_MASK = MaskLow(FIELD_LOG2); size_t num_bits; ///< Total number of bits are we using @@ -113,10 +115,10 @@ namespace emp { // Assume that the size of the bits has already been adjusted to be the size of the one // being copied and only the fields need to be copied over. - void RawCopy(const Ptr in); + inline void RawCopy(const Ptr in); // Copy bits from one position in the genome to another; leave old positions unchanged. - void RawCopy(const size_t from_start, const size_t from_stop, const size_t to); + inline void RawCopy(const size_t from_start, const size_t from_stop, const size_t to); // Convert the bits to bytes (note that bits are NOT in order at the byte level!) [[nodiscard]] emp::Ptr BytePtr() { return bits.ReinterpretCast(); } @@ -134,20 +136,20 @@ namespace emp { inline BitVector & ApplyRange(const FUN_T & fun, size_t start, size_t stop); // Helper: call SHIFT with positive number - void ShiftLeft(const size_t shift_size); + inline void ShiftLeft(const size_t shift_size); // Helper for calling SHIFT with negative number - void ShiftRight(const size_t shift_size); + inline void ShiftRight(const size_t shift_size); /// Helper: call ROTATE with negative number instead - void RotateLeft(const size_t shift_size_raw); + inline void RotateLeft(const size_t shift_size_raw); /// Helper for calling ROTATE with positive number - void RotateRight(const size_t shift_size_raw); + inline void RotateRight(const size_t shift_size_raw); public: /// Build a new BitVector with specified bit count (default 0) and initialization (default 0) - BitVector(size_t in_num_bits=0, bool init_val=false); + inline BitVector(size_t in_num_bits=0, bool init_val=false); // Prevent ambiguous conversions... /// Anything not otherwise defined for first argument, convert to size_t. @@ -155,67 +157,67 @@ namespace emp { BitVector(T in_num_bits) : BitVector((size_t) in_num_bits, 0) {} /// Copy constructor of existing bit field. - BitVector(const BitVector & in); + inline BitVector(const BitVector & in); /// Move constructor of existing bit field. - BitVector(BitVector && in); + inline BitVector(BitVector && in); /// Constructor to generate a BitVector from a std::bitset. template - explicit BitVector(const std::bitset & bitset); + inline explicit BitVector(const std::bitset & bitset); /// Constructor to generate a BitVector from a string of '0's and '1's. - BitVector(const std::string & bitstring); + inline BitVector(const std::string & bitstring); /// Constructor to generate a BitVector from a literal string of '0's and '1's. BitVector(const char * bitstring) : BitVector(std::string(bitstring)) {} /// Constructor to generate a random BitVector (with equal prob of 0 or 1). - BitVector(size_t in_num_bits, Random & random); + inline BitVector(size_t in_num_bits, Random & random); /// Constructor to generate a random BitVector with provided prob of 1's. - BitVector(size_t in_num_bits, Random & random, const double p1); + inline BitVector(size_t in_num_bits, Random & random, const double p1); /// Constructor to generate a random BitVector with provided number of 1's. - BitVector(size_t in_num_bits, Random & random, const size_t target_ones); + inline BitVector(size_t in_num_bits, Random & random, const size_t target_ones); /// Constructor to generate a random BitVector with provided number of 1's. BitVector(size_t in_num_bits, Random & random, const int target_ones) : BitVector(in_num_bits, random, (size_t) target_ones) { } /// Initializer list constructor. - template BitVector(const std::initializer_list l); + template inline BitVector(const std::initializer_list l); /// Copy, but with a resize. - BitVector(const BitVector & in, size_t new_size); + inline BitVector(const BitVector & in, size_t new_size); /// Destructor - ~BitVector(); + inline ~BitVector(); /// Assignment operator. - BitVector & operator=(const BitVector & in); + inline BitVector & operator=(const BitVector & in) &; /// Move operator. - BitVector & operator=(BitVector && in); + inline BitVector & operator=(BitVector && in) &; /// Assignment operator from a std::bitset. template - BitVector & operator=(const std::bitset & bitset); + inline BitVector & operator=(const std::bitset & bitset) &; /// Assignment operator from a string of '0's and '1's. - BitVector & operator=(const std::string & bitstring); + inline BitVector & operator=(const std::string & bitstring) &; /// Assignment operator from a literal string of '0's and '1's. - BitVector & operator=(const char * bitstring) { return operator=(std::string(bitstring)); } + BitVector & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } /// Assignment from another BitVector without changing size. - BitVector & Import( const BitVector & from_bv, const size_t from_bit=0 ); + inline BitVector & Import( const BitVector & from_bv, const size_t from_bit=0 ); /// Convert to a BitVector of a different size. - BitVector Export(size_t out_size, size_t start_bit=0) const; + inline BitVector Export(size_t out_size, size_t start_bit=0) const; // Scan this bitvector to make sure that there are no internal problems. - bool OK() const; + inline bool OK() const; // ========= Accessors ========= // @@ -230,30 +232,32 @@ namespace emp { [[nodiscard]] double GetNumStates() const { return emp::Pow2(num_bits); } /// Retrieve the bit value from the specified index. - [[nodiscard]] bool Get(size_t index) const; + [[nodiscard]] inline bool Get(size_t index) const; /// A safe version of Get() for indexing out of range. Useful for representing collections. [[nodiscard]] bool Has(size_t index) const { return (index < num_bits) ? Get(index) : false; } /// Update the bit value at the specified index. - BitVector & Set(size_t index, bool value=true); + inline BitVector & Set(size_t index, bool value=true); /// Set all bits to 1. - BitVector & SetAll(); + inline BitVector & SetAll(); - /// Set a range of bits to one: [start, stop) - BitVector & SetRange(size_t start, size_t stop) - { return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); } + /// Set a range of bits to value (default one): [start, stop) + BitVector & SetRange(size_t start, size_t stop, bool value=true) { + if (value) return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); + return Clear(start, stop); + } /// Set all bits to 0. - BitVector & Clear(); + inline BitVector & Clear(); /// Set specific bit to 0. BitVector & Clear(size_t index) { return Set(index, false); } /// Set bits to 0 in the range [start, stop) BitVector & Clear(const size_t start, const size_t stop) - { return ApplyRange([](field_t){ return 0; }, start, stop); } + { return ApplyRange([](field_t) -> size_t { return 0; }, start, stop); } /// Const index operator -- return the bit at the specified position. @@ -266,14 +270,14 @@ namespace emp { BitVector & Toggle() { return NOT_SELF(); } /// Change a specified bit to the opposite value - BitVector & Toggle(size_t index); + inline BitVector & Toggle(size_t index); /// Flips all the bits in a range [start, end) BitVector & Toggle(size_t start, size_t stop) { return ApplyRange([](field_t x){ return ~x; }, start, stop); } /// Return true if ANY bits are set to 1, otherwise return false. - [[nodiscard]] bool Any() const; + [[nodiscard]] inline bool Any() const; /// Return true if NO bits are set to 1, otherwise return false. [[nodiscard]] bool None() const { return !Any(); } @@ -283,75 +287,75 @@ namespace emp { [[nodiscard]] bool All() const { return (~(*this)).None(); } /// Resize this BitVector to have the specified number of bits. - BitVector & Resize(size_t new_bits); + inline BitVector & Resize(size_t new_bits); // ========= Randomization functions ========= // /// Set all bits randomly, with a 50% probability of being a 0 or 1. - BitVector & Randomize(Random & random); + inline BitVector & Randomize(Random & random); /// Set all bits randomly, with probability specified at compile time. template - BitVector & RandomizeP(Random & random, const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & RandomizeP(Random & random, const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Set all bits randomly, with a given probability of being a one. - BitVector & Randomize(Random & random, const double p, - const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & Randomize(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Set all bits randomly, with a given number of ones. - BitVector & ChooseRandom(Random & random, const int target_ones, - const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & ChooseRandom(Random & random, const size_t target_ones, + const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Flip random bits with a given probability. - BitVector & FlipRandom(Random & random, const double p, - const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & FlipRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Set random bits with a given probability (does not check if already set.) - BitVector & SetRandom(Random & random, const double p, - const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & SetRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Unset random bits with a given probability (does not check if already zero.) - BitVector & ClearRandom(Random & random, const double p, - const size_t start_pos=0, size_t stop_pos=MAX_BITS); + inline BitVector & ClearRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Flip a specified number of random bits. - BitVector & FlipRandomCount(Random & random, const size_t target_bits); + inline BitVector & FlipRandomCount(Random & random, const size_t target_bits); /// Set a specified number of random bits (does not check if already set.) - BitVector & SetRandomCount(Random & random, const size_t target_bits); + inline BitVector & SetRandomCount(Random & random, const size_t target_bits); /// Unset a specified number of random bits (does not check if already zero.) - BitVector & ClearRandomCount(Random & random, const size_t target_bits); + inline BitVector & ClearRandomCount(Random & random, const size_t target_bits); // ========= Comparison Operators ========= // - [[nodiscard]] bool operator==(const BitVector & in) const; - [[nodiscard]] bool operator!=(const BitVector & in) const { return !(*this == in); } - [[nodiscard]] bool operator< (const BitVector & in) const; - [[nodiscard]] bool operator> (const BitVector & in) const { return in < *this; } - [[nodiscard]] bool operator<=(const BitVector & in) const { return !(in < *this); } - [[nodiscard]] bool operator>=(const BitVector & in) const { return !(*this < in); } + [[nodiscard]] inline bool operator==(const BitVector & in) const; + [[nodiscard]] inline bool operator!=(const BitVector & in) const { return !(*this == in); } + [[nodiscard]] inline bool operator< (const BitVector & in) const; + [[nodiscard]] inline bool operator> (const BitVector & in) const { return in < *this; } + [[nodiscard]] inline bool operator<=(const BitVector & in) const { return !(in < *this); } + [[nodiscard]] inline bool operator>=(const BitVector & in) const { return !(*this < in); } // ========= Conversion Operators ========= // /// Automatically convert BitVector to other vector types. - template operator emp::vector(); + template inline operator emp::vector(); /// Casting a bit array to bool identifies if ANY bits are set to 1. - explicit operator bool() const { return Any(); } + explicit inline operator bool() const { return Any(); } // ========= Access Groups of bits ========= // /// Retrieve the byte at the specified byte index. - [[nodiscard]] uint8_t GetByte(size_t index) const; + [[nodiscard]] inline uint8_t GetByte(size_t index) const; /// Get a read-only view into the internal array used by BitVector. /// @return Read-only span of BitVector's bytes. - [[nodiscard]] std::span GetBytes() const; + [[nodiscard]] inline std::span GetBytes() const; /// Get a read-only pointer to the internal array used by BitVector. /// (note that bits are NOT in order at the byte level!) @@ -359,18 +363,18 @@ namespace emp { emp::Ptr RawBytes() const { return BytePtr(); } /// Update the byte at the specified byte index. - void SetByte(size_t index, uint8_t value); + inline void SetByte(size_t index, uint8_t value); /// Get the overall value of this BitVector, using a uint encoding, but including all bits /// and returning the value as a double. - [[nodiscard]] double GetValue() const; + [[nodiscard]] inline double GetValue() const; /// Return a span with all fields in order. std::span FieldSpan() { return std::span(bits.Raw(), NumFields()); } /// Get specified type at a given index (in steps of that type size) template - [[nodiscard]] T GetValueAtIndex(const size_t index) const; + [[nodiscard]] inline T GetValueAtIndex(const size_t index) const; // Retrieve the 8-bit uint from the specified uint index. [[nodiscard]] uint8_t GetUInt8(size_t index) const { return GetValueAtIndex(index); } @@ -389,7 +393,7 @@ namespace emp { /// Set specified type at a given index (in steps of that type size) - template void SetValueAtIndex(const size_t index, T value); + template inline void SetValueAtIndex(const size_t index, T value); /// Update the 8-bit uint at the specified uint index. void SetUInt8(const size_t index, uint8_t value) { SetValueAtIndex(index, value); } @@ -409,7 +413,7 @@ namespace emp { /// Get specified type starting at a given BIT position. template - [[nodiscard]] T GetValueAtBit(const size_t index) const; + [[nodiscard]] inline T GetValueAtBit(const size_t index) const; // Retrieve the 8-bit uint from the specified uint index. [[nodiscard]] uint8_t GetUInt8AtBit(size_t index) const { return GetValueAtBit(index); } @@ -427,7 +431,7 @@ namespace emp { [[nodiscard]] uint32_t GetUIntAtBit(size_t index) const { return GetUInt32AtBit(index); } - template void SetValueAtBit(const size_t index, T value); + template inline void SetValueAtBit(const size_t index, T value); /// Update the 8-bit uint at the specified uint index. void SetUInt8AtBit(const size_t index, uint8_t value) { SetValueAtBit(index, value); } @@ -448,41 +452,41 @@ namespace emp { // ========= Other Analyses ========= // /// A simple hash function for bit vectors. - [[nodiscard]] std::size_t Hash(size_t start_field=0) const; + [[nodiscard]] inline std::size_t Hash(size_t start_field=0) const; /// Count the number of ones in the BitVector. - [[nodiscard]] size_t CountOnes() const; + [[nodiscard]] inline size_t CountOnes() const; /// Faster counting of ones for very sparse bit vectors. - [[nodiscard]] size_t CountOnes_Sparse() const; + [[nodiscard]] inline size_t CountOnes_Sparse() const; /// Count the number of zeros in the BitVector. [[nodiscard]] size_t CountZeros() const { return GetSize() - CountOnes(); } /// Pop the last bit in the vector. /// @return value of the popped bit. - bool PopBack(); + inline bool PopBack(); /// Push given bit(s) onto the back of a vector. /// @param bit value of bit to be pushed. /// @param num number of bits to be pushed. - void PushBack(const bool bit=true, const size_t num=1); + inline void PushBack(const bool bit=true, const size_t num=1); /// Insert bit(s) into any index of vector using bit magic. /// Blog post on implementation reasoning: https://devolab.org/?p=2249 /// @param index location to insert bit(s). /// @param val value of bit(s) to insert. /// @param num number of bits to insert, default 1. - void Insert(const size_t index, const bool val=true, const size_t num=1); + inline void Insert(const size_t index, const bool val=true, const size_t num=1); /// Delete bits from any index in a vector. /// TODO: consider a bit magic approach here. /// @param index location to delete bit(s). /// @param num number of bits to delete, default 1. - void Delete(const size_t index, const size_t num=1); + inline void Delete(const size_t index, const size_t num=1); /// Return the position of the first one; return -1 if no ones in vector. - [[nodiscard]] int FindOne() const; + [[nodiscard]] inline int FindOne() const; /// Deprecated: Return the position of the first one; return -1 if no ones in vector. [[deprecated("Renamed to more accurate FindOne()")]] @@ -493,30 +497,39 @@ namespace emp { /// /// for (int pos = bv.FindOne(); pos >= 0; pos = bv.FindOne(pos+1)) { ... } /// - [[nodiscard]] int FindOne(const size_t start_pos) const; + [[nodiscard]] inline int FindOne(const size_t start_pos) const; + + /// Special version of FindOne takes int; most common way to call. + [[nodiscard]] int FindOne(int start_pos) const { + return FindOne(static_cast(start_pos)); + } /// Deprecated version of FindOne(). [[deprecated("Renamed to more accurate FindOne(start_pos)")]] [[nodiscard]] int FindBit(const size_t start_pos) const; /// Find the most-significant set-bit. - [[nodiscard]] int FindMaxOne() const; + [[nodiscard]] inline int FindMaxOne() const; /// Return the position of the first one and change it to a zero. Return -1 if no ones. - int PopOne(); + inline int PopOne(); /// Deprecated version of PopOne(). [[deprecated("Renamed to more accurate PopOne()")]] int PopBit() { return PopOne(); } /// Return positions of all ones. - [[nodiscard]] emp::vector GetOnes() const; + [[nodiscard]] inline emp::vector GetOnes() const; + + /// Collect positions of ones in the provided vector (allows id type choice) + template + inline emp::vector & GetOnes(emp::vector & out_vals) const; /// Find the length of the longest continuous series of ones. - [[nodiscard]] size_t LongestSegmentOnes() const; + [[nodiscard]] inline size_t LongestSegmentOnes() const; /// Return true if any ones are in common with another BitVector. - [[nodiscard]] bool HasOverlap(const BitVector & in) const; + [[nodiscard]] inline bool HasOverlap(const BitVector & in) const; // ========= Print/String Functions ========= // @@ -525,17 +538,17 @@ namespace emp { [[nodiscard]] char GetAsChar(size_t id) const { return Get(id) ? '1' : '0'; } /// Convert this BitVector to a vector string [index 0 on left] - [[nodiscard]] std::string ToString() const; + [[nodiscard]] inline std::string ToString() const; /// Convert this BitVector to a numerical string [index 0 on right] - [[nodiscard]] std::string ToBinaryString() const; + [[nodiscard]] inline std::string ToBinaryString() const; /// Convert this BitVector to a series of IDs - [[nodiscard]] std::string ToIDString(const std::string & spacer=" ") const; + [[nodiscard]] inline std::string ToIDString(const std::string & spacer=" ") const; /// Convert this BitVector to a series of IDs with ranges condensed. - [[nodiscard]] std::string ToRangeString(const std::string & spacer=",", - const std::string & ranger="-") const; + [[nodiscard]] inline std::string ToRangeString(const std::string & spacer=",", + const std::string & ranger="-") const; /// Regular print function (from least significant bit to most) void Print(std::ostream & out=std::cout) const { out << ToString(); } @@ -547,18 +560,18 @@ namespace emp { void PrintArray(std::ostream & out=std::cout) const { out << ToString(); } /// Print a space between each field (or other provided spacer) - void PrintFields(std::ostream & out=std::cout, const std::string & spacer=" ") const; + void inline PrintFields(std::ostream & out=std::cout, const std::string & spacer=" ") const; /// Print out details about the internals of the BitVector. - void PrintDebug(std::ostream & out=std::cout) const; + void inline PrintDebug(std::ostream & out=std::cout) const; /// Print the positions of all one bits, spaces are the default separator. - void PrintOneIDs(std::ostream & out=std::cout, const std::string & spacer=" ") const; + void inline PrintOneIDs(std::ostream & out=std::cout, const std::string & spacer=" ") const; /// Print the ones in a range format. E.g., 2-5,7,10-15 - void PrintAsRange(std::ostream & out=std::cout, - const std::string & spacer=",", - const std::string & ranger="-") const; + void inline PrintAsRange(std::ostream & out=std::cout, + const std::string & spacer=",", + const std::string & ranger="-") const; /// Overload ostream operator to return Print. friend std::ostream& operator<<(std::ostream &out, const BitVector & bv) { @@ -570,25 +583,25 @@ namespace emp { // ========= Boolean Logic and Shifting Operations ========= // /// Perform a Boolean NOT with this BitVector, store result here, and return this object. - BitVector & NOT_SELF(); + inline BitVector & NOT_SELF(); /// Perform a Boolean AND with this BitVector, store result here, and return this object. - BitVector & AND_SELF(const BitVector & bv2); + inline BitVector & AND_SELF(const BitVector & bv2); /// Perform a Boolean OR with this BitVector, store result here, and return this object. - BitVector & OR_SELF(const BitVector & bv2); + inline BitVector & OR_SELF(const BitVector & bv2); /// Perform a Boolean NAND with this BitVector, store result here, and return this object. - BitVector & NAND_SELF(const BitVector & bv2); + inline BitVector & NAND_SELF(const BitVector & bv2); /// Perform a Boolean NOR with this BitVector, store result here, and return this object. - BitVector & NOR_SELF(const BitVector & bv2); + inline BitVector & NOR_SELF(const BitVector & bv2); /// Perform a Boolean XOR with this BitVector, store result here, and return this object. - BitVector & XOR_SELF(const BitVector & bv2); + inline BitVector & XOR_SELF(const BitVector & bv2); /// Perform a Boolean EQU with this BitVector, store result here, and return this object. - BitVector & EQU_SELF(const BitVector & bv2); + inline BitVector & EQU_SELF(const BitVector & bv2); /// Perform a Boolean NOT on this BitVector and return the result. @@ -614,65 +627,74 @@ namespace emp { /// Positive shifts go left and negative go right (0 does nothing); return result. - [[nodiscard]] BitVector SHIFT(const int shift_size) const; + [[nodiscard]] inline BitVector SHIFT(const int shift_size) const; /// Positive shifts go left and negative go right; store result here, and return this object. - BitVector & SHIFT_SELF(const int shift_size); + inline BitVector & SHIFT_SELF(const int shift_size); /// Reverse the order of bits in the bitset - BitVector & REVERSE_SELF(); + inline BitVector & REVERSE_SELF(); /// Reverse order of bits in the bitset. - [[nodiscard]] BitVector REVERSE() const; + [[nodiscard]] inline BitVector REVERSE() const; /// Positive rotates go left and negative rotates go left (0 does nothing); /// return result. - [[nodiscard]] BitVector ROTATE(const int rotate_size) const; + [[nodiscard]] inline BitVector ROTATE(const int rotate_size) const; /// Positive rotates go right and negative rotates go left (0 does nothing); /// store result here, and return this object. - BitVector & ROTATE_SELF(const int rotate_size); + inline BitVector & ROTATE_SELF(const int rotate_size); /// Helper: call ROTATE with negative number instead template - BitVector & ROTL_SELF(); + inline BitVector & ROTL_SELF(); /// Helper for calling ROTATE with positive number template - BitVector & ROTR_SELF(); + inline BitVector & ROTR_SELF(); /// Addition of two BitVectors. /// Wraps if it overflows. /// Returns result. - [[nodiscard]] BitVector ADD(const BitVector & set2) const; + [[nodiscard]] inline BitVector ADD(const BitVector & set2) const; /// Addition of two BitVectors. /// Wraps if it overflows. /// Returns this object. - BitVector & ADD_SELF(const BitVector & set2); + inline BitVector & ADD_SELF(const BitVector & set2); /// Subtraction of two BitVectors. /// Wraps around if it underflows. /// Returns result. - [[nodiscard]] BitVector SUB(const BitVector & set2) const; + [[nodiscard]] inline BitVector SUB(const BitVector & set2) const; /// Subtraction of two BitVectors. /// Wraps if it underflows. /// Returns this object. - BitVector & SUB_SELF(const BitVector & set2); + inline BitVector & SUB_SELF(const BitVector & set2); /// Operator bitwise NOT... [[nodiscard]] inline BitVector operator~() const { return NOT(); } /// Operator bitwise AND... - [[nodiscard]] inline BitVector operator&(const BitVector & ar2) const { return AND(ar2); } + [[nodiscard]] inline BitVector operator&(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return AND(ar2); + } /// Operator bitwise OR... - [[nodiscard]] inline BitVector operator|(const BitVector & ar2) const { return OR(ar2); } + [[nodiscard]] inline BitVector operator|(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return OR(ar2); + } /// Operator bitwise XOR... - [[nodiscard]] inline BitVector operator^(const BitVector & ar2) const { return XOR(ar2); } + [[nodiscard]] inline BitVector operator^(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return XOR(ar2); + } /// Operator shift left... [[nodiscard]] inline BitVector operator<<(const size_t shift_size) const { return SHIFT(-(int)shift_size); } @@ -681,13 +703,19 @@ namespace emp { [[nodiscard]] inline BitVector operator>>(const size_t shift_size) const { return SHIFT((int)shift_size); } /// Compound operator bitwise AND... - BitVector & operator&=(const BitVector & ar2) { return AND_SELF(ar2); } + BitVector & operator&=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return AND_SELF(ar2); + } /// Compound operator bitwise OR... - BitVector & operator|=(const BitVector & ar2) { return OR_SELF(ar2); } + BitVector & operator|=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return OR_SELF(ar2); + } /// Compound operator bitwise XOR... - BitVector & operator^=(const BitVector & ar2) { return XOR_SELF(ar2); } + BitVector & operator^=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return XOR_SELF(ar2); + } /// Compound operator for shift left... BitVector & operator<<=(const size_t shift_size) { return SHIFT_SELF(-(int)shift_size); } @@ -700,10 +728,17 @@ namespace emp { [[nodiscard]] size_t size() const { return num_bits; } void resize(std::size_t new_size) { Resize(new_size); } + void push_back(bool value) { PushBack(value); } + [[nodiscard]] auto at(size_t pos) { return operator[](pos); } + [[nodiscard]] auto at(size_t pos) const { return operator[](pos); } + [[nodiscard]] auto front() { return at(0); } + [[nodiscard]] auto front() const { return at(0); } + [[nodiscard]] auto back() { return at(GetSize()-1); } + [[nodiscard]] auto back() const { return at(GetSize()-1); } [[nodiscard]] bool all() const { return All(); } [[nodiscard]] bool any() const { return Any(); } [[nodiscard]] bool none() const { return !Any(); } - size_t count() const { return CountOnes(); } + [[nodiscard]] size_t count() const { return CountOnes(); } BitVector & flip() { return Toggle(); } BitVector & flip(size_t pos) { return Toggle(pos); } BitVector & flip(size_t start, size_t end) { return Toggle(start, end); } @@ -888,32 +923,33 @@ namespace emp { n = (n<>( (-(c+FIELD_BITS-num_bits)) & FIELD_LOG2_MASK )); } else if (NUM_FIELDS < 32) { // For small BitVectors, shifting L/R and ORing is faster. - emp::BitVector dup(*this); + BitVector dup(*this); dup.ShiftLeft(shift_size); ShiftRight(num_bits - shift_size); OR_SELF(dup); } else { // For big BitVectors, manual rotating is faster // Note: we already modded shift_size by num_bits, so no need to mod by FIELD_SIZE - const int field_shift = ( shift_size + EndGap() ) / FIELD_BITS; + const size_t field_shift = ( shift_size + EndGap() ) / FIELD_BITS; // If we field shift, we need to shift bits by (FIELD_BITS - NumEndBits()) // to account for the filler that gets pulled out of the middle - const int bit_shift = NumEndBits() && (shift_size + field_shift ? EndGap() : 0) % FIELD_BITS; - const int bit_overflow = FIELD_BITS - bit_shift; + const size_t field_gap = field_shift ? EndGap() : 0; + const size_t bit_shift = NumEndBits() && (shift_size + field_gap) % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields auto field_span = FieldSpan(); std::rotate( field_span.rbegin(), - field_span.rbegin()+field_shift, + field_span.rbegin()+static_cast(field_shift), field_span.rend() ); // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = (LastField() + field_shift) % NUM_FIELDS; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = (LastField() + field_shift) % NUM_FIELDS; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -929,7 +965,7 @@ namespace emp { bits[LastField()] ); - for (int i = LastField(); i > 0; --i) { + for (size_t i = LastField(); i > 0; --i) { bits[i] <<= bit_shift; bits[i] |= (bits[i-1] >> bit_overflow); } @@ -965,7 +1001,7 @@ namespace emp { } else if (NUM_FIELDS < 32) { // for small BitVectors, shifting L/R and ORing is faster - emp::BitVector dup(*this); + BitVector dup(*this); dup.ShiftRight(shift_size); ShiftLeft(num_bits - shift_size); OR_SELF(dup); @@ -973,7 +1009,7 @@ namespace emp { // for big BitVectors, manual rotating is faster const field_t field_shift = (shift_size / FIELD_BITS) % NUM_FIELDS; - const int bit_shift = shift_size % FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; const field_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields @@ -986,8 +1022,8 @@ namespace emp { // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = LastField() - field_shift; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = LastField() - field_shift; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -1137,7 +1173,7 @@ namespace emp { } /// Assignment operator. - BitVector & BitVector::operator=(const BitVector & in) { + BitVector & BitVector::operator=(const BitVector & in) & { emp_assert(in.OK()); if (&in == this) return *this; @@ -1157,7 +1193,7 @@ namespace emp { } /// Move operator. - BitVector & BitVector::operator=(BitVector && in) { + BitVector & BitVector::operator=(BitVector && in) & { emp_assert(&in != this); // in is an r-value, so this shouldn't be possible... if (bits) bits.DeleteArray(); // If we already have bits, get rid of them. num_bits = in.num_bits; // Update the number of bits... @@ -1170,7 +1206,7 @@ namespace emp { /// Assignment operator from a std::bitset. template - BitVector & BitVector::operator=(const std::bitset & bitset) { + BitVector & BitVector::operator=(const std::bitset & bitset) & { const size_t start_fields = NumFields(); num_bits = NUM_BITS; const size_t new_fields = NumFields(); @@ -1189,7 +1225,7 @@ namespace emp { } /// Assignment operator from a string of '0's and '1's. - BitVector & BitVector::operator=(const std::string & bitstring) { + BitVector & BitVector::operator=(const std::string & bitstring) & { const size_t start_fields = NumFields(); num_bits = bitstring.size(); const size_t new_fields = NumFields(); @@ -1253,7 +1289,7 @@ namespace emp { } // Otherwise bits is null; num_bits should be zero. - else emp_assert(num_bits == 0); + else { emp_assert(num_bits == 0); } return true; } @@ -1378,7 +1414,7 @@ namespace emp { } /// Set all bits randomly, with a given number of them being on. - BitVector & BitVector::ChooseRandom(Random & random, const int target_ones, + BitVector & BitVector::ChooseRandom(Random & random, const size_t target_ones, const size_t start_pos, size_t stop_pos) { if (stop_pos == MAX_BITS) stop_pos = num_bits; @@ -1386,8 +1422,7 @@ namespace emp { emp_assert(stop_pos <= num_bits); const size_t target_size = stop_pos - start_pos; - emp_assert(target_ones >= 0); - emp_assert(target_ones <= (int) target_size); + emp_assert(target_ones <= target_size); // Approximate the probability of ones as a starting point. double p = ((double) target_ones) / (double) target_size; @@ -1591,7 +1626,7 @@ namespace emp { if (max_one < 64) return (double) GetUInt64(0); // To grab the most significant field, figure out how much to shift it by. - const int shift_bits = max_one - 63; + const size_t shift_bits = static_cast(max_one) - 63; double out_value = (double) (*this >> shift_bits).GetUInt64(0); out_value *= emp::Pow2(shift_bits); @@ -1779,11 +1814,11 @@ namespace emp { /// Find the most-significant set-bit. int BitVector::FindMaxOne() const { // Find the max field with a one. - int max_field = NumFields() - 1; - while (max_field >= 0 && bits[max_field] == 0) max_field--; + size_t max_field = NumFields() - 1; + while (max_field > 0 && bits[max_field] == 0) max_field--; // If there are no ones, return -1. - if (max_field == -1) return -1; + if (bits[max_field] == 0) return -1; const field_t field = bits[max_field]; // Save a local copy of this field. field_t mask = (field_t) -1; // Mask off the bits still under consideration. @@ -1811,10 +1846,18 @@ namespace emp { /// Return positions of all ones. emp::vector BitVector::GetOnes() const { + emp::vector out_vals; + GetOnes(out_vals); + return out_vals; + } + + /// Return positions of all ones using a specified type. + template + emp::vector & BitVector::GetOnes(emp::vector & out_vals) const { // @CAO -- There are better ways to do this with bit tricks. - emp::vector out_vals(CountOnes()); - size_t cur_pos = 0; - for (size_t i = 0; i < num_bits; i++) { + out_vals.resize(CountOnes()); + T cur_pos = 0; + for (T i = 0; i < num_bits; i++) { if (Get(i)) out_vals[cur_pos++] = i; } return out_vals; @@ -2010,9 +2053,9 @@ namespace emp { // adapted from https://stackoverflow.com/questions/2602823/in-c-c-whats-the-simplest-way-to-reverse-the-order-of-bits-in-a-byte for (size_t i = 0; i < NumBytes(); ++i) { unsigned char & b = BytePtr()[i]; - b = (b & 0xF0) >> 4 | (b & 0x0F) << 4; - b = (b & 0xCC) >> 2 | (b & 0x33) << 2; - b = (b & 0xAA) >> 1 | (b & 0x55) << 1; + b = static_cast( (b & 0xF0) >> 4 | (b & 0x0F) << 4 ); + b = static_cast( (b & 0xCC) >> 2 | (b & 0x33) << 2 ); + b = static_cast( (b & 0xAA) >> 1 | (b & 0x55) << 1 ); } // shift out filler bits @@ -2070,34 +2113,34 @@ namespace emp { // note that we already modded shift_size by num_bits // so there's no need to mod by FIELD_SIZE here - int field_shift = NumEndBits() ? ( + size_t field_shift = NumEndBits() ? ( (shift_size + FIELD_BITS - NumEndBits()) / FIELD_BITS ) : ( shift_size / FIELD_BITS ); // if we field shift, we need to shift bits by (FIELD_BITS - NumEndBits()) // more to account for the filler that gets pulled out of the middle - int bit_shift = NumEndBits() && field_shift ? ( + size_t bit_shift = NumEndBits() && field_shift ? ( (shift_size + FIELD_BITS - NumEndBits()) % FIELD_BITS ) : ( shift_size % FIELD_BITS ); - int bit_overflow = FIELD_BITS - bit_shift; + size_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields if (field_shift) { auto field_span = FieldSpan(); std::rotate( field_span.rbegin(), - field_span.rbegin()+field_shift, + field_span.rbegin()+static_cast(field_shift), field_span.rend() ); } // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = (LAST_FIELD + field_shift) % NUM_FIELDS; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = (LAST_FIELD + field_shift) % NUM_FIELDS; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -2113,7 +2156,7 @@ namespace emp { bits[LAST_FIELD] ); - for (int i = LAST_FIELD; i > 0; --i) { + for (size_t i = LAST_FIELD; i > 0; --i) { bits[i] <<= bit_shift; bits[i] |= (bits[i-1] >> bit_overflow); } @@ -2152,7 +2195,7 @@ namespace emp { } else { field_t field_shift = (shift_size / FIELD_BITS) % NUM_FIELDS; - int bit_shift = shift_size % FIELD_BITS; + size_t bit_shift = shift_size % FIELD_BITS; field_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields @@ -2167,8 +2210,8 @@ namespace emp { // if necessary, shift filler bits out of the middle if (NumEndBits()) { - int filler_idx = LAST_FIELD - field_shift; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + size_t filler_idx = LAST_FIELD - field_shift; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -2282,8 +2325,8 @@ namespace std { /// This is added to the std namespace so that BitVectors can be used /// in data structures that require hashing (such as unordered_map) template <> - struct hash { - std::size_t operator()(const emp::BitVector & bv) const { + struct hash { + std::size_t operator()(const emp::old::BitVector & bv) const { return bv.Hash(); } }; diff --git a/include/emp/bits/Bits.hpp b/include/emp/bits/Bits.hpp new file mode 100644 index 0000000000..edb8b787d5 --- /dev/null +++ b/include/emp/bits/Bits.hpp @@ -0,0 +1,2354 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022-23. + * + * @file Bits.hpp + * @brief A generic bit-handler to replace vector, etc +additional bitwise logic features. + * @note Status: RELEASE + * + * The Bits template allows the user to recreate the functionality of std::vector, + * array, std::bitset, and other such bit-handling classes. + * + * This class stores an arbitrary number of bits in a set of "fields" (typically 32 bits or 64 + * bits per field, depending on which should be faster.) Individual bits can be extracted, + * -or- bitwise logic (including more complex bit magic) can be used on the groups of bits. + * + * The template parameters are: + * DATA_T : How is memory managed? + * ZERO_LEFT : Should the index of zero be the left-most bit? (right-most if false) + * + * Specializations are: + * BitVector : A replacement for std::vector (index 0 is on left) + * BitValue : Like BitVector, but index 0 is on the right + * StaticBitVector : Like Bitvector, but max size and fixed memory. + * StaticBitValue : Like BitValue, but max size and fixed memory. + * BitArray : A replacement for std::array (index 0 is on left) + * BitSet : A replacement for std::bitset (index 0 is on right) + * + * In the case of replacements, the aim was for identical functionality, but many additional + * features, especially associated with bitwise logic operations. + * + * @note Compile with -O3 and -msse4.2 for fast bit counting. + * + * + * @todo Most of the operators don't check to make sure that both Bit groups are the same size. + * We should create versions (Intersection() and Union()?) that adjust sizes if needed. + * @todo Do small BitVector optimization. Currently we have number of bits (8 bytes) and a + * pointer to the memory for the bitset (another 8 bytes), but we could use those 16 bytes + * as 1 byte of size info followed by 15 bytes of bitset (120 bits!) + * @todo For large BitVectors we can use a factory to preserve/adjust bit info. That should be + * just as efficient than a reserve, but without the need to store extra in-class info. + * @todo Think about how iterators should work for Bit collections. It should probably go + * bit-by-bit, but there are very few circumstances where that would be useful. Going + * through the positions of all ones would be more useful, but perhaps less intuitive. + */ + +#ifndef EMP_BITS_BITS_HPP_INCLUDE +#define EMP_BITS_BITS_HPP_INCLUDE + + +#include +#include +#include +#include +#include +#include + +#include "../base/array.hpp" +#include "../base/assert.hpp" +#include "../base/error.hpp" +#include "../base/Ptr.hpp" +#include "../base/vector.hpp" +#include "../datastructs/hash_utils.hpp" +#include "../math/constants.hpp" +#include "../math/math.hpp" +#include "../math/Random.hpp" +#include "../math/Range.hpp" +#include "../meta/type_traits.hpp" + +#include "Bits_Data.hpp" +#include "_bitset_helpers.hpp" +#include "bitset_utils.hpp" + + +namespace emp { + + /// @brief A flexible base template to handle BitVector, BitArray, BitSet, & other combinations. + /// @tparam DATA_T How is this Bits object allowed to change size? + /// @tparam ZERO_LEFT Should the index of zero be the left-most bit? (right-most if false) + template + class Bits { + using this_t = Bits; + using field_t = bits_field_t; + + // All internal data (and base-level manipulators) for Bits. + DATA_T _data; + + static constexpr size_t FIELD_BITS = NUM_FIELD_BITS; + + // Number of bits needed to specify position in a field + mask + static constexpr size_t FIELD_LOG2 = static_cast(emp::Log2(FIELD_BITS)); + static constexpr field_t FIELD_LOG2_MASK = MaskLow(FIELD_LOG2); + + static constexpr field_t FIELD_0 = (field_t) 0; ///< All bits in a field set to 0 + static constexpr field_t FIELD_1 = (field_t) 1; ///< Least significant bit set to 1 + static constexpr field_t FIELD_255 = (field_t) 255; ///< Least significant 8 bits set to 1 + static constexpr field_t FIELD_ALL = ~FIELD_0; ///< All bits in a field set to 1 + + // Identify the field that a specified bit is in. + [[nodiscard]] static constexpr size_t FieldID(const size_t index) { return index / FIELD_BITS; } + + // Identify the position within a field where a specified bit is. + [[nodiscard]] static constexpr size_t FieldPos(const size_t index) { return index & (FIELD_BITS-1); } + + // Identify which field a specified byte position would be in. + [[nodiscard]] static constexpr size_t Byte2Field(const size_t index) { return index / sizeof(field_t); } + + // Convert a byte position in Bits to a byte position in the target field. + [[nodiscard]] static constexpr size_t Byte2FieldPos(const size_t index) { return FieldPos(index * 8); } + + [[nodiscard]] constexpr field_t MaskField(size_t mask_size) const { + return MaskLow(mask_size); + } + [[nodiscard]] constexpr field_t MaskField(size_t mask_size, size_t offset) const { + return MaskLow(mask_size) << offset; + } + + // Assume that the size of the bits has already been adjusted to be the size of the one + // being copied and only the fields need to be copied over. + void RawCopy(const Ptr from, size_t copy_fields=emp::MAX_SIZE_T); + + // Shortcut for RawCopy if we are copying a whole other Bits object. + template + void RawCopy(const Bits & in_bits) { + RawCopy(in_bits.FieldPtr(), in_bits.NumFields()); + } + + // Copy bits from one position in the genome to another; leave old positions unchanged. + constexpr void RawMove(const size_t from_start, const size_t from_stop, const size_t to); + + // Convert the bits to bytes (note that bits are NOT in order at the byte level!) + [[nodiscard]] emp::Ptr BytePtr() { return _data.BytePtr(); } + + // Convert the bits to const bytes array (note that bits are NOT in order at the byte level!) + [[nodiscard]] emp::Ptr BytePtr() const { return _data.BytePtr(); } + + // Any bits past the last "real" bit in the last field should be kept as zeros. + constexpr this_t & ClearExcessBits() { + if (_data.NumEndBits()) _data.bits[_data.LastField()] &= _data.EndMask(); + return *this; + } + + // Apply a transformation to each bit field in a specified range. + template + Bits & ApplyRange(const FUN_T & fun, size_t start, size_t stop); + + // Helper: call SHIFT with positive number + constexpr void ShiftLeft(const size_t shift_size); + + // Helper for calling SHIFT with negative number + // Raw indicates if we should keep bits that are technically out of range; may be needed if + // we are trying to shift bits back INTO range after another operation. + constexpr void ShiftRight(const size_t shift_size, bool raw=false); + + /// Helper: call ROTATE with negative number instead + constexpr void ROTL_SELF(const size_t shift_size_raw); + + /// Helper for calling ROTATE with positive number + constexpr void ROTR_SELF(const size_t shift_size_raw); + + public: + /// @brief Default constructor; will build the default number of bits (often 0, but not always) + /// @param init_val Initial value of all default bits. + Bits(bool init_val=0) { if (init_val) SetAll(); else Clear(); } + + /// @brief Build a new Bits with specified bit count and initialization (default 0) + Bits(size_t in_num_bits, bool init_val=false); + + // Prevent ambiguous conversions... + /// @brief Anything not otherwise defined for first argument, convert to size_t. + template ::value, int>::type = 0> + Bits(T in_num_bits, bool init_val=false) : Bits(static_cast(in_num_bits), init_val) {} + + /// @brief Copy constructor of existing bits object. + Bits(const Bits & in) = default; + + /// @brief Constructor for other type of existing bits object. + template + Bits(const Bits & in); + + /// @brief Move constructor of existing bit field. + Bits(this_t && in) = default; + + /// @brief Constructor to generate a Bits from a std::bitset. + template + explicit Bits(const std::bitset & bitset); + + /// @brief Constructor to generate a Bits from a string of '0's and '1's. + Bits(const std::string & bitstring); + + /// @brief Constructor to generate a Bits from a literal string of '0's and '1's. + Bits(const char * bitstring) : Bits(std::string(bitstring)) {} + + /// @brief Constructor to generate a random set of bits in the default size. + /// @param random Random number generator to use. + Bits(Random & random); + + /// @brief Constructor to generate random Bits with provided prob of 1's, default size. + /// @param random Random number generator to use. + /// @param p1 Probability of a bit being a one. + Bits(Random & random, const double p1); + + /// @brief Constructor to generate random Bits with specified # of ones, default size. + /// @param random Random number generator to use. + /// @param target_ones Number of ones to include in the Bits. + Bits(Random & random, const size_t target_ones); + + /// @brief Constructor to generate random Bits with specified # of ones, default size. + /// @param random Random number generator to use. + /// @param target_ones Number of ones to include in the Bits. + Bits(Random & random, const int target_ones) : Bits(random, (size_t) target_ones) { } + + /// @brief Constructor to generate a specified number of random Bits (with equal prob of 0 or 1). + Bits(size_t in_num_bits, Random & random); + + /// @brief Constructor to generate a random Bits with provided prob of 1's. + Bits(size_t in_num_bits, Random & random, const double p1); + + /// @brief Constructor to generate a random Bits with provided number of 1's. + Bits(size_t in_num_bits, Random & random, const size_t target_ones); + + /// @brief Constructor to generate a random Bits with provided number of 1's. + Bits(size_t in_num_bits, Random & random, const int target_ones) + : Bits(in_num_bits, random, (size_t) target_ones) { } + + /// @brief Initializer list constructor. + template Bits(const std::initializer_list l); + + /// @brief Copy, but with a resize. + template + Bits(const Bits & in, size_t new_size); + + /// @brief Destructor + ~Bits() = default; + + /// @brief Copy assignment operator. + Bits & operator=(const Bits & in) &; + + /// @brief Assignment operator for other Bits object + template + Bits & operator=(const Bits & in) &; + + /// @brief Move operator. + Bits & operator=(Bits && in) &; + + /// @brief Assignment operator from a std::bitset. + template + Bits & operator=(const std::bitset & bitset) &; + + /// @brief Assignment operator from a string of '0's and '1's. + Bits & operator=(const std::string & bitstring) &; + + /// @brief Assignment operator from a literal string of '0's and '1's. + Bits & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } + + /// @brief Assignment from another Bits object without changing size. + template + Bits & Import( + const Bits & from_bits, + const size_t from_start_pos=0, + size_t max_copy_bits=emp::MAX_SIZE_T + ); + + /// @brief Convert to a Bits of a different size. + template > + [[nodiscard]] OUT_T Export(size_t out_size, size_t start_bit=0) const; + + /// @brief Convert to a BitArray of a different size. + template + [[nodiscard]] Bits,true> + ExportArray(size_t start_bit=0) const { + return Export< Bits,true> >(NUM_BITS, start_bit); + } + + /// @brief concatenate another Bits object on to the end of this one. + template + Bits & Append(const Bits & in_bits); + + // @brief Scan this bitvector to make sure that there are no internal problems. + [[nodiscard]] bool OK() const { return _data.OK(); } + + + // ========= Accessors ========= // + + /// @brief How many bits do we currently have? + [[nodiscard]] constexpr auto GetSize() const { return _data.NumBits(); } + + /// @brief How many bits are locked in a compile time? + [[nodiscard]] static constexpr auto GetCTSize() { return DATA_T::NumCTBits(); } + + /// @brief How many bytes are in this Bits? (includes empty field space) + [[nodiscard]] constexpr auto GetNumBytes() const { return _data.NumBytes(); } + + /// @brief How many distinct values could be held in this Bits? + [[nodiscard]] constexpr double GetNumStates() const { return emp::Pow2(_data.NumBits()); } + + /// @brief Retrieve the bit value from the specified index. + [[nodiscard]] constexpr bool Get(size_t index) const; + + /// @brief A safe version of Get() for indexing out of range. Useful for representing collections. + [[nodiscard]] constexpr bool Has(size_t index) const { + return (index < _data.NumBits()) ? Get(index) : false; + } + + /// @brief Update the bit value at the specified index. + Bits & Set(size_t index, bool value=true); + + /// @brief Set all bits to 1. + Bits & SetAll(); + + /// @brief Set a range of bits to value (default one): [start, stop) + Bits & SetRange(size_t start, size_t stop, bool value=true) { + if (value) return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); + return Clear(start, stop); + } + + /// @brief Set all bits to 0. + Bits & Clear(); + + /// @brief Set specific bit to 0. + Bits & Clear(size_t index) { return Set(index, false); } + + /// @brief Set bits to 0 in the range [start, stop) + Bits & Clear(const size_t start, const size_t stop) { + return ApplyRange([](field_t) -> size_t { return 0; }, start, std::min(stop,GetSize())); + } + + + /// @brief Const index operator -- return the bit at the specified position. + [[nodiscard]] bool operator[](size_t index) const { return Get(index); } + + /// @brief Index operator; return proxy to bit at specified position usable as an lvalue. + BitProxy operator[](size_t index) { return BitProxy(*this, index); } + + /// @brief Change every bit in the sequence. + Bits & Toggle() { return NOT_SELF(); } + + /// @brief Change a specified bit to the opposite value + Bits & Toggle(size_t index); + + /// @brief Flips all the bits in a range [start, end) + Bits & Toggle(size_t start, size_t stop) + { return ApplyRange([](field_t x){ return ~x; }, start, stop); } + + /// @brief Return true if ANY bits are set to 1, otherwise return false. + [[nodiscard]] bool Any() const; + + /// @brief Return true if NO bits are set to 1, otherwise return false. + [[nodiscard]] bool None() const { return !Any(); } + + /// @brief Return true if ALL bits are set to 1, otherwise return false. + // @CAO: Can speed up by not duplicating Bits; fields should be all 1, last should be mask. + [[nodiscard]] bool All() const { return (~(*this)).None(); } + + /// @brief Resize this Bits object to have the specified number of bits (if allowed) + Bits & Resize(size_t new_bits) { _data.RawResize(new_bits, true); return *this; } + + + // ========= Randomization functions ========= // + + /// @brief Set all bits randomly, with a 50% probability of being a 0 or 1. + Bits & Randomize(Random & random); + + /// @brief Set all bits randomly, with probability specified at compile time. + template + Bits & RandomizeP(Random & random, const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set all bits randomly, with a given probability of being a one. + Bits & Randomize(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set all bits randomly, with a given number of ones. + Bits & ChooseRandom(Random & random, const size_t target_ones, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Flip random bits with a given probability. + Bits & FlipRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set random bits with a given probability (does not check if already set.) + Bits & SetRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Unset random bits with a given probability (does not check if already zero.) + Bits & ClearRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Flip a specified number of random bits. + Bits & FlipRandomCount(Random & random, const size_t target_bits); + + /// @brief Set a specified number of random bits (does not check if already set.) + Bits & SetRandomCount(Random & random, const size_t target_bits); + + /// @brief Unset a specified number of random bits (does not check if already zero.) + Bits & ClearRandomCount(Random & random, const size_t target_bits); + + + // ========= Comparison Operators ========= // + + /// @brief Compare two bits objects, even with different template arguments. + template + [[nodiscard]] bool operator==(const Bits & in) const; + template + [[nodiscard]] bool operator!=(const Bits & in) const { return !(*this == in); } + template + [[nodiscard]] bool operator< (const Bits & in) const; + template + [[nodiscard]] bool operator> (const Bits & in) const { return in < *this; } + template + [[nodiscard]] bool operator<=(const Bits & in) const { return !(in < *this); } + template + [[nodiscard]] bool operator>=(const Bits & in) const { return !(*this < in); } + + + // ========= Conversion Operators ========= // + + /// @brief Automatically convert Bits to other vector types. + template operator emp::vector(); + + /// @brief Casting a bit array to bool identifies if ANY bits are set to 1. + explicit operator bool() const { return Any(); } + + + // ========= Access Groups of bits ========= // + + /// @brief Retrieve the byte at the specified byte index. + [[nodiscard]] uint8_t GetByte(size_t index) const; + + /// @brief et a read-only view into the internal array used by Bits. + /// @return Read-only span of Bits's bytes. + [[nodiscard]] auto GetBytes() const { return _data.AsByteSpan(); } + + /// @brief Return a span with all fields in order. + [[nodiscard]] std::span FieldSpan() { + return std::span(_data.FieldPtr().Raw(), _data.NumFields()); + } + + /// @brief Return a const span with all fields in order. + [[nodiscard]] std::span FieldSpan() const { + return std::span(_data.FieldPtr().Raw(), _data.NumFields()); + } + + [[nodiscard]] size_t NumFields() const { return _data.NumFields(); } + + /// @brief Return a pointer to the set of fields. + [[nodiscard]] auto FieldPtr() { return _data.FieldPtr(); } + + /// @brief Return a const pointer to the set of fields. + [[nodiscard]] auto FieldPtr() const { return _data.FieldPtr(); } + + /// @brief Get a read-only pointer to the internal array used by Bits. + /// (note that bits are NOT in order at the byte level!) + /// @return Read-only pointer to Bits' bytes. + [[nodiscard]] emp::Ptr RawBytes() const { return BytePtr(); } + + /// @brief Update the byte at the specified byte index. + void SetByte(size_t index, uint8_t value); + + /// @brief Get overall base-2 value of this Bits, returning as a double. + [[nodiscard]] double GetValue() const; + + /// @brief Get specified type at a given index (in steps of that type size) + template + [[nodiscard]] T GetValueAtIndex(const size_t index) const; + + /// @brief Retrieve the 8-bit uint from the specified uint index. + [[nodiscard]] uint8_t GetUInt8(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 16-bit uint from the specified uint index. + [[nodiscard]] uint16_t GetUInt16(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt32(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 64-bit uint from the specified uint index. + [[nodiscard]] uint64_t GetUInt64(size_t index) const { return GetValueAtIndex(index); } + + /// @brief By default, retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt(size_t index) const { return GetUInt32(index); } + + + /// @brief Set specified type at a given index (in steps of that type size) + template Bits & SetValueAtIndex(const size_t index, T value); + + /// @brief Update the 8-bit uint at the specified uint index. + void SetUInt8(const size_t index, uint8_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 16-bit uint at the specified uint index. + void SetUInt16(const size_t index, uint16_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 32-bit uint at the specified uint index. + void SetUInt32(const size_t index, uint32_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 64-bit uint at the specified uint index. + void SetUInt64(const size_t index, uint64_t value) { SetValueAtIndex(index, value); } + + /// @brief By default, update the 32-bit uint at the specified uint index. + void SetUInt(const size_t index, uint32_t value) { SetUInt32(index, value); } + + + /// @briefGet specified type starting at a given BIT position. + template + [[nodiscard]] T GetValueAtBit(const size_t index) const; + + /// @brief Retrieve the 8-bit uint from the specified uint index. + [[nodiscard]] uint8_t GetUInt8AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 16-bit uint from the specified uint index. + [[nodiscard]] uint16_t GetUInt16AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt32AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 64-bit uint from the specified uint index. + [[nodiscard]] uint64_t GetUInt64AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief By default, retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUIntAtBit(size_t index) const { return GetUInt32AtBit(index); } + + + template Bits & SetValueAtBit(const size_t index, T value); + + /// @brief Update the 8-bit uint at the specified uint index. + void SetUInt8AtBit(const size_t index, uint8_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 16-bit uint at the specified uint index. + void SetUInt16AtBit(const size_t index, uint16_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 32-bit uint at the specified uint index. + void SetUInt32AtBit(const size_t index, uint32_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 64-bit uint at the specified uint index. + void SetUInt64AtBit(const size_t index, uint64_t value) { SetValueAtBit(index, value); } + + /// @brief By default, update the 32-bit uint at the specified uint index. + void SetUIntAtBit(const size_t index, uint32_t value) { SetUInt32AtBit(index, value); } + + + // ========= Other Analyses ========= // + + /// @brief A simple hash function for bit vectors. + [[nodiscard]] std::size_t Hash(size_t start_field=0) const; + + /// @brief Count the number of ones in Bits. + [[nodiscard]] constexpr size_t CountOnes() const; + + /// @brief Count the number of ones in a range within Bits. [start, end) + [[nodiscard]] constexpr size_t CountOnes(size_t start, size_t end) const; + + /// @brief Faster counting of ones for very sparse bit vectors. + [[nodiscard]] constexpr size_t CountOnes_Sparse() const; + + /// @brief Count the number of zeros in Bits. + [[nodiscard]] constexpr size_t CountZeros() const { return GetSize() - CountOnes(); } + + /// @brief Pop the last bit in the vector. + /// @return value of the popped bit. + bool PopBack(); + + /// @brief Push given bit(s) onto the back of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + void PushBack(const bool bit=true, const size_t num=1); + + /// @brief Push given bit(s) onto the front of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + void PushFront(const bool bit=true, const size_t num=1); + + /// @brief Insert bit(s) into any index of vector using bit magic. + /// Blog post on implementation reasoning: https://devolab.org/?p=2249 + /// @param index location to insert bit(s). + /// @param val value of bit(s) to insert. + /// @param num number of bits to insert, default 1. + void Insert(const size_t index, const bool val=true, const size_t num=1); + + /// @brief Delete bits from any index in a vector. + // TODO: consider a bit magic approach here. + /// @param index location to delete bit(s). + /// @param num number of bits to delete, default 1. + void Delete(const size_t index, const size_t num=1); + + /// @brief Return the position of the first one; return -1 if no ones in vector. + [[nodiscard]] int FindOne() const; + + /// @brief Return the position of the first zero; return -1 if no zeroes in vector. + [[nodiscard]] int FindZero() const; + + + /// Deprecated: Return the position of the first one; return -1 if no ones in vector. + [[deprecated("Renamed to more accurate FindOne()")]] + [[nodiscard]] int FindBit() const { return FindOne(); } + + /// @brief Return the position of the first one after start_pos (or -1 if none) + /// You can loop through all 1-bit positions of Bits object "bits" with: + /// + /// for (int pos = bits.FindOne(); pos >= 0; pos = bits.FindOne(pos+1)) { ... } + + [[nodiscard]] int FindOne(const size_t start_pos) const; + + /// @brief Return the position of the first zero after start_pos (or -1 if none) + /// You can loop through all 0-bit positions of Bits object "bits" with: + /// + /// for (int pos = bits.FindZero(); pos >= 0; pos = bits.FindZero(pos+1)) { ... } + + [[nodiscard]] int FindZero(const size_t start_pos) const; + + /// @brief Special version of FindOne takes int; common way to call. + [[nodiscard]] int FindOne(int start_pos) const { + return FindOne(static_cast(start_pos)); + } + + /// @brief Special version of FindZero takes int; common way to call. + [[nodiscard]] int FindZero(int start_pos) const { + return FindZero(static_cast(start_pos)); + } + + /// Deprecated version of FindOne(). + [[deprecated("Renamed to more accurate FindOne(start_pos)")]] + [[nodiscard]] int FindBit(const size_t start_pos) const; + + /// @brief Find the most-significant set-bit. + [[nodiscard]] int FindMaxOne() const; + + /// @brief Return the position of the first one and change it to a zero. Return -1 if none. + int PopOne(); + + /// Deprecated version of PopOne(). + [[deprecated("Renamed to more accurate PopOne()")]] + int PopBit() { return PopOne(); } + + /// @brief Return vector of positions of all ones. + [[nodiscard]] emp::vector GetOnes() const; + + /// @brief Collect positions of ones in the provided vector (allows id type choice) + template + emp::vector & GetOnes(emp::vector & out_vals) const; + + /// @brief Find the length of the longest continuous series of ones. + [[nodiscard]] size_t LongestSegmentOnes() const; + + /// @brief Find ids of all groups of ones. + /// @return A vector of ranges that identify all ids of ones. + [[nodiscard]] emp::vector> GetRanges() const; + + /// @brief Return true if any ones are in common with another Bits. + [[nodiscard]] bool HasOverlap(const Bits & in) const; + + + // ========= Print/String Functions ========= // + + /// @brief Convert a specified bit to a character. + [[nodiscard]] char GetAsChar(size_t id) const { return Get(id) ? '1' : '0'; } + + /// @brief Convert this Bits to a vector string [index 0 based on ZERO_LEFT] + [[nodiscard]] std::string ToString() const; + + /// @brief Convert this Bits to an array-based string [index 0 on left] + [[nodiscard]] std::string ToArrayString() const; + + /// @brief Convert this Bits to a numerical string [index 0 on right] + [[nodiscard]] std::string ToBinaryString() const; + + /// @brief Convert this Bits to a series of IDs + [[nodiscard]] std::string ToIDString(const std::string & spacer=" ") const; + + /// @brief Convert this Bits to a series of IDs with ranges condensed. + [[nodiscard]] std::string ToRangeString(const std::string & spacer=",", + const std::string & ranger="-") const; + + /// @brief Regular print function (from least significant bit to most) + void Print(std::ostream & out=std::cout) const { out << ToString(); } + + /// @brief Numerical print function (from most significant bit to least) + void PrintBinary(std::ostream & out=std::cout) const { out << ToBinaryString(); } + + /// @brief Print from smallest bit position to largest. + void PrintArray(std::ostream & out=std::cout) const { out << ToArrayString(); } + + /// @brief Print a space between each field (or other provided spacer) + void PrintFields(std::ostream & out=std::cout, const std::string & spacer=" ") const; + + /// @brief Print out details about the internals of Bits. + void PrintDebug(std::ostream & out=std::cout, const std::string & label="") const; + + /// @brief Print the positions of all one bits, spaces are the default separator. + void PrintOneIDs(std::ostream & out=std::cout, const std::string & spacer=" ") const; + + /// @brief Print the ones in a range format. E.g., 2-5,7,10-15 + void PrintAsRange(std::ostream & out=std::cout, + const std::string & spacer=",", + const std::string & ranger="-") const; + + /// @brief Overload ostream operator to return Print. + friend std::ostream& operator<<(std::ostream &out, const Bits & bits) { + bits.Print(out); + return out; + } + + + // ========= Boolean Logic and Shifting Operations ========= // + + /// @brief Perform a Boolean NOT with this Bits, store result here, and return this object. + Bits & NOT_SELF(); + + /// @brief Perform a Boolean AND with this Bits, store result here, and return this object. + Bits & AND_SELF(const Bits & bits2); + + /// @brief Perform a Boolean OR with this Bits, store result here, and return this object. + Bits & OR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean NAND with this Bits, store result here, and return this object. + Bits & NAND_SELF(const Bits & bits2); + + /// @brief Perform a Boolean NOR with this Bits, store result here, and return this object. + Bits & NOR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean XOR with this Bits, store result here, and return this object. + Bits & XOR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean EQU with this Bits, store result here, and return this object. + Bits & EQU_SELF(const Bits & bits2); + + + /// @brief Perform a Boolean NOT on this Bits and return the result. + [[nodiscard]] Bits NOT() const { return Bits(*this).NOT_SELF(); } + + /// @brief Perform a Boolean AND on this Bits and return the result. + [[nodiscard]] Bits AND(const Bits & bits2) const { return Bits(*this).AND_SELF(bits2); } + + /// @brief Perform a Boolean OR on this Bits and return the result. + [[nodiscard]] Bits OR(const Bits & bits2) const { return Bits(*this).OR_SELF(bits2); } + + /// @brief Perform a Boolean NAND on this Bits and return the result. + [[nodiscard]] Bits NAND(const Bits & bits2) const { return Bits(*this).NAND_SELF(bits2); } + + /// @brief Perform a Boolean NOR on this Bits and return the result. + [[nodiscard]] Bits NOR(const Bits & bits2) const { return Bits(*this).NOR_SELF(bits2); } + + /// @brief Perform a Boolean XOR on this Bits and return the result. + [[nodiscard]] Bits XOR(const Bits & bits2) const { return Bits(*this).XOR_SELF(bits2); } + + /// @brief Perform a Boolean EQU on this Bits and return the result. + [[nodiscard]] Bits EQU(const Bits & bits2) const { return Bits(*this).EQU_SELF(bits2); } + + + /// @brief Positive shifts left and negative right (0 does nothing); return result. + [[nodiscard]] Bits SHIFT(const int shift_size) const; + + /// @brief Positive shifts left and negative right; store result here, and return *this. + Bits & SHIFT_SELF(const int shift_size); + + /// @brief Reverse the order of bits in the bitset + Bits & REVERSE_SELF(); + + /// @brief Reverse order of bits in the bitset. + [[nodiscard]] Bits REVERSE() const; + + /// @brief Positive rotates right and negative goes left; return result. + [[nodiscard]] Bits ROTATE(const int rotate_size) const; + + /// @brief Positive rotates right and negative goes left; store here, and return *this. + Bits & ROTATE_SELF(const int rotate_size); + + /// @brief Sums two Bits objects (following uint rules); returns result. + [[nodiscard]] Bits ADD(const Bits & set2) const; + + /// @brief Sums another Bits object onto this one (following uint rules); returns *this. + Bits & ADD_SELF(const Bits & set2); + + /// @brief Subtracts on Bits object from another (following uint rules); returns result. + [[nodiscard]] Bits SUB(const Bits & set2) const; + + /// @brief Subtracts another Bits object from this one (following uint rules); returns *this. + Bits & SUB_SELF(const Bits & set2); + + + /// @brief Operator bitwise NOT... + [[nodiscard]] inline Bits operator~() const { return NOT(); } + + /// @brief Operator bitwise AND... + [[nodiscard]] inline Bits operator&(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return AND(ar2); + } + + /// @brief Operator bitwise OR... + [[nodiscard]] inline Bits operator|(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return OR(ar2); + } + + /// @brief Operator bitwise XOR... + [[nodiscard]] inline Bits operator^(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return XOR(ar2); + } + + /// @brief Operator shift left... + [[nodiscard]] inline Bits operator<<(const size_t shift_size) const { return SHIFT(-(int)shift_size); } + + /// @brief Operator shift right... + [[nodiscard]] inline Bits operator>>(const size_t shift_size) const { return SHIFT((int)shift_size); } + + /// @brief Compound operator bitwise AND... + Bits & operator&=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return AND_SELF(ar2); + } + + /// @brief Compound operator bitwise OR... + Bits & operator|=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return OR_SELF(ar2); + } + + /// @brief Compound operator bitwise XOR... + Bits & operator^=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return XOR_SELF(ar2); + } + + /// @brief Compound operator for shift left... + Bits & operator<<=(const size_t shift_size) { return SHIFT_SELF(-(int)shift_size); } + + /// @brief Compound operator for shift right... + Bits & operator>>=(const size_t shift_size) { return SHIFT_SELF((int)shift_size); } + + /// @brief Operator plus... + [[nodiscard]] Bits operator+(const Bits & ar2) const { return ADD(ar2); } + + /// @brief Operator minus... + [[nodiscard]] Bits operator-(const Bits & ar2) const { return SUB(ar2); } + + /// @brief Compound operator plus... + const Bits & operator+=(const Bits & ar2) { return ADD_SELF(ar2); } + + /// @brief Compound operator minus... + const Bits & operator-=(const Bits & ar2) { return SUB_SELF(ar2); } + + + // ========= Cereal Compatibility ========= // + + /// @brief Setup this bits object so that it can be stored in an archive and re-loaded. + template + void serialize(Archive & ar) { ar(_data); } + + + // ========= Standard Library Compatability ========= // + // A set of functions to allow drop-in replacement with std::bitset. + + [[nodiscard]] constexpr size_t size() const { return _data.NumBits(); } + [[nodiscard]] auto & at(size_t pos) { return operator[](pos); } + [[nodiscard]] auto at(size_t pos) const { return operator[](pos); } + [[nodiscard]] auto & front() { return at(0); } + [[nodiscard]] auto front() const { return at(0); } + [[nodiscard]] auto & back() { return at(GetSize()-1); } + [[nodiscard]] auto back() const { return at(GetSize()-1); } + void resize(std::size_t new_size) { Resize(new_size); } + void push_back(const bool bit=true, const size_t num=1) { PushBack(bit, num); } + void pop_back() { resize(GetSize() - 1); } + [[nodiscard]] constexpr bool all() const { return All(); } + [[nodiscard]] constexpr bool any() const { return Any(); } + [[nodiscard]] constexpr bool none() const { return !Any(); } + [[nodiscard]] constexpr size_t count() const { return CountOnes(); } + Bits & flip() { return Toggle(); } + Bits & flip(size_t pos) { return Toggle(pos); } + Bits & flip(size_t start, size_t end) { return Toggle(start, end); } + void reset() { Clear(); } + void reset(size_t id) { Set(id, false); } + void set() { SetAll(); } + void set(size_t id) { Set(id); } + [[nodiscard]] bool test(size_t index) const { return Get(index); } + auto data() { return FieldSpan(); } + auto data() const { return FieldSpan(); } + }; + + + + // ------------------------ Implementations for Internal Functions ------------------------ + + template + void Bits:: + RawCopy(const Ptr from, size_t num_fields) + { + // If num_fields was not specified, set it to the max number of fields. + if (num_fields == emp::MAX_SIZE_T) num_fields = _data.NumFields(); + + emp_assert(num_fields <= _data.NumFields(), "Trying to RawCopy() more fields than can fit."); + + for (size_t i = 0; i < num_fields; i++) _data.bits[i] = from[i]; + } + + // Move bits from one position in the genome to another; leave old positions unchanged. + // All positions are requires to exist and memory must be available for the move. + // @CAO: Can speed up by focusing only on the moved fields (i.e., don't shift unused bits). + template + constexpr void Bits:: + RawMove(const size_t from_start, const size_t from_stop, const size_t to) + { + emp_assert(from_start <= from_stop); // Must move legal region. + emp_assert(from_stop <= _data.NumBits()); // Cannot move from past end. + emp_assert(to <= _data.NumBits()); // Must move to somewhere legal. + + const size_t move_size = from_stop - from_start; // How big is the chunk to move? + emp_assert(to + move_size <= _data.NumBits()); // Must fit in new position. + + // If nothing to copy OR already in place, stop right there. + if (move_size == 0 || from_start == to) return; + + const size_t to_stop = to + move_size; // Where is the end to move it to? + const int shift = (int) from_start - (int) to; // How far will the moved piece shift? + this_t move_bits(*this); // Place to hold moved bits. + move_bits.SHIFT_SELF(shift); // Put the moved bits in place. + Clear(to, to_stop); // Make room for the moved bits. + move_bits.Clear(0, to); // Clear everything BEFORE moved bits. + move_bits.Clear(to_stop, _data.NumBits()); // Clear everything AFTER moved bits. + OR_SELF(move_bits); // Merge bit strings together. + } + + template + template + Bits & Bits:: + ApplyRange(const FUN_T & fun, size_t start, size_t stop) + { + emp_assert(start <= stop, start, stop, _data.NumBits()); // Start cannot be after stop. + emp_assert(stop <= _data.NumBits(), stop, _data.NumBits()); // Stop must be in range. + + if (start == stop) return *this; // Empty range. + + const size_t start_pos = FieldPos(start); // Start position WITHIN a bit field. + const size_t stop_pos = FieldPos(stop); // Stop position WITHIN a bit field. + size_t start_field = FieldID(start); // ID of bit field we're starting in. + const size_t stop_field = FieldID(stop); // ID of last field to actively scan. + + // If all bits are in the same field, mask off the middle. + if (start_field == FieldID(stop-1)) { + const size_t apply_bits = stop - start; // How many bits to change? + const field_t mask = MaskField(apply_bits, start_pos); // Target change bits with a mask. + field_t & target = _data.bits[start_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + } + + // Otherwise mask the ends and fully modify the chunks in between. + else { + // If we're only using a portions of start field, mask it and setup. + if (start_pos != 0) { + const size_t start_bits = FIELD_BITS - start_pos; // How many bits in start field? + const field_t mask = MaskField(start_bits, start_pos); // Target start bits with a mask. + field_t & target = _data.bits[start_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + start_field++; // Move to the next field. + } + + // Middle fields + for (size_t cur_field = start_field; cur_field < stop_field; cur_field++) { + _data.bits[cur_field] = fun(_data.bits[cur_field]); + } + + // Set portions of stop field + if (stop_pos != 0) { + const field_t mask = MaskField(stop_pos); // Target end bits with a mask. + field_t & target = _data.bits[stop_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + } + } + + return *this; + } + + template + constexpr void Bits::ShiftLeft(const size_t shift_size) { + // If we are shifting out of range, clear the bits and stop. + if (shift_size >= GetSize()) { Clear(); return; } + + // If we have only a single field, this operation can be quick. + if (_data.NumFields() == 1) { + (_data.bits[0] <<= shift_size) &= _data.EndMask(); + return; + } + + const size_t field_shift = shift_size / FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; + + // Loop through each field, from L to R, and update it. + if (field_shift) { + for (size_t i = _data.LastField(); i >= field_shift; --i) { + _data.bits[i] = _data.bits[i - field_shift]; + } + for (size_t i = field_shift; i > 0; --i) _data.bits[i-1] = 0; + } + + // account for bit_shift + if (bit_shift) { + for (size_t i = _data.LastField() ; i > field_shift; --i) { + _data.bits[i] <<= bit_shift; + _data.bits[i] |= (_data.bits[i-1] >> bit_overflow); + } + // Handle final field (field_shift position) + _data.bits[field_shift] <<= bit_shift; + } + + // Mask out any bits that have left-shifted away + ClearExcessBits(); + } + + template + constexpr void Bits::ShiftRight(const size_t shift_size, bool raw) { + if (shift_size == 0) return; + + // If we are shifting out of range, clear the bits and stop. + if (!raw && shift_size >= GetSize()) { Clear(); return; } + + // If we have only a single field, this operation can be quick. + if (_data.NumFields() == 1) { + _data.bits[0] >>= shift_size; + return; + } + + const size_t field_shift = shift_size / FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; + const size_t NUM_FIELDS = _data.NumFields(); + const size_t field_shift2 = NUM_FIELDS - field_shift; + + // account for field_shift + if (field_shift) { + for (size_t i = 0; i < field_shift2; ++i) { + _data.bits[i] = _data.bits[i + field_shift]; + } + // Clear fields where bits were fully shifted out. + for (size_t i = field_shift2; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_0; + } + + // account for bit_shift + if (bit_shift) { + for (size_t i = 0; i < (field_shift2 - 1); ++i) { + _data.bits[i] >>= bit_shift; + _data.bits[i] |= (_data.bits[i+1] << bit_overflow); + } + _data.bits[field_shift2 - 1] >>= bit_shift; + } + } + + /// Helper: call ROTATE with negative number + template + constexpr void Bits::ROTL_SELF(const size_t shift_size_raw) { + if (GetSize() == 0) return; // Nothing to rotate if there are not bits. + const field_t shift_size = shift_size_raw % GetSize(); + + // Use different approaches based on number of bits. + if (_data.NumFields() == 1) { + _data.bits[0] = emp::RotateBitsLeft(_data.bits[0], shift_size, GetSize()); + } else { // For few bits, shifting L/R and OR-ing is faster. + this_t dup(*this); + dup.ShiftLeft(shift_size); + ShiftRight(GetSize() - shift_size); + OR_SELF(dup); + } + } + + + /// Helper for calling ROTATE with positive number + template + constexpr void Bits::ROTR_SELF(const size_t shift_size_raw) { + const size_t shift_size = shift_size_raw % GetSize(); + + // use different approaches based on number of bits + if (_data.NumFields() == 1) { + _data.bits[0] = emp::RotateBitsRight(_data.bits[0], shift_size, GetSize()); + } else { + this_t dup(*this); + dup.ShiftRight(shift_size); + ShiftLeft(GetSize() - shift_size); + OR_SELF(dup); + } + } + + + /////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + // ---------------------------------------------------------------------------------------- + // --------------------- Implementations of Public Member Functions ----------------------- + // ---------------------------------------------------------------------------------------- + /////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + + // ------------------- Implementations of Constructors and Assignments -------------------- + + /// Build a new Bits object with specified bit count and initialization (default 0) + template + Bits::Bits(size_t _num_bits, bool init_val) : _data(_num_bits) { + if (init_val) SetAll(); else Clear(); + } + + /// Constructor from other type of Bits field. + template + template + Bits::Bits(const Bits & in) + : _data(in.GetSize()) + { + emp_assert(in.OK()); + RawCopy(in); + } + + // -- Move constructor in class; set to default -- + + /// Constructor to generate a Bits from a std::bitset. + template + template + Bits::Bits(const std::bitset & bitset) + : _data(NUM_BITS) + { + // Copy over the values. + Clear(); + for (size_t i = 0; i < NUM_BITS; ++i) Set(i, bitset[i]); + } + + /// Constructor to generate a Bits from a string of '0's and '1's. + template + Bits::Bits(const std::string & bitstring) + : _data(CountBits(bitstring)) + { + Clear(); + + size_t pos = 0; + for (char c : bitstring) { + if (c == '1') { + if constexpr (ZERO_LEFT) Set(pos); + else Set(GetSize() - pos - 1); + pos++; + } + if (c == '0') ++pos; // Leave position as zero and move to next pos. + } + } + + /// Constructor to generate a random set of bits in the default size. + template + Bits::Bits(Random & random) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + Randomize(random); + ClearExcessBits(); + } + + /// Constructor to generate random Bits with provided prob of 1's, default size. + template + Bits::Bits(Random & random, const double p1) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + emp_assert(p1 >= 0.0 && p1 <= 1.0, "Probability of ones out of range", p1); + Randomize(random, p1); + ClearExcessBits(); + } + + /// Constructor to generate random Bits with specified number of ones. + template + Bits::Bits(Random & random, const size_t target_ones) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + ChooseRandom(random, target_ones); + ClearExcessBits(); + } + + /// Constructor to generate a random Bits (with equal prob of 0 or 1). + template + Bits::Bits(size_t in_num_bits, Random & random) + : _data(in_num_bits) + { + Clear(); + Randomize(random); + } + + /// Constructor to generate a random Bits with provided prob of 1's. + template + Bits::Bits(size_t in_num_bits, Random & random, const double p1) + : _data(in_num_bits) + { + emp_assert(p1 >= 0.0 && p1 <= 1.0, "Probability of ones out of range", p1); + Clear(); + Randomize(random, p1); + } + + /// Constructor to generate a random Bits with provided number of 1's. + template + Bits::Bits(size_t in_num_bits, Random & random, const size_t target_ones) + : _data(in_num_bits) + { + Clear(); + ChooseRandom(random, target_ones); + } + + /// Initializer list constructor. + template + template + Bits::Bits(const std::initializer_list l) + : _data(l.size()) + { + Clear(); + size_t idx = 0; + if constexpr (ZERO_LEFT) { + for (auto i = std::begin(l); i != std::end(l); ++i) Set(idx++, *i); + } else { + for (auto i = std::rbegin(l); i != std::rend(l); ++i) Set(idx++, *i); + } + } + + /// Copy, but with a resize. + template + template + Bits:: + Bits(const Bits & in, size_t new_size) + : Bits(new_size) + { + emp_assert(in.OK()); + + // How many fields do we need to copy? + size_t copy_fields = std::min(_data.NumFields(), in.NumFields()); + + RawCopy(in.FieldPtr(), copy_fields); + } + + /// Copy assignment operator. + template + Bits & + Bits::operator=(const Bits & in) & + { + emp_assert(in.OK()); + if (&in != this) { + _data.RawResize(in.GetSize()); + RawCopy(in); + } + + return *this; + } + + /// Other Bits assignment operator. + template + template + Bits & + Bits::operator=(const Bits & in) & + { + emp_assert(in.OK()); + Resize(in.GetSize()); + RawCopy(in); + + return *this; + } + + /// Move operator. + template + Bits & + Bits::operator=(Bits && in) & + { + emp_assert(&in != this); // Shouldn't be possible in an r-value + _data = std::move(in._data); // Shift move into _data objects. + return *this; + } + + /// Assignment operator from a std::bitset. + template + template + Bits & + Bits::operator=(const std::bitset & bitset) & + { + _data.RawResize(NUM_BITS); + for (size_t i = 0; i < NUM_BITS; i++) Set(i, bitset[i]); // Copy bits in. + return ClearExcessBits(); // Set excess bits to zeros. + } + + /// Assignment operator from a string of '0's and '1's. + template + Bits & + Bits::operator=(const std::string & bitstring) & + { + const size_t new_size = CountBits(bitstring); + _data.RawResize(new_size); + + Clear(); + + size_t pos = 0; + for (char c : bitstring) { + if (c == '1') { + if constexpr (ZERO_LEFT) Set(pos); + else Set(new_size - pos - 1); + pos++; + } + if (c == '0') ++pos; // Leave position as zero and move to next pos. + } + + return *this; + } + + + /// Assign from a Bits object of a different size, while keeping current size. + /// If there are too many bits being imported, extras are cut off. + /// If there are fewer bits, the remainder are zero'd out (up to max_copy_bits) + // @CAO: Can copy fields for a speedup. + template + template + Bits & + Bits::Import( + const Bits & from_bits, + const size_t from_start_pos, + size_t max_copy_bits) + { + emp_assert(from_start_pos < from_bits.GetSize()); + size_t bits_available = from_bits.GetSize() - from_start_pos; + + // Actual copied bits is limited by bits available to copy and bits in this object. + size_t copy_size = emp::Min(bits_available, GetSize(), max_copy_bits); + + for (size_t i = 0; i < copy_size; ++i) { + Set(i, from_bits[i+from_start_pos]); + } + + // Any bits AFTER the ones copied, but before the max copy, should be zeroed out. + Clear(copy_size, max_copy_bits); + + return *this; + } + + /// Convert to a Bitset of a different size. + template + template + OUT_T Bits::Export(size_t out_size, size_t start_bit) const { + OUT_T out_bits(out_size); + out_bits.Import(*this, start_bit); + return out_bits; + } + + /// Concatenate another Bits object on to the end of this one. + template + template + Bits & Bits::Append( + const Bits & in_bits + ) { + this_t shift_copy(in_bits); + const size_t old_size = GetSize(); + const size_t new_size = old_size + in_bits.GetSize(); + Resize(new_size); + shift_copy.Resize(new_size); + shift_copy <<= old_size; + OR_SELF(shift_copy); + return *this; + } + + + // -------------------- Implementations of common accessors ------------------- + + /// Retrieve the bit value from the specified index. + template + constexpr bool Bits::Get(size_t index) const { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + return _data.bits[field_id] & (FIELD_1 << pos_id); + } + + /// Update the bit value at the specified index. + template + Bits & Bits::Set(size_t index, bool value) { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + const field_t pos_mask = FIELD_1 << pos_id; + + if (value) _data.bits[field_id] |= pos_mask; + else _data.bits[field_id] &= ~pos_mask; + + return *this; + } + + /// Set all bits to 1. + template + Bits & Bits::SetAll() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_ALL; + return ClearExcessBits(); + } + + /// Set all bits to 0. + template + Bits & Bits::Clear() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_0; + return *this; + } + + /// Change a specified bit to the opposite value + template + Bits & Bits::Toggle(size_t index) { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + const field_t pos_mask = FIELD_1 << pos_id; + + _data.bits[field_id] ^= pos_mask; + + return *this; + } + + + // ------ @CAO CONTINUE HERE!!! ------ + + + template + bool Bits::Any() const { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) { + if (_data.bits[i]) return true; + } + return false; + } + + // ------------------------- Implementations Randomization functions ------------------------- + + /// Set all bits randomly, with a 50% probability of being a 0 or 1. + template + Bits & Bits::Randomize(Random & random) { + random.RandFill(BytePtr(), _data.NumBytes()); + return ClearExcessBits(); + } + + /// Set all bits randomly, with probability specified at compile time. + template + template + Bits & Bits::RandomizeP(Random & random, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + random.RandFillP

(BytePtr(), _data.NumBytes(), start_pos, stop_pos); + return *this; + } + + + /// Set all bits randomly, with a given probability of being on. + template + Bits & + Bits::Randomize(Random & random, const double p, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos, start_pos, stop_pos); + emp_assert(stop_pos <= GetSize(), stop_pos, GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + random.RandFill(BytePtr(), _data.NumBytes(), p, start_pos, stop_pos); + return *this; + } + + /// Set all bits randomly, with a given number of them being on. + template + Bits & + Bits::ChooseRandom(Random & random, const size_t target_ones, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + + const size_t target_size = stop_pos - start_pos; + emp_assert(target_ones <= target_size); + + // Approximate the probability of ones as a starting point. + double p = ((double) target_ones) / (double) target_size; + + // If we are not randomizing the whole sequence, we need to track the number of ones + // in the NON-randomized region to subtract off later. + size_t kept_ones = 0; + if (target_size != GetSize()) { + Clear(start_pos, stop_pos); + kept_ones = CountOnes(); + } + + // Try to find a shortcut if p allows.... + // (These values are currently educated guesses) + if (p < 0.12) { if (target_size == GetSize()) Clear(start_pos, stop_pos); } + else if (p < 0.2) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.35) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.42) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.58) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.65) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.8) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.88) RandomizeP(random, start_pos, stop_pos); + else SetRange(start_pos, stop_pos); + + size_t cur_ones = CountOnes() - kept_ones; + + // Do we need to add more ones? + while (cur_ones < (size_t) target_ones) { + size_t pos = random.GetUInt(start_pos, stop_pos); + auto bit = operator[](pos); + if (!bit) { + bit.Set(); + cur_ones++; + } + } + + // See if we have too many ones. + while (cur_ones > (size_t) target_ones) { + size_t pos = random.GetUInt(start_pos, stop_pos); + auto bit = operator[](pos); + if (bit) { + bit.Clear(); + cur_ones--; + } + } + + return *this; + } + + /// Flip random bits with a given probability. + // @CAO: Possibly faster to generate a sequence of bits and XORing with them. + template + Bits & + Bits::FlipRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Toggle(i); + + return *this; + } + + /// Set random bits with a given probability (does not check if already set.) + template + Bits & Bits::SetRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Set(i); + + return *this; + } + + /// Unset random bits with a given probability (does not check if already zero.) + template + Bits & Bits::ClearRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Clear(i); + + return *this; + } + + /// Flip a specified number of random bits. + template + Bits & Bits::FlipRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, target_bits); + return XOR_SELF(choice); + } + + /// Set a specified number of random bits (does not check if already set.) + template + Bits & Bits::SetRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, target_bits); + return OR_SELF(choice); + } + + /// Unset a specified number of random bits (does not check if already zero.) + template + Bits & Bits::ClearRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, GetSize() - target_bits); + return AND_SELF(choice); + } + + + // ------------------------- Implementations of Comparison Operators ------------------------- + + /// Test if two bit vectors are identical. + template + template + bool Bits::operator==(const Bits & in) const { + if (GetSize() != in.GetSize()) return false; + + const size_t NUM_FIELDS = _data.NumFields(); + auto in_fields = in.FieldSpan(); + for (size_t i = 0; i < NUM_FIELDS; ++i) { + if (_data.bits[i] != in_fields[i]) return false; + } + return true; + } + + /// Compare the would-be numerical values of two bit vectors. + template + template + bool Bits::operator<(const Bits & in) const { + if (GetSize() != in.GetSize()) return GetSize() < in.GetSize(); + + const size_t NUM_FIELDS = _data.NumFields(); + auto in_fields = in.FieldSpan(); + for (size_t i = NUM_FIELDS; i > 0; --i) { // Start loop at the largest field. + const size_t pos = i-1; + if (_data.bits[pos] == in_fields[pos]) continue; // If same, keep looking! + return (_data.bits[pos] < in_fields[pos]); // Otherwise, do comparison + } + return false; // Bit vectors are identical. + } + + /// Automatically convert Bits object to other vector types. + template + template + Bits::operator emp::vector() { + emp::vector out(GetSize()); + for (size_t i = 0; i < GetSize(); i++) { + out[i] = (T) Get(i); + } + return out; + } + + + // ------------------------- Access Groups of bits ------------------------- + + /// Retrieve the byte at the specified byte index. + template + uint8_t Bits::GetByte(size_t index) const { + emp_assert(index < _data.NumBytes(), index, _data.NumBytes()); + const size_t field_id = Byte2Field(index); + const size_t pos_id = Byte2FieldPos(index); + return (_data.bits[field_id] >> pos_id) & 255U; + } + + /// Update the byte at the specified byte index. + template + void Bits::SetByte(size_t index, uint8_t value) { + emp_assert(index < _data.NumBytes(), index, _data.NumBytes()); + const size_t field_id = Byte2Field(index); + const size_t pos_id = Byte2FieldPos(index); + const field_t val_uint = value; + _data.bits[field_id] = (_data.bits[field_id] & ~(FIELD_255 << pos_id)) | (val_uint << pos_id); + } + + /// Get the overall value of this BitSet, using a uint encoding, but including all bits + /// and returning the value as a double. + template + double Bits::GetValue() const { + const int max_one = FindMaxOne(); + + // If there are no ones, this value must be 0. + if (max_one == -1) return 0.0; + + // If all ones are in the least-significant field, just return it. + if (max_one < 64) return (double) GetUInt64(0); + + // To grab the most significant field, figure out how much to shift it by. + const size_t shift_bits = static_cast(max_one) - 63; + double out_value = (double) (*this >> shift_bits).GetUInt64(0); + + out_value *= emp::Pow2(shift_bits); + + return out_value; + } + + /// Get specified type at a given index (in steps of that type size) + template + template + T Bits::GetValueAtIndex(const size_t index) const { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index + 1) * sizeof(T) <= _data.TotalBytes()); + + T out_value; + std::memcpy( &out_value, BytePtr().Raw() + index * sizeof(T), sizeof(T) ); + return out_value; + } + + + /// Set specified type at a given index (in steps of that type size) + template + template + Bits & Bits::SetValueAtIndex(const size_t index, T in_value) { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index + 1) * sizeof(T) <= _data.TotalBytes()); + std::memcpy( BytePtr().Raw() + index * sizeof(T), &in_value, sizeof(T) ); + return ClearExcessBits(); + } + + + /// Get the specified type starting from a given BIT position. + template + template + T Bits::GetValueAtBit(const size_t index) const { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index+7)/8 + sizeof(T) < _data.TotalBytes()); + + Bits out_bits(*this); + out_bits >>= index; + + return out_bits.template GetValueAtIndex(0); + } + + + /// Set the specified type starting from a given BIT position. + // @CAO: Can be optimized substantially, especially for long Bits objects. + template + template + Bits & Bits::SetValueAtBit(const size_t index, T value) { + // For the moment, must fit inside bounds; eventually should (?) pad with zeros. + emp_assert((index+7)/8 + sizeof(T) < _data.TotalBytes()); + constexpr size_t type_bits = sizeof(T) * 8; + + const size_t end_pos = Min(index+type_bits, GetSize()); + Clear(index, end_pos); // Clear out the bits where new value will go. + Bits in_bits(GetSize()); // Setup a bitset for the new bits. + in_bits.SetValueAtIndex(0, value); // Insert the new bits. + in_bits <<= index; // Shift new bits into place. + OR_SELF(in_bits); // Place new bits into current Bits object. + + return ClearExcessBits(); + } + + + // ------------------------- Other Analyses ------------------------- + + /// A simple hash function for bit vectors. + template + std::size_t Bits::Hash(size_t start_field) const { + static_assert(std::is_same_v, "Hash() requires fields to be size_t"); + + // If there are no fields left, hash on size one. + if (start_field == _data.NumFields()) return GetSize(); + + // If we have only one field left, combine it with size. + if (start_field == _data.NumFields()-1) return hash_combine(_data.bits[start_field], GetSize()); + + // Otherwise we have more than one field. Combine and recurse. + size_t partial_hash = hash_combine(_data.bits[start_field], _data.bits[start_field+1]); + + return hash_combine(partial_hash, Hash(start_field+2)); + } + + // TODO: see https://arxiv.org/pdf/1611.07612.pdf for fast pop counts + /// Count the number of ones in Bits. + template + constexpr size_t Bits::CountOnes() const { + if (GetSize() == 0) return 0; + const field_t NUM_FIELDS = _data.NumFields(); + size_t bit_count = 0; + for (size_t i = 0; i < NUM_FIELDS; i++) { + // when compiling with -O3 and -msse4.2, this is the fastest population count method. + std::bitset std_bs(_data.bits[i]); + bit_count += std_bs.count(); + } + + emp_assert(bit_count <= GetSize()); + return bit_count; + } + + // TODO: Speed this up so that we don't need to copy out all of the bits. + /// Count the number of ones in a specified range of Bits. + template + constexpr size_t Bits::CountOnes(size_t start, size_t end) const { + emp_assert(start <= end); + emp_assert(end <= GetSize()); + if (start == end) return 0; + const size_t range_size = end-start; + return Export(range_size, start).CountOnes(); + } + + /// Faster counting of ones for very sparse bit vectors. + template + constexpr size_t Bits::CountOnes_Sparse() const { + size_t bit_count = 0; + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; ++i) { + field_t cur_field = _data.bits[i]; + while (cur_field) { + cur_field &= (cur_field-1); // Peel off a single 1. + bit_count++; // Increment the counter + } + } + return bit_count; + } + + /// Pop the last bit in the vector. + /// @return value of the popped bit. + template + bool Bits::PopBack() { + const bool val = Get(GetSize()-1); + Resize(GetSize() - 1); + return val; + } + + /// Push given bit(s) onto the back of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + template + void Bits::PushBack(const bool bit, const size_t num) { + Resize(GetSize() + num); + if (bit) SetRange(GetSize()-num, GetSize()); + } + + /// Push given bit(s) onto the front of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + template + void Bits::PushFront(const bool bit, const size_t num) { + Resize(GetSize() + num); + SHIFT_SELF(num); + if (bit) SetRange(0, num); + } + + /// Insert bit(s) into any index of vector using bit magic. + /// Blog post on implementation reasoning: https://devolab.org/?p=2249 + /// @param index location to insert bit(s). + /// @param val value of bit(s) to insert (default true) + /// @param num number of bits to insert, default 1. + template + void Bits::Insert(const size_t index, const bool val, const size_t num) { + Resize(GetSize() + num); // Adjust to new number of bits. + Bits low_bits(*this); // Copy current bits + SHIFT_SELF(-(int)num); // Shift the high bits into place. + Clear(0, index+num); // Reduce current to just high bits. + low_bits.Clear(index, GetSize()); // Reduce copy to just low bits. + if (val) SetRange(index, index+num); // If new bits should be ones, make it so. + OR_SELF(low_bits); // Put the low bits back in place. + } + + + /// Delete bits from any index in a vector. + /// @param index location to delete bit(s). + /// @param num number of bits to delete, default 1. + template + void Bits::Delete(const size_t index, const size_t num) { + emp_assert(index+num <= GetSize()); // Make sure bits to delete actually exist! + RawMove(index+num, GetSize(), index); // Shift positions AFTER delete into place. + Resize(GetSize() - num); // Crop off end bits. + } + + /// Return the position of the first one; return -1 if no ones in vector. + template + int Bits::FindOne() const { + const size_t NUM_FIELDS = _data.NumFields(); + size_t field_id = 0; + while (field_id < NUM_FIELDS && _data.bits[field_id]==0) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Return the position of the first zero; return -1 if no zeros in vector. + template + int Bits::FindZero() const { + const size_t NUM_FIELDS = _data.NumFields(); + size_t field_id = 0; + while (field_id < NUM_FIELDS && _data.bits[field_id]==FIELD_ALL) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(~_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Return the position of the first one after start_pos; return -1 if no ones in vector. + /// You can loop through all 1-bit positions in "bits" with: + /// + /// for (int pos = bits.FindOne(); pos >= 0; pos = bits.FindOne(pos+1)) { ... } + + template + int Bits::FindOne(const size_t start_pos) const { + if (start_pos >= GetSize()) return -1; // If we are past the end, return fail. + size_t field_id = FieldID(start_pos); // What field do we start in? + const size_t field_pos = FieldPos(start_pos); // What position in that field? + + // If there's a hit in a partial first field, return it. + if (field_pos && (_data.bits[field_id] & ~(MaskField(field_pos)))) { + return (int) (find_bit(_data.bits[field_id] & ~(MaskField(field_pos))) + + field_id * FIELD_BITS); + } + + // Search other fields... + const size_t NUM_FIELDS = _data.NumFields(); + if (field_pos) field_id++; + while (field_id < NUM_FIELDS && _data.bits[field_id]==0) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Return the position of the first zero after start_pos; return -1 if no zeroes in vector. + /// You can loop through all 0-bit positions in "bits" with: + /// + /// for (int pos = bits.FindZero(); pos >= 0; pos = bits.FindZero(pos+1)) { ... } + + template + int Bits::FindZero(const size_t start_pos) const { + if (start_pos >= GetSize()) return -1; // If we are past the end, return fail. + size_t field_id = FieldID(start_pos); // What field do we start in? + const size_t field_pos = FieldPos(start_pos); // What position in that field? + + // If there's a hit in a partial first field, return it. + if (field_pos && (~_data.bits[field_id] & ~(MaskField(field_pos)))) { + return (int) (~find_bit(_data.bits[field_id] & ~(MaskField(field_pos))) + + field_id * FIELD_BITS); + } + + // Search other fields... + const size_t NUM_FIELDS = _data.NumFields(); + if (field_pos) field_id++; + while (field_id < NUM_FIELDS && _data.bits[field_id]==FIELD_ALL) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(~_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Find the most-significant set-bit. + template + int Bits::FindMaxOne() const { + // Find the max field with a one. + size_t max_field = _data.NumFields() - 1; + while (max_field > 0 && _data.bits[max_field] == 0) max_field--; + + // If there are no ones, return -1. + if (_data.bits[max_field] == 0) return -1; + + const field_t field = _data.bits[max_field]; // Save a local copy of this field. + field_t mask = (field_t) -1; // Mask off the bits still under consideration. + size_t offset = 0; // Indicate where the mask should be applied. + size_t range = FIELD_BITS; // Indicate how many bits are in the mask. + + while (range > 1) { + // Cut the range in half and see if we need to adjust the offset. + range /= 2; // Cut range size in half + mask >>= range; // Cut the mask down. + + // Check the upper half of original range; if has a one shift new offset to there. + if (field & (mask << (offset + range))) offset += range; + } + + return (int) (max_field * FIELD_BITS + offset); + } + + /// Return the position of the first one and change it to a zero. Return -1 if no ones. + template + int Bits::PopOne() { + const int out_bit = FindOne(); + if (out_bit >= 0) Clear((size_t) out_bit); + return out_bit; + } + + /// Return positions of all ones. + template + emp::vector Bits::GetOnes() const { + emp::vector out_vals; + GetOnes(out_vals); + return out_vals; + } + + /// Return positions of all ones using a specified type. + template + template + emp::vector & Bits::GetOnes(emp::vector & out_vals) const { + // @CAO -- There are better ways to do this with bit tricks. + out_vals.resize(CountOnes()); + T cur_pos = 0; + for (T i = 0; i < GetSize(); i++) { + if (Get(i)) out_vals[cur_pos++] = i; + } + return out_vals; + } + + /// Find the length of the longest continuous series of ones. + template + size_t Bits::LongestSegmentOnes() const { + size_t length = 0; + Bits test_bits(*this); + while(test_bits.Any()){ + ++length; + test_bits.AND_SELF(test_bits<<1); + } + return length; + } + + template + emp::vector> Bits::GetRanges() const { + emp::vector> out_ranges; + for (int start_pos = FindOne(); start_pos >= 0; start_pos = FindOne(start_pos+1)) { + int end_pos = FindZero(start_pos); + end_pos = (end_pos == -1) ? GetSize() - 1 : end_pos - 1; + out_ranges.emplace_back(start_pos, end_pos); + } + return out_ranges; + } + + /// Return true if any ones are in common with another Bits object. + template + bool Bits::HasOverlap(const Bits & in) const { + const size_t num_fields = std::min(_data.NumFields(), in.NumFields()); + auto in_fields = in.FieldSpan(); + for (size_t i = 0; i < num_fields; ++i) { + // Short-circuit if we find any overlap. + if (_data.bits[i] & in_fields[i]) return true; + } + return false; + } + + + // ------------------------- Printing and string conversion ------------------------- + + /// Convert this Bits object to a vector string [0 index on left] + template + std::string Bits::ToString() const { + if constexpr (ZERO_LEFT) return ToArrayString(); + else return ToBinaryString(); + } + + /// Convert this Bits object to a vector string [0 index on left] + template + std::string Bits::ToArrayString() const { + std::string out_string; + out_string.reserve(GetSize()); + for (size_t i = 0; i < GetSize(); ++i) out_string.push_back(GetAsChar(i)); + return out_string; + } + + /// Convert this Bits object to a numerical string [0 index on right] + template + std::string Bits::ToBinaryString() const { + std::string out_string; + out_string.reserve(GetSize()); + for (size_t i = GetSize(); i > 0; --i) out_string.push_back(GetAsChar(i-1)); + return out_string; + } + + /// Convert this Bits object to a series of IDs + template + std::string Bits::ToIDString(const std::string & spacer) const { + std::stringstream ss; + PrintOneIDs(ss, spacer); + return ss.str(); + } + + /// Convert this Bits object to a series of IDs with ranges condensed. + template + std::string Bits::ToRangeString(const std::string & spacer, + const std::string & ranger) const + { + std::stringstream ss; + PrintAsRange(ss, spacer, ranger); + return ss.str(); + } + + /// Print a space between each field (or other provided spacer) + template + void Bits::PrintFields(std::ostream & out, const std::string & spacer) const { + for (size_t i = GetSize()-1; i < GetSize(); i--) { + out << Get(i); + if (i && (i % FIELD_BITS == 0)) out << spacer; + } + } + + /// Print a space between each field (or other provided spacer) + template + void Bits::PrintDebug( + std::ostream & out, + const std::string & label + ) const { + if (label.size()) out << label << ":\n"; + for (size_t field = 0; field < _data.NumFields(); field++) { + for (size_t bit_id = 0; bit_id < FIELD_BITS; bit_id++) { + bool bit = (FIELD_1 << bit_id) & _data.bits[field]; + out << ( bit ? 1 : 0 ); + } + out << " : " << field << std::endl; + } + size_t end_pos = _data.NumEndBits(); + if (end_pos == 0) end_pos = FIELD_BITS; + for (size_t i = 0; i < end_pos; i++) out << " "; + out << "^" << std::endl; + } + + /// Print the positions of all one bits, spaces are the default separator. + template + void Bits::PrintOneIDs(std::ostream & out, const std::string & spacer) const { + bool started = false; + for (size_t i = 0; i < GetSize(); i++) { + if (Get(i)) { + if (started) out << spacer; + out << i; + started = true; + } + } + } + + /// Print the ones in a range format. E.g., 2-5,7,10-15 + template + void Bits::PrintAsRange(std::ostream & out, + const std::string & spacer, + const std::string & ranger) const + { + emp::vector ones = GetOnes(); + + for (size_t pos = 0; pos < ones.size(); pos++) { + if (pos) out << spacer; + + size_t start = ones[pos]; + while (pos+1 < ones.size() && ones[pos+1] == ones[pos]+1) pos++; + size_t end = ones[pos]; + + out << start; + if (start != end) out << ranger << end; + } + } + + + // ------------------------- Base Boolean-logic operations ------------------------- + + /// Perform a Boolean NOT with this Bits object, store result here, and return this object. + template + Bits & Bits::NOT_SELF() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~_data.bits[i]; + return ClearExcessBits(); + } + + /// Perform a Boolean AND with this Bits object, store result here, and return this object. + template + Bits & Bits::AND_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] & bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean OR with this Bits object, store result here, and return this object. + template + Bits & Bits::OR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] | bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean NAND with this Bits object, store result here, and return this object. + template + Bits & Bits::NAND_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] & bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Perform a Boolean NOR with this Bits object, store result here, and return this object. + template + Bits & Bits::NOR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] | bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Perform a Boolean XOR with this Bits object, store result here, and return this object. + template + Bits & Bits::XOR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] ^ bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean EQU with this Bits object, store result here, and return this object. + template + Bits & Bits::EQU_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] ^ bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Positive shifts go left and negative go right (0 does nothing); return result. + template + Bits Bits::SHIFT(const int shift_size) const { + Bits out_bits(*this); + if (shift_size > 0) out_bits.ShiftRight((size_t) shift_size); + else if (shift_size < 0) out_bits.ShiftLeft((size_t) -shift_size); + return out_bits; + } + + /// Positive shifts go left and negative go right; store result here, and return this object. + template + Bits & Bits::SHIFT_SELF(const int shift_size) { + if (shift_size > 0) ShiftRight((size_t) shift_size); + else if (shift_size < 0) ShiftLeft((size_t) -shift_size); + return *this; + } + + /// Reverse the order of bits in the bitset + template + Bits & Bits::REVERSE_SELF() { + auto bit_span = _data.AsSpan(); + + // Reverse order of whole fields + std::reverse( bit_span.begin(), bit_span.end() ); + + // Reverse the bits in each field. + for (auto & cur_field : bit_span) cur_field = emp::ReverseBits(cur_field); + + // Move the gap to the other side. + if (_data.NumEndBits()) ShiftRight(_data.EndGap(), true); + + return *this; + } + + /// Reverse order of bits in the bitset. + template + Bits Bits::REVERSE() const { + Bits out_set(*this); + return out_set.REVERSE_SELF(); + } + + + /// Positive rotates go left and negative rotates go left (0 does nothing); + /// return result. + template + Bits Bits::ROTATE(const int rotate_size) const { + Bits out_set(*this); + if (rotate_size > 0) out_set.ROTR_SELF((field_t) rotate_size); + else if (rotate_size < 0) out_set.ROTL_SELF((field_t) (-rotate_size)); + return out_set; + } + + /// Positive rotates go right and negative rotates go left (0 does nothing); + /// store result here, and return this object. + template + Bits & Bits::ROTATE_SELF(const int rotate_size) { + if (rotate_size > 0) ROTR_SELF((field_t) rotate_size); + else if (rotate_size < 0) ROTL_SELF((field_t) -rotate_size); + return *this; + } + + + /// Addition of two Bitsets. + /// Wraps if it overflows. + /// Returns result. + template + Bits Bits::ADD(const Bits & set2) const{ + Bits out_set(*this); + return out_set.ADD_SELF(set2); + } + + /// Addition of two Bitsets. + /// Wraps if it overflows. + /// Returns this object. + template + Bits & Bits::ADD_SELF(const Bits & set2) { + bool carry = false; + + for (size_t i = 0; i < GetSize()/FIELD_BITS; ++i) { + field_t addend = set2._data.bits[i] + static_cast(carry); + carry = set2._data.bits[i] > addend; + + field_t sum = _data.bits[i] + addend; + carry |= _data.bits[i] > sum; + + _data.bits[i] = sum; + } + + if (_data.NumEndBits()) { + _data.bits[GetSize()/FIELD_BITS] = ( + _data.bits[GetSize()/FIELD_BITS] + + set2._data.bits[GetSize()/FIELD_BITS] + + static_cast(carry) + ) & _data.EndMask(); + } + + return *this; + } + + /// Subtraction of two Bitsets. + /// Wraps around if it underflows. + /// Returns result. + template + Bits Bits::SUB(const Bits & set2) const{ + Bits out_set(*this); + return out_set.SUB_SELF(set2); + } + + /// Subtraction of two Bitsets. + /// Wraps if it underflows. + /// Returns this object. + template + Bits & Bits::SUB_SELF(const Bits & set2){ + + bool carry = false; + + for (size_t i = 0; i < GetSize()/FIELD_BITS; ++i) { + field_t subtrahend = set2._data.bits[i] + static_cast(carry); + carry = set2._data.bits[i] > subtrahend; + carry |= _data.bits[i] < subtrahend; + _data.bits[i] -= subtrahend; + } + + if (_data.NumEndBits()) { + _data.bits[GetSize()/FIELD_BITS] = ( + _data.bits[GetSize()/FIELD_BITS] + - set2._data.bits[GetSize()/FIELD_BITS] + - static_cast(carry) + ) & _data.EndMask(); + } + + return *this; + } + + // Set up some aliases from common types of Bit strings. + // BitVector and BitArray function like vectors and arrays, which is to say that the zero + // index is on the left-hand side. BitSet and BitValue are treated like numerical + // representations, with the zero-position on the right-hand side. + + // using BitVector = Bits; + using BitVector = Bits; + using BitValue = Bits; + + template using BitArray = Bits, true>; + template using BitSet = Bits, false>; + template using StaticBitVector = Bits, true>; + template using StaticBitValue = Bits, false>; +} + + +// ---------------------- Implementations to work with standard library ---------------------- + +namespace std { + /// Hash function to allow Bits objects to be used with maps and sets (must be in std). + template + struct hash> { + std::size_t operator()(const emp::Bits & bits) const { + return bits.Hash(); + } + }; +} + +#endif // #ifndef EMP_BITS_BITS_HPP_INCLUDE diff --git a/include/emp/bits/Bits_Data.hpp b/include/emp/bits/Bits_Data.hpp new file mode 100644 index 0000000000..d109c3e6da --- /dev/null +++ b/include/emp/bits/Bits_Data.hpp @@ -0,0 +1,459 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022-23. + * + * @file Bits_Data.hpp + * @brief Helper class to handle memory management for Bits objects. + * @note Status: BETA + * + * Bits_Data handles the actual bits inside of the Bits class. Bits itself provides many tools + * to operate on that data. + */ + +#ifndef EMP_BITS_BITS_DATA_HPP_INCLUDE +#define EMP_BITS_BITS_DATA_HPP_INCLUDE + + +#include +#include + +#include "../base/array.hpp" +#include "../base/assert.hpp" +#include "../base/Ptr.hpp" +#include "../math/math.hpp" + +#include "bitset_utils.hpp" + +namespace emp { + + // BitsMode specifies how a Bits object can change the number of bits in itself. + // FIXED is locked at the base size an cannot change and is stored in static memory. + // CAPPED must be the base size or lower, but requires size tracking. + // DYNAMIC defaults to base size, but can be changed; requires indirect memory and allocation. + // WATERMARK is like DYNAMIC, but never reallocates memory when shrinking active size. + enum class BitsMode { FIXED, CAPPED, DYNAMIC, WATERMARK }; + + namespace internal { + + // ------------------------------------------------------------------------------------ + // SIZE TRACKING + // ------------------------------------------------------------------------------------ + + /// Dynamic size is stored here to work with, but not the actual bits. + template + struct Bits_Data_Size_Var { + using field_t = bits_field_t; + + size_t num_bits; ///< Total number of bits are we using + + constexpr void SetSize(size_t new_size) { num_bits = new_size; } + + [[nodiscard]] constexpr size_t NumBits() const noexcept { return num_bits; } + + // Number of bits locked in at compile time. + [[nodiscard]] static constexpr size_t NumCTBits() noexcept { return 0; } + + /// Number of bits used in partial field at the end; 0 = perfect fit. + [[nodiscard]] constexpr size_t NumEndBits() const noexcept { + return num_bits & (NUM_FIELD_BITS - 1); + } + + /// EXTRA bits leftover in the gap at the end + [[nodiscard]] constexpr size_t EndGap() const noexcept { + return NumEndBits() ? (NUM_FIELD_BITS - NumEndBits()) : 0; + } + + /// Mask to cut off all of the final bits. + [[nodiscard]] constexpr field_t EndMask() const noexcept { + return MaskLow(NumEndBits()); + } + + /// How many fields do we need for the current set of bits? + [[nodiscard]] constexpr size_t NumFields() const noexcept { + return num_bits ? (1 + ((num_bits - 1) / NUM_FIELD_BITS)) : 0; + } + + /// ID of the last occupied field + [[nodiscard]] constexpr size_t LastField() const noexcept { + return NumFields() - 1; + } + + /// Number of bytes needed for the current set of bits + [[nodiscard]] constexpr size_t NumBytes() const noexcept { + return num_bits ? (1 + ((num_bits - 1) >> 3)) : 0; + } + + /// How many bytes are allocated? + [[nodiscard]] constexpr size_t TotalBytes() const noexcept { + return NumFields() * sizeof(field_t); + } + + Bits_Data_Size_Var(size_t in_size=DEFAULT_SIZE) : num_bits(in_size) { } + Bits_Data_Size_Var(const Bits_Data_Size_Var &) = default; + + template + void serialize(Archive & ar) { ar(num_bits); } + + [[nodiscard]] constexpr bool OK() const { return true; } // Nothing to check yet. + }; + + /// If we have a fixed number of bits, we know size at compile time. + template + struct Bits_Data_Size_Fixed { + using field_t = bits_field_t; + static constexpr size_t DEFAULT_SIZE = NUM_BITS; + + constexpr void SetSize(size_t new_size) { + emp_assert(new_size == NUM_BITS, "Cannot change to new_size"); + } + + [[nodiscard]] constexpr size_t NumBits() const noexcept { return NUM_BITS; } + + // Number of bits locked in at compile time. + [[nodiscard]] static constexpr size_t NumCTBits() noexcept { return NUM_BITS; } + + /// Number of bits used in partial field at the end; 0 if perfect fit. + [[nodiscard]] constexpr size_t NumEndBits() const noexcept { + return NUM_BITS & (NUM_FIELD_BITS - 1); + } + + /// How many EXTRA bits are leftover in the gap at the end? + [[nodiscard]] constexpr size_t EndGap() const noexcept { + return (NUM_FIELD_BITS - NumEndBits()) % NUM_FIELD_BITS; + } + + /// A mask to cut off all of the final bits. + [[nodiscard]] constexpr field_t EndMask() const noexcept { + return MaskLow(NumEndBits()); + } + + /// How many felids do we need for the current set of bits? + [[nodiscard]] constexpr size_t NumFields() const noexcept { + return NUM_BITS ? (1 + ((NUM_BITS - 1) / NUM_FIELD_BITS)) : 0; + } + + /// What is the ID of the last occupied field? + [[nodiscard]] constexpr size_t LastField() const noexcept { return NumFields() - 1; } + + /// How many bytes are used for the current set of bits? (rounded up!) + [[nodiscard]] constexpr size_t NumBytes() const noexcept { + return NUM_BITS ? (1 + ((NUM_BITS - 1) >> 3)) : 0; + } + + /// How many bytes are allocated? (rounded up!) + [[nodiscard]] constexpr size_t TotalBytes() const noexcept { + return NumFields() * sizeof(field_t); + } + + Bits_Data_Size_Fixed([[maybe_unused]] size_t in_size=NUM_BITS) { + emp_assert(in_size <= NUM_BITS, in_size, NUM_BITS); + } + Bits_Data_Size_Fixed(const Bits_Data_Size_Fixed &) = default; + + template + void serialize(Archive & /* ar */) { /* Nothing to do here. */ } + + [[nodiscard]] constexpr bool OK() const { return true; } // Nothing to check yet. + }; + + + // ------------------------------------------------------------------------------------ + // RAW MEMORY MANAGEMENT + // ------------------------------------------------------------------------------------ + + /// Data & functions for Bits types with fixed memory (size may be dynamic, capped by CAPACITY) + template + struct Bits_Data_Mem_Static_Base : public BASE_T { + using base_size_t = BASE_T; + using field_t = bits_field_t; + static constexpr size_t MAX_FIELDS = (1 + ((CAPACITY - 1) / NUM_FIELD_BITS)); + + emp::array bits; ///< Fields to hold the actual bit values. + + Bits_Data_Mem_Static_Base() = default; + Bits_Data_Mem_Static_Base(size_t num_bits) : BASE_T(num_bits) + { + emp_assert(num_bits <= CAPACITY, num_bits, CAPACITY); + } + Bits_Data_Mem_Static_Base(const Bits_Data_Mem_Static_Base &) = default; + Bits_Data_Mem_Static_Base(Bits_Data_Mem_Static_Base &&) = default; + + Bits_Data_Mem_Static_Base & operator=(const Bits_Data_Mem_Static_Base &) = default; + Bits_Data_Mem_Static_Base & operator=(Bits_Data_Mem_Static_Base &&) = default; + + // --- Helper functions -- + + [[nodiscard]] Ptr FieldPtr() { return bits.data(); } + [[nodiscard]] Ptr FieldPtr() const { return bits.data(); } + + void RawResize(const size_t new_size, const bool preserve_data=false) { + const size_t old_num_fields = BASE_T::NumFields(); + BASE_T::SetSize(new_size); + if (preserve_data && BASE_T::NumEndBits()) { + bits[BASE_T::LastField()] &= BASE_T::EndMask(); + for (size_t id = BASE_T::NumFields(); id < old_num_fields; ++id) bits[id] = 0; + } + } + + [[nodiscard]] auto AsSpan() { return std::span(bits.data(), MAX_FIELDS); } + [[nodiscard]] auto AsSpan() const { return std::span(bits.data(), MAX_FIELDS); } + + [[nodiscard]] bool OK() const { return true; } // Nothing to check yet. + + template + void serialize(Archive & ar) { + BASE_T::serialize(ar); // Save size info. + for (size_t i=0; i < BASE_T::NumFields(); ++i) { ar(bits[i]); } + } + + }; + + template + using Bits_Data_Mem_Static = + Bits_Data_Mem_Static_Base< Bits_Data_Size_Var, CAPACITY >; + + template + using Bits_Data_Mem_Fixed = + Bits_Data_Mem_Static_Base< Bits_Data_Size_Fixed, CAPACITY >; + + /// Data & functions for Bits types with dynamic memory (size is tracked elsewhere) + template + struct Bits_Data_Mem_Dynamic : public Bits_Data_Size_Var + { + using base_t = Bits_Data_Size_Var; + using base_size_t = base_t; + using field_t = bits_field_t; + + Ptr bits; ///< Pointer to array with the status of each bit + + Bits_Data_Mem_Dynamic(size_t num_bits=DEFAULT_SIZE) : base_t(num_bits), bits(nullptr) + { + if (num_bits) bits = NewArrayPtr(NumBitFields(num_bits)); + } + Bits_Data_Mem_Dynamic(const Bits_Data_Mem_Dynamic & in) : base_t(), bits(nullptr) { Copy(in); } + Bits_Data_Mem_Dynamic(Bits_Data_Mem_Dynamic && in) : bits(nullptr) { Move(std::move(in)); } + ~Bits_Data_Mem_Dynamic() { if (bits) bits.DeleteArray(); } + + Bits_Data_Mem_Dynamic & operator=(const Bits_Data_Mem_Dynamic & in) { Copy(in); return *this; } + Bits_Data_Mem_Dynamic & operator=(Bits_Data_Mem_Dynamic && in) { Move(std::move(in)); return *this; } + + // --- Helper functions -- + + [[nodiscard]] Ptr FieldPtr() { return bits; } + [[nodiscard]] Ptr FieldPtr() const { return bits; } + + void MakeEmpty() { + base_t::SetSize(0); + if (bits) bits.DeleteArray(); + bits = nullptr; + } + + void RawResize(const size_t new_size, const bool preserve_data=false) { + if (new_size == 0) { return MakeEmpty(); } + + // See if number of bit fields needs to change. + const size_t num_old_fields = base_t::NumFields(); + const size_t num_new_fields = NumBitFields(new_size); + + if (num_old_fields != num_new_fields) { + auto new_bits = NewArrayPtr(num_new_fields); + if (num_old_fields) { + if (preserve_data) { + size_t copy_count = std::min(num_old_fields, num_new_fields); + emp::CopyMemory(bits, new_bits, copy_count); + } + bits.DeleteArray(); // Delete old memory + } + bits = new_bits; // Use new memory + if (preserve_data) { + // Zero out any newly added fields. + for (size_t i = num_old_fields; i < num_new_fields; ++i) bits[i] = 0; + } + } + + base_t::SetSize(new_size); + + // Clear out any extra bits in the last field. + if (preserve_data && base_t::NumEndBits()) { + bits[base_t::LastField()] &= base_t::EndMask(); + } + } + + // Assume size is already correct. + void Copy(const Bits_Data_Mem_Dynamic & in) { + RawResize(in.NumBits()); + for (size_t i = 0; i < base_t::NumFields(); ++i) bits[i] = in.bits[i]; + } + + void Move(Bits_Data_Mem_Dynamic && in) { + base_t::SetSize(in.NumBits()); + if (bits) bits.DeleteArray(); // Clear out old bits. + bits = in.bits; // Move over the bits. + in.bits = nullptr; // Clear them out of the original. + } + + [[nodiscard]] auto AsSpan() { return std::span(bits.Raw(), base_t::NumFields()); } + [[nodiscard]] auto AsSpan() const { return std::span(bits.Raw(), base_t::NumFields()); } + + template + void save(Archive & ar) { + base_t::serialize(ar); // Save size info. + for (size_t i=0; i < base_t::NumFields(); ++i) { + ar(bits[i]); + } + } + + template + void load(Archive & ar) { + base_t::serialize(ar); + if (bits) bits.DeleteArray(); // Delete old memory if needed + bits = NewArrayPtr(base_t::NumFields()); + for (size_t i=0; i < base_t::NumFields(); ++i) { + ar(bits[i]); + } + } + + bool OK() const { + // Do some checking on the bits array ptr to make sure it's value. + if (bits) { + #ifdef EMP_TRACK_MEM + emp_assert(bits.DebugIsArray()); // Must be marked as an array. + emp_assert(bits.OK()); // Pointer must be okay. + #endif + } + else { emp_assert(base_t::num_bits == 0); } // If bits is null, num_bits should be zero. + return true; + } + }; + + /// Data & functions for Bits types with dynamic memory (size is tracked elsewhere) + template + struct Bits_Data_Mem_Watermark : public Bits_Data_Mem_Dynamic + { + using this_t = Bits_Data_Mem_Watermark; + using base_t = Bits_Data_Mem_Dynamic; + using field_t = bits_field_t; + using base_t::bits; ///< Pointer to array with the status of each bit + size_t field_capacity = 0; ///< How many fields is the watermark up to? + + Bits_Data_Mem_Watermark(size_t num_bits=DEFAULT_SIZE) : base_t(num_bits) + { + field_capacity = base_t::NumFields(); + } + Bits_Data_Mem_Watermark(const this_t & in) : base_t(0) { Copy(in); } + Bits_Data_Mem_Watermark(this_t && in) : base_t(0) { Move(std::move(in)); } + ~Bits_Data_Mem_Watermark() { /* cleanup in base class */ } + + Bits_Data_Mem_Watermark & operator=(const this_t & in) { Copy(in); return *this; } + Bits_Data_Mem_Watermark & operator=(this_t && in) { Move(std::move(in)); return *this; } + + // --- Helper functions -- + + /// Resize to have at least the specified number of fields. + /// @param new_size The number of bits the new data needs to hold. + /// @param preserve_data Should we keep existing bits and zero out new bits? + void RawResize(const size_t new_size, const bool preserve_data=false) { + // See if number of bit fields needs to change. + const size_t num_old_fields = base_t::NumFields(); + const size_t num_new_fields = NumBitFields(new_size); + + // If we need more fields than are currently available, reallocate memory. + if (num_new_fields > field_capacity) { + auto new_bits = NewArrayPtr(num_new_fields); + if (field_capacity) { // If we already had some allocated fields... + // If needed, copy over previous memory. + if (preserve_data) emp::CopyMemory(bits, new_bits, field_capacity); + bits.DeleteArray(); // Delete old memory + } + field_capacity = num_new_fields; + bits = new_bits; // Use new memory + } + + base_t::SetSize(new_size); + + if (preserve_data) { + // Clear any new (or previously unused) fields. + for (size_t i = num_old_fields; i < num_new_fields; ++i) bits[i] = 0; + + // Clear out any extra end bits. + if (base_t::NumEndBits()) bits[base_t::LastField()] &= base_t::EndMask(); + } + } + + void Copy(const Bits_Data_Mem_Watermark & in) { // Same as base class, but call THIS RawResize(). + RawResize(in.NumBits()); + for (size_t i = 0; i < base_t::NumFields(); ++i) bits[i] = in.bits[i]; + } + + void Move(Bits_Data_Mem_Watermark && in) { + base_t::Move(std::move(in)); + field_capacity = in.field_capacity; + } + + template + void save(Archive & ar) { base_t::save(ar); } // Base class handles saving. + + template + void load(Archive & ar) { + base_t::load(ar); + field_capacity = base_t::NumFields(); // Use loaded size as capacity. + } + + bool OK() const { + emp_assert(field_capacity >= base_t::NumFields()); + return base_t::OK(); + } + }; + + + + /// Internal data for the Bits class to separate static vs. dynamic. + template + struct Bits_Data : public BASE_T + { + using field_t = bits_field_t; + + Bits_Data() = default; + Bits_Data(size_t num_bits) : BASE_T(num_bits) { } + Bits_Data(const Bits_Data & in) = default; + Bits_Data(Bits_Data && in) = default; + + Bits_Data & operator=(const Bits_Data &) = default; + Bits_Data & operator=(Bits_Data &&) = default; + + [[nodiscard]] emp::Ptr BytePtr() { + return BASE_T::FieldPtr().template ReinterpretCast(); + } + [[nodiscard]] emp::Ptr BytePtr() const { + return BASE_T::FieldPtr().template ReinterpretCast(); + } + + [[nodiscard]] auto AsByteSpan() const { return std::as_bytes( BASE_T::AsSpan() ); } + + [[nodiscard]] bool OK() const { + bool result = BASE_T::OK(); + + // If there are end bits, make sure that everything past the last one is clear. + if (BASE_T::NumEndBits()) { + // Make sure final bits are zeroed out. + const field_t excess_bits = + BASE_T::bits[BASE_T::LastField()] & ~MaskLow(BASE_T::NumEndBits()); + result &= !excess_bits; + } + + return result; + } + + }; + } + + using Bits_WatermarkData = internal::Bits_Data< internal::Bits_Data_Mem_Watermark<0> >; + using Bits_DynamicData = internal::Bits_Data< internal::Bits_Data_Mem_Dynamic<0> >; + template + using Bits_FixedData = internal::Bits_Data< internal::Bits_Data_Mem_Fixed >; + template + using Bits_StaticData = internal::Bits_Data< internal::Bits_Data_Mem_Static >; +} + +#endif // #ifndef EMP_BITS_BITS_DATA_HPP_INCLUDE diff --git a/include/emp/bits/bitset_utils.hpp b/include/emp/bits/bitset_utils.hpp index 1e8d473fac..83a9641a58 100644 --- a/include/emp/bits/bitset_utils.hpp +++ b/include/emp/bits/bitset_utils.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2020. + * date: 2016-2023. */ /** * @file @@ -12,11 +12,46 @@ #ifndef EMP_BITS_BITSET_UTILS_HPP_INCLUDE #define EMP_BITS_BITSET_UTILS_HPP_INCLUDE -#include +#include +#include // uint8_t, uint16_t, etc. +#include #include +#include + +#include "../base/Ptr.hpp" namespace emp { + /// @brief Use size_t as the default bits field type. + using bits_field_t = size_t; + + /// @brief Track the number of bits in a single bit field. + static constexpr size_t NUM_FIELD_BITS = sizeof(bits_field_t)*8; + + /// @brief Convert a bit count to the number of fields needed to store them. + [[nodiscard]] static constexpr size_t NumBitFields(size_t num_bits) noexcept { + return num_bits ? (1 + ((num_bits - 1) / NUM_FIELD_BITS)) : 0; + } + + /// @brief Convert a single bit field to a string. + /// @param field A single bit field to convert to a string. + [[nodiscard]] static std::string BitFieldToString(bits_field_t field) { + std::stringstream ss; + ss << '[' << std::hex << field << ']'; + return ss.str(); + } + + /// @brief Convert a series of bit fields to a string. + /// @param field A single bit field to convert to a string. + [[nodiscard]] static std::string BitFieldsToString(emp::Ptr bits, size_t count) { + std::stringstream ss; + for (size_t i = 0; i < count; ++i) { + if (i) ss << ' '; + ss << BitFieldToString(bits[i]); + } + return ss.str(); + } + /// Create a series of a specified number of ones (at compile time) in a uint. template constexpr uint32_t UIntMaskFirst() { return (UIntMaskFirst() << 1) | 1; } @@ -37,58 +72,77 @@ namespace emp { 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; - /// Count the number of bits in a 64-bit unsigned integer. - inline constexpr size_t count_bits(uint64_t val) { - return - ByteCount[ val >> 56 ] + - ByteCount[ (val >> 48) & 0xFF ] + - ByteCount[ (val >> 40) & 0xFF ] + - ByteCount[ (val >> 32) & 0xFF ] + - ByteCount[ (val >> 24) & 0xFF ] + - ByteCount[ (val >> 16) & 0xFF ] + - ByteCount[ (val >> 8) & 0xFF ] + - ByteCount[ val & 0xFF ]; - } - - // /// Count the number of bits in a 32-bit unsigned integer. - // inline constexpr size_t count_bits(uint32_t val) { - // return - // ByteCount[ val >> 24 ] + - // ByteCount[ (val >> 16) & 0xFF ] + - // ByteCount[ (val >> 8) & 0xFF ] + - // ByteCount[ val & 0xFF ]; - // } + /// Count the number of bits in an unsigned integer. + template + [[nodiscard]] inline constexpr size_t count_bits(T val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t num_bytes = sizeof(T); + static_assert(num_bytes <= 8, "count_bits() requires 8 or fewer bytes."); + + size_t out_ones = ByteCount[ val & 0xFF ]; + if constexpr (num_bytes > 1) { + out_ones += ByteCount[ (val >> 8) & 0xFF ]; + } + if constexpr (num_bytes > 2) { + out_ones += ByteCount[ (val >> 24) & 0xFF ] + + ByteCount[ (val >> 16) & 0xFF ]; + } + if constexpr (num_bytes > 4) { + out_ones += ByteCount[ val >> 56 ] + + ByteCount[ (val >> 48) & 0xFF ] + + ByteCount[ (val >> 40) & 0xFF ] + + ByteCount[ (val >> 32) & 0xFF ]; + } + return out_ones; + } + + + /// Return the position of the first one bit + template + [[nodiscard]] inline constexpr size_t find_bit(const T val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + return count_bits( (~val) & (val-1) ); + } /// Return the position of the first one bit template - inline constexpr size_t find_bit(T val) { return count_bits( (~val) & (val-1) ); } + [[nodiscard]] inline constexpr size_t find_last_bit(T val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + if constexpr (sizeof(T) > 1) val |= val >> 8; + if constexpr (sizeof(T) > 2) val |= val >> 16; + if constexpr (sizeof(T) > 4) val |= val >> 32; + return count_bits(val) - 1; + } /// Return the position of the first one bit AND REMOVE IT. template inline size_t pop_bit(T & val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); const size_t pos = find_bit(val); val &= ~(1 << pos); return pos; } - /// A compile-time bit counter. - template - static constexpr int CountOnes(TYPE x) { return x == 0 ? 0 : (CountOnes(x/2) + (x&1)); } - /// Quick bit-mask generator for low bits. - template - static constexpr TYPE MaskLow(std::size_t num_bits) { + template + [[nodiscard]] static constexpr TYPE MaskLow(std::size_t num_bits) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); return (num_bits == 8*sizeof(TYPE)) ? ((TYPE)-1) : ((((TYPE)1) << num_bits) - 1); } /// Quick bit-mask generator for high bits. - template - static constexpr TYPE MaskHigh(std::size_t num_bits) { + template + [[nodiscard]] static constexpr TYPE MaskHigh(std::size_t num_bits) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); return MaskLow(num_bits) << (8*sizeof(TYPE)-num_bits); } - template - static constexpr TYPE MaskUsed(TYPE val) { + template + [[nodiscard]] static constexpr TYPE MaskUsed(TYPE val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); size_t shift = 1; TYPE last = 0; while (val != last) { // While the shift is making progress... @@ -100,6 +154,111 @@ namespace emp { return val; } + template + [[nodiscard]] constexpr T ReverseBits(T in) { + constexpr size_t num_bytes = sizeof(T); + + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + static_assert( num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8, + "ReverseBits() currently requires 1, 2, 4, or 8-byte values." ); + + if constexpr (num_bytes == 1) { + in = static_cast( (in & 0xF0) >> 4 | (in & 0x0F) << 4 ); + in = static_cast( (in & 0xCC) >> 2 | (in & 0x33) << 2 ); + in = static_cast( (in & 0xAA) >> 1 | (in & 0x55) << 1 ); + } + else if constexpr (num_bytes == 2) { + in = static_cast( (in & 0xFF00) >> 8 | (in & 0x00FF) << 8 ); + in = static_cast( (in & 0xF0F0) >> 4 | (in & 0x0F0F) << 4 ); + in = static_cast( (in & 0xCCCC) >> 2 | (in & 0x3333) << 2 ); + in = static_cast( (in & 0xAAAA) >> 1 | (in & 0x5555) << 1 ); + } + else if constexpr (num_bytes == 4) { + in = static_cast( (in & 0xFFFF0000) >> 16 | (in & 0x0000FFFF) << 16 ); + in = static_cast( (in & 0xFF00FF00) >> 8 | (in & 0x00FF00FF) << 8 ); + in = static_cast( (in & 0xF0F0F0F0) >> 4 | (in & 0x0F0F0F0F) << 4 ); + in = static_cast( (in & 0xCCCCCCCC) >> 2 | (in & 0x33333333) << 2 ); + in = static_cast( (in & 0xAAAAAAAA) >> 1 | (in & 0x55555555) << 1 ); + } + else /* if constexpr (num_bytes == 8) */ { + in = static_cast( (in & 0xFFFFFFFF00000000) >> 32 | (in & 0x00000000FFFFFFFF) << 32 ); + in = static_cast( (in & 0xFFFF0000FFFF0000) >> 16 | (in & 0x0000FFFF0000FFFF) << 16 ); + in = static_cast( (in & 0xFF00FF00FF00FF00) >> 8 | (in & 0x00FF00FF00FF00FF) << 8 ); + in = static_cast( (in & 0xF0F0F0F0F0F0F0F0) >> 4 | (in & 0x0F0F0F0F0F0F0F0F) << 4 ); + in = static_cast( (in & 0xCCCCCCCCCCCCCCCC) >> 2 | (in & 0x3333333333333333) << 2 ); + in = static_cast( (in & 0xAAAAAAAAAAAAAAAA) >> 1 | (in & 0x5555555555555555) << 1 ); + } + + return in; + } + + // Rotate all bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsLeft( + T in, + size_t rotate_size = 1 + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + rotate_size %= FIELD_BITS; // Make sure rotate is in range. + return (in << rotate_size) | + (in >> (FIELD_BITS - rotate_size)); + } + + // Rotate lowest "bit_count" bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsLeft( + T in, + size_t rotate_size, + size_t bit_count + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + [[maybe_unused]] constexpr size_t FIELD_BITS = sizeof(T) * 8; + emp_assert(bit_count <= FIELD_BITS, "Cannot have more bits than can fit in field."); + rotate_size %= bit_count; // Make sure rotate is in range. + const T out = (in << rotate_size) | (in >> (bit_count - rotate_size)); + return out & MaskLow(bit_count); // Zero out excess bits. + } + + // Rotate all bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsRight( + T in, + size_t rotate_size = 1 + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + rotate_size %= FIELD_BITS; // Make sure rotate is in range. + return (in >> rotate_size) | + (in << (FIELD_BITS - rotate_size)); + } + + // Rotate lowest "bit_count" bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsRight( + T in, + size_t rotate_size, + size_t bit_count + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + [[maybe_unused]] constexpr size_t FIELD_BITS = sizeof(T) * 8; + emp_assert(bit_count <= FIELD_BITS, "Cannot have more bits than can fit in field."); + rotate_size %= bit_count; // Make sure rotate is in range. + const T out = (in >> rotate_size) | (in << (bit_count - rotate_size)); + return out & MaskLow(bit_count); // Zero out excess bits. + } + + /// Count the number of bits ('0' or '1') found in a string. + static size_t CountBits(const std::string & bitstring) { + return static_cast( + std::count_if( + bitstring.begin(), + bitstring.end(), + [](char i) { return i == '0' || i == '1'; } + ) + ); + } + /* // Returns the position of the first set (one) bit or a -1 if none exist. template diff --git a/include/emp/compiler/DFA.hpp b/include/emp/compiler/DFA.hpp index 672cc3cea9..83becd0841 100644 --- a/include/emp/compiler/DFA.hpp +++ b/include/emp/compiler/DFA.hpp @@ -29,13 +29,17 @@ namespace emp { private: emp::vector< emp::array > transitions; emp::vector< STOP_TYPE > is_stop; // 0=not stop; other values for STOP return value. + + using this_t = tDFA; public: tDFA(size_t num_states=0) : transitions(num_states), is_stop(num_states, 0) { for (auto & t : transitions) t.fill(-1); } - tDFA(const tDFA &) = default; + tDFA(const this_t &) = default; + tDFA(this_t &&) = default; ~tDFA() { ; } - tDFA & operator=(const tDFA &) = default; + this_t & operator=(const this_t &) = default; + this_t & operator=(this_t &&) = default; using stop_t = STOP_TYPE; diff --git a/include/emp/compiler/Lexer.hpp b/include/emp/compiler/Lexer.hpp index 65f8133593..8ba9070f1a 100644 --- a/include/emp/compiler/Lexer.hpp +++ b/include/emp/compiler/Lexer.hpp @@ -1,18 +1,38 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2019. + * date: 2016-2023. */ /** * @file * @brief A general-purpose, fast lexer. - * @note Status: ALPHA + * @note Status: BETA + * + * Build a lexer that can convert input strings or streams into a series of provided tokens. + * + * Use AddToken(name, regex) to list out the relevant tokens. + * 'name' is the unique name for this token. + * 'regex' is the regular expression that describes this token. + * It will return a unique ID associated with this lexeme. + * + * IgnoreToken(name, regex) uses the same arguments, but is used for tokens that + * should be skipped over. + * + * Names and IDs can be recovered later using GetTokenID(name) and GetTokenName(id). + * + * Tokens can be retrieved either one at a time with Process(string) or Process(stream), + * which will return the next (non-ignored) token, removing it from the input. + * + * Alternatively, an entire series of tokens can be processed with Tokenize(). + * + * Finally, GetLexeme() can be used to retrieve the lexeme from the most recent token found. */ #ifndef EMP_COMPILER_LEXER_HPP_INCLUDE #define EMP_COMPILER_LEXER_HPP_INCLUDE #include +#include #include #include #include @@ -35,9 +55,12 @@ namespace emp { bool save_lexeme; ///< Preserve the lexeme for this token? bool save_token; ///< Keep token at all? (Whitespace and comments are often discarded). + // Default constructor produces an error token. + TokenInfo() : name(""), desc("Unable to parse input!"), regex(""), + id(-1), save_lexeme(true), save_token(true) { } TokenInfo(const std::string & _name, const std::string & _regex, int _id, bool _save_l=true, bool _save_t=true, const std::string & _desc="") - : name(_name), desc(_desc), regex(_regex), id(_id), save_lexeme(_save_l), save_token(_save_t) { ; } + : name(_name), desc(_desc), regex(_regex), id(_id), save_lexeme(_save_l), save_token(_save_t) { } TokenInfo(const TokenInfo &) = default; TokenInfo(TokenInfo &&) = default; TokenInfo & operator=(const TokenInfo &) = default; @@ -56,52 +79,140 @@ namespace emp { /// Information about a token instance from an input stream. struct Token { - int token_id; ///< Which type of token is this? + int id; ///< Which type of token is this? std::string lexeme; ///< Sequence matched by this token (or empty if not saved) size_t line_id; ///< Which line did this token start on? - Token(int id, const std::string & str="", size_t _line=0) - : token_id(id), lexeme(str), line_id(_line) { ; } + Token(int _id, const std::string & str="", size_t _line=0) + : id(_id), lexeme(str), line_id(_line) { ; } Token(const Token &) = default; Token(Token &&) = default; Token & operator=(const Token &) = default; Token & operator=(Token &&) = default; /// Token will automatically convert to its ID if used as an int. - operator int() const { return token_id; } + operator int() const { return id; } /// Token will automatically convert to its matched sequence (lexeme) is used as a string. operator const std::string &() const { return lexeme; } }; + class TokenStream { + private: + std::string name = ""; + emp::vector tokens; + + public: + TokenStream(const std::string & in_name) : name(in_name) { } + TokenStream(const TokenStream &) = default; + TokenStream(TokenStream &&) = default; + TokenStream(const emp::vector & in_tokens, const std::string & in_name) + : name(in_name), tokens(in_tokens) { } + + TokenStream & operator=(const TokenStream &) = default; + TokenStream & operator=(TokenStream &&) = default; + + class Iterator { + private: + emp::Ptr ts; + size_t pos; + + public: + Iterator(const Iterator &) = default; + Iterator(const TokenStream & in_ts, size_t in_pos) : ts(&in_ts), pos(in_pos) { } + Iterator & operator=(const Iterator &) = default; + + const TokenStream & GetTokenStream() const { return *ts; } + size_t GetIndex() const { return pos; } + emp::Ptr ToPtr() const { return ts->GetPtr(pos); } + + Token operator*() const { return ts->tokens[pos]; } + const Token * operator->() const { return &(ts->tokens[pos]); } + + bool operator==(const Iterator & in) const { return ToPtr() == in.ToPtr(); } + bool operator!=(const Iterator & in) const { return ToPtr() != in.ToPtr(); } + bool operator< (const Iterator & in) const { return ToPtr() < in.ToPtr(); } + bool operator<=(const Iterator & in) const { return ToPtr() <= in.ToPtr(); } + bool operator> (const Iterator & in) const { return ToPtr() > in.ToPtr(); } + bool operator>=(const Iterator & in) const { return ToPtr() >= in.ToPtr(); } + + Iterator & operator++() { ++pos; return *this; } + Iterator operator++(int) { Iterator old(*this); ++pos; return old; } + Iterator & operator--() { --pos; return *this; } + Iterator operator--(int) { Iterator old(*this); --pos; return old; } + + bool IsValid() const { return pos < ts->size(); } + bool AtEnd() const { return pos == ts->size(); } + + operator bool() const { return IsValid(); } + }; + + size_t size() const { return tokens.size(); } + const Token & Get(size_t pos) const { return tokens[pos]; } + emp::Ptr GetPtr(size_t pos) const { return &(tokens.data()[pos]); } + const std::string & GetName() const { return name; } + Iterator begin() const { return Iterator(*this, 0); } + Iterator end() const { return Iterator(*this, tokens.size()); } + const Token & back() const { return tokens.back(); } + + void push_back(const Token & in) { tokens.push_back(in); } + + void Print(std::ostream & os=std::cout) const { + for (auto x : tokens) { + os << " [" << x.lexeme << "]"; + } + os << std::endl; + } + }; + + /// A lexer with a set of token types (and associated regular expressions) class Lexer { private: static constexpr int MAX_ID = 255; ///< IDs count down so that first ones have priority. static constexpr int ERROR_ID = -1; ///< Code for unknown token ID. - emp::vector token_set; ///< List of all active tokens. + emp::vector token_set; ///< List of all active tokens types. emp::map token_map; ///< Map of token names to id. int cur_token_id = MAX_ID; ///< Which ID should the next new token get? mutable bool generate_lexer = false; ///< Do we need to regenerate the lexer? mutable DFA lexer_dfa; ///< Table driven lexer implementation. - std::string lexeme; ///< Current state of lexeme being generated. + mutable std::string lexeme; ///< Current state of lexeme being generated. - const TokenInfo ERROR_TOKEN{"", "", ERROR_ID, true, true, "Unable to parse input!"}; + static const TokenInfo & ERROR_TOKEN() { + static const TokenInfo token; + return token; + } public: - Lexer() { ; } - ~Lexer() { ; } + Lexer() = default; + Lexer(const Lexer &) = default; + Lexer(Lexer &&) = default; + ~Lexer() = default; + + Lexer & operator=(const Lexer &) = default; + Lexer & operator=(Lexer &&) = default; /// How many types of tokens can be identified in this Lexer? size_t GetNumTokens() const { return token_set.size(); } + void Reset() { + token_set.resize(0); + token_map.clear(); + cur_token_id = MAX_ID; + generate_lexer = false; + } + bool TokenOK(int id) const { return id >= 0 && id < cur_token_id; } /// Add a new token, specified by a name and the regex used to identify it. /// Note that token ids count down with highest IDs having priority. - int AddToken(const std::string & name, const std::string & regex, - bool save_lexeme=true, bool save_token=true, const std::string & desc="") { + int AddToken(const std::string & name, + const std::string & regex, + bool save_lexeme = true, + bool save_token = true, + const std::string & desc = "") + { int id = cur_token_id--; // Grab the next available token id. generate_lexer = true; // Indicate the the lexer DFA needs to be rebuilt. token_set.emplace_back( name, regex, id, save_lexeme, save_token, desc ); @@ -127,7 +238,7 @@ namespace emp { /// Get the full information about a token (you provide the id) const TokenInfo & GetTokenInfo(int id) const { - if (id > MAX_ID || id <= cur_token_id) return ERROR_TOKEN; + if (id > MAX_ID || id <= cur_token_id) return ERROR_TOKEN(); return token_set[(size_t)(MAX_ID - id)]; } @@ -161,7 +272,7 @@ namespace emp { /// longest one we can find.) Every time we do hit a valid lexeme, store it as the current /// "best" and keep going. Once we hit a point where no other valid lexemes are possible, /// stop and return the best we've found so far. - Token Process(std::istream & is) { + Token Process(std::istream & is) const { // If we still need to generate the DFA for the lexer, do so. if (generate_lexer) Generate(); @@ -174,7 +285,7 @@ namespace emp { lexeme.resize(0); // Keep looking as long as: - // 1: We may still be able to contine the current lexeme. + // 1: We may still be able to continue the current lexeme. // 2: We have not entered an invalid state. // 3: Our input stream has more symbols. while (cur_stop >= 0 && cur_state >= 0 && is) { @@ -202,8 +313,8 @@ namespace emp { return { best_stop, lexeme }; } - /// Shortcut to process a string rather than a stream. - Token Process(std::string & in_str) { + /// Shortcut to process a string rather than a stream, chopping off one token each time. + Token Process(std::string & in_str) const { std::stringstream ss; ss << in_str; auto out_val = Process(ss); @@ -211,8 +322,16 @@ namespace emp { return out_val; } + /// Shortcut to just get a single token. + Token ToToken(std::string_view in_str) const { + std::stringstream ss; + ss << in_str; + auto out_val = Process(ss); + return out_val; + } + /// Turn an input stream of text into a vector of tokens. - emp::vector Tokenize(std::istream & is) { + TokenStream Tokenize(std::istream & is, const std::string & name="in_stream") const { emp::vector out_tokens; size_t cur_line = 1; emp::Token token = Process(is); @@ -222,28 +341,30 @@ namespace emp { if (GetSaveToken(token)) out_tokens.push_back(token); token = Process(is); } - return out_tokens; + return TokenStream{out_tokens, name}; } /// Turn an input string into a vector of tokens. - emp::vector Tokenize(const std::string & str) { + TokenStream Tokenize(std::string_view str, const std::string & name="in_view") const { std::stringstream ss; ss << str; - return Tokenize(ss); + return Tokenize(ss, name); } /// Turn a vector of strings into a vector of tokens. - emp::vector Tokenize(const emp::vector & str_v) { + TokenStream Tokenize(const emp::vector & str_v, + const std::string & name="in_string vector") const + { std::stringstream ss; for (auto & str : str_v) { - ss << str; + ss << str << '\n'; } - return Tokenize(ss); + return Tokenize(ss, name); } /// Get the lexeme associated with the last token identified. - const std::string & GetLexeme() { return lexeme; } + const std::string & GetLexeme() const { return lexeme; } /// Print the full information about this lexer (for debugging) void Print(std::ostream & os=std::cout) const { @@ -253,7 +374,7 @@ namespace emp { } /// Try out the lexer on a string and demonstrate how it's tokenized. - void DebugString(std::string test_string) { + void DebugString(std::string test_string) const { std::stringstream ss; ss << test_string; diff --git a/include/emp/compiler/RegEx.hpp b/include/emp/compiler/RegEx.hpp index 8ae9a172ac..dc800332b5 100644 --- a/include/emp/compiler/RegEx.hpp +++ b/include/emp/compiler/RegEx.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2019. + * date: 2016-2022. */ /** * @file @@ -30,8 +30,11 @@ * static DFA to_DFA(const RegEx & regex); * * - * @todo Need to implement ^ and $ (beginning and end of line) - * @todo Need to implement {n}, {n,} and {n,m} (exactly n, at least n, and n-m copies, respecitvely) + * @todo Implement ^ and $ (beginning and end of line) + * @todo Implement {n}, {n,} and {n,m} (exactly n, at least n, and n-m copies, respectively) + * @todo Implement \d (for digits), \s (for whitespace), etc. + * @todo Consider a separator (maybe backtick?) to divide up a regex expression; + * the result can be returned by each section as a vector of strings. */ #ifndef EMP_COMPILER_REGEX_HPP_INCLUDE @@ -207,9 +210,11 @@ namespace emp { // If blocks are nested, merge them into a single block. if (nodes[i]->AsBlock()) { auto old_node = nodes[i]->AsBlock(); // Save the old node for merging. - nodes.erase(nodes.begin() + (int) i); // Remove block from nodes. - nodes.insert(nodes.begin() + (int) i, old_node->nodes.begin(), old_node->nodes.end()); - old_node->nodes.resize(0); // Don't recurse delete since nodes were moved! + nodes.erase(nodes.begin() + (int) i); // Remove block from nodes. + if (old_node->nodes.size()) { + nodes.insert(nodes.begin() + (int) i, old_node->nodes.begin(), old_node->nodes.end()); + old_node->nodes.resize(0); // Don't recurse delete since nodes were moved! + } old_node.Delete(); i--; modify = true; @@ -347,7 +352,7 @@ namespace emp { case '-': case '\\': case ']': - case '[': + case '[': // technically doesn't need to be escaped, but allowed. case '^': break; default: @@ -457,7 +462,7 @@ namespace emp { /// Process the input regex into a tree representaion. Ptr Process(Ptr cur_block=nullptr) { - emp_assert(pos >= 0 && pos < regex.size(), pos, regex.size()); + emp_assert(pos < regex.size(), pos, regex.size()); // If caller does not provide current block, create one (and return it.) if (cur_block==nullptr) cur_block = NewPtr(); diff --git a/include/emp/compiler/regex_utils.hpp b/include/emp/compiler/regex_utils.hpp new file mode 100644 index 0000000000..7e92b7109d --- /dev/null +++ b/include/emp/compiler/regex_utils.hpp @@ -0,0 +1,47 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2016-2021. + * + * @file regex_utils.hpp + * @brief Helper functions for building regular expressions. + * @note Status: BETA + */ + +#ifndef EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE +#define EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE + +#include + +#include "RegEx.hpp" + +namespace emp { + + using namespace std::string_literals; + + std::string regex_nested(char open='(', + char close=')', + size_t depth=0, + bool stop_at_newline=true + ) + { + // Setup open and close as literal strings. + std::string open_re = emp::to_string('"', open, '"'); + std::string close_re = emp::to_string('"', close, '"'); + + // Base version has open_re and close_re at either end. + const std::string no_parens = "[^"s + open_re + close_re + (stop_at_newline ? "\n\r]*" : "]*"); + const std::string matched = open_re + no_parens + close_re; + + for (size_t level = 0; level < depth; level++) { + const std::string multi = no_parens + "("s + matched + no_parens + ")*"s; + const std::string matched = open_re + multi + close_re; + } + + return matched; + } + + +} + +#endif // #ifndef EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE diff --git a/include/emp/config/ArgManager.hpp b/include/emp/config/ArgManager.hpp index e23c0eec56..e0da716708 100644 --- a/include/emp/config/ArgManager.hpp +++ b/include/emp/config/ArgManager.hpp @@ -189,8 +189,8 @@ namespace emp { } else if (args[i].size() == 2) { // in POSIX, -- means treat subsequent words as literals // so we remove the -- and stop deflagging subsequent words - res.erase(std::next(std::begin(res),i)); - args.erase(std::next(std::begin(args),i)); + res.erase(std::next(std::begin(res),(int) i)); + args.erase(std::next(std::begin(args),(int) i)); break; } // " ", -, ---, ----, etc. left in place and treated as non-flags @@ -302,17 +302,14 @@ namespace emp { ); // store the argument pack + bool is_special = command == "_positional" + || command == "_unknown" + || command == "_invalid"; res.insert({ command, pack_t( - std::next( - std::begin(args), - command == "_positional" - || command == "_unknown" - || command == "_invalid" - ? i : i+1 - ), - j+1 < args.size() ? std::next(std::begin(args), j+1) : std::end(args) + std::next( std::begin(args), (int) (is_special ? i : i+1) ), + j+1 < args.size() ? std::next(std::begin(args), (int) j+1) : std::end(args) ) }); i = j; diff --git a/include/emp/config/FlagManager.hpp b/include/emp/config/FlagManager.hpp new file mode 100644 index 0000000000..cdc51d95dc --- /dev/null +++ b/include/emp/config/FlagManager.hpp @@ -0,0 +1,172 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2023. + * + * @file FlagManager.hpp + * @brief This file contains tools for dealing with command-line flags (from argv and argc). + * @note Status: ALPHA + * + * The FlagManager class will take command line arguments (either in its constructor or with + * the AddFlags() function) and process them appropriately. + * + * For setup, the user must run AddOption with the function to call. Functions to call can take + * zero, one, or two Strings as arguments OR they can take a vector of Strings and the range of + * allowed arguments should be specified. When Process() is run, the appropriate function will + * be called on each and any invalid arguments will trigger an error. + * + * Flags are expected to begin with a '-' and non-flags are expected to NOT begin with a '-'. + * + * If a single dash is followed by multiple characters, each will be processed independently. + * So, "-abc" will be the same as "-a -b -c". + * + * Extra command line arguments will be saves as a vector of strings called "extras" and must + * be processed manually. They can be retrieved with GetExtras(). + * + * + * @todo: Make variable numbers of flag arguments work. + */ + +#ifndef EMP_CONFIG_FLAGMANAGER_HPP_INCLUDE +#define EMP_CONFIG_FLAGMANAGER_HPP_INCLUDE + +#include +#include +#include +#include + +#include "../base/vector.hpp" +#include "../datastructs/map_utils.hpp" +#include "../tools/String.hpp" + +namespace emp { + + class FlagManager { + private: + emp::vector args; + emp::vector extras; + + struct FlagInfo { + String desc; + size_t min_args = 0; + size_t max_args = 0; + std::function &)> fun; + char shortcut = '\0'; + }; + + std::map flag_options; + std::map shortcuts; + + public: + FlagManager() { } + FlagManager(int argc, char* argv[]) { AddFlags(argc, argv); } + + constexpr static size_t npos = static_cast(-1); + + [[nodiscard]] String & operator[](size_t pos) { return args[pos]; } + [[nodiscard]] const String & operator[](size_t pos) const { return args[pos]; } + + emp::vector GetExtras() const { return extras; } + + [[nodiscard]] size_t Find(String pattern) const { + for (size_t i = 0; i < args.size(); ++i) if (args[i] == pattern) return i; + return npos; + } + + [[nodiscard]] bool Has(String pattern) const { return Find(pattern) != npos; } + + // Return true/false if a specific argument is present and REMOVE IT. + bool Use(String pattern) { + size_t pos = Find(pattern); + if (pos == npos) return false; + args.erase(args.begin() + pos); + return true; + } + + void AddOption(String name, String desc="") { + flag_options[name] = FlagInfo{desc, 0, 0, [](const emp::vector &){} }; + } + void AddOption(String name, std::function fun, String desc="") { + flag_options[name] = FlagInfo{desc, 0,0, [fun](const emp::vector &){fun();}}; + } + void AddOption(String name, std::function fun, String desc="") { + flag_options[name] = FlagInfo{desc, 1,1, [fun](const emp::vector & in){fun(in[0]);}}; + } + void AddOption(String name, std::function fun, String desc="") { + flag_options[name] = FlagInfo{desc, 2,2, [fun](const emp::vector & in){fun(in[0],in[1]);}}; + } + void AddOption(String name, std::function &)> fun, + size_t min_args=0, size_t max_args=npos, String desc="") { + flag_options[name] = FlagInfo{desc, min_args,max_args, fun}; + } + + // Allow an option to have a single-letter flag (e.g. "-h" is short for "--help") + template + void AddOption(char shortcut, String name, FUN_T fun, String desc="") { + AddOption(name, fun, desc); + shortcuts[shortcut] = name; + flag_options[name].shortcut = shortcut; + } + + void AddFlags(int argc, char* argv[]) { + for (size_t i = 0; i < (size_t) argc; i++) { + args.push_back(argv[i]); + } + } + + // Process an argument associated with a particular name; return num additional args used. + size_t ProcessArg(String name, size_t cur_pos=0) { + if (!emp::Has(flag_options, name)) { emp::notify::Error("Unknown flag '", name , "'."); } + auto option = flag_options[name]; + emp::vector flag_args; + for (size_t i = 1; i <= option.min_args; ++i) { + flag_args.push_back(args[cur_pos+i]); + } + option.fun(flag_args); + return option.min_args; + } + + // Process an argument associated with a particular character; return num additional args used. + size_t ProcessArg(char c, size_t cur_pos=0) { + if (!emp::Has(shortcuts, c)) { emp::notify::Error("Unknown flag '-", c , "'."); } + return ProcessArg(shortcuts[c], cur_pos); + } + + // Process the argument at a given position. Return number of additional args consumed. + size_t ProcessFlagSet(String name, size_t cur_pos=0) { + size_t offset = 0; + for (size_t i = 1; i < name.size(); ++i) { + offset += ProcessArg(name[i], cur_pos+offset); + } + return offset; + } + + // Process all of the flag data that we have. + void Process() { + for (size_t i = 1; i < args.size(); ++i) { + String & arg = args[i]; + if (arg[0] == '-') { // We have a flag! + if (arg.size() > 1 && arg[1] == '-') i += ProcessArg(arg, i); + else i += ProcessFlagSet(arg, i); + } + else extras.push_back(arg); + } + } + + void PrintOptions(std::ostream & os=std::cout) const { + for (const auto & [name, options] : flag_options) { + os << " " << name; + if (options.shortcut) { + os << " (or '-" << options.shortcut << "')"; + } + if (options.desc.size()) { + os << " : " << options.desc; + } + os << "\n"; + } + } + }; + +} + +#endif // #ifndef EMP_CONFIG_FLAGMANAGER_HPP_INCLUDE diff --git a/include/emp/control/Action.hpp b/include/emp/control/Action.hpp index b2484c6ce8..25896e5591 100644 --- a/include/emp/control/Action.hpp +++ b/include/emp/control/Action.hpp @@ -9,7 +9,7 @@ * @note Status: Beta * * @todo Create an ActionDefaults class that can take fewer args than expected and fill in rest. - * @todo Allow for named arguments to facilite intepreted functions. + * @todo Allow for named arguments to facilitate interpreted functions. */ #ifndef EMP_CONTROL_ACTION_HPP_INCLUDE @@ -24,8 +24,7 @@ namespace emp { /// BaseActions abstract functions and allow for signals to be setup at runtime; they can be /// called with types specified in the call. /// - /// Actions can be a bit heavyweight, but can easily be converted to more lightweight - /// std:function objects. + /// Actions can be a bit heavyweight, but can easily be converted to std::function objects. class ActionBase { protected: @@ -61,11 +60,11 @@ namespace emp { size_t GetArgCount() const { return ARG_COUNT; } }; - /// The declaration for Action has any template types; the only definined specilizations require + /// The declaration for Action has any template types; the only defined specializations require /// a function type to be specified (with void and non-void return type variants.) template class Action; - /// This Action class specialization takes a function with a void return tyime and builds it off + /// This Action class specialization takes a function with a void return type and builds it off /// of the action base classes. template class Action : public ActionSize { @@ -97,7 +96,7 @@ namespace emp { }; - /// This Action class specialization takes a function with any non-void return tyime and builds it + /// This Action class specialization takes a function with any non-void return type and builds it /// off of the action base classes. template class Action : public ActionSize { diff --git a/include/emp/control/Signal.hpp b/include/emp/control/Signal.hpp index cdde127bdb..012c331893 100644 --- a/include/emp/control/Signal.hpp +++ b/include/emp/control/Signal.hpp @@ -107,7 +107,7 @@ namespace emp { std::string name; ///< What is the unique name of this signal? uint32_t signal_id; ///< What is the unique ID of this signal? - uint32_t next_link_id; ///< What ID shouild the next link have? + uint32_t next_link_id; ///< What ID should the next link have? std::map link_key_map; ///< Map unique link keys to link index for actions. emp::vector managers; ///< What manager is handling this signal? man_t * prime_manager; ///< Which manager leads deletion? (nullptr for self) @@ -115,7 +115,7 @@ namespace emp { // Helper Functions SignalKey NextSignalKey() { return SignalKey(signal_id,++next_link_id); } - // SignalBase should only be constructable from derrived classes. + // SignalBase should only be constructable from derived classes. SignalBase(const std::string & n, internal::SignalManager_Base * manager=nullptr) : name(n), signal_id(0), next_link_id(0), link_key_map(), managers(), prime_manager(nullptr) { @@ -366,7 +366,7 @@ namespace emp { #ifndef DOXYGEN_SHOULD_SKIP_THIS template inline void SignalBase::BaseTrigger(ARGS... args) { - // Make sure this base class is really of the correct derrived type (but do so in an + // Make sure this base class is really of the correct derived type (but do so in an // assert since triggers may be called frequently and should be fast!) emp_assert(dynamic_cast< Signal * >(this)); ((Signal *) this)->Trigger(args...); diff --git a/include/emp/data/AnnotatedType.hpp b/include/emp/data/AnnotatedType.hpp new file mode 100644 index 0000000000..a095e36813 --- /dev/null +++ b/include/emp/data/AnnotatedType.hpp @@ -0,0 +1,90 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021. + * + * @file AnnotatedType.hpp + * @brief A base class to provide a DataMap and accessors to derived classes. + * @note Status: ALPHA + * + */ + +#ifndef EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE +#define EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE + +#include "../base/assert.hpp" +#include "../meta/TypeID.hpp" +#include "../tools/string_utils.hpp" + +#include "DataMap.hpp" + +namespace emp { + + /// A generic base class implementing the use of dynamic traits via DataMaps. + class AnnotatedType { + private: + emp::DataMap data_map; ///< Dynamic variables assigned to this class. + + public: + emp::DataMap & GetDataMap() { return data_map; } + const emp::DataMap & GetDataMap() const { return data_map; } + + void SetDataMap(emp::DataMap & in_dm) { data_map = in_dm; } + + emp::DataLayout & GetDataLayout() { return data_map.GetLayout(); } + const emp::DataLayout & GetDataLayout() const { return data_map.GetLayout(); } + + bool HasTraitID(size_t id) const { return data_map.HasID(id); } + bool HasTrait(const std::string & name) const { return data_map.HasName(name); } + template + bool TestTraitType(size_t id) const { return data_map.IsType(id); } + template + bool TestTraitType(const std::string & name) const { return data_map.IsType(name); } + + size_t GetTraitID(const std::string & name) const { return data_map.GetID(name); } + + template + auto & GetTrait(KEY_T && key) { + return data_map.Get(std::forward(key)); + } + + template + auto GetTrait(KEY_T && key, size_t count) { + return data_map.Get(std::forward(key), count); + } + + template + const auto & GetTrait(KEY_T && key) const { + return data_map.Get(std::forward(key)); + } + + template + auto GetTrait(KEY_T && key, size_t count) const { + return data_map.Get(std::forward(key), count); + } + + template + T & SetTrait(size_t id, const T & val) { return data_map.Set(id, val); } + + template + T & SetTrait(const std::string & name, const T & val) { return data_map.Set(name, val); } + + emp::TypeID GetTraitType(size_t id) const { return data_map.GetType(id); } + emp::TypeID GetTraitType(const std::string & name) const { return data_map.GetType(name); } + + double GetTraitAsDouble(size_t id) const { return data_map.GetAsDouble(id); } + + double GetTraitAsDouble(size_t trait_id, emp::TypeID type_id) const { + return data_map.GetAsDouble(trait_id, type_id); + } + + std::string GetTraitAsString(size_t id) const { return data_map.GetAsString(id); } + + std::string GetTraitAsString(size_t trait_id, emp::TypeID type_id, size_t count=1) const { + return data_map.GetAsString(trait_id, type_id, count); + } + }; + +} + +#endif // #ifndef EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE diff --git a/include/emp/data/DataFile.hpp b/include/emp/data/DataFile.hpp index f00db3b448..6a1ecc307a 100644 --- a/include/emp/data/DataFile.hpp +++ b/include/emp/data/DataFile.hpp @@ -11,6 +11,7 @@ #ifndef EMP_DATA_DATAFILE_HPP_INCLUDE #define EMP_DATA_DATAFILE_HPP_INCLUDE +#include #include #include #include @@ -142,7 +143,7 @@ namespace emp { /// Print a header containing comments describing all of the columns virtual void PrintHeaderComment(const std::string & cstart = "# ") { for (size_t i = 0; i < keys.size(); i++) { - *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl; + *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")\n"; } os->flush(); } @@ -618,10 +619,10 @@ namespace emp { /// Print a header containing comments describing all of the columns void PrintHeaderComment(const std::string & cstart = "# ") override { for (size_t i = 0; i < keys.size(); i++) { - *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl; + *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")\n"; } for (size_t i = 0; i < container_keys.size(); i++) { - *os << cstart << i+keys.size() << ": " << container_descs[i] << " (" << container_keys[i] << ")" << std::endl; + *os << cstart << i+keys.size() << ": " << container_descs[i] << " (" << container_keys[i] << ")\n"; } os->flush(); diff --git a/include/emp/data/DataLayout.hpp b/include/emp/data/DataLayout.hpp index 93f03bc313..661a29c418 100644 --- a/include/emp/data/DataLayout.hpp +++ b/include/emp/data/DataLayout.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2019. + * date: 2019-2022 */ /** * @file @@ -19,6 +19,7 @@ #include "../base/assert.hpp" #include "../base/vector.hpp" #include "../datastructs/map_utils.hpp" +#include "../math/constants.hpp" #include "../meta/TypeID.hpp" #include "MemoryImage.hpp" @@ -34,6 +35,7 @@ namespace emp { std::string name; ///< Name of this setting. std::string desc; ///< Full description of this setting. std::string notes; ///< Any additional notes about this setting. + size_t count; ///< Number of objects in this entry. bool is_log; ///< Is this setting a current value or a log of all values? }; @@ -76,13 +78,55 @@ namespace emp { /// Determine if we have an ID. bool HasID(size_t id) const { return emp::Has(setting_map, id); } - /// Detemine if we have the correct type of a specific variable ID. + /// Determine if we have the correct type of a specific variable ID. template bool IsType(size_t id) const { - emp_assert(Has(setting_map, id), id); + emp_assert(emp::Has(setting_map, id), id); return setting_map.find(id)->second.type == emp::GetTypeID(); } + // Verify type, position, AND count. + template + bool Has(size_t id, size_t count=1) const { + auto it = setting_map.find(id); + return it != setting_map.end() && + it->second.type == emp::GetTypeID() && + it->second.count == count; + } + + // Verify name, position, AND count. + template + bool Has(const std::string & name, size_t count=1) const { + auto it = id_map.find(name); + return (it != id_map.end()) && Has(it->second, count); + } + + template + std::string DiagnoseHas(KEY_T key, size_t count=1) const { + size_t id = 0; + if constexpr (std::is_arithmetic()) { + id = key; + } else { // key is name. + auto it = id_map.find(key); + if (it == id_map.end()) return emp::to_string("Unknown trait name '", key, "'"); + id = it->second; + } + + auto setting_it = setting_map.find(id); + if (setting_it == setting_map.end()) { + if (id == emp::MAX_SIZE_T) return emp::to_string("Unknown ID ", id, " (aka -1)"); + return emp::to_string("Unknown ID ", id); + } + if (setting_it->second.type != emp::GetTypeID()) { + return emp::to_string("Checking for type as ", emp::GetTypeID(), + ", but recorded as ", setting_it->second.type); + } + if (setting_it->second.count != count) { + return emp::to_string("Checking for count of ", count, + ", but recorded as ", setting_it->second.count); + } + return emp::to_string("Has<", emp::GetTypeID(), ">(", key, ",", count, ") should be true."); + } /// Return the number of bytes in the default image. size_t GetImageSize() const { return image_size; } @@ -99,31 +143,44 @@ namespace emp { return setting_map.find(id)->second.type; } + // What is the count associated with a given entry. + size_t GetCount(size_t id) const { + emp_assert(HasID(id), id); + return setting_map.find(id)->second.count; + } + + /// Determine is entry is some form of numeric type. + bool IsNumeric(size_t id) const { + return GetType(id).IsArithmetic(); + } + + bool IsNumeric(const std::string & name) const { + return IsNumeric(GetID(name)); + } + /// Prevent this layout from being modified. void Lock() { is_locked = true; } /// Add a new variable with a specified type, name and value. template - size_t Add(MemoryImage & base_memory, - const std::string & name, - const T & default_value, - const std::string & desc="", - const std::string & notes="") { - emp_assert(!HasName(name), name); // Make sure this doesn't already exist. - emp_assert(is_locked == false); // Cannot add to a locked layout. - - // std::cout << "\nL: Adding var '" << name - // << "' of type " << emp::GetTypeID() - // << " to DataMap with " << id_map.size() << " elements" - // << " totalling " << image_size << " bytes." - // << std::endl; - - // Analyze the size of the new object and where it will go. + size_t Add(MemoryImage & base_memory, // Memory to store prototype objects. + const std::string & name, // Lookup name for this variable. + const T & default_value, // Initial value for each object in this entry. + const std::string & desc="", // Description associated with this variable + const std::string & notes="", // Additional information. + const size_t count = 1 // Number of values to store with this entry. + ) { + emp_assert(!HasName(name), name); // Make sure this doesn't already exist. + emp_assert(count >= 1); // Must add at least one instance of an object. + emp_assert(is_locked == false); // Cannot add to a locked layout. + + // Analyze the size of the new object(s) and where it will go. constexpr const size_t obj_size = sizeof(T); + const size_t entry_size = obj_size * count; const size_t pos = image_size; // Create a new image with enough room for the new object and move the old data over. - MemoryImage new_memory(image_size + obj_size); + MemoryImage new_memory(image_size + entry_size); MoveImageContents(base_memory, new_memory); // Now that the data is moved, cleanup the old image and put the new one in place. @@ -131,18 +188,22 @@ namespace emp { // Setup this new object. image_size = base_memory.GetSize(); - base_memory.Construct(pos, default_value); + for (size_t i = 0; i < count; ++i) { + base_memory.Construct(pos + i*obj_size, default_value); + } base_memory.init_to = image_size; // Store the information about this object. id_map[name] = pos; - setting_map[pos] = { emp::GetTypeID(), name, desc, notes, false }; + setting_map[pos] = { emp::GetTypeID(), name, desc, notes, count, false }; // Store copy constructor if needed. if (std::is_trivially_copyable() == false) { copy_constructors.push_back( - [pos](const MemoryImage & from_image, MemoryImage & to_image) { - to_image.CopyObj(pos, from_image); + [pos,count](const MemoryImage & from_image, MemoryImage & to_image) { + for (size_t i = 0; i < count; ++i) { + to_image.CopyObj(pos + i*sizeof(T), from_image); + } } ); } @@ -150,15 +211,21 @@ namespace emp { // Store destructor if needed. if (std::is_trivially_destructible() == false) { destructors.push_back( - [pos](MemoryImage & image) { image.Destruct(pos); } + [pos,count](MemoryImage & image) { + for (size_t i = 0; i < count; ++i) { + image.Destruct(pos + i*sizeof(T)); + } + } ); } // Store move constructor if needed. if (std::is_trivially_destructible() == false) { move_constructors.push_back( - [pos](MemoryImage & from_image, MemoryImage & to_image) { - to_image.MoveObj(pos, from_image); + [pos,count](MemoryImage & from_image, MemoryImage & to_image) { + for (size_t i = 0; i < count; ++i) { + to_image.MoveObj(pos + i*sizeof(T), from_image); + } } ); } @@ -178,7 +245,7 @@ namespace emp { image.init_to = 0; } - /// Destruct and delete all memomry assocated with this DataMap. + /// Destruct and delete all memory associated in the provided image. void ClearImage(MemoryImage & image) const { // If this memory image is already clear, stop. if (image.GetSize() == 0) return; diff --git a/include/emp/data/DataLog.hpp b/include/emp/data/DataLog.hpp index 38eb0ab371..05e56f70af 100644 --- a/include/emp/data/DataLog.hpp +++ b/include/emp/data/DataLog.hpp @@ -8,6 +8,8 @@ * @brief Tools for processing a single set of data. * @note Status: ALPHA * + * A DataLog takes in a continuous series of data and allows for easy analysis, both by + * performing calculations on those values and by outputting ascii histograms, etc. */ #ifndef EMP_DATA_DATALOG_HPP_INCLUDE diff --git a/include/emp/data/DataManager.hpp b/include/emp/data/DataManager.hpp index 7fa440c5c1..fa0b2bc03c 100644 --- a/include/emp/data/DataManager.hpp +++ b/include/emp/data/DataManager.hpp @@ -38,22 +38,12 @@ namespace emp { } // so we can use range-based for loops - auto begin() -> decltype(std::begin(node_map)) { - return std::begin(node_map); - } - // so we can use range-based for loops - auto end() -> decltype(std::end(node_map)) { - return std::end(node_map); - } + auto begin() { return std::begin(node_map); } + auto end() { return std::end(node_map); } // so we can use range-based for loops with const - auto begin() const -> const decltype(std::begin(node_map)) { - return std::begin(node_map); - } - // so we can use range-based for loops with const - auto end() const -> const decltype(std::end(node_map)) { - return std::end(node_map); - } + auto begin() const { return std::begin(node_map); } + auto end() const { return std::end(node_map); } /// @returns the number of DataNodes in this DataManager size_t GetSize() const { return node_map.size(); } @@ -89,10 +79,10 @@ namespace emp { } /// @returns a reference to the node with the specified name - /// Throws an error if there is no node with that name in this manager /// @param name the name of the DataNode node_t & Get(const std::string & name) { - emp_assert(Has(node_map, name), name, emp::to_string(Keys(node_map))); + emp_assert(Has(node_map, name), name); + emp_assert(node_map[name] != nullptr); return *(node_map[name]); } diff --git a/include/emp/data/DataMap.hpp b/include/emp/data/DataMap.hpp index cb3705fcc4..baa41ae205 100644 --- a/include/emp/data/DataMap.hpp +++ b/include/emp/data/DataMap.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2018-2021. + * date: 2018-2022. */ /** * @file @@ -47,17 +47,17 @@ * to be stored elsewhere (presumably in the memory image, but possibly in the layout.) * 5. The memory is a LOG of values, not a single value. This allows for quick identification * of when something special needs to be done. - * 6-8. Limited type information (8 types that can be handled more effectively?) + * 6-8. Limited type information (7 types that can be handled more effectively?) * * - We should be able to keep a series of values, not just a single one. This can be done with * a series of new functions: - * AddLog() instead of AddVar() when new veriable is created. - * Get() should still work for latest value. Ideally keep lates in first position. + * AddLog() instead of AddVar() when new variable is created. + * Get() should still work for latest value. Ideally keep latest in first position. * Change non-const Get() to GetRef() which cannot be used for a log. - * Add GetAve() function for logs as well as GetLog() for the full vector. + * Add GetAve() function for logs as well as GetLog() for the full series (as std::span?). * * - Settings for all entries should have more information on how they are dealt with, such as if - * they should be included in output an how. Perhaps a system of tags for dynamic use? + * they should be included in output and how. Perhaps a system of tags for dynamic use? * * - After everything else is working, build a LocalDataMap that locks in the size at * compile time, providing more localized memory. Otherwise DataMap as a whole can be built @@ -68,25 +68,29 @@ * * - A user should be able to override copy constructors (though probably not move constructors * or destructors?). Then the copy process can be more customizable, for example having some - * settings retrun to the default value or be further processed. It's also possible to have + * settings return to the default value or be further processed. It's also possible to have * multiple types of copies, so if we indicate a "Copy birth" we get the above, but if we * indicate a "Copy clone" or "Copy inject" we do something different. We also probably need * to allow for multiple parents... * * - An OptimizeLayout() function that can reorder entries so that they are somehow more sensible? + * Does DataMap need to worry about memory alignment? * - * - A MemoryImage factory to speed up allocation, deallocation if we're using the same size + * - A MemoryImage factory to speed up allocation and deallocation if we're using the same size * images repeatedly. * * - Some way of grouping memory across DataMaps so that a particular entry for many maps has all - * of its instances consecutive in memory? This seems really tricky to pull of, but if we can + * of its instances consecutive in memory? This seems really tricky to pull off, but if we can * do it, the improvement in cache performance could be dramatic. + * + * - Rename DataLayout and MemoryImage to DataMap_Layout and DataMap_Memory? */ #ifndef EMP_DATA_DATAMAP_HPP_INCLUDE #define EMP_DATA_DATAMAP_HPP_INCLUDE #include // For std::memcpy +#include #include #include @@ -96,6 +100,7 @@ #include "../tools/string_utils.hpp" #include "DataLayout.hpp" +#include "Datum.hpp" #include "MemoryImage.hpp" namespace emp { @@ -108,6 +113,8 @@ namespace emp { DataMap(emp::Ptr in_layout_ptr, size_t in_size) : memory(in_size), layout_ptr(in_layout_ptr) { ; } + // -- Helper functions -- + /// If the current layout is shared, make a copy of it. void MakeLayoutUnique() { // Make sure we have a layout, even if empty. @@ -119,6 +126,7 @@ namespace emp { layout_ptr.New(*layout_ptr); } } + public: DataMap() : layout_ptr(nullptr) { ; } DataMap(const DataMap & in_map) : layout_ptr(in_map.layout_ptr) { @@ -133,44 +141,20 @@ namespace emp { } // Copy Operator... - DataMap & operator=(const DataMap & in_map) { - // If we have a layout pointer, use it to clear our memory image and update it if needed. - if (layout_ptr) { - layout_ptr->ClearImage(memory); - - // If layout pointer doesn't match the new one, shift over. - if (layout_ptr != in_map.layout_ptr) { - layout_ptr->DecMaps(); // Remove self from counter. - if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); // Delete layout if now unused. - layout_ptr = in_map.layout_ptr; // Shift to new layout. - if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. - } - } - - // Otherwise we DON'T have a layout pointer, so setup the new one. - else { - layout_ptr = in_map.layout_ptr; // Shift to new layout. - if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. - } - - // Now that we know we have a good layout, copy over the image. - layout_ptr->CopyImage(in_map.memory, memory); - - return *this; - } + DataMap & operator=(const DataMap & in_map); ~DataMap() { /// If we have a layout pointer, clean up! if (!layout_ptr.IsNull()) { - // Clean up the current MemoryImage. - layout_ptr->ClearImage(memory); - - // Clean up the DataLayout - layout_ptr->DecMaps(); + layout_ptr->ClearImage(memory); // Clean up the current MemoryImage. + layout_ptr->DecMaps(); // Clean up the DataLayout if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); } } + // Built-in types. + using key_type = std::string; + /// Determine how many Bytes large this image is. size_t GetSize() const { return memory.GetSize(); } @@ -198,19 +182,26 @@ namespace emp { return layout_ptr->IsType(GetID(name)); } + /// Verify settings + template + bool Has(ARGS &&... args) const { + emp_assert(layout_ptr); + return layout_ptr->Has(std::forward(args)...); + } + /// Retrieve a variable by its type and position. template T & Get(size_t id) { - emp_assert(HasID(id), "Can only get IDs the are available in DataMap.", id, GetSize()); - emp_assert(IsType(id)); + emp_assert(Has(id), "Can only get IDs/types that match DataMap in type and count.", + id, GetSize(), layout_ptr->DiagnoseHas(id)); return memory.Get(id); } /// Retrieve a const variable by its type and position. template const T & Get(size_t id) const { - emp_assert(HasID(id), id, GetSize()); - emp_assert(IsType(id)); + emp_assert(Has(id), "Can only get IDs/types that match DataMap in type and count.", + id, GetSize(), layout_ptr->DiagnoseHas(id)); return memory.Get(id); } @@ -218,20 +209,51 @@ namespace emp { /// Retrieve a variable by its type and name. (Slower!) template T & Get(const std::string & name) { - emp_assert(HasName(name), name); - emp_assert(IsType(name), "DataMap::Get() must be provided the correct type.", - name, GetType(name), emp::GetTypeID()); + emp_assert(Has(name), "Can only get name/types that match DataMap in type and count.", + name, GetSize(), layout_ptr->DiagnoseHas(name)); return memory.Get(GetID(name)); } /// Retrieve a const variable by its type and name. (Slower!) template const T & Get(const std::string & name) const { - emp_assert(HasName(name)); - emp_assert(IsType(name), name, GetType(name), emp::GetTypeID()); + emp_assert(Has(name), "Can only get name/types that match DataMap in type and count.", + name, GetSize(), layout_ptr->DiagnoseHas(name)); return memory.Get(GetID(name)); } + // Retrieve a set of variables by id (as an std::span) + template + std::span Get(size_t id, size_t count) { + emp_assert(Has(id, count), "Can only get name/types that match DataMap.", + id, count, GetSize(), layout_ptr->DiagnoseHas(id,count)); + return memory.Get(id, count); + } + + // Retrieve a const set of variables by id (as an std::span) + template + std::span Get(size_t id, size_t count) const { + emp_assert(Has(id, count), "Can only get name/types that match DataMap.", + id, GetSize(), layout_ptr->DiagnoseHas(id,count)); + return memory.Get(id, count); + } + + // Retrieve a set of variables by name (as an std::span) + template + std::span Get(const std::string & name, size_t count) { + emp_assert(HasName(name), "Cannot get names not stored in DataMap.", + name, layout_ptr->DiagnoseHas(name, count)); + return Get(GetID(name), count); + } + + // Retrieve a const set of variables by name (as an std::span) + template + std::span Get(const std::string & name, size_t count) const { + emp_assert(HasName(name), "Cannot get names not stored in DataMap.", + name, layout_ptr->DiagnoseHas(name, count)); + return Get(GetID(name), count); + } + /// Set a variable by ID. template T & Set(size_t id, const T & value) { return (Get(id) = value); @@ -254,6 +276,9 @@ namespace emp { return layout_ptr->GetType(GetID(name)); } + bool IsNumeric(size_t id) const { return GetType(id).IsArithmetic(); } + bool IsNumeric(const std::string & name) const { return IsNumeric(GetID(name)); } + /// Get the memory at the target position, assume it is the provided type, and convert the /// value found there to double. double GetAsDouble(size_t id, TypeID type_id) const { @@ -270,16 +295,25 @@ namespace emp { /// Get the memory at the target position, assume it is the provided type, and convert the /// value found there to string. - std::string GetAsString(size_t id, TypeID type_id) const { + std::string GetAsString(size_t id, TypeID type_id, size_t count=1) const { emp_assert(HasID(id), "Can only Get IDs that are available in DataMap.", id, GetSize()); emp_assert(type_id == layout_ptr->GetType(id)); - return type_id.ToString(memory.GetPtr(id)); + emp_assert(count = layout_ptr->GetCount(id)); + if (count == 1) return type_id.ToString(memory.GetPtr(id)); + else { + size_t obj_size = type_id.GetSize(); + std::stringstream ss; + for (size_t i = 0; i < count; ++i) { + ss << '[' << type_id.ToString(memory.GetPtr(id+i*obj_size)) << ']'; + } + return ss.str(); + } } /// Get the memory at the target position, lookup it's type, and convert the value to string. std::string GetAsString(size_t id) const { emp_assert(HasID(id), "Can only get IDs the are available in DataMap.", id, GetSize()); - return GetAsString(id, layout_ptr->GetType(id)); + return GetAsString(id, layout_ptr->GetType(id), layout_ptr->GetCount(id)); } /// Add a new variable with a specified type, name and value. @@ -287,9 +321,17 @@ namespace emp { size_t AddVar(const std::string & name, const T & default_value, const std::string & desc="", - const std::string & notes="") { + const std::string & notes="", + size_t count=1) { + MakeLayoutUnique(); // If the current layout is shared, first make a copy of it. + return layout_ptr->Add(memory, name, default_value, desc, notes, count); + } + + /// Add a new variable with just a specified type and name; must be able to default. + template + size_t AddVar(const std::string & name) { MakeLayoutUnique(); // If the current layout is shared, first make a copy of it. - return layout_ptr->Add(memory, name, default_value, desc, notes); + return layout_ptr->Add(memory, name, T{}, "", "", 1); } /// Test if this DataMap uses the specified layout. @@ -297,6 +339,9 @@ namespace emp { return layout_ptr == &in_layout; } + /// Test if this DataMap has ANY layout. + bool HasLayout() const { return layout_ptr; } + /// Test if this DataMap is using the identical layout as another DataMap. bool SameLayout(const emp::DataMap & in_dm) const { return layout_ptr == in_dm.layout_ptr; @@ -304,7 +349,10 @@ namespace emp { } /// Get the DataLayout so that it can be used elsewhere. - const emp::DataLayout & GetLayout() { return *layout_ptr; } + emp::DataLayout & GetLayout() { return *layout_ptr; } + + /// Get the DataLayout so that it can be used elsewhere. + const emp::DataLayout & GetLayout() const { return *layout_ptr; } /// Test if this layout is locked (i.e., it cannot be changed.) bool IsLocked() const { return layout_ptr && layout_ptr->IsLocked(); } @@ -314,8 +362,83 @@ namespace emp { MakeLayoutUnique(); layout_ptr->Lock(); } + + + ///////////////////////////////////////////////////////////////// + // Tools for working with DataMaps.... + + + /// Return a function that takes in a data map and (efficiently) returns a Datum using the + /// specified entry. + static std::function + MakeDatumAccessor(const emp::DataLayout & layout, size_t id) { + // This must be a DataLayout entry name. + emp_assert(layout.HasID(id), "DatumAccessor pointing to invalid id", id); + emp_assert(layout.GetCount(id) == 1, + "DatumAccessors must have a count of 1 for proper conversion.", + layout.GetCount(id)); + TypeID type_id = layout.GetType(id); + + // Return an appropriate accessor for this value. + if (type_id.IsType()) { // Explicit STRING + return [id](const emp::DataMap & dm){ + return emp::Datum(dm.Get(id)); + }; + } + else if (type_id.IsType()) { // Explicit DOUBLE + return [id](const emp::DataMap & dm){ + return emp::Datum(dm.Get(id)); + }; + } + else if (type_id.IsArithmetic()) { // Other NUMERIC type + return [id,type_id](const emp::DataMap & dm){ + return emp::Datum(type_id.ToDouble(dm.memory.GetPtr(id))); + }; + } + else { // Resort to STRING + return [id,type_id](const emp::DataMap & dm){ + return emp::Datum(type_id.ToString(dm.memory.GetPtr(id))); + }; + } + } + + /// Return a function that takes in a data map and (efficiently) returns a Datum using the + /// specified name. + static auto MakeDatumAccessor(const emp::DataLayout & layout, const std::string & name) { + emp_assert(layout.HasName(name), "DatumAccessor not pointing to valid name", name); + return MakeDatumAccessor(layout, layout.GetID(name)); + } }; + + // Copy Operator... + DataMap & DataMap::operator=(const DataMap & in_map) { + // If we have a layout pointer, use it to clear our memory image and update it if needed. + if (layout_ptr) { + layout_ptr->ClearImage(memory); + + // If layout pointer doesn't match the new one, shift over. + if (layout_ptr != in_map.layout_ptr) { + layout_ptr->DecMaps(); // Remove self from counter. + if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); // Delete layout if now unused. + layout_ptr = in_map.layout_ptr; // Shift to new layout. + if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. + } + } + + // Otherwise we DON'T have a layout pointer, so setup the new one. + else { + layout_ptr = in_map.layout_ptr; // Shift to new layout. + if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. + } + + // Now that we know we have a good layout, copy over the image. + layout_ptr->CopyImage(in_map.memory, memory); + + return *this; + } + + } #endif // #ifndef EMP_DATA_DATAMAP_HPP_INCLUDE diff --git a/include/emp/data/DataNode.hpp b/include/emp/data/DataNode.hpp index d1807565e8..7fb9bf1413 100644 --- a/include/emp/data/DataNode.hpp +++ b/include/emp/data/DataNode.hpp @@ -130,7 +130,10 @@ namespace emp { /// Calculate the median of observed values double GetMedian() const {emp_assert(false, "Calculating median requires a DataNode with the Log modifier"); return 0;} /// Calculate a percentile of observed values - double GetPercentile(const double pct) const {emp_assert(false, "Calculating percentile requires a DataNode with the Log modifier"); return 0;} + double GetPercentile(const double /*pct*/) const { + emp_assert(false, "Calculating percentile requires a DataNode with the Log modifier"); + return 0; + } const std::string & GetName() const { return emp::empty_string(); } const std::string & GetDescription() const { return emp::empty_string(); } @@ -145,7 +148,7 @@ namespace emp { emp_assert(false, "Invalid call for DataNode config."); } - void AddDatum(const VAL_TYPE & val) { val_count++; } + void AddDatum(const VAL_TYPE & /*val*/) { val_count++; } void Reset() { val_count = 0; } diff --git a/include/emp/data/Datum.hpp b/include/emp/data/Datum.hpp new file mode 100644 index 0000000000..703d48c699 --- /dev/null +++ b/include/emp/data/Datum.hpp @@ -0,0 +1,200 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2023. + * + * @file Datum.hpp + * @brief A single piece of data, either a value or a string. + * @note Status: ALPHA + * + * DEVELOPER NOTES: + * - For now, using unions, but this creates complications with non-trivial constructor/destructor + * for strings, so could try shifting over. + */ + +#ifndef EMP_DATA_DATUM_HPP_INCLUDE +#define EMP_DATA_DATUM_HPP_INCLUDE + +#include + +#include "../base/assert.hpp" +#include "../base/notify.hpp" +#include "../math/math.hpp" + +namespace emp { + + class Datum { + private: + union { + double num; + std::string str; + }; + bool is_num = true; + + void InitString() { new (&str) std::string; is_num = false; } + template + void InitString(T && in) { new (&str) std::string(std::forward(in)); is_num = false; } + void FreeString() { if (!is_num) str.~basic_string(); } + public: + Datum() : num(0.0), is_num(true) { } + Datum(double in) : num(in), is_num(true) { } + Datum(const std::string & in) { InitString(in); } + Datum(std::string && in) { InitString(in); } + Datum(const char * in) { InitString(in); } + Datum(const Datum & in) { + if (in.is_num) num = in.num; + else InitString(in.str); + } + Datum(Datum && in) { + if (in.is_num) num = in.num; + else InitString(std::move(in.str)); + } + ~Datum() { FreeString(); } + + bool IsDouble() const { return is_num; } ///< Is this natively stored as a double? + bool IsString() const { return !is_num; } ///< Is this natively stored as a string? + + /// If we know Datum is a Double, we can request its native form. + double & NativeDouble() { emp_assert(is_num); return num; } + double NativeDouble() const { emp_assert(is_num); return num; } + + /// If we know Datum is a String, we can request its native form. + std::string & NativeString() { emp_assert(!is_num); return str; } + const std::string & NativeString() const { emp_assert(!is_num); return str; } + + double AsDouble() const { + if (is_num) return num; + // Make sure we have a value here; otherwise provide a warning and return 0.0. + if (str.size() > 0 && + (std::isdigit(str[0]) || (str[0] == '-' && str.size() > 1 && std::isdigit(str[1])))) { + return std::stod(str); + } + + // Otherwise this string is invalid. + emp::notify::Warning("Cannot convert string '", str, "' to double."); + return 0.0; + } + + std::string AsString() const { + if (!is_num) return str; + std::stringstream ss; + ss << num; + return ss.str(); + //return std::to_string(num); + } + + operator double() const { return AsDouble(); } + operator std::string() const { return AsString(); } + + Datum & SetDouble(double in) { // If this were previously a string, clean it up! + FreeString(); // If there was previously a string, make sure to free it. + is_num = true; + num = in; + return *this; + } + + Datum & SetString(const std::string & in) { + if (is_num) InitString(in); // Convert to string. + else str = in; // Already a string. + return *this; + } + + Datum & Set(const Datum & in) { + if (in.is_num) return SetDouble(in.num); + else return SetString(in.str); + } + + Datum & operator=(double in) { return SetDouble(in); } + Datum & operator=(const std::string & in) { return SetString(in); } + Datum & operator=(const char * in) { return SetString(in); } + Datum & operator=(const Datum & in) { return Set(in); } + + // Unary operators + Datum operator+() const { return AsDouble(); } + Datum operator-() const { return -AsDouble(); } + Datum operator!() const { return AsDouble() == 0.0; } + + // Comparison operators + int CompareNumber(double rhs) const { + const double val = AsDouble(); + return (val == rhs) ? 0 : ((val < rhs) ? -1 : 1); + } + + int CompareString(const std::string & rhs) const { + if (is_num) { + const std::string val = std::to_string(num); + return (val == rhs) ? 0 : ((val < rhs) ? -1 : 1); + } + return (str == rhs) ? 0 : ((str < rhs) ? -1 : 1); + } + + int Compare(double rhs) const { return CompareNumber(rhs); } + int Compare(const std::string & rhs) const { return CompareString(rhs); } + int Compare(const char * rhs) const { return CompareString(rhs); } + int Compare(const Datum & rhs) const { return (rhs.is_num) ? CompareNumber(rhs) : CompareString(rhs); } + + template bool operator==(const T & rhs) const { return Compare(rhs) == 0; } + template bool operator!=(const T & rhs) const { return Compare(rhs) != 0; } + template bool operator< (const T & rhs) const { return Compare(rhs) == -1; } + template bool operator>=(const T & rhs) const { return Compare(rhs) != -1; } + template bool operator> (const T & rhs) const { return Compare(rhs) == 1; } + template bool operator<=(const T & rhs) const { return Compare(rhs) != 1; } + + // Binary Operators + + Datum operator+(double in) const { + if (IsDouble()) return NativeDouble() + in; + return NativeString() + std::to_string(in); + } + Datum operator*(double in) const { + if (IsDouble()) return NativeDouble() * in; + std::string out_string; + const size_t count = static_cast(in); + out_string.reserve(NativeString().size() * count); + for (size_t i = 0; i < count; ++i) out_string += NativeString(); + return out_string; + } + Datum operator-(double in) const { return AsDouble() - in; } + Datum operator/(double in) const { return AsDouble() / in; } + Datum operator%(double in) const { return emp::Mod(AsDouble(), in); } + + Datum operator+(const Datum & in) const { + if (IsDouble()) return NativeDouble() + in.AsDouble(); + return NativeString() + in.AsString(); + } + Datum operator*(const Datum & in) const { + if (IsDouble()) return NativeDouble() * in.AsDouble(); + std::string out_string; + size_t count = static_cast(in.AsDouble()); + out_string.reserve(NativeString().size() * count); + for (size_t i = 0; i < count; i++) out_string += NativeString(); + return out_string; + } + Datum operator-(const Datum & in) const { return AsDouble() - in.AsDouble(); } + Datum operator/(const Datum & in) const { return AsDouble() / in.AsDouble(); } + Datum operator%(const Datum & in) const { return emp::Mod(AsDouble(), in.AsDouble()); } + + template + Datum operator+=(T && in) { return *this = operator+(std::forward(in)); } + template + Datum operator-=(T && in) { return *this = operator-(std::forward(in)); } + template + Datum operator*=(T && in) { return *this = operator*(std::forward(in)); } + template + Datum operator/=(T && in) { return *this = operator/(std::forward(in)); } + template + Datum operator%=(T && in) { return *this = operator%(std::forward(in)); } + }; + + std::ostream & operator<<(std::ostream & out, const emp::Datum & d) { + out << d.AsString(); + return out; + } + +} + +emp::Datum operator%(double value1, emp::Datum value2) { + return emp::Mod(value1, value2.AsDouble()); +} + +#endif // #ifndef EMP_DATA_DATUM_HPP_INCLUDE diff --git a/include/emp/data/MemoryImage.hpp b/include/emp/data/MemoryImage.hpp index e973821f52..044a3f7c0c 100644 --- a/include/emp/data/MemoryImage.hpp +++ b/include/emp/data/MemoryImage.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2019 + * date: 2019-2022 */ /** * @file @@ -15,6 +15,7 @@ #include // For std::memcpy #include // For placement new +#include #include #include "../base/assert.hpp" @@ -29,7 +30,7 @@ namespace emp { private: emp::Ptr image = nullptr; ///< Current memory image. size_t size = 0; ///< Size of current image. - size_t init_to = 0; ///< How far if the current image initialized? + size_t init_to = 0; ///< How far of the current image is initialized? // Setup all of the uninitialized memory to be non-zero. void Fuzz() { @@ -78,6 +79,16 @@ namespace emp { return *GetPtr(pos); } + /// Get proper spans to sets of same-type objects represented in this image. + template std::span Get(size_t pos, size_t count) { + emp_assert(pos < GetInitSize(), "Only get a span from initialized memory."); + return std::span( GetPtr(pos).Raw(), count ); + } + template std::span Get(size_t pos, size_t count) const { + emp_assert(pos < GetInitSize(), "Only get a span from initialized memory."); + return std::span( GetPtr(pos).Raw(), count ); + } + /// Change the size of this memory. Assume all cleanup and setup is done elsewhere. void RawResize(size_t new_size) { // If the size is already good, stop here. @@ -93,7 +104,7 @@ namespace emp { } /// Copy all of the bytes directly from another memory image. Size manipulation must be - /// done beforehand to ensure sufficient space is availabe. + /// done beforehand to ensure sufficient space is available. void RawCopy(const MemoryImage & from_memory) { emp_assert(GetSize() >= from_memory.GetSize()); if (from_memory.GetSize() == 0) return; // Nothing to copy! diff --git a/include/emp/data/README.md b/include/emp/data/README.md new file mode 100644 index 0000000000..b851d57b7c --- /dev/null +++ b/include/emp/data/README.md @@ -0,0 +1,62 @@ +# Data Management Tools + +This directory contains a set of tools for managing more or less genetic data. + + +## Individual pieces of data + +* Datum.hpp - emp::Datum holds a single value, which can be a string or a float. It shifts + between these types as needed. + + +## Tracking series of Data + +* DataNode.hpp - Manage a stream of data of a specific type; can specify at compile time how + data should be handled (tracking averages, modes, entropy, etc.) Can also be made to + pull data when needed. + +* DataInterface.hpp - A generic interface to DataNodes to make the easy to manage collectively. + +* DataManager.hpp - Manages a collection of DataNodes that all have the same settings. + +* DataFile.hpp - A collection of DataNodes that automatically output desired information to + an output file. + +* DataLog.hpp - Manage a series of data, tracking calculations and printing histograms. + + +## Tracking arbitrary named data + +* DataMap.hpp - Links variable names to arbitrary type values, these are stored in a single + memory block for locality and easy group copying. + +* DataLayout.hpp - Keeps track of information associated with each variable in a data map, + including location, type, description, etc. + +* MemoryImage.hpp - Block of memory managed using a given DataLayout. + +* AnnotatedType.hpp - Base class for objects that have a linked DataMap. + +* VarMap.hpp - Similar to DataMap, but types are stored with variables and not consecutive + in memory. + +* DataMapParser.hpp - A parser to take an equation based on variables in a DataLayout that + will produce a lambda. If a DataMap is passed into the lambda the equation will be + calculated and the result returned. + +* Trait.hpp - ? + + +## To add? + +DataFrame - rows are entries, columns are types, stored by column for fast calculation. +DataColumn = vector from DataFrame with type information. +DataRow - Same interface as DataMap; refers to associated DataFrame. + +DataTracker - Handles all of the functionality of DataNode, DataLog, etc., but more dynamic + using lambdas to deal with values as needed. + + +## To modify? + +Datum - should be able to do uint64_t? diff --git a/include/emp/data/SimpleParser.hpp b/include/emp/data/SimpleParser.hpp new file mode 100644 index 0000000000..eb242fec90 --- /dev/null +++ b/include/emp/data/SimpleParser.hpp @@ -0,0 +1,626 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file SimpleParser.hpp + * @brief Parser to convert function descriptions to lambdas using maps for variable lookup. + * @note Status: ALPHA + * + * A fully functional parser that will convert a string-description of a function to a C++ + * lambda. A map-typed object should be passed in to provide values associated with variables. + * Allowed map types include std::map, std::unordered_map, + * emp::DataMap, and (soon) derivations from emp::AnnotatedType. For standard maps, T must be + * convertable to emp::Datum. + * + * Developer TODO: + * - Setup operator RegEx to be built dynamically + * - Setup LVALUES as a type, and allow assignment + * - Allow types other than Datum (string and double)? + */ + +#ifndef EMP_DATA_SIMPLEPARSER_HPP_INCLUDE +#define EMP_DATA_SIMPLEPARSER_HPP_INCLUDE + +#include +#include +#include +#include + +#include "../base/notify.hpp" +#include "../compiler/Lexer.hpp" +#include "../compiler/regex_utils.hpp" +#include "../data/Datum.hpp" +#include "../datastructs/ra_map.hpp" +#include "../math/Random.hpp" +#include "../meta/meta.hpp" + +#include "AnnotatedType.hpp" +#include "DataMap.hpp" + +namespace emp { + + class SimpleParser { + private: + + template + struct ValueType { + using fun_t = std::function; + enum type_t { ERROR=0, VALUE, FUNCTION }; + + type_t type; + emp::Datum value; + fun_t fun; + + ValueType() : type(ERROR) {} + ValueType(const ValueType &) = default; + ValueType(double in_val) : type(VALUE), value(in_val) { } + ValueType(std::string in_val) : type(VALUE), value(in_val) { } + ValueType(emp::Datum in_val) : type(VALUE), value(in_val) { } + ValueType(fun_t in_fun) : type(FUNCTION), fun(in_fun) { } + + ValueType & operator=(const ValueType &) = default; + ValueType & operator=(emp::Datum in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(double in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(const std::string & in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(fun_t in_fun) { type = FUNCTION; fun = in_fun; return *this; } + + fun_t AsFunction() { + if (type==FUNCTION) return fun; + else return [v=value](ARG_T){ return v; }; + } + }; + + template + struct SymbolTable { + using arg_t = const MAP_T &; + using fun_t = std::function; + using value_t = ValueType; + + SymbolTable() { } + SymbolTable(arg_t) { } + + static_assert( std::is_same(), + "Any map type used by the parser must have a key type of std::string"); + + static fun_t MakeDatumAccessor(const std::string & name) { + return [name](arg_t symbol_vals){ + auto val_it = symbol_vals.find(name); + emp_assert(val_it != symbol_vals.end()); + return emp::Datum(val_it->second); + }; + } + + /// By default, let the value handle its own converstion to a function. + auto AsFunction(ValueType & val) const { return val.AsFunction(); } + }; + + template + struct SymbolTable, DUMMY_T> { + using map_t = emp::ra_map; + using arg_t = const map_t &; + using fun_t = std::function; + using value_t = ValueType; + + const typename map_t::layout_t & layout; + + SymbolTable(const emp::ra_map & in_map) + : layout(in_map.GetLayout()) { } + + fun_t MakeDatumAccessor(const std::string & name) const { + emp_assert(layout.find(name) != layout.end()); + size_t id = layout.find(name)->second; + #ifdef NDEBUG + return [id](arg_t symbol_vals){ + #else + return [id,name](arg_t symbol_vals){ // Keep name in debug mode to check id. + emp_assert(symbol_vals.GetID(name) == id); + #endif + return emp::Datum(symbol_vals.AtID(id)); + }; + } + + /// By default, let the value handle its own converstion to a function. + auto AsFunction(ValueType & val) const { + // @CAO: Could check layout correctness in debug mode. + return val.AsFunction(); + } + }; + + /// Specialty implementation for DataLayouts. + template + struct SymbolTable { + using arg_t = const emp::DataMap &; + using fun_t = std::function; + using value_t = ValueType; + + const emp::DataLayout & layout; + + SymbolTable(const emp::DataLayout & in_layout) : layout(in_layout) { } + + auto MakeDatumAccessor(const std::string & name) const { + return emp::DataMap::MakeDatumAccessor(layout, name); + } + + auto AsFunction(ValueType & val) const { + #ifdef NDEBUG + return val.AsFunction(); + #else + // If we are in debug mode, add wrapper to ensure DataMap with has correct layout. + return [fun=val.AsFunction(),layout_ptr=&layout](arg_t dm) { + emp_assert(dm.HasLayout(*layout_ptr)); + return fun(dm); + }; + #endif + } + + }; + + /// Special DataMap implementation that just converts to underlying layout. + template + struct SymbolTable : public SymbolTable { + SymbolTable(const emp::DataMap & dm) : SymbolTable(dm.GetLayout()) { } + }; + + + using pos_t = emp::TokenStream::Iterator; + + static constexpr const bool verbose = false; + + class MapLexer : public emp::Lexer { + private: + int token_identifier; ///< Token id for identifiers + int token_number; ///< Token id for literal numbers + int token_string; ///< Token id for literal strings + int token_char; ///< Token id for literal characters + int token_external; ///< Token id for an external value that was passed in + int token_symbol; ///< Token id for other symbols + + public: + MapLexer() { + // Whitespace and comments should always be dismissed (top priority) + IgnoreToken("Whitespace", "[ \t\n\r]+"); + IgnoreToken("//-Comments", "//.*"); + IgnoreToken("/*...*/-Comments", "/[*]([^*]|([*]+[^*/]))*[*]+/"); + + // Meaningful tokens have next priority. + + // An identifier must begin with a letter, underscore, or dot, and followed by + // more of the same OR numbers or brackets. + token_identifier = AddToken("Identifier", "[a-zA-Z_.][a-zA-Z0-9_.[\\]]*"); + + // A literal number must begin with a digit; it can have any number of digits in it and + // optionally a decimal point. + token_number = AddToken("Literal Number", "[0-9]+(\\.[0-9]+)?"); + + // A string must begin and end with a quote and can have an escaped quote in the middle. + token_string = AddToken("Literal String", "\\\"([^\"\\\\]|\\\\.)*\\\""); + + // A literal char must begin and end with a single quote. It will always be treated as + // its ascii value. + token_char = AddToken("Literal Character", "'([^'\n\\\\]|\\\\.)+'"); + + // An external value that was passed in will be a dollar sign ('$') followed by the + // position of the value to be used (e.g., '$3'). + token_external = AddToken("External Value", "[$][0-9]+"); + + // Symbols should have least priority. They include any solitary character not listed + // above, or pre-specified multi-character groups. + token_symbol = AddToken("Symbol", ".|\"==\"|\"!=\"|\"<=\"|\">=\"|\"~==\"|\"~!=\"|\"~<\"|\"~>\"|\"~<=\"|\"~>=\"|\"&&\"|\"||\"|\"**\"|\"%%\""); + } + + bool IsID(const emp::Token & token) const noexcept { return token.id == token_identifier; } + bool IsNumber(const emp::Token & token) const noexcept { return token.id == token_number; } + bool IsString(const emp::Token & token) const noexcept { return token.id == token_string; } + bool IsChar(const emp::Token & token) const noexcept { return token.id == token_char; } + bool IsExternal(const emp::Token & token) const noexcept { return token.id == token_external; } + bool IsSymbol(const emp::Token & token) const noexcept { return token.id == token_symbol; } + }; + + struct BinaryOperator { + using fun_t = std::function; + size_t prec; + fun_t fun; + void Set(size_t in_prec, fun_t in_fun) { prec = in_prec; fun = in_fun; } + }; + + struct Function { + using fun0_t = std::function; + using fun1_t = std::function; + using fun2_t = std::function; + using fun3_t = std::function; + + size_t num_args = 0; + fun0_t fun0; fun1_t fun1; fun2_t fun2; fun3_t fun3; + + void Set0(fun0_t in_fun) { num_args = 0; fun0 = in_fun; } + void Set1(fun1_t in_fun) { num_args = 1; fun1 = in_fun; } + void Set2(fun2_t in_fun) { num_args = 2; fun2 = in_fun; } + void Set3(fun3_t in_fun) { num_args = 3; fun3 = in_fun; } + }; + + // --------- MEMBER VARIABLES ----------- + MapLexer lexer; + + // Operators and functions that should be used when parsing. + std::unordered_map> unary_ops; + std::unordered_map binary_ops; + std::unordered_map functions; + emp::vector external_vals; + + // The set of data map entries accessed when the last function was parsed. + std::set var_names; + + // Track the number of errors and the function to call when errors occur. + template + size_t ParseError(Ts &&... args) { + emp::notify::Exception("SimpleParser::PARSE_ERROR", emp::to_string(args...), this); + return 1; + } + + public: + SimpleParser(bool use_defaults=true) { + if (use_defaults) { + AddDefaultOperators(); + AddDefaultFunctions(); + } + } + + /// Construct with a random number generator to automatically include random functions. + SimpleParser(bool use_defaults, emp::Random & random) : SimpleParser(use_defaults) + { AddRandomFunctions(random); } + + /// Get the set of variable names that the most recently generated function used. + const std::set & GetNamesUsed() const { return var_names; } + + /// Get the set of names used in the provided equation. + const std::set & GetNamesUsed(const std::string & expression) { + var_names.clear(); + emp::TokenStream tokens = lexer.Tokenize(expression, std::string("Expression: ") + expression); + for (emp::Token token : tokens) { + if (lexer.IsID(token) && !emp::Has(functions, token.lexeme)) { + var_names.insert(token.lexeme); + } + } + return var_names; + } + + + /// Add a unary operator + void AddOp(const std::string & op, std::function fun) { + unary_ops[op] = fun; + } + + /// Add a binary operator + void AddOp(const std::string & op, size_t prec, + std::function fun) { + binary_ops[op].Set(prec, fun); + } + + + static int ApproxCompare(double x, double y) { + static constexpr double APPROX_FRACTION = 8192.0; + double margin = y / APPROX_FRACTION; + if (x < y - margin) return -1; + if (x > y + margin) return 1; + return 0; + } + + void AddDefaultOperators() { + // Setup the unary operators for the parser. + AddOp("+", [](emp::Datum x) { return x; }); + AddOp("-", [](emp::Datum x) { return -x; }); + AddOp("!", [](emp::Datum x) { return !x; }); + + + // Setup the default binary operators for the parser. + size_t prec = 0; // Precedence level of each operator... + AddOp("||", ++prec, [](emp::Datum x, emp::Datum y){ return (x!=0.0)||(y!=0.0); } ); + AddOp("&&", ++prec, [](emp::Datum x, emp::Datum y){ return (x!=0.0)&&(y!=0.0); } ); + AddOp("==", ++prec, [](emp::Datum x, emp::Datum y){ return x == y; } ); + AddOp("!=", prec, [](emp::Datum x, emp::Datum y){ return x != y; } ); + AddOp("~==", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == 0; } ); + AddOp("~!=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != 0; } ); + AddOp("<", ++prec, [](emp::Datum x, emp::Datum y){ return x < y; } ); + AddOp("<=", prec, [](emp::Datum x, emp::Datum y){ return x <= y; } ); + AddOp(">", prec, [](emp::Datum x, emp::Datum y){ return x > y; } ); + AddOp(">=", prec, [](emp::Datum x, emp::Datum y){ return x >= y; } ); + AddOp("~<", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == -1; } ); + AddOp("~<=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != 1; } ); + AddOp("~>", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == 1; } ); + AddOp("~>=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != -1; } ); + AddOp("+", ++prec, [](emp::Datum x, emp::Datum y){ return x + y; } ); + AddOp("-", prec, [](emp::Datum x, emp::Datum y){ return x - y; } ); + AddOp("*", ++prec, [](emp::Datum x, emp::Datum y){ return x * y; } ); + AddOp("/", prec, [](emp::Datum x, emp::Datum y){ return x / y; } ); + AddOp("%", prec, [](emp::Datum x, emp::Datum y){ return emp::Mod(x, y); } ); + AddOp("**", ++prec, [](emp::Datum x, emp::Datum y){ return emp::Pow(x, y); } ); + AddOp("%%", prec, [](emp::Datum x, emp::Datum y){ return emp::Log(x, y); } ); + } + + void AddDefaultFunctions() { + // Setup the default functions. + functions["ABS"].Set1( [](emp::Datum x){ return std::abs(x); } ); + functions["EXP"].Set1( [](emp::Datum x){ return emp::Pow(emp::E, x); } ); + functions["LOG"].Set1( [](emp::Datum x){ return std::log(x); } ); + functions["LOG2"].Set1( [](emp::Datum x){ return std::log2(x); } ); + functions["LOG10"].Set1( [](emp::Datum x){ return std::log10(x); } ); + + functions["SQRT"].Set1( [](emp::Datum x){ return std::sqrt(x); } ); + functions["CBRT"].Set1( [](emp::Datum x){ return std::cbrt(x); } ); + + functions["SIN"].Set1( [](emp::Datum x){ return std::sin(x); } ); + functions["COS"].Set1( [](emp::Datum x){ return std::cos(x); } ); + functions["TAN"].Set1( [](emp::Datum x){ return std::tan(x); } ); + functions["ASIN"].Set1( [](emp::Datum x){ return std::asin(x); } ); + functions["ACOS"].Set1( [](emp::Datum x){ return std::acos(x); } ); + functions["ATAN"].Set1( [](emp::Datum x){ return std::atan(x); } ); + functions["SINH"].Set1( [](emp::Datum x){ return std::sinh(x); } ); + functions["COSH"].Set1( [](emp::Datum x){ return std::cosh(x); } ); + functions["TANH"].Set1( [](emp::Datum x){ return std::tanh(x); } ); + functions["ASINH"].Set1( [](emp::Datum x){ return std::asinh(x); } ); + functions["ACOSH"].Set1( [](emp::Datum x){ return std::acosh(x); } ); + functions["ATANH"].Set1( [](emp::Datum x){ return std::atanh(x); } ); + + functions["CEIL"].Set1( [](emp::Datum x){ return std::ceil(x); } ); + functions["FLOOR"].Set1( [](emp::Datum x){ return std::floor(x); } ); + functions["ROUND"].Set1( [](emp::Datum x){ return std::round(x); } ); + + functions["ISINF"].Set1( [](emp::Datum x){ return std::isinf(x); } ); + functions["ISNAN"].Set1( [](emp::Datum x){ return std::isnan(x); } ); + + // Default 2-input functions + functions["HYPOT"].Set2( [](emp::Datum x, emp::Datum y){ return std::hypot(x,y); } ); + functions["EXP"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Pow(x,y); } ); + functions["LOG"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Log(x,y); } ); + functions["MIN"].Set2( [](emp::Datum x, emp::Datum y){ return (xy) ? x : y; } ); + functions["POW"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Pow(x,y); } ); + + // Default 3-input functions. + functions["IF"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (x!=0.0) ? y : z; + } ); + functions["CLAMP"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (xz) ? z : x; + } ); + functions["TO_SCALE"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (z-y)*x+y; + } ); + functions["FROM_SCALE"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (x-y) / (z-y); + } ); + } + + void AddRandomFunctions(Random & random) { + functions["RAND"].Set0( [&random](){ return random.GetDouble(); } ); + functions["RAND"].Set1( [&random](emp::Datum x){ return random.GetDouble(x); } ); + functions["RAND"].Set2( [&random](emp::Datum x, emp::Datum y){ return random.GetDouble(x,y); } ); + } + + /// Helpers for parsing. + template + typename SYMBOLS_T::value_t ParseValue(const SYMBOLS_T & symbols, pos_t & pos) { + if constexpr (verbose) { + std::cout << "ParseValue at position " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } + + using arg_t = typename SYMBOLS_T::arg_t; + using fun_t = typename SYMBOLS_T::fun_t; + using value_t = typename SYMBOLS_T::value_t; + + // Deal with any unary operators... + if (emp::Has(unary_ops, pos->lexeme)) { + if constexpr (verbose) std::cout << "Found UNARY OP: " << pos->lexeme << std::endl; + auto op = unary_ops[pos->lexeme]; + ++pos; + value_t val = ParseValue(symbols, pos); + if (val.type == value_t::VALUE) { return op(val.value); } + else { + return static_cast( + [fun=val.fun,op](arg_t arg){ return op(fun(arg)); } + ); + } + } + + // If we have parentheses, process the contents + if (pos->lexeme == "(") { + if constexpr (verbose) std::cout << "Found: OPEN PAREN" << std::endl; + ++pos; + value_t val = ParseMath(symbols, pos); + if (pos->lexeme != ")") return ParseError("Expected ')', but found '", pos->lexeme, "'."); + ++pos; + return val; + } + + // If this is a value, set it and return. + if (lexer.IsNumber(*pos)) { + double result = emp::from_string(pos->lexeme); + ++pos; + return result; + } + + // Similar for an external value + if (lexer.IsExternal(*pos)) { + size_t id = emp::from_string(pos->lexeme.substr(1)); + ++pos; + if (id >= external_vals.size()) { + ParseError("Invalid access into external variable (\"$", id, "\"): Does not exist."); + } + return external_vals[id]; + } + + // Otherwise it should be and identifier! + const std::string & name = pos->lexeme; + ++pos; + + // If it is followed by a parenthesis, it should be a function. + const bool is_fun = (pos.IsValid() && pos->lexeme == "("); + + if (is_fun) { + if (!emp::Has(functions, name)) return ParseError("Call to unknown function '", name,"'."); + ++pos; + emp::vector args; + while(pos->lexeme != ")") { + args.push_back(ParseMath(symbols, pos)); + if (pos->lexeme == ",") ++pos; + } + ++pos; + + // Now build the function based on its argument count. + fun_t out_fun; + switch (args.size()) { + case 0: + if (!functions[name].fun0) ParseError("Function '", name, "' requires arguments."); + out_fun = [fun=functions[name].fun0](arg_t /*sym_arg*/) { return fun(); }; + break; + case 1: + if (!functions[name].fun1) ParseError("Function '", name, "' cannot have 1 arguments."); + out_fun = [fun=functions[name].fun1,arg0=args[0].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg)); + }; + break; + case 2: + if (!functions[name].fun2) ParseError("Function '", name, "' cannot have 2 arguments."); + out_fun = [fun=functions[name].fun2, + arg0=args[0].AsFunction(), + arg1=args[1].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg), arg1(sym_arg)); + }; + break; + case 3: + if (!functions[name].fun3) ParseError("Function '", name, "' cannot have 3 arguments."); + out_fun = [fun=functions[name].fun3, + arg0=args[0].AsFunction(), + arg1=args[1].AsFunction(), + arg2=args[2].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg), arg1(sym_arg), arg2(sym_arg)); + }; + break; + default: + ParseError("Too many arguments (", args.size(), ") for function '", name, "'."); + } + return out_fun; + } + + var_names.insert(name); // Store this name in the list of those used. + return symbols.MakeDatumAccessor(name); // Return an accessor for this name. + } + + template + typename SYMBOLS_T::value_t ParseMath(const SYMBOLS_T & symbols, pos_t & pos, size_t prec_limit=0) { + using value_t = typename SYMBOLS_T::value_t; + using arg_t = typename SYMBOLS_T::arg_t; + value_t val1 = ParseValue(symbols, pos); + + if constexpr (verbose) { + if (pos.IsValid()) { + std::cout << "ParseMath at " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } else std::cout << "PROCESSED!" << std::endl; + } + + while (pos.IsValid() && pos->lexeme != ")" && pos->lexeme != ",") { + if constexpr (verbose) { std::cout << "...Scanning for op... [" << pos->lexeme << "]" << std::endl; } + + // If we have an operator, act on it! + if (Has(binary_ops, pos->lexeme)) { + const BinaryOperator & op = binary_ops[pos->lexeme]; + if (prec_limit >= op.prec) return val1; // Precedence not allowed; return currnet value. + ++pos; + value_t val2 = ParseMath(symbols, pos, op.prec); + if (val1.type == value_t::VALUE) { + if (val2.type == value_t::VALUE) { val1 = op.fun(val1.value, val2.value); } + else { + val1 = [val1_num=val1.value,val2_fun=val2.fun,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_num, val2_fun(symbol_vals)); + }; + } + } else { + if (val2.type == value_t::VALUE) { + val1 = [val1_fun=val1.fun,val2_num=val2.value,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_fun(symbol_vals), val2_num); + }; + } else { + val1 = [val1_fun=val1.fun,val2_fun=val2.fun,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_fun(symbol_vals), val2_fun(symbol_vals)); + }; + } + } + } + + else ParseError("Operator '", pos->lexeme, "' NOT found!"); + } + + // @CAO Make sure there's not a illegal lexeme here. + + return val1; + } + + /// Take a set of variables and use them to replace $0, $1, etc. in any function. + template + void SetupStaticValues(T1 arg1, Ts... args) { + // If we have a vector of incoming values, make sure it is valid and then just pass it along. + if constexpr (sizeof...(Ts) == 0 && emp::is_emp_vector()) { + using value_t = typename T1::value_type; + static_assert(std::is_same(), + "If BuildMathFunction is provided a vector, it must contain only emp::Datum."); + external_vals = arg1; + return; + } + + else { + // Otherwise convert all args to emp::Datum. + external_vals = emp::vector{ + static_cast(arg1), + static_cast(args)... + }; + } + } + + /// If there are no input args, just clear external values. + void SetupStaticValues() { external_vals.resize(0); } + + /// Parse a function description that will take a map and return the results. + /// For example, if the string "foo * 2 + bar" is passed in, a function will be returned + /// that takes a map (of the proper type) loads in the values of "foo" and "bar", and + /// returns the result of the above equation. + + template + auto BuildMathFunction( + const MAP_T & symbol_map, ///< The map or layout to use, specifying variables. + const std::string & expression, ///< The primary expression to convert. + EXTRA_Ts... extra_args ///< Extra value arguments (accessed as $1, $2, etc.) + ) { + // If we have incoming values, store them appropriately. + SetupStaticValues(extra_args...); + + using value_t = typename SymbolTable::value_t; + SymbolTable symbol_table(symbol_map); + + // Tokenize the expression. + emp::TokenStream tokens = lexer.Tokenize(expression, std::string("Expression: ") + expression); + if constexpr (verbose) tokens.Print(); + var_names.clear(); // Reset the names used from data map. + pos_t pos = tokens.begin(); + value_t val = ParseMath(symbol_table, pos); + + // Return the value as a function. + return symbol_table.AsFunction(val); + } + + + /// Generate a temporary math function and immediately run it on the provided arguments. + /// @param symbol_map The map containing the required variables. + /// @param expression The mathematical expression to be run on the data map. + /// @param extras Any extra values to fill in a $0, $1, etc. + template + emp::Datum RunMathFunction(const MAP_T & symbol_map, ARG_Ts... args) { + auto fun = BuildMathFunction(symbol_map, std::forward(args)...); + return fun(symbol_map); + } + + }; + +} + +#endif // #ifndef EMP_DATA_SIMPLEPARSER_HPP_INCLUDE diff --git a/include/emp/data/Trait.hpp b/include/emp/data/Trait.hpp index 6ebfe2f2da..8acaab5aa0 100644 --- a/include/emp/data/Trait.hpp +++ b/include/emp/data/Trait.hpp @@ -85,7 +85,7 @@ namespace emp { void SetMax(value_t max) { range.SetUpper(max); } value_t Eval(target_t & target) const { return fun(target); } - value_t EvalLimit(target_t & target) const { return range.Limit(fun(target)); } + value_t EvalLimit(target_t & target) const { return range.Clamp(fun(target)); } std::string EvalString(target_t & target) const { return std::to_string(EvalLimit(target)); } double EvalValue(target_t & target) const { return (double) EvalLimit(target); } diff --git a/include/emp/datastructs/IndexMap.hpp b/include/emp/datastructs/IndexMap.hpp index bdcc776ced..a19c78c42a 100644 --- a/include/emp/datastructs/IndexMap.hpp +++ b/include/emp/datastructs/IndexMap.hpp @@ -1,15 +1,23 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2015-2018 + * date: 2015-2022 */ /** * @file * @brief A simple class to weight items differently within a container and return the correct index. * @note Status: BETA * + * An IndexMap is a container where each item has a specified weight (specified as a double). + * The total weight of the container determines the max index point. When indexing into the + * container, each item is represented by a range of values equal to it's weight. Randomly + * indexing into the container will provide either item with a probability proportional to its + * weight. + * + * In this regular IndexMap, all items are kept in order (so the map starts at 0, then 1, then + * 2, etc.) If order is not required, UnorderedIndexMap is slightly faster. + * * @todo Convert to a template that acts as a glorified vector, simplifying random selection? - * @todo Should operator[] index by element count or by weight? * @todo Make Raw*() function private. */ @@ -44,13 +52,14 @@ namespace emp { /// Which ID is the right child of the ID provided? size_t RightID(size_t id) const { return 2*id + 2; } - /// Sift through the nodes to find the where index zero maps to. + /// Sift through the nodes to find where index zero maps to. size_t CalcZeroOffset() const { size_t id = 0; while (id < num_items - 1) id = LeftID(id); return id - (num_items - 1); } + /// Convert an item ID to the internal position where it's stored. size_t ToInternalID(size_t id) const { return (id + zero_offset) % num_items + num_items-1; } @@ -59,10 +68,34 @@ namespace emp { return (id + _offset) % _items + _items-1; } + /// Convert and internal position to the item ID to which it refers. size_t ToExternalID(size_t id) const { return (id + 1 - zero_offset) % num_items; } + // Collect the weight at the specified index of the array (no conversions) + double RawWeight(size_t id) const { return weights[id]; } + + // Collect the probability at the specified index of the array (no conversions) + double RawProb(size_t id) const { ResolveRefresh(); return weights[id] / weights[0]; } + + /// Adjust the weight associated with a particular index in the map. + /// @param id is the identification number of the item whose weight is being adjusted. + /// @param new_weight is the new weight for that entry. + void RawAdjust(size_t id, const double new_weight) { + // Update this node. + const double weight_diff = new_weight - weights[id]; // Track change size for tree weights. + weights[id] = new_weight; // Update THIS item weight + + if (needs_refresh) return; // If we already need a refresh don't update tree weights! + + // Update tree to root. + while (id > 0) { + id = ParentID(id); + weights[id] += weight_diff; + } + } + /// A Proxy class so that an index can be treated as an l-value. class Proxy { private: @@ -97,7 +130,10 @@ namespace emp { } IndexMap(size_t _items, double init_weight) : num_items(_items), zero_offset(CalcZeroOffset()), needs_refresh(true) - , weights(num_items, init_weight) { ; } + , weights(num_items*2-1, 0.0) + { + if (init_weight != 0.0) AdjustAll(init_weight); + } IndexMap(const IndexMap &) = default; IndexMap(IndexMap &&) = default; ~IndexMap() = default; @@ -111,11 +147,9 @@ namespace emp { double GetWeight() const { ResolveRefresh(); return weights[0]; } /// What is the current weight of the specified index? - double RawWeight(size_t id) const { return weights[id]; } double GetWeight(size_t id) const { return RawWeight(ToInternalID(id)); } /// What is the probability of the specified index being selected? - double RawProb(size_t id) const { ResolveRefresh(); return weights[id] / weights[0]; } double GetProb(size_t id) const { return RawProb(ToInternalID(id)); } /// Change the number of indices in the map. @@ -164,23 +198,6 @@ namespace emp { Clear(); } - /// Adjust the weight associated with a particular index in the map. - /// @param id is the identification number of the item whose weight is being adjusted. - /// @param new_weight is the new weight for that entry. - void RawAdjust(size_t id, const double new_weight) { - // Update this node. - const double weight_diff = new_weight - weights[id]; // Track change size for tree weights. - weights[id] = new_weight; // Update THIS item weight - - if (needs_refresh) return; // If we already need a refresh don't update tree weights! - - // Update tree to root. - while (id > 0) { - id = ParentID(id); - weights[id] += weight_diff; - } - } - void Adjust(size_t id, const double new_weight) { RawAdjust(ToInternalID(id), new_weight); } /// Adjust all index weights to the set provided. diff --git a/include/emp/datastructs/IndexSet.hpp b/include/emp/datastructs/IndexSet.hpp new file mode 100644 index 0000000000..24ee45275a --- /dev/null +++ b/include/emp/datastructs/IndexSet.hpp @@ -0,0 +1,528 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2023. + * + * @file IndexSet.hpp + * @brief Collection of indices, ideally optimized for memory size. + * @note Status: ALPHA + */ + +#ifndef EMP_DATASTRUCTS_INDEXSET_HPP_INCLUDE +#define EMP_DATASTRUCTS_INDEXSET_HPP_INCLUDE + +#include "../base/Ptr.hpp" +#include "../bits/BitVector.hpp" +#include "../math/constants.hpp" + +namespace emp { + + /// Index range is a simple pair of values indicating the start and end of a series of indices. + class IndexRange { + size_t start = 0; // First value in this range. + size_t end = 0; // First value after start NOT in this range; zero for empty range. + + public: + IndexRange() = default; + IndexRange(size_t val) : start(val), end(val+1) { } + IndexRange(size_t _start, size_t _end) : start(_start), end(_end) { } + IndexRange(const IndexRange &) = default; + + IndexRange & operator=(const IndexRange &) = default; + + auto operator<=>(const IndexRange &) const = default; + + size_t GetStart() const { return start; } + size_t GetEnd() const { return end; } + size_t GetSize() const { return end-start; } + + void SetStart(size_t in) { start = in; } + void SetEnd(size_t in) { end = in; } + + bool Has(size_t val) const { return val >= start && val < end; } + bool Has(IndexRange in) const { return in.start >= start && in.end < end; } + + /// Will identify if two ranges are next to each other or overlapping. + bool IsConnected(IndexRange in) const { + return (in.start >= start && in.start <= end || + start >= in.start && start <= in.end); + } + + /// Grow this range (default, by one) + void Grow(size_t count=1) { end += count; } + + /// Insert a value into a range if valid; return false if not. + bool Insert(size_t val) { + if (val == end) { end++; return true; } + if (val == start - 1) { start--; return true; } + return Has(val); + } + + /// Extend the current range with a new one. Must be perfectly adjacent! + bool Append(IndexRange in) { + if (end == in.start) { end = in.end; return true; } + return false; + } + + /// @brief Expand this range to encompass a provided value. + /// @param val Value to expand through. + /// @return Whether the range has changed due to this expansion. + bool Expand(size_t val) { + if (val < start) start = val; + else if (val > end) end = val; + else return false; + return true; + } + + /// @brief Expand this range to encompass all provided values. + /// @param vals Values to expand through + /// @return Whether the range has changed due to this expansion. + template + bool Expand(size_t val1, size_t val2, Ts... args) { + return Expand(val1) + Expand(val2, args...); + } + + + /// Merge this range with another. Must be adjacent or overlap! + bool Merge(IndexRange in) { + if (!IsConnected(in)) return false; + return Expand(in.start) + Expand(in.end); // Use + to avoid short-circuiting. + } + }; + + /// IndexRanges is a class to maintain a series of ranges of indexes. The ranges will + /// always be kept sorted and non-adjacent (i.e., there will always be at least one index + /// missing between two ranges). + class IndexRangeSet { + emp::vector range_set; + + // Helper function to find the id of an IndexRange that a value belongs in or can extend; + // returns next-higher index if none fit perfectly. + // @CAO - consider doing a binary search. + size_t _FindRange(size_t val) const { + for (size_t id = 0; id < range_set.size(); ++id) { + if (id <= range_set[id].GetEnd()) return id; + } + return range_set.size(); + } + + // Helper function to grow a range by one, possibly merging it with the next range. + void _GrowRange(size_t id) { + emp_assert(id < range_set.size()); + range_set[id].Grow(); + + // Test if we need to merge with the next range. + if (id+1 < range_set.size() && range_set[id].GetEnd() == range_set[id+1].GetStart()) { + range_set[id].SetEnd(range_set[id+1].GetEnd()); + range_set.erase(range_set.begin()+id+1); // Delete next range (now merged in) + } + } + + public: + IndexRangeSet() = default; + IndexRangeSet(const IndexRangeSet &) = default; + IndexRangeSet(IndexRangeSet &&) = default; + + IndexRangeSet & operator=(const IndexRangeSet &) = default; + IndexRangeSet & operator=(IndexRangeSet &&) = default; + + bool Has(size_t val) const { + size_t id = _FindRange(val); + if (id >= range_set.size()) return false; + return range_set[id].Has(val); + } + + size_t GetStart() const { + return range_set.size() ? range_set[0].GetStart() : emp::MAX_SIZE_T; + } + size_t GetEnd() const { + return range_set.size() ? range_set.back().GetEnd() : 0; + } + + size_t GetNumRanges() const { return range_set.size(); } + + /// @brief Calculate the total combined size of all ranges. + size_t GetSize() const { + size_t total = 0; + for (const auto & x : range_set) total += x.GetSize(); + return total; + } + + // Return all of the internal ranges. + const emp::vector & GetRanges() & { return range_set; } + + /// @brief Add a new value that belongs at the end of the sets. + /// @param val Value to add + /// @return Did the append work? If it's not at the end, returns false. + bool Append(size_t val) { + if (range_set.size() == 0 || val > GetEnd()) range_set.emplace_back(val); // New Range + else if (val == GetEnd()) range_set.back().SetEnd(val+1); // Extend range + else return false; // Not at end + + return true; + } + + /// @brief Add an entire range that belongs at the end of the sets. + /// @param val Range to add + /// @return Did the append work? If it's not at the end, returns false. + bool Append(IndexRange in) { + // Are we adding on a new range? + if (range_set.size() == 0 || in.GetStart() > GetEnd()) { + range_set.emplace_back(in); + } + + // Are we extending an existing range? + else if (in.GetEnd() > GetEnd()) { + // Are we encompassing ALL existing ranges? + if (in.GetStart() <= range_set[0].GetStart()) { + range_set.resize(1); + range_set[0] = in; + } + + // Otherwise find the start and convert from there. + else { + const size_t start_id = _FindRange(in.GetStart()); + range_set[start_id].SetEnd(in.GetEnd()); + } + } + + else return false; // Not at end + + return true; + } + + /// @brief Insert a value into this range set + /// @param val Value to insert. + /// @return Was there a change due to this insertion (or was it already there?) + bool Insert(size_t val) { + // Are we inserting a new range onto the end? + if (Append(val)) return true; + + // Do we already have the value? + size_t id = _FindRange(val); + if (range_set[id].Has(val)) return false; + + // Are we extending the range (and possibly merging)? + else if (range_set[id].GetEnd() == val) _GrowRange(id); + + // Are we extending the beginning of the next range? + else if (range_set[id].GetStart() == val+1) range_set[id].Insert(val); + + // Otherwise we must insert an entirely new range. + else range_set.emplace(id, val); + + return true; + } + + /// @brief Insert a whole range into this set, merging other ranges as needed. + /// @param in New range to include. + /// @return Was there a change due to this insertion (or were they already there?) + bool Insert(IndexRange in) { + // If the new range goes past the end, Append will take care of it. + if (Append(in)) return true; + + size_t start_id = _FindRange(in.GetStart()); + size_t end_id = _FindRange(in.GetEnd()); + emp_assert(start_id <= end_id); + + // If both are in the same range id, either insert a new range or modify an existing one. + if (start_id == end_id) { + // If the end of the new range is before the start of the found range, insert the new one! + if (in.GetEnd() < range_set[start_id].GetStart() - 1) { + range_set.insert(range_set.begin() + start_id, in); + } + + // Otherwise try to merge it into the existing range (will return false if already there) + else return range_set[start_id].Merge(in); + } + + // We are across multiple ranges. Collapse into first! + else { + if (in.GetEnd()+1 < range_set[end_id].GetStart()) --end_id; // Don't include end id. + range_set[start_id].Expand(in.GetStart(), in.GetEnd(), range_set[end_id].GetEnd()); + range_set.erase(range_set.begin()+start_id+1, range_set.begin()+end_id+1); + } + + return true; + } + + /// @brief Remove a single value from this index range. + /// @param val Value to remove + /// @return Did the range change due to this removal? + bool Remove(size_t val) { + if (!Has(val)) return false; + size_t id = _FindRange(val); + IndexRange & cur_range = range_set[id]; + if (cur_range.GetSize() == 1) range_set.erase(range_set.begin()+id); + else if (cur_range.GetStart() == val) cur_range.SetStart(cur_range.GetStart()+1); + else if (cur_range.GetEnd()-1 == val) cur_range.SetEnd(cur_range.GetEnd()-1); + else { + // Need to split the range. + range_set.insert(range_set.begin()+id+1, IndexRange{val+1,cur_range.GetEnd()}); + cur_range.SetEnd(val); + } + } + }; + + /// @brief A class to maintain a set of indices with a bit vector to represent them. + class IndexBits { + emp::BitVector bits; + size_t offset = 0; // Always a multiple of 64. + + // Figure out the best offset for a given value. + size_t _CalcOffset(size_t val) const { + return (val >> 6) << 6; + } + + /// @brief Increase the range of valid values + /// @param val Value to make sure can be set. + void _ExpandRange(size_t val) { + if (bits.GetSize() == 0) { // Must setup bits + offset = _CalcOffset(val); + bits.Resize(64); + } + else if (val < offset) { // Value is before offset... + const size_t new_offset = _CalcOffset(val); + bits.PushFront(offset - new_offset); + offset = new_offset; + } + else if (bits.GetSize() <= val-offset) { // Value is out of range... + bits.Resize(_CalcOffset(val) + 64 - offset); + } + } + public: + IndexBits() = default; + IndexBits(const IndexBits &) = default; + IndexBits(IndexBits &&) = default; + IndexBits(size_t min_val, size_t max_val) : offset(_CalcOffset(min_val)) { + bits.Resize(_CalcOffset(max_val) + 64 - offset); + } + + IndexBits & operator=(const IndexBits &) = default; + IndexBits & operator=(IndexBits &&) = default; + + bool Has(size_t val) const { return (val < offset) ? false : bits[val-offset]; } + size_t GetStart() const { return static_cast(bits.FindOne()) + offset; } + size_t GetEnd() const { return static_cast(bits.FindMaxOne()) + offset; } + size_t GetNumRanges() const { + return (bits & ~(bits >> 1)).CountOnes(); + } + size_t GetSize() const { return bits.CountOnes(); } + bool Insert(size_t val) { + _ExpandRange(val); // Make sure there is room for the new value. + bits.Set(val-offset); + } + bool Insert(IndexRange in) { + _ExpandRange(in.GetStart()); + _ExpandRange(in.GetEnd()); + bits.SetRange(in.GetStart()-offset, in.GetEnd()-offset); + } + bool Remove(size_t val) { + bits.Clear(val - offset); + } + }; + + /// @brief IndexSet maintains a collection of indices that can be easily manipulated. + /// It will try to adjust representation to maintain speed and memory efficiency + class IndexSet { + private: + // For zero to three entries, it will maintain values directly. + // For more than four entries it will use either bits or ranges based on how + // packed the values are into ranges. + enum class index_t { NONE=0, VALS1, VALS2, VALS3, RANGES, BITS }; + struct _Index_Vals { size_t id1; size_t id2; size_t id3; }; // Few values + + union { + _Index_Vals vals; + IndexRangeSet ranges; + IndexBits bits; + }; + index_t type = index_t::NONE; + + // --- Helper functions --- + + /// Free whatever type we currently have. + void _ReleaseUnion() { + if (type == index_t::BITS) bits.~IndexBits(); + else if (type == index_t::RANGES) ranges.~IndexRangeSet(); + } + + /// Convert the internal representation to use bits. + void _ToBits() { + emp_assert(type != index_t::NONE, "Cannot start IndexSet as type BITS"); + if (type == index_t::BITS) return; // Already bits! + + IndexBits new_bits(GetMin(), GetMax()); + + switch (type) { + case index_t::VALS3: new_bits.Insert(vals.id3); [[fallthrough]]; + case index_t::VALS2: new_bits.Insert(vals.id2); [[fallthrough]]; + case index_t::VALS1: new_bits.Insert(vals.id1); + break; + case index_t::RANGES: + for (const auto & range : ranges.GetRanges()) { + new_bits.Insert(range); + } + break; + } + + _ReleaseUnion(); + new (&bits) IndexBits(std::move(new_bits)); + } + + /// Convert the internal representation to use a ranges. + void _ToRanges() { + emp_assert(type != index_t::NONE, "Cannot start IndexSet as type RANGES"); + if (type == index_t::RANGES) return; // Already ranges format! + + IndexRangeSet new_ranges; + + switch (type) { + case index_t::VALS3: new_ranges.Insert(vals.id3); [[fallthrough]]; + case index_t::VALS2: new_ranges.Insert(vals.id2); [[fallthrough]]; + case index_t::VALS1: new_ranges.Insert(vals.id1); + break; + case index_t::BITS: + for (const auto & range : ranges.GetRanges()) { + new_ranges.Insert(range); + } + break; + } + + _ReleaseUnion(); + new (&bits) IndexBits(std::move(new_bits)); + + } + + public: + static constexpr const size_t npos = static_cast(-1); + + IndexSet() = default; + ~IndexSet() { + switch (type) { + case index_t::ARRAY: + ids.array.ids.DeleteArray(); + break; + case index_t::BITS: + ids.bits.bits.DeleteArray(); + break; + } + } + + size_t GetSize() const { + switch (type) { + case index_t::NONE: return 0; + case index_t::VALS1: return 1; + case index_t::VALS2: return 2; + case index_t::VALS3: return 3; + case index_t::RANGE: return ids.range.end - ids.range.start; + case index_t::ARRAY: return ids.array.num_ids; + case index_t::BITS: + size_t count = 0; + for (size_t i = 0; i < ids.bits.num_fields; ++i) { + count += emp::count_bits(ids.bits.bits[i]); + } + return count; + } + } + + bool Has(size_t id) const { + switch (type) { + case index_t::NONE: return false; + case index_t::VALS1: return ids.vals.id1 == id; + case index_t::VALS2: return ids.vals.id1 == id || ids.vals.id2 == id; + case index_t::VALS3: return ids.vals.id1 == id || ids.vals.id2 == id || ids.vals.id3 == id; + case index_t::RANGE: return id >= ids.range.start && id < ids.range.end; + case index_t::ARRAY: return ids.array.num_ids; + case index_t::BITS: { + if (id < ids.bits.offset) return false; + id = id - ids.bits.offset; + const size_t field = id / NUM_FIELD_BITS; + if (field >= ids.bits.num_fields) return false; + const size_t shift = id % NUM_FIELD_BITS; + return (ids.bits.bits[field] >> shift) & 1; + } + } + } + + size_t GetMin() const { + switch (type) { + case index_t::NONE: return npos; + case index_t::VALS1: return ids.vals.id1; + case index_t::VALS2: return ids.vals.id1; + case index_t::VALS3: return ids.vals.id1; + case index_t::RANGE: return ids.range.start; + case index_t::ARRAY: return ids.array.ids[0]; + case index_t::BITS: + return emp::find_bit(ids.bits.bits[0]) + ids.bits.offset; + } + } + + size_t GetMax() const { + switch (type) { + case index_t::NONE: return npos; + case index_t::VALS1: return ids.vals.id1; + case index_t::VALS2: return ids.vals.id2; + case index_t::VALS3: return ids.vals.id3; + case index_t::RANGE: return ids.range.end - 1; + case index_t::ARRAY: return ids.array.ids[ids.array.num_ids-1]; + case index_t::BITS: { + const size_t field_id = ids.bits.num_fields - 1; + const size_t offset = field_id * NUM_FIELD_BITS + ids.bits.offset; + return emp::find_last_bit(ids.bits.bits[field_id]) + offset; + } + } + } + + // Are all of the indicies one after the next? + bool IsConsecutive() const { + switch (type) { + case index_t::NONE: return true; + case index_t::VALS1: return true; + case index_t::VALS2: return ids.vals.id2 == ids.vals.id1+1; + case index_t::VALS3: return ids.vals.id3 == ids.vals.id2+1 && ids.vals.id2 == ids.vals.id1+1; + case index_t::RANGE: return true; + case index_t::ARRAY: + case index_t::BITS: + return GetSize() == GetMax() - GetMin() + 1; + } + } + + void Set(size_t id) { + switch (type) { + case index_t::NONE: ids.vals.id1 = id; type=index_t::VALS1; break; + case index_t::VALS1: if (!Has(id)) { ids.vals.id2 = id; type=index_t::VALS2; } break; + case index_t::VALS2: if (!Has(id)) { ids.vals.id3 = id; type=index_t::VALS3; } break; + case index_t::VALS3: + if (!Has(id)) { + // If current values are consecutive, try to make a range. + if (IsConsecutive()) { + if (id == ids.vals.id1 - 1) { + size_t max_val = ids.vals.id3; + ids.range.start = id; + ids.range.end = max_val+1; + break; + } else if (id == ids.vals.id3 + 1) { + size_t min_val = ids.vals.id1; + ids.range.start = min_val; + ids.range.end = id+1; + break; + } + } + + // If we made it here, values are not consecutive. + // For now: ASSUME we shift to BITS. + const size_t min_val = std::min(id, ids.vals.id1); + const size_t max_val = std::max(id, ids.vals.id3); + const size_t num_bits = max_val - min_val + 1; + const size_t num_fields = (num_bits / NUM_FIELD_BITS + 1) * 2; + } + case index_t::RANGE: return ids.range.end - ids.range.start; + case index_t::ARRAY: return ids.array.num_ids; + case index_t::BITS: return ids.bits.num_ids; + } + } + }; +} + +#endif // #ifndef EMP_DATASTRUCTS_INDEXSET_HPP_INCLUDE diff --git a/include/emp/datastructs/QueueCache.hpp b/include/emp/datastructs/QueueCache.hpp index 6eb484a143..cfb0ab65e6 100644 --- a/include/emp/datastructs/QueueCache.hpp +++ b/include/emp/datastructs/QueueCache.hpp @@ -1,12 +1,13 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2020 + * date: 2020-23 */ /** * @file * @brief A simple implementation of a Least-Recently Used Cache. - * It orders elements by access time and removes the stalest ones in case maximum capacity is reached. + * It orders elements by access time and removes the stalest ones in case + * maximum capacity is reached. */ #ifndef EMP_DATASTRUCTS_QUEUECACHE_HPP_INCLUDE @@ -22,6 +23,8 @@ #include "../base/assert.hpp" +#include "map_utils.hpp" + namespace emp { template < class Key, @@ -75,8 +78,8 @@ // Delete given iterator from cache // @param it cache_map iterator to element to be deleted from cache void Delete(const typename cache_map_t::iterator it) { - cache_map.erase(it); cache_list.erase(it->second); + cache_map.erase(it); } public: @@ -107,9 +110,8 @@ /// Delete element from cache. /// @param key Key to delete from cache void Delete(const Key& key) { - Delete( - cache_map.find(key) - ); + emp_assert(emp::Has(cache_map, key)); + Delete(cache_map.find(key)); } /// Does cache contain key? @@ -124,17 +126,15 @@ /// @param val Value of element to store /// @return Iterator to newly-added element in cache queue typename cache_list_t::iterator Put(const Key& key, const Value& val) { - // try to find element in map + // If the element is already in the cache, delete it. const auto found = cache_map.find(key); if (found != cache_map.end()) { Delete(found); } - // put element into our cache - cache_list.emplace_front(key, val); - // add pointer to this element to our map - cache_map.emplace(key, cache_list.begin()); - // make sure we don't have more elements than our capacity - Shrink(); + + cache_list.emplace_front(key, val); // Put element into the cache + cache_map.emplace(key, cache_list.begin()); // Add element pointer to map + Shrink(); // Reduce if we are over capacity return cache_list.begin(); } diff --git a/include/emp/datastructs/UnorderedIndexMap.hpp b/include/emp/datastructs/UnorderedIndexMap.hpp index bc33297d59..b503598e14 100644 --- a/include/emp/datastructs/UnorderedIndexMap.hpp +++ b/include/emp/datastructs/UnorderedIndexMap.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2015-2021. + * date: 2015-2022. */ /** * @file @@ -47,7 +47,7 @@ namespace emp { class Proxy { private: UnorderedIndexMap & index_map; ///< Which index map is this proxy from? - size_t id; ///< Which id does it represent? + size_t id; ///< Which id does it represent? public: Proxy(UnorderedIndexMap & _im, size_t _id) : index_map(_im), id(_id) { ; } operator double() const { return index_map.RawWeight(id); } @@ -68,11 +68,21 @@ namespace emp { } public: - /// Construct an UnorderedIndexMap where num_items is the maximum number of items that can be placed - /// into the data structure. All item weights default to zero. + /// Construct an UnorderedIndexMap where num_items is the maximum number of items that + /// can be placed into the data structure. All item weights default to zero. UnorderedIndexMap(size_t _items=0, double init_weight=0.0) - : num_items(_items), num_nodes(_items-1), needs_refresh(_items && (init_weight > 0.0)), weights(0) + : num_items(_items), num_nodes(_items-1), + needs_refresh(_items && (init_weight > 0.0)), weights(0) { if (_items > 0) weights.resize(_items*2-1, init_weight); } + /// Construct an UnorderedIndexMap with a specified initial set of weights. + UnorderedIndexMap(const emp::vector & in_weights) + : num_items(in_weights.size()), num_nodes(num_items-1), needs_refresh(true) + , weights(num_items*2 - 1) + { + emp_assert(num_items > 0, "UnorderedIndexMaps should not be initialized with empty weights"); + for (size_t i = 0; i < num_items; i++) weights[i + num_nodes] = in_weights[i]; + } + UnorderedIndexMap(const UnorderedIndexMap &) = default; UnorderedIndexMap(UnorderedIndexMap &&) = default; ~UnorderedIndexMap() = default; @@ -156,7 +166,7 @@ namespace emp { void Adjust(size_t id, const double new_weight) { RawAdjust(id + num_nodes, new_weight); } - /// Adjust all index weights to the set provided. + /// Adjust all index & new_weights) { num_items = new_weights.size(); num_nodes = num_items - 1; @@ -167,7 +177,7 @@ namespace emp { needs_refresh = true; } - /// Adjust all index weights to the set provided. + /// Adjust all index weights to the single weight provided. void AdjustAll(double new_weight) { for (size_t i = 0; i < num_items; i++) weights[i + num_nodes] = new_weight; needs_refresh = true; @@ -217,7 +227,7 @@ namespace emp { } /// Indicate that we need to adjust weights before relying on them in the future; this will - /// prevent refreshes from occuring immediately and is useful when many updates to weights are + /// prevent refreshes from occurring immediately and is useful when many updates to weights are /// likely to be done before any are accessed again. void DeferRefresh() { needs_refresh = true; diff --git a/include/emp/datastructs/hash_utils.hpp b/include/emp/datastructs/hash_utils.hpp index 1bdf191680..8e66f461e8 100644 --- a/include/emp/datastructs/hash_utils.hpp +++ b/include/emp/datastructs/hash_utils.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2019-2021. + * date: 2019-2022. */ /** * @file @@ -16,11 +16,11 @@ #include #include #include +#include #include #include #include "../base/Ptr.hpp" -#include "../polyfill/span.hpp" namespace emp { @@ -108,7 +108,7 @@ namespace emp { // helper functions for murmur hash #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace internal { - constexpr inline uint64_t rotate(const size_t x, const size_t r) noexcept { + constexpr inline uint64_t rotate(const uint64_t x, const uint64_t r) noexcept { return (x << r) | (x >> (64 - r)); } constexpr inline void fmix64(uint64_t& k) noexcept { @@ -129,13 +129,13 @@ namespace emp { /// @param key Span of bytes to hash. /// @param seed Optional seed. /// @return Hash of key. - constexpr inline size_t murmur_hash( + constexpr inline uint64_t murmur_hash( const std::span key, - const size_t seed = 0 + const uint64_t seed = 0 ) noexcept { // define constants - const size_t numbytes = key.size(); - const size_t nblocks = numbytes / 16; + const uint64_t numbytes = key.size(); + const uint64_t nblocks = numbytes / 16; const uint64_t c1 = 0x87c37b91114253d5LLU; const uint64_t c2 = 0x4cf5ad432745937fLLU; diff --git a/include/emp/datastructs/map_utils.hpp b/include/emp/datastructs/map_utils.hpp index f93bf798ac..9f97841274 100644 --- a/include/emp/datastructs/map_utils.hpp +++ b/include/emp/datastructs/map_utils.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2017 + * date: 2016-2023 */ /** * @file @@ -12,7 +12,10 @@ #ifndef EMP_DATASTRUCTS_MAP_UTILS_HPP_INCLUDE #define EMP_DATASTRUCTS_MAP_UTILS_HPP_INCLUDE +#include #include +#include +#include #include #include "../base/map.hpp" @@ -20,23 +23,73 @@ namespace emp { + template + std::string MapToString(const MAP_T & in_map) { + std::stringstream ss; + bool use_comma = false; + for (const auto & [key, value] : in_map) { + if (use_comma) ss << ","; + ss << "{" << key << ":" << value << "}"; + use_comma = true; + } + return ss.str(); + } + /// Take any map type, and run find to determine if a key is present. template inline bool Has( const MAP_T & in_map, const KEY_T & key ) { return in_map.find(key) != in_map.end(); } + // Check to see if any of the elements in a map satisfy a function. + template + bool AnyOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::any_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::any_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } + + // Check to see if any of the elements in a map satisfy a function. + template + bool AllOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::all_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::all_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } + + // Check to see if any of the elements in a map satisfy a function. + template + bool NoneOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::none_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::none_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } template - inline auto Keys( const MAP_T & in_map) -> emp::vectorfirst)>::type> { - using KEY_T = typename std::remove_constfirst)>::type; - emp::vector keys; + inline auto Keys( const MAP_T & in_map) { + emp::vector keys; for (auto it : in_map) { keys.push_back(it.first); } - return keys; - } @@ -59,6 +112,15 @@ namespace emp { return val_it->second; } + /// Take any map and element, run find() member function, and return a reference to + /// the result found; trip assert if the result is not present. + template + inline const auto & GetConstRef( const MAP_T & in_map, const KEY_T & key) { + auto val_it = in_map.find(key); + emp_assert(val_it != in_map.end()); + return val_it->second; + } + // The following two functions are from: // http://stackoverflow.com/questions/5056645/sorting-stdmap-using-value diff --git a/include/emp/datastructs/ra_map.hpp b/include/emp/datastructs/ra_map.hpp new file mode 100644 index 0000000000..86bff50254 --- /dev/null +++ b/include/emp/datastructs/ra_map.hpp @@ -0,0 +1,173 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file ra_map.hpp + * @brief This file defines a Random Access Map template. + * @note Status: ALPHA + * + * A random access map allows for simple traversal by index and a guarantee that a value at a + * given index will always be at that index unless any map element is deleted. This allows + * storage of indices for maps with a fixed layout, resulting in easy access. + */ + +#ifndef EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE +#define EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE + +#include + +#include "../base/unordered_map.hpp" +#include "../base/vector.hpp" +#include "../math/constants.hpp" + +namespace emp { + + /// This class uses a combination of a hashtable (std::unordered_map) and emp::vector to + /// lookup insert, lookup, and delete values in constant time, while still being able to + /// step through all values (albeit in an arbitrary order). + /// + /// @note The arbitrary order of values may change if any values are deleted. + + template , + typename KeyEqual = std::equal_to, + typename Allocator = std::allocator< std::pair > + > + class ra_map { + public: + using key_type = KEY_T; + using mapped_type = T; + using value_type = std::pair; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename std::allocator_traits::pointer; + using const_pointer = typename std::allocator_traits::const_pointer; + + using layout_t = emp::unordered_map; + + private: + layout_t id_map; ///< Map to find keys in vector. + emp::vector vals; ///< Vector of all values. + + using this_t = ra_map; + public: + ra_map() = default; + ra_map(const ra_map &) = default; + ra_map(ra_map &&) = default; + this_t & operator=(const ra_map &) = default; + this_t & operator=(ra_map &&) = default; + + // -- Iterators -- + auto begin() { return vals.begin(); } + auto cbegin() const { return vals.cbegin(); } + auto end() { return vals.end(); } + auto cend() const { return vals.cend(); } + + // -- Capacity -- + size_t size() const { return vals.size(); } ///< Number of entries in map. + bool empty() const { return size() == 0; } ///< Are there NO values in map? + size_t max_size() const { return id_map.max_size(); } ///< Max system limit on size. + + // -- Modifiers -- + void clear() { id_map.clear(); vals.resize(0); } ///< Remove all values from container. + + /// Insert a new value into container by copy; return position. + size_t insert(const value_type & v) { + auto pos_it = id_map.find(v.first); + if (pos_it != id_map.end()) return pos_it->second; // Already in map. + const size_t pos = vals.size(); + id_map[v.first] = pos; + vals.emplace_back(v); + return pos; + } + + /// Insert a new value into container by move; return position. + size_t insert(value_type && v) { + auto pos_it = id_map.find(v.first); + if (pos_it != id_map.end()) return pos_it->second; // Already in map. + const size_t pos = vals.size(); + id_map[v.first] = pos; + vals.emplace_back(std::move(v)); + return pos; + } + + /// Construct a new value in place in a container container; return position. + template + size_t emplace(Ts &&... args) { + const size_t new_pos = vals.size(); + vals.emplace_back(std::forward(args)...); + auto old_pos_it = id_map.find(vals.back().first); + if (old_pos_it != id_map.end()) { + vals.resize(vals.size()-1); // Destroy newly created instance. + return old_pos_it->second; // Return old position in map. + } + id_map[vals.back().first] = new_pos; // Save new position for later lookup. + return new_pos; // And return it. + } + + /// Erase a specific value from the container. + bool erase(const KEY_T & key) { + if (!count(key)) return false; // Not in map. + + // Find out where key is in id_map and clear it. + const size_t pos = id_map[key]; + id_map.erase(key); + + // Move the former last value to the now-empty spot. + const size_t last_pos = vals.size() - 1; + if (pos != last_pos) { + const_cast(vals[pos].first) = vals[last_pos].first; + vals[pos].second = vals[last_pos].second; + id_map[vals[pos].first] = pos; + } + vals.resize(last_pos); + return true; + } + + + size_t count(const KEY_T & key) const { return id_map.count(key); } /// Is value included? (0 or 1). + + /// Index into the ra_map by key. + T & operator[](key_type key) { + auto key_it = id_map.find(key); + if (key_it == id_map.end()) { + return NewEntry(key); + } + return vals[key_it->second].second; + } + + // --- Empirical only commands --- + + const layout_t & GetLayout() const { return id_map; } + + T & NewEntry(key_type key) { + emp_assert(id_map.find(key) == id_map.end(), "ra_map::NewEntry must be an unused key!", key); + const size_t pos = vals.size(); + id_map[key] = pos; + vals.emplace_back(); + return vals.back().second; + } + + bool Has(key_type key) const { return id_map.find(key) != id_map.end(); } + + size_t GetID(key_type key) const { + auto key_it = id_map.find(key); + return (key_it == id_map.end()) ? emp::MAX_SIZE_T : key_it->second; + } + + key_type & KeyAtID(size_t id) { return vals[id]->first; } + + T & AtID(size_t id) { return vals[id]->second; } + const T & AtID(size_t id) const { return vals[id].second; } + }; + +} + +#endif // #ifndef EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE diff --git a/include/emp/datastructs/span_utils.hpp b/include/emp/datastructs/span_utils.hpp new file mode 100644 index 0000000000..6130086de6 --- /dev/null +++ b/include/emp/datastructs/span_utils.hpp @@ -0,0 +1,57 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file span_utils.hpp + * @brief A set of simple functions to manipulate std::span + * @note Status: BETA + * + */ + +#ifndef EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE +#define EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE + +#include +#include + +#include "../base/array.hpp" +#include "../base/vector.hpp" + +namespace emp { + + /// Print the contents of a span. + template + void Print(const std::span & v, std::ostream & os=std::cout, const std::string & spacer=" ") { + for (size_t id = 0; id < v.size(); id++) { + if (id) os << spacer; // Put a space before second element and beyond. + os << emp::to_string(v[id]); + } + } + + /// Convert an emp::array to an equivalent span + template + auto to_span(emp::array a) { return std::span(a); } + + /// Convert an emp::vector to an equivalent span + template + auto to_span(emp::vector v) { return std::span(v); } +} + +namespace std { + // A generic streaming function for spans. + template + std::ostream & operator<<(std::ostream & out, std::span s) { + emp::Print(s, out); + return out; + } + + template + std::istream & operator>>(std::istream & is, std::span s) { + for (T & x : s) is >> x; + return is; + } + +} + +#endif // #ifndef EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE diff --git a/include/emp/datastructs/tuple_struct.hpp b/include/emp/datastructs/tuple_struct.hpp index c9cb546511..793f5397b6 100644 --- a/include/emp/datastructs/tuple_struct.hpp +++ b/include/emp/datastructs/tuple_struct.hpp @@ -8,7 +8,7 @@ * @brief These macros will build a tuple and accessors to that tuple's members inside of a * class definintion. * - * Status: ALPHA + * @note Status: ALPHA * * "But WHY???" you ask. Let me explain: Keeping a tuple allows us to easily track the * members in the stuct or class, and makes possible powerful types of reflection diff --git a/include/emp/datastructs/tuple_utils.hpp b/include/emp/datastructs/tuple_utils.hpp index 7fe5f1a39b..8d93543c8c 100644 --- a/include/emp/datastructs/tuple_utils.hpp +++ b/include/emp/datastructs/tuple_utils.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Functions to simplify the use of std::tuple - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_DATASTRUCTS_TUPLE_UTILS_HPP_INCLUDE diff --git a/include/emp/datastructs/vector_utils.hpp b/include/emp/datastructs/vector_utils.hpp index 9046b58f83..5b2cf7175a 100644 --- a/include/emp/datastructs/vector_utils.hpp +++ b/include/emp/datastructs/vector_utils.hpp @@ -29,6 +29,21 @@ namespace emp { + /// Remove and return the first element of a vector. + template + T PopFront(emp::vector & v) { + emp_assert(v.size()); + T out = v[0]; + v.erase(v.begin()); + return out; + } + + /// Insert a value at a specified position in a vector. + template + void InsertAt(emp::vector & v, size_t id, T value) { + v.insert(v.begin()+id, value); + } + #ifndef DOXYGEN_SHOULD_SKIP_THIS /// Base case for Append; we just have a single vector with nothing to append. template @@ -64,13 +79,19 @@ namespace emp { /// Convert a map to a vector. template - emp::vector ToVector(const std::map & in_map, T default_val=T()) { + emp::vector ToVector( + const std::map & in_map, + T default_val=T(), + INDEX_T index_cap=32768 + ) { INDEX_T max_index = in_map.back().second; if (max_index < 0) max_index = 0; // In case all entries are negative... + if (max_index >= index_cap) max_index=index_cap-1; emp::vector out_vec; out_vec.resize(max_index+1, default_val); for (auto [index, val] : in_map) { - if (index < 0) continue; // Skip entries that can't go into a vector... + if (index < 0) continue; // Skip entries that can't go into a vector... + if (index >= index_cap) break; // Stop when we've hit the upper limit on vector size. out_vec[index] = val; } return out_vec; @@ -78,10 +99,14 @@ namespace emp { /// Convert an unordered map to a vector. template - emp::vector ToVector(const std::unordered_map & in_map, T default_val=T()) { + emp::vector ToVector( + const std::unordered_map & in_map, + T default_val=T(), + INDEX_T index_cap=32768 + ) { emp::vector out_vec; for (auto [index, val] : in_map) { - if (index < 0) continue; // Skip entries that can't go into a vector... + if (index < 0 || index >= index_cap) continue; // Skip entries that can't go into a vector... if (((size_t) index) >= out_vec.size()) out_vec.resize(index+1, default_val); out_vec[index] = val; } @@ -126,6 +151,29 @@ namespace emp { return true; } + /// Remove value at an index. + template + void RemoveAt(emp::vector & v, size_t id) { + v.erase(v.begin() + id); + } + + /// Remove values starting at an index. + template + void RemoveAt(emp::vector & v, size_t id, size_t count) { + if (!count) return; + v.erase(v.begin() + id, v.begin() + id + count); + } + + /// Return a new vector containing the same elements as @param v + /// with any duplicate elements removed. + /// Not guaranteed to preserve order + template + emp::vector RemoveDuplicates(const emp::vector & v) { + std::set temp_set(v.begin(), v.end()); + emp::vector new_vec(temp_set.begin(), temp_set.end()); + return new_vec; + } + /// Return whether a value exists in a vector template bool Has(const emp::vector & v, const T & val) { @@ -297,16 +345,6 @@ namespace emp { return numbers; } - /// Return a new vector containing the same elements as @param v - /// with any duplicate elements removed. - /// Not guaranteed to preserve order - template - emp::vector RemoveDuplicates(const emp::vector & v) { - std::set temp_set(v.begin(), v.end()); - emp::vector new_vec(temp_set.begin(), temp_set.end()); - return new_vec; - } - /// Build a vector with a range of values from min to max at the provided step size. template static inline emp::vector BuildRange(T min, T max, T step=1) { diff --git a/include/emp/debug/alert.hpp b/include/emp/debug/alert.hpp index 3dc5e5c4a1..000ada51c0 100644 --- a/include/emp/debug/alert.hpp +++ b/include/emp/debug/alert.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Define an Alert function that goes to std::cerr in c++ or to Alert() in Javascript. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_DEBUG_ALERT_HPP_INCLUDE diff --git a/include/emp/debug/debug.hpp b/include/emp/debug/debug.hpp index 10a734022b..362c8692eb 100644 --- a/include/emp/debug/debug.hpp +++ b/include/emp/debug/debug.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2015-2017 + * date: 2015-2022 */ /** * @file @@ -21,7 +21,7 @@ namespace emp { - /// BlockRelease() will halt compilation if NDEBUG is on and EMP_NO_BLOCK is off. + /// BlockRelease(true) will halt compilation if NDEBUG is on and EMP_NO_BLOCK is off. /// It is useful to include alongside debug code that you want to remember to remove when you /// are done debugging; it is automatically included with the emp_debug() function below. /// If you want to intentionally compile in release mode, make sure to define EMP_NO_BLOCK. @@ -29,17 +29,22 @@ namespace emp { #ifdef EMP_NO_BLOCK #define BlockRelease(BLOCK) #else - #define BlockRelease(BLOCK) static_assert(!BLOCK, "Release blocked due to debug material.") + #define BlockRelease(BLOCK) \\ + std::cerr << "Release block at " << __FILE___ << ", line " << __LINE__ << std::endl;\\ + static_assert(!BLOCK, "Release blocked due to debug material.") #endif #else #define BlockRelease(BLOCK) #endif /// The EMP_DEBUG macro executes its contents in debug mode, but otherwise ignores them. + /// test_debug() can be used inside of an if-constexpr for code you want only in debug mode. #ifdef NDEBUG -#define EMP_DEBUG(...) + #define EMP_DEBUG(...) + constexpr bool test_debug() { return false; } #else -#define EMP_DEBUG(...) __VA_ARGS__ + #define EMP_DEBUG(...) __VA_ARGS__ + constexpr bool test_debug() { return true; } #endif template @@ -48,19 +53,10 @@ namespace emp { std::cerr << std::endl; } - /// emp_debug() will print its contents as a message in debug mode and BLOCK release mode until it's removed. - #define emp_debug(...) BlockRelease(true); emp::emp_debug_print(__VA_ARGS__); - - /// Depricated() prints its contents exactly once to notify a user of a depricated function. - static void Depricated(const std::string & name, const std::string & desc="") { - static std::set name_set; - if (name_set.count(name) == 0) { - std::cerr << "Deprication WARNING: " << name << std::endl; - if (desc != "") std::cerr << desc << std::endl; - name_set.insert(name); - } - } - + /// emp_debug() will print its contents as a message in debug mode and BLOCK release mode until + /// it is removed. It's a useful too for printing "Ping1", "Ping2", etc, but no forgetting to + /// remove them. + #define emp_debug(...) { BlockRelease(true); emp::emp_debug_print(__VA_ARGS__); } } #endif // #ifndef EMP_DEBUG_DEBUG_HPP_INCLUDE diff --git a/include/emp/functional/AnyFunction.hpp b/include/emp/functional/AnyFunction.hpp index 37383fb51c..52b82a451f 100644 --- a/include/emp/functional/AnyFunction.hpp +++ b/include/emp/functional/AnyFunction.hpp @@ -52,6 +52,8 @@ namespace emp { /// Determine if this BaseFunction can be converted into a derived emp::Function template bool ConvertOK(); + + virtual emp::Ptr Clone() = 0; }; @@ -82,6 +84,10 @@ namespace emp { /// Get the std::function to be called. const fun_t & GetFunction() const { return fun; } + + emp::Ptr Clone() override{ + return emp::NewPtr>(fun); + } }; @@ -90,6 +96,7 @@ namespace emp { private: emp::Ptr fun = nullptr; + private: /// Helper to build a proper derived function. template auto MakePtr( T in_fun ) { @@ -103,6 +110,29 @@ namespace emp { // By default, build an empty function. AnyFunction() { ; } + AnyFunction(const AnyFunction& other){ // copy constructor + fun = other.CloneFunc(); + } + + AnyFunction(AnyFunction&& other) noexcept{ // move constructor + fun = other.CloneFunc(); + other.fun.Delete(); + other.fun = nullptr; + } + + AnyFunction& operator=(const AnyFunction& other){ // copy assignment + Clear(); + fun = other.CloneFunc(); + return *this; + } + + AnyFunction& operator=(AnyFunction&& other) noexcept{ // move assignment + Clear(); + fun = other.CloneFunc(); + other.Clear(); + return *this; + } + /// If an argument is provided, set the function. template AnyFunction(T in_fun) { @@ -113,6 +143,10 @@ namespace emp { void Clear() { if (fun) fun.Delete(); fun = nullptr; } size_t NumArgs() const { return fun ? fun->NumArgs() : 0; } + emp::Ptr CloneFunc() const{ + if(fun == nullptr) return nullptr; + return fun->Clone(); + } operator bool() { return (bool) fun; } diff --git a/include/emp/games/Mancala.hpp b/include/emp/games/Mancala.hpp index 797b31a511..241596cb4b 100644 --- a/include/emp/games/Mancala.hpp +++ b/include/emp/games/Mancala.hpp @@ -30,8 +30,9 @@ namespace emp { side_t boardA; // Current board state for side A. side_t boardB; // Current board state for side B. + size_t turn_count; // How many turns has this game been played? bool over = false; // Has the game ended? - size_t is_A_turn; // Which player goes next? + bool is_A_turn; // Which player goes next? void TestOver() { bool side_A_empty = true; @@ -48,7 +49,7 @@ namespace emp { public: using move_t = size_t; - Mancala(bool A_first=true) : boardA(), boardB(), over(false), is_A_turn(true) { + Mancala(bool A_first=true) : boardA(), boardB(), turn_count(0), over(false), is_A_turn(true) { Reset(A_first); } ~Mancala() { ; } @@ -56,6 +57,7 @@ namespace emp { void Reset(bool A_first=true) { for (size_t i = 0; i < 6; i++) { boardA[i] = 4; boardB[i] = 4; } boardA[6] = boardB[6] = 0; + turn_count = 0; over = false; is_A_turn = A_first; } @@ -96,12 +98,14 @@ namespace emp { // Returns bool indicating whether player can go again bool DoMove(move_t cell) { - emp_assert(cell < 6); // You cannot choose a cell out of bounds. + emp_assert(cell < 6); // Make sure move is not out of bounds. - side_t & cur_board = GetCurSide(); + turn_count++; // Maintain count of moves. + + side_t & cur_board = GetCurSide(); // Load in board view based on current player. side_t & other_board = GetOtherSide(); - emp_assert(cur_board[cell] != 0); // You cannot choose an empty cell. + emp_assert(cur_board[cell] != 0); // Make sure move is not an empty pit. size_t stone_count = cur_board[cell]; size_t cur_cell = cell; diff --git a/include/emp/games/Othello8.hpp b/include/emp/games/Othello8.hpp index da7e69c945..d9b641e5de 100644 --- a/include/emp/games/Othello8.hpp +++ b/include/emp/games/Othello8.hpp @@ -52,11 +52,11 @@ namespace emp { constexpr Index(size_t x, size_t y) : pos() { Set(x,y); } constexpr Index(const Index & _in) : pos(_in.pos) { emp_assert(pos <= NUM_CELLS); } - operator size_t() const { return pos; } - size_t x() const { return pos & 7; } - size_t y() const { return pos >> 3; } - void Set(size_t x, size_t y) { pos = (x> 3; } + constexpr void Set(size_t x, size_t y) { pos = (x struct AvidaCPU_InstLib : public InstLib { using hardware_t = HARDWARE_T; diff --git a/include/emp/hardware/AvidaGP.hpp b/include/emp/hardware/AvidaGP.hpp index 42f4a811ef..549d15085b 100644 --- a/include/emp/hardware/AvidaGP.hpp +++ b/include/emp/hardware/AvidaGP.hpp @@ -58,7 +58,7 @@ namespace emp { using stack_t = emp::vector; using arg_set_t = emp::array; - struct Instruction { + struct Instruction : public inst_lib_t::InstructionBase { size_t id; arg_set_t args; @@ -70,7 +70,7 @@ namespace emp { Instruction & operator=(const Instruction &) = default; Instruction & operator=(Instruction &&) = default; bool operator<(const Instruction & in) const { - return std::tie(id, args) < std::tie(in.id, in.args); + return (id == in.id) ? (args < in.args) : (id < in.id); } bool operator==(const Instruction & in) const { return id == in.id && args == in.args; } bool operator!=(const Instruction & in) const { return !(*this == in); } @@ -80,6 +80,10 @@ namespace emp { void Set(size_t _id, size_t _a0=0, size_t _a1=0, size_t _a2=0) { id = _id; args[0] = _a0; args[1] = _a1; args[2] = _a2; } + + size_t GetIndex() const override{ + return id; + } }; struct ScopeInfo { diff --git a/include/emp/hardware/EventDrivenGP.hpp b/include/emp/hardware/EventDrivenGP.hpp index e17572ef47..1931bf5904 100644 --- a/include/emp/hardware/EventDrivenGP.hpp +++ b/include/emp/hardware/EventDrivenGP.hpp @@ -365,6 +365,7 @@ namespace emp { CEREAL_NVP(id) ); } + size_t GetIndex() const{ return id; } }; diff --git a/include/emp/hardware/InstLib.hpp b/include/emp/hardware/InstLib.hpp index 5c2e17a76f..c0e8116f2e 100644 --- a/include/emp/hardware/InstLib.hpp +++ b/include/emp/hardware/InstLib.hpp @@ -24,6 +24,7 @@ namespace emp { + /// ScopeType is used for scopes that we need to do something special at the end. /// Eg: LOOP needs to go back to beginning of loop; FUNCTION needs to return to call. enum class ScopeType { NONE=0, ROOT, BASIC, LOOP, FUNCTION }; @@ -42,7 +43,14 @@ namespace emp { using fun_t = std::function; using inst_properties_t = std::unordered_set; + struct InstructionBase{ + virtual ~InstructionBase() {;} + virtual size_t GetIndex() const = 0; + }; + struct InstDef { + size_t index; + size_t id; std::string name; ///< Name of this instruction. fun_t fun_call; ///< Function to call when executing. size_t num_args; ///< Number of args needed by function. @@ -52,11 +60,11 @@ namespace emp { inst_properties_t properties; ///< Are there any generic properties associated with this inst def? char symbol; ///< Unique symbol for this instruction. - InstDef(const std::string & _n, fun_t _fun, size_t _args, const std::string & _d, - ScopeType _s_type, size_t _s_arg, + InstDef(size_t _idx, size_t _id, const std::string & _n, fun_t _fun, size_t _args, + const std::string & _d, ScopeType _s_type, size_t _s_arg, const inst_properties_t & _properties = inst_properties_t(), char _sym='?') - : name(_n), fun_call(_fun), num_args(_args), desc(_d) + : index(_idx), id(_id), name(_n), fun_call(_fun), num_args(_args), desc(_d) , scope_type(_s_type), scope_arg(_s_arg), properties(_properties), symbol(_sym) { ; } InstDef(const InstDef &) = default; }; @@ -65,6 +73,7 @@ namespace emp { emp::vector inst_lib; ///< Full definitions for instructions. emp::vector inst_funs; ///< Map of instruction IDs to their functions. std::map name_map; ///< How do names link to instructions? + std::map id_map; ///< How do identifiers link to instructions? std::map arg_map; ///< How are different arguments named? /// Symbols to use when representing individual instructions (80). @@ -73,39 +82,43 @@ namespace emp { emp::array symbol_map; ///< Map of symbols back to instruction IDs. public: - InstLib() : inst_lib(), inst_funs(), name_map(), arg_map() { ; } ///< Default Constructor + InstLib() : inst_lib(), inst_funs(), name_map(), id_map(), arg_map() { ; } ///< Default Constructor InstLib(const InstLib &) = delete; ///< Copy Constructor InstLib(InstLib &&) = delete; ///< Move Constructor - ~InstLib() { ; } ///< Destructor + virtual ~InstLib() { ; } ///< Destructor - InstLib & operator=(const InstLib &) = default; ///< Copy Operator - InstLib & operator=(InstLib &&) = default; ///< Move Operator + InstLib & operator=(const InstLib &) = default; ///< Copy Operator + InstLib & operator=(InstLib &&) = default; ///< Move Operator /// Return the name associated with the specified instruction ID. - const std::string & GetName(size_t id) const { return inst_lib[id].name; } + const std::string & GetName(size_t idx) const { return inst_lib[idx].name; } /// Return the function associated with the specified instruction ID. - const fun_t & GetFunction(size_t id) const { return inst_lib[id].fun_call; } + const fun_t & GetFunction(size_t idx) const { return inst_lib[idx].fun_call; } /// Return the number of arguments expected for the specified instruction ID. - size_t GetNumArgs(size_t id) const { return inst_lib[id].num_args; } + size_t GetNumArgs(size_t idx) const { return inst_lib[idx].num_args; } - /// Return the provided description for the provided instruction ID. - const std::string & GetDesc(size_t id) const { return inst_lib[id].desc; } + /// Return the provided description for the providxed instruction ID. + const std::string & GetDesc(size_t idx) const { return inst_lib[idx].desc; } /// What type of scope does this instruction state? ScopeType::NONE is default. - ScopeType GetScopeType(size_t id) const { return inst_lib[id].scope_type; } + ScopeType GetScopeType(size_t idx) const { return inst_lib[idx].scope_type; } - /// If this instruction alters scope, identify which argument does so. - size_t GetScopeArg(size_t id) const { return inst_lib[id].scope_arg; } + /// If this instruction alters scope, idxentify which argument does so. + size_t GetScopeArg(size_t idx) const { return inst_lib[idx].scope_arg; } - /// Return the set of properties for the provided instruction ID. - const inst_properties_t & GetProperties(size_t id) const { return inst_lib[id].properties; } + /// Return the set of properties for the providxed instruction ID. + const inst_properties_t & GetProperties(size_t idx) const { + return inst_lib[idx].properties; + } - char GetSymbol(size_t id) const { return inst_lib[id].symbol; } + char GetSymbol(size_t idx) const { return inst_lib[idx].symbol; } /// Does the given instruction ID have the given property value? - bool HasProperty(size_t id, std::string property) const { return inst_lib[id].properties.count(property); } + bool HasProperty(size_t idx, std::string property) const { + return inst_lib[idx].properties.count(property); + } /// Get the number of instructions in this set. size_t GetSize() const { return inst_lib.size(); } @@ -116,18 +129,37 @@ namespace emp { return Has(name_map, name); } + size_t GetID(const size_t idx) const { + return inst_lib[idx].id; + } /// Return the ID of the instruction that has the specified name. size_t GetID(const std::string & name) const { emp_assert(Has(name_map, name), name); - return Find(name_map, name, (size_t) -1); + return inst_lib[Find(name_map, name, (size_t) -1)].id; } /// Return the ID of the instruction associated with the specified symbol. - size_t GetID(char symbol) { + size_t GetIDFromSymbol(char symbol) const { emp_assert(symbol > 0); return symbol_map[(size_t) symbol]; } + /// Return the ID of the instruction that has the specified name. + size_t GetIndex(const std::string & name) const { + emp_assert(Has(name_map, name), name); + return Find(name_map, name, (size_t) -1); + } + /// Return the ID of the instruction that has the specified id. + size_t GetIndex(const size_t id) const { + emp_assert(Has(id_map, id), id); + return Find(id_map, id, (size_t) -1); + } + + size_t GetIndexFromSymbol(char symbol) const { + size_t id = GetIDFromSymbol(symbol); + return GetIndex(id); + } + /// Return the argument value associated with the provided keyword. arg_t GetArg(const std::string & name) { emp_assert(Has(arg_map, name)); @@ -148,14 +180,20 @@ namespace emp { const std::string & desc="", ScopeType scope_type=ScopeType::NONE, size_t scope_arg=(size_t) -1, - const inst_properties_t & inst_properties=inst_properties_t()) + const inst_properties_t & inst_properties=inst_properties_t(), + int _id = -1) { - const size_t id = inst_lib.size(); + const size_t idx = inst_lib.size(); + const size_t id = (_id >= 0) ? _id : inst_lib.size(); + emp_assert(!Has(id_map, id), "ID is already in use!", id); const char symbol = (id < symbol_defaults.size()) ? symbol_defaults[id] : '+'; - inst_lib.emplace_back(name, fun_call, num_args, desc, scope_type, scope_arg, inst_properties, symbol); + inst_lib.emplace_back(idx, id, name, fun_call, num_args, desc, scope_type, scope_arg, + inst_properties, symbol); inst_funs.emplace_back(fun_call); - name_map[name] = id; + name_map[name] = idx; + id_map[id] = idx; symbol_map[(size_t) symbol] = id; + std::cout << "Registered instruction: " << name << " index: " << idx << "; id: " << id << "; symbol: " << symbol << std::endl; } /// Specify a keyword and arg value. @@ -165,18 +203,17 @@ namespace emp { } /// Process a specified instruction in the provided hardware. - void ProcessInst(hardware_t & hw, const inst_t & inst) const { - inst_funs[inst.id](hw, inst); + virtual void ProcessInst(hardware_t & hw, const inst_t & inst) const { + inst_funs[inst.GetIndex()](hw, inst); } /// Process a specified instruction on hardware that can be converted to the correct type. template void ProcessInst(emp::Ptr hw, const inst_t & inst) const { emp_assert( dynamic_cast(hw.Raw()) ); - inst_funs[inst.id](*(hw.template Cast()), inst); + inst_funs[inst.GetIndex()](*(hw.template Cast()), inst); } - /// Write out a full genome to the provided ostream. void WriteGenome(const genome_t & genome, std::ostream & os=std::cout) const { for (const inst_t & inst : genome) { @@ -192,9 +229,10 @@ namespace emp { /// Read the instruction in the provided info and append it to the provided genome. void ReadInst(genome_t & genome, std::string info) const { std::string name = emp::string_pop_word(info); - size_t id = GetID(name); - genome.emplace_back(id); - size_t num_args = GetNumArgs(id); + size_t idx = GetIndex(name); + size_t id = GetID(idx); + genome.emplace_back(idx, id); + size_t num_args = GetNumArgs(idx); for (size_t i = 0; i < num_args; i++) { std::string arg_name = emp::string_pop_word(info); // @CAO: Should check to make sure arg name is real. diff --git a/include/emp/hardware/VirtualCPU.hpp b/include/emp/hardware/VirtualCPU.hpp new file mode 100644 index 0000000000..d2f3d52326 --- /dev/null +++ b/include/emp/hardware/VirtualCPU.hpp @@ -0,0 +1,847 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file VirtualCPU.hpp + * @brief A simple virtual CPU styled after the original and extended Avidian architectures. + * + * @TODO + * - Expanded heads? + * - expanded_nop_args useful? + * - Consider changing default return value for search functions + * - Consider switching to (or adding an optional mode) where nops are only curated + * as-needed instead of all at once + * + */ + +#ifndef EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE +#define EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE + +#include +#include +#include + +#include "../base/array.hpp" +#include "../base/Ptr.hpp" +#include "../base/unordered_map.hpp" +#include "../base/vector.hpp" +#include "../datastructs/map_utils.hpp" +#include "../datastructs/vector_utils.hpp" +#include "../io/File.hpp" +#include "../math/Random.hpp" +#include "../tools/string_utils.hpp" + +#include "Genome.hpp" +#include "VirtualCPU_InstLib.hpp" + +namespace emp{ + /// \brief A simple virtual CPU styled after those seen in Avida + /// + /// This class represents a single virtual CPU following a genome of assembly-level + /// instructions. + /// By default, each CPU features four heads, two stacks, multiple registers, and + /// a circular genome. + /// Both the original and extended architectures are supported. + template + class VirtualCPU{ + public: + static constexpr size_t NUM_STACKS = 2; ///< Number of stacks in this CPU (currently 2) + static constexpr size_t MAX_NOPS = 23; ///< Maximum number of nop instructions supported + struct Instruction; + + using derived_t = DERIVED; + using data_t = int32_t; + using inst_t = Instruction; + using inst_lib_t = VirtualCPU_InstLib; + using genome_t = Genome; + using nop_vec_t = emp::vector; + using stack_t = emp::vector; + + /// \brief Representation of a single instruction in the CPU's genome + /// + /// Only contains the necessary information for which instruction is being represented + /// as well as any data it needs in the genome. + /// Does NOT contain the actual logic of the instruction, nor the name. + /// These are handled by the instruction library itself. + struct Instruction : public inst_lib_t::InstructionBase { + size_t idx; /// Index of the instruction in the instruction library + size_t id; /// Identifier for the instruction that gives the user + /// flexibility over the instruction (e.g., what symbol + /// it should use in a string representation) + emp::vector nop_vec; /// Representation of the contiguous sequence of NOP + /// instructions following this instruction in the genome + bool has_been_executed = false; /// Has this instruction been executed? + bool has_been_copied = false; // Has this instruction been copied to an offspring? + + Instruction() = delete; + Instruction(size_t _idx, size_t _id=0, emp::vector _nop_vec = {}) + : idx(_idx), id(_id), nop_vec(_nop_vec) { ; } + Instruction(const Instruction &) = default; + Instruction(Instruction &&) = default; + + Instruction & operator=(const Instruction &) = default; + Instruction & operator=(Instruction &&) = default; + bool operator<(const Instruction & in) const { + return id < in.id; + } + bool operator==(const Instruction & in) const { return id == in.id; } + bool operator!=(const Instruction & in) const { return !(*this == in); } + bool operator>(const Instruction & in) const { return in < *this; } + bool operator>=(const Instruction & in) const { return !(*this < in); } + bool operator<=(const Instruction & in) const { return !(in < *this); } + + void Set(size_t _idx, size_t _id, emp::vector _nop_vec = {}) + { idx = _idx; id = _id; nop_vec=_nop_vec;} + + size_t GetIndex() const override { return idx; } + }; + + + protected: + size_t num_regs = 0; ///< Number of registers found in this CPU + size_t num_nops = 0; ///< Number of NOP instructions found in this CPU's library + + public: + //////// FLAGS + bool are_nops_counted = false; ///< Flag detailing if the number of NOP instructions + ///< in the CPU's library have been counted + bool are_regs_expanded = false; ///< Flag signaling if the number of registers have + ///< been expanded to accommodate the number of NOP + ///< instructions in the library + bool nops_need_curated = true; ///< Flag signaling that NOP instructions need curated + bool expanded_nop_args = false; ///< Flag signaling that CPU is used the expanded + + //////// CPU COMPONENTS + emp::vector regs; ///< Vector of registers + std::unordered_map inputs; ///< Map of all available inputs + ///< (position -> value) + std::unordered_map outputs; ///< Map of all outputs (position -> value) + emp::array stacks; ///< Array of stacks for this CPU + size_t inst_ptr; ///< Instruction pointer, signifies next + ///< instruction to be executed + size_t flow_head; ///< Flow head, used for moving heads and + ///< values + size_t read_head; ///< Read head, signals what instruction to + ///< copy next + size_t write_head; ///< Write head, signals where to copy next + ///< instruction + size_t cooldown_timer = 0; ///< Do not process inst if value > 0. + ///< Decrease this value instead + //////// HELPER CONSTRUCTS + emp::unordered_map nop_id_map;/**< NOP inst id -> Nop index + (e.g., NopA -> 0, NopB -> 1, + NopE -> 5) */ + emp::vector label_idx_vec; ///< Vector of LABEL instructions indices in genome + //////// GENOME + genome_t genome; ///< Preserved copy of genome from organism creation/birth + ///< that should not change in any way + genome_t genome_working; ///< Working copy of genome that can mutate, resize, and change + //////// BOOKKEEPING + size_t active_stack_idx = 0; ///< Index of CPU's active stack + emp::vector copied_inst_id_vec; /**< Vector of instructions that have been + copied */ + size_t num_insts_executed = 0; ///< Number of instructions that have been executed + + + //////// CONSTRUCTORS / DESTRUCTOR + /// Create a new VirtualCPU with the same genome (and thus instruction library) + VirtualCPU(const genome_t & in_genome) + : regs(), inputs(), outputs(), + inst_ptr(0), flow_head(0), read_head(0), write_head(0), + genome(in_genome), genome_working(in_genome) { + Initialize(); + ResetHardware(); + } + /// Create a default VirtualCPU (no genome sequence, default instruction set) + VirtualCPU() : + VirtualCPU(genome_t(inst_lib_t::DefaultInstLib())) { + Initialize(); + ResetHardware(); + } + /// Create a perfect copy of passed VirtualCPU + VirtualCPU(const VirtualCPU &) = default; + /// Default move constructor + VirtualCPU(VirtualCPU &&) = default; + /// Default destructor + virtual ~VirtualCPU() { ; } + + + //////// GETTERS + /// Return size of original genome + size_t GetGenomeSize() const { return genome.GetSize(); } + /// Return size of working genome + size_t GetWorkingGenomeSize() const { return genome_working.GetSize(); } + /// Return the number of registers in the CPU + size_t GetNumRegs() const { return num_regs; } + /// Return the number of NOP instructions found in the CPU's instruction library + size_t GetNumNops() const { return num_nops; } + /// Return the outputs of the CPU + const std::unordered_map & GetOutputs() const { return outputs; } + /// Return a pointer to the CPU's instruction library + Ptr GetInstLib() const { return genome.GetInstLib(); } + /// Return the number of instructions that have been executed + size_t GetNumInstsExecuted() const{ + size_t count = 0; + for (auto inst : genome_working) { + if (inst.has_been_executed) count++; + } + return count; + } + /// Return the number of instructions that have been copied + size_t GetNumInstsCopied() const{ + size_t count = 0; + for (auto inst : genome_working) { + if (inst.has_been_copied) count++; + } + return count; + } + + + + //////// SETTERS + /// Copies passed vector into input map + void SetInputs(const emp::vector & vals) { + inputs = emp::ToUMap(vals); + } + + + //////// GENOME & INSTRUCTION MANIPULATION + /// Load instructions from input stream + bool Load(std::istream & input) { + ClearGenome(); + File file(input); + file.RemoveComments("//"); // Remove all C++ style comments + file.RemoveComments("#"); // Remove all bash/Python/R style comments + file.CompressWhitespace(); // Trim down remaining whitespace. + file.RemoveEmpty(); + if (file.GetNumLines() == 0) { + emp::notify::Error("VirtualCPU trying to load a genome from an empty stream!"); + } + file.Apply( [this](std::string & info) { PushInst(info); } ); + nops_need_curated = true; + return true; + } + + /// Load instructions from file + bool Load(const std::string & filename) { + std::ifstream is(filename); + if (is.is_open()) return Load(is); + emp::notify::Error("VirtualCPU genome file is either empty or missing: ", filename); + return false; + } + + /// Load genome from a string of characters + bool LoadFromChars(const std::string & new_genome){ + ClearGenome(); + for(size_t idx = 0; idx < new_genome.size(); ++idx){ + PushInst(new_genome[idx]); + } + nops_need_curated = true; + return true; + } + + /// Add a new instruction to the end of the genome, by index in the instruction library + void PushInst(size_t idx) { + const size_t id = GetInstLib()->GetID(idx); + genome.emplace_back(idx, id); + genome_working.emplace_back(idx, id); + nops_need_curated = true; + } + + /// Redirect literal ints to PushInst(size_t) overload. + void PushInst(int idx) { PushInst(static_cast(idx)); } + + /// Add a new instruction to the end of the genome, by the instruction's symbol/char + void PushInst(char c) { PushInst( GetInstLib()->GetIndexFromSymbol(c) ); } + + /// Add a new instruction to the end of the genome, by name + void PushInst(const std::string & name) { + PushInst(GetInstLib()->GetIndex(name)); + nops_need_curated = true; + } + + /// Add a specified new instruction to the end of the genome + void PushInst(const inst_t & inst) { + genome.emplace_back(inst); + genome_working.emplace_back(inst); + nops_need_curated = true; + } + + /// Add multiple copies of a specified instruction to the end of the genome + void PushInst(const inst_t & inst, size_t count) { + genome.reserve(genome.size() + count); + for (size_t i = 0; i < count; i++) genome.emplace_back(inst); + genome_working.reserve(genome.size() + count); + for (size_t i = 0; i < count; i++) genome_working.emplace_back(inst); + nops_need_curated = true; + } + + /// Return the first instruction in the instruction library + inst_t GetDefaultInst() const{ + return inst_t(GetInstLib()->GetIndex(0), 0); + } + + /// Add one or more default instructions to the end of the genome + void PushDefaultInst(size_t count=1) { + PushInst( inst_t(GetInstLib()->GetIndex(0), 0), count ); + nops_need_curated = true; + } + + /// Return a random instruction from the instruction library + inst_t GetRandomInst(Random & rand) { + size_t id = rand.GetUInt(GetInstLib()->GetSize()); + size_t idx = GetInstLib()->GetIndex(id); + //size_t idx = rand.GetUInt(GetInstLib()->GetSize()); + //size_t id = GetInstLib()->GetID(idx); + return inst_t(idx, id); + } + + /// Overwrite the instruction at the given genome index with passed instruction + void SetInst(size_t pos, const inst_t & inst) { + genome[pos] = inst; + genome_working[pos] = inst; + nops_need_curated = true; + } + + /// Overwrite the instruction at the given genome index with a random instruction + void RandomizeInst(size_t pos, Random & rand) { + SetInst(pos, GetRandomInst(rand) ); + nops_need_curated = true; + } + + /// Add a random instruction from the instruction library to the end of the genome + void PushRandomInst(Random & random, const size_t count=1) { + for (size_t i = 0; i < count; i++) { + PushInst(GetRandomInst(random)); + } + nops_need_curated = true; + } + + /// Insert the given instruction at the specified genome position + void InsertInst(const inst_t& inst, const size_t idx) { + genome.emplace(genome.begin() + idx, inst); + genome_working.emplace(genome_working.begin() + idx, inst); + nops_need_curated = true; + } + + /// Inserts a random instruction at the given genome position + void InsertRandomInst(const size_t idx, emp::Random& random) { + InsertInst(GetRandomInst(random), idx); + } + + /// Remove the instruction at the specified genome position + void RemoveInst(const size_t idx) { + genome.erase(genome.begin() + idx); + genome_working.erase(genome_working.begin() + idx); + nops_need_curated = true; + } + /// Increase the cooldown by some value, so instructions cannot be processed for longer + void IncreaseCooldown(size_t val){ + cooldown_timer += val; + } + /// Decrease the cooldown by some value, so instructions can be processed sooner + void DecreaseCooldown(size_t val){ + if(cooldown_timer >= val) cooldown_timer -= val; + else cooldown_timer = 0; + } + /// Reset the cooldown timer + void ResetCooldown(){ + cooldown_timer = 0; + } + + + + //////// HEAD MANIPULATION + + void ResetIP() { inst_ptr = 0; } ///< Move instruction pointer to beginning of the genome. + void ResetRH() { read_head = 0; } ///< Move read head to beginning of the genome. + void ResetWH() { write_head = 0; } ///< Move write head to beginning of the genome. + void ResetFH() { flow_head = 0; } ///< Move flow head to beginning of the genome. + + /// Advance the instruction pointer so many steps and wrap around the end of the genome + void AdvanceIP(size_t steps=1) { + inst_ptr += steps; + inst_ptr = (genome_working.size() > 0 ? inst_ptr % genome_working.size() : 0); + } + /// Advance the read head so many steps and wrap around the end of the genome + void AdvanceRH(size_t steps=1) { + read_head += steps; + read_head = (genome_working.size() > 0 ? read_head % genome_working.size() : 0); + } + /// Advance the write head so many steps and wrap around the end of the genome + void AdvanceWH(size_t steps=1) { + write_head += steps; + write_head = (genome_working.size() > 0 ? write_head % genome_working.size() : 0); + } + /// Advance the flow head so many steps and wrap around the end of the genome + void AdvanceFH(size_t steps=1) { + flow_head += steps; + flow_head = (genome_working.size() > 0 ? flow_head % genome_working.size() : 0); + } + /// Set the instruction pointer to the genome index, wrap around the end of the genome + void SetIP(size_t pos) { + inst_ptr = pos; + inst_ptr %= genome_working.size(); + } + /// Set the read head to the genome index, wrap around the end of the genome + void SetRH(size_t pos) { + read_head = pos; + read_head %= genome_working.size(); + } + /// Set the write head to the genome index, wrap around the end of the genome + void SetWH(size_t pos) { + write_head = pos; + write_head %= genome_working.size(); + } + /// Set the flow head to the genome index, wrap around the end of the genome + void SetFH(size_t pos) { + flow_head = pos; + flow_head %= genome_working.size(); + } + /// Set the specified head (which can wrap) to the beginning of the genome, + void ResetModdedHead(size_t head_idx) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) SetIP(0); + else if (modded_idx == 1) SetRH(0); + else if (modded_idx == 2) SetWH(0); + else if (modded_idx == 3) SetFH(0); + } + /// Set the specified head (which can wrap) to the given genome position, + /// wrap around the end of the genome + void SetModdedHead(size_t head_idx, size_t pos) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) SetIP(pos); + else if (modded_idx == 1) SetRH(pos); + else if (modded_idx == 2) SetWH(pos); + else if (modded_idx == 3) SetFH(pos); + } + /// Advance the specified head (which can wrap) the given number of instructions, + /// wrap around the end of the genome + void AdvanceModdedHead(size_t head_idx, size_t steps=1) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) AdvanceIP(steps); + else if (modded_idx == 1) AdvanceRH(steps); + else if (modded_idx == 2) AdvanceWH(steps); + else if (modded_idx == 3) AdvanceFH(steps); + } + /// Return the head POSITION of the specified head (can wrap) + size_t GetModdedHead(size_t head_idx) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) return inst_ptr; + else if (modded_idx == 1) return read_head; + else if (modded_idx == 2) return write_head; + else if (modded_idx == 3) return flow_head; + return inst_ptr; + } + + + //////// HARDWARE MANIPULATION + /// Initializes the CPU by counting the number of NOP instructions in the instruction + /// library and expanding the number of registers to match + void Initialize() { + CountNops(); + ExpandRegisters(); + ResetHardware(); + } + /// Reset all heads + void ResetHeads() { + ResetIP(); + ResetRH(); + ResetWH(); + ResetFH(); + ResetCooldown(); + } + /// Reset all inputs and outputs + void ResetIO() { + inputs.clear(); + outputs.clear(); + } + /// Reset all memory/data + void ResetMemory() { + // Initialize registers to their position. So Reg0 = 0 and Reg11 = 11. + for (size_t i = 0; i < num_regs; i++) { + regs[i] = (data_t) i; + } + for (size_t i = 0; i < NUM_STACKS; ++i) { + stacks[i].resize(0); + } + active_stack_idx = 0; + } + /// Reset all bookkeeping variables + void ResetBookkeeping() { + copied_inst_id_vec.clear(); + num_insts_executed = 0; + } + /// Reset the working genome back to the original genome + void ResetWorkingGenome() { + genome_working = genome; + label_idx_vec.clear(); + nops_need_curated = true; + } + /// Reset just the CPU hardware, but keep the original genome + void ResetHardware() { + ResetHeads(); + ResetMemory(); + ResetIO(); + ResetBookkeeping(); + } + /// Clear the main genome of the organism and reset all hardware + void ClearGenome() { + genome.resize(0,0); // Clear out genome + genome_working.resize(0,0); // Clear out working genome + label_idx_vec.clear(); // No labels if genome is empty + nops_need_curated = true; + ResetHardware(); // Reset the full hardware + } + /// Compile NOP instructions in genome into useful nop vectors for each instruction, + /// and records the position of all LABEL instructions + void CurateNops() { + if (genome_working.size() == 0) { + nops_need_curated = false; + return; + } + bool label_inst_present = GetInstLib()->IsInst("Label"); + size_t label_inst_id = label_inst_present ? GetInstLib()->GetID("Label") : 0; + + if (!are_nops_counted) CountNops(); + label_idx_vec.clear(); + // Start by filling the nop vector of the last instruction + for (size_t inst_idx = 0; inst_idx < genome_working.GetSize() - 1; ++inst_idx) { + if (emp::Has(nop_id_map, genome_working[inst_idx].id)) { + genome_working[genome_working.size() - 1].nop_vec.push_back( + nop_id_map[genome_working[inst_idx].id]); + } + else break; + } + // If the last index is a label, record it! + if (label_inst_present && + (genome_working[genome_working.size() - 1].id == label_inst_id)) + label_idx_vec.push_back(genome_working.size() - 1); + // Now iterate backward over the genome, filling in each instruction's nop vector + // Example, our genome looks like xyzabc where only a, b, and c are nops + // If we are on index 2 (z), we see it is followed by a nop. + // Thus, we copy the next instruction into the nop vector [a] + // Then we copy THAT instruction's nop vector, too: [a,b,c] + // By going in reverse order, all following instructions already have a nop vec + for (auto it = genome_working.rbegin() + 1; it != genome_working.rend(); ++it) { + if (emp::Has(nop_id_map, (it - 1)->id)) { + it->nop_vec.resize( (it - 1)->nop_vec.size() + 1 ); + it->nop_vec[0] = nop_id_map[(it - 1)->id]; + std::copy( + (it - 1)->nop_vec.begin(), + (it - 1)->nop_vec.end(), + it->nop_vec.begin() + 1); + } + } + for (size_t inst_idx = 0; inst_idx < genome_working.size(); ++inst_idx) { + if (genome_working[inst_idx].id == label_inst_id) // Record pos if inst is label + label_idx_vec.push_back(inst_idx); + } + nops_need_curated = false; + } + /// Determine the number of sequential NOP instructions in the instruction library + /// + /// Starts at NopA and continues from there. Any missing instructions force count to + /// stop. Last possible NOP instruction is NopW, as NopX is a special case in Avida. + void CountNops() { + num_nops = 0; + nop_id_map.clear(); + are_nops_counted = true; + for (size_t idx = 0; idx < MAX_NOPS ; ++idx) { // Stop before X! + std::string nop_name = (std::string)"Nop" + (char)('A' + idx); + if (GetInstLib()->IsInst(nop_name)) { + num_nops++; + size_t id = GetInstLib()->GetID(nop_name); + nop_id_map[id] = idx; + } + else return; + } + } + /// Expand the CPU's registers to match the number of NOP instructions in the + /// instruction library + void ExpandRegisters() { + if (!are_nops_counted) CountNops(); + are_regs_expanded = true; + num_regs = num_nops; + regs.resize(num_regs); + } + + //////// NOP SEQUENCE METHODS + /// For a given NOP instruction (as an index), return its complement index + size_t GetComplementNop(size_t idx) { + if (idx >= num_nops - 1) return 0; + else return idx + 1; + } + /// For a vector of NOP instructions (as indices), return a vector of complement indices + /// in the same order + nop_vec_t GetComplementNopSequence(const nop_vec_t& nop_vec) { + nop_vec_t res_vec; + for (size_t nop : nop_vec) { + res_vec.push_back(GetComplementNop(nop)); + } + return res_vec; + } + /// Check if a vector of NOP instructions is the same as the START of another vector + bool CompareNopSequences(const nop_vec_t& search_vec, const nop_vec_t& compare_vec) { + if (search_vec.size() > compare_vec.size()) return false; + if (search_vec.size() == 0 || compare_vec.size() == 0) return false; + for (size_t idx = 0; idx < search_vec.size(); ++idx) { + if (search_vec[idx] != compare_vec[idx]) return false; + } + return true; + } + /// Check if the given vector of NOP instructions (as indices) were the last + /// instructions to be copied by the CPU + bool CheckIfLastCopied(const nop_vec_t& label) { + if (label.size() > copied_inst_id_vec.size()) return false; + if (label.size() == 0) return false; + int idx = label.size() - 1; + for (auto copied_it = copied_inst_id_vec.rbegin(); copied_it != copied_inst_id_vec.rend(); copied_it++) { + if (*copied_it != label[idx]) + return false; + idx--; + if (idx < 0) break; + + } + return true; + } + /// Search up the genome (backward) for a sequence of NOP instructions following a LABEL + /// instruction that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindLabel_Reverse(bool start_local) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + size_t start_label_vec_idx = label_idx_vec.size() - 1; + if (start_local) { + bool start_found = false; + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + if (label_idx_vec[label_idx_vec.size() - offset - 1] < inst_ptr) { + start_label_vec_idx = label_idx_vec.size() - offset - 1; + start_found = true; + break; + } + } + if (!start_found) start_label_vec_idx = label_idx_vec.size() - 1; + } + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + const size_t idx = + label_idx_vec[ + (start_label_vec_idx - offset + label_idx_vec.size()) % label_idx_vec.size() + ]; + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search the genome for a sequence of NOP instructions following a LABEL + /// instruction that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindLabel(bool start_local, bool reverse = false) { + if (reverse) return FindLabel_Reverse(start_local); + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + size_t start_label_vec_idx = 0; + if (start_local) { + bool start_found = false; + for (; start_label_vec_idx < label_idx_vec.size(); ++start_label_vec_idx) { + if (label_idx_vec[start_label_vec_idx] > inst_ptr) { + start_found = true; + break; + } + } + if (!start_found) start_label_vec_idx = 0; + } + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + const size_t idx = label_idx_vec[(start_label_vec_idx + offset) % label_idx_vec.size()]; + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the given NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_idx Position in the genome to start the search + size_t FindNopSequence_Reverse(const nop_vec_t& search_vec, size_t start_idx) { + for (size_t offset = 1; offset < genome_working.size() + 1; ++offset) { + const size_t idx = (start_idx - offset + genome_working.size()) % genome_working.size(); + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the given NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindNopSequence_Reverse(const nop_vec_t& search_vec, bool start_local) { + size_t start_idx = 0; + if (start_local && inst_ptr != 0) start_idx = inst_ptr; + return FindNopSequence_Reverse(search_vec, start_idx); + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindNopSequence_Reverse(bool start_local) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + return FindNopSequence_Reverse(search_vec, start_local); + } + /// Search the genome for a sequence of NOP instructions that match the given + /// NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_idx Position in the genome to start the search + size_t FindNopSequence(const nop_vec_t& search_vec, size_t start_idx, + bool reverse = false) { + if (reverse) return FindNopSequence_Reverse(search_vec, start_idx); + for (size_t offset = 1; offset < genome_working.size() + 1; ++offset) { + const size_t idx = (start_idx + offset) % genome_working.size(); + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search the genome for a sequence of NOP instructions that match the given + /// NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindNopSequence(const nop_vec_t& search_vec, bool start_local, + bool reverse = false) { + size_t start_idx = genome_working.size() - 1; + if (start_local) start_idx = inst_ptr; + return FindNopSequence(search_vec, start_idx, reverse); + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindNopSequence(bool start_local, bool reverse = false) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + return FindNopSequence(search_vec, start_local, reverse); + } + + + //////// STACK MANIPULATION + /// Push the value in the specified register on top of the active stack + void StackPush(size_t reg_idx) { + stacks[active_stack_idx].push_back(regs[reg_idx]); + } + /// Remove the value from the top of the active stack and store it in the + /// specified register + void StackPop(size_t reg_idx) { + if (stacks[active_stack_idx].size()) { + regs[reg_idx] = *stacks[active_stack_idx].rbegin(); + stacks[active_stack_idx].pop_back(); + } + } + /// Swap which stack is active + void StackSwap() { + active_stack_idx++; + if (active_stack_idx >= NUM_STACKS) active_stack_idx = 0; + } + /// Fetch the nth value of the specified stack + data_t GetStackVal(size_t stack_idx, size_t val_idx) { + emp_assert(stack_idx < NUM_STACKS); + emp_assert(val_idx < stacks[stack_idx].size()); + size_t reverse_idx = stacks[stack_idx].size() - val_idx - 1; + return stacks[stack_idx][reverse_idx]; + } + + + //////// PROCESSING + /// Process the next instruction pointed to be the instruction pointer + void SingleProcess(bool verbose = true) { + if(cooldown_timer > 0){ + DecreaseCooldown(1); + num_insts_executed++; + } + else{ + emp_assert(genome_working.GetSize() > 0); // A genome must exist to be processed. + if(!are_regs_expanded) ExpandRegisters(); + if(nops_need_curated) CurateNops(); + if(verbose){ + GetInstLib()->GetName(genome_working[inst_ptr].idx); + PrintDetails(); + } + genome_working[inst_ptr].has_been_executed = true; + GetInstLib()->ProcessInst(ToPtr(this), genome_working[inst_ptr]); + AdvanceIP(); + num_insts_executed++; + } + } + /// Process the next SERIES of instructions, directed by the instruction pointer. + void Process(size_t num_inst = 1, bool verbose = true) { + for (size_t i = 0; i < num_inst; i++) SingleProcess(verbose); + } + + + //////// STATE -> STRING FUNCTIONS + /// Return the working genome in string form. + /// + /// Each instruction is represented by a single character, dictated by the + /// instruction's ID. + std::string GetWorkingGenomeString() const{ + std::stringstream sstr; + sstr << "[" << genome_working.size() << "]"; + for (size_t idx = 0; idx < genome_working.size(); idx++) { + unsigned char c = 'a' + genome_working[idx].id; + if (genome_working[idx].id > 25) c = 'A' + genome_working[idx].id - 26; + sstr << c; + } + return sstr.str(); + } + /// Return the original genome in string form. + /// + /// Each instruction is represented by a single character, dictated by the + /// instruction's ID. + std::string GetGenomeString() const{ + std::stringstream sstr; + sstr << "[" << genome.size() << "]"; + for (size_t idx = 0; idx < genome.size(); idx++) { + unsigned char c = 'a' + genome[idx].id; + if (genome[idx].id > 25) c = 'A' + genome[idx].id - 26; + sstr << c; + } + return sstr.str(); + } + /// Return the original genome in string form, without the genome length. + /// + /// Each instruction is represented by a single character, dictated by the + /// instruction's ID. + std::string GetRawGenomeString() const{ + std::stringstream sstr; + for(size_t idx = 0; idx < genome.size(); idx++){ + unsigned char c = 'a' + genome[idx].id; + if(genome[idx].id > 25) c = 'A' + genome[idx].id - 26; + sstr << c; + } + return sstr.str(); + } + /// Output the state of the CPU's heads and registers to the specified output stream + void PrintDetails(std::ostream& os = std::cout) { + os << "IP: " << inst_ptr; + os << " RH: " << read_head; + os << " WH: " << write_head; + os << " FH: " << flow_head; + os << "(nops: " << num_nops << "; regs: " << num_regs << ")" << std::endl; + for (size_t reg_idx = 0; reg_idx < regs.size(); ++reg_idx) { + os << "[" << reg_idx << "] " << regs[reg_idx] << std::endl; + } + } + + }; // End VirtualCPU class +} // End namespace + + + +#endif // #ifndef EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE diff --git a/include/emp/hardware/VirtualCPU_InstLib.hpp b/include/emp/hardware/VirtualCPU_InstLib.hpp new file mode 100644 index 0000000000..74d02b79a5 --- /dev/null +++ b/include/emp/hardware/VirtualCPU_InstLib.hpp @@ -0,0 +1,295 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022 + * + * @file VirtualCPU_InstLib.hpp + * @brief A specialized version of InstLib to handle VirtualCPU instructions. + */ + +#ifndef EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE +#define EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE + +#include "../base/error.hpp" +#include "../math/math.hpp" + +#include "InstLib.hpp" + +namespace emp { + + /// \brief A pure-virtual class that defines a series of instructions for VirtualCPU_Base or any of its derived classes. + template + struct VirtualCPU_InstLib : public InstLib { + using hardware_t = HARDWARE_T; + using inst_lib_t = InstLib; + using arg_t = ARG_T; + using this_t = VirtualCPU_InstLib; + using inst_t = typename hardware_t::inst_t; + using nop_vec_t = typename hardware_t::nop_vec_t; + + // Instructions + static void Inst_NopA(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_NopB(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_NopC(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_Inc(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + ++hw.regs[idx]; + } + static void Inst_Dec(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + --hw.regs[idx]; + } + static void Inst_If_Not_Equal(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_op_1 = inst.nop_vec.size() < 1 ? 1 : inst.nop_vec[0]; + size_t idx_op_2 = inst.nop_vec.size() < 2 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[1]; + if(hw.regs[idx_op_1] == hw.regs[idx_op_2]) + hw.AdvanceIP(1); + hw.AdvanceIP(inst.nop_vec.size()); + } + else{ + size_t idx_1 = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx_1); + if(hw.regs[idx_1] == hw.regs[idx_2]) + hw.AdvanceIP(1); + if(inst.nop_vec.size()) hw.AdvanceIP(1); + } + } + static void Inst_If_Less(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_op_1 = inst.nop_vec.size() < 1 ? 1 : inst.nop_vec[0]; + size_t idx_op_2 = inst.nop_vec.size() < 2 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[1]; + if(hw.regs[idx_op_1] >= hw.regs[idx_op_2]) + hw.AdvanceIP(1); + hw.AdvanceIP(inst.nop_vec.size()); + } + else{ + size_t idx_1 = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx_1); + if(hw.regs[idx_1] >= hw.regs[idx_2]) + hw.AdvanceIP(1); + if(inst.nop_vec.size()) hw.AdvanceIP(1); + } + } + static void Inst_Pop(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.StackPop(idx); + } + static void Inst_Push(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.StackPush(idx); + } + static void Inst_Swap_Stack(hardware_t & hw, const inst_t & /*inst*/) { + hw.StackSwap(); + } + static void Inst_Shift_Right(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.regs[idx] >>= 1; + } + static void Inst_Shift_Left(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.regs[idx] <<= 1; + } + static void Inst_Add(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = hw.regs[idx_op_1] + hw.regs[idx_op_2]; + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] + hw.regs[idx_2]; + } + } + static void Inst_Sub(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = hw.regs[idx_op_1] - hw.regs[idx_op_2]; + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] - hw.regs[idx_2]; + } + } + static void Inst_Nand(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = ~(hw.regs[idx_op_1] & hw.regs[idx_op_2]); + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] + hw.regs[idx_2]; + hw.regs[idx] = ~(hw.regs[idx] & hw.regs[idx_2]); + } + } + static void Inst_IO(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + std::cout << "Output: " << hw.regs[idx] << std::endl; + // TODO: Handle input + } + static void Inst_H_Alloc(hardware_t & hw, const inst_t & /*inst*/) { + hw.genome_working.resize(hw.genome.size() * 2, hw.GetDefaultInst()); + hw.regs[0] = hw.genome.size(); + } + static void Inst_H_Divide(hardware_t & hw, const inst_t & /*inst*/) { + if(hw.read_head >= hw.genome.size()){ + hw.genome_working.resize(hw.read_head, 0); + hw.ResetHardware(); + hw.inst_ptr = hw.genome.size() - 1; + std::cout << "Divide!" << std::endl; + } + } + static void Inst_H_Copy(hardware_t & hw, const inst_t & /*inst*/) { + hw.genome_working[hw.write_head] = hw.genome_working[hw.read_head]; + hw.copied_inst_id_vec.push_back(hw.genome_working[hw.write_head].id); + hw.read_head++; + while(hw.read_head >= hw.genome_working.size()) hw.read_head -= hw.genome_working.size(); + hw.write_head++; + while(hw.write_head >= hw.genome_working.size()) hw.write_head -= hw.genome_working.size(); + // TODO: Mutation + } + static void Inst_H_Search(hardware_t & hw, const inst_t & inst) { + size_t res = hw.FindNopSequence(hw.GetComplementNopSequence(inst.nop_vec), hw.inst_ptr); + if(inst.nop_vec.size() == 0 || res == hw.inst_ptr){ + hw.regs[1] = 0; + hw.regs[2] = 0; + hw.SetFH(hw.inst_ptr + 1); + } + else{ + hw.regs[1] = (res - hw.inst_ptr) > 0 ? res - hw.inst_ptr : res + hw.genome_working.size() - res + hw.inst_ptr; + hw.regs[2] = inst.nop_vec.size(); + hw.SetFH(res + inst.nop_vec.size() + 1); + } + } + static void Inst_Mov_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t dest_idx = hw.flow_head; + if(inst.nop_vec.size() >= 2) dest_idx = hw.GetModdedHead(inst.nop_vec[1]); + if(!inst.nop_vec.empty()) hw.SetModdedHead(inst.nop_vec[0], dest_idx); + else hw.SetIP(dest_idx); + } + else{ + if(!inst.nop_vec.empty()){ + // IP is a special case because it auto advances! + if(inst.nop_vec[0] % 4 == 0) hw.SetIP(hw.flow_head - 1); + else hw.SetModdedHead(inst.nop_vec[0], hw.flow_head); + } + else hw.SetIP(hw.flow_head - 1); + } + } + static void Inst_Jmp_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t jump_dist = hw.regs[1]; + if(inst.nop_vec.size() >= 2) jump_dist = hw.regs[inst.nop_vec[1]]; + if(!inst.nop_vec.empty()) hw.AdvanceModdedHead(inst.nop_vec[0], jump_dist); + else hw.AdvanceIP(jump_dist); + } + else{ + if(!inst.nop_vec.empty()) hw.AdvanceModdedHead(inst.nop_vec[0], hw.regs[2]); + else hw.AdvanceIP(hw.regs[2]); + } + } + static void Inst_Get_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t head_val = inst.nop_vec.empty() ? hw.inst_ptr : hw.GetModdedHead(inst.nop_vec[0]); + if(inst.nop_vec.size() < 2) hw.regs[2] = head_val; + else hw.regs[inst.nop_vec[1]] = head_val; + } + else{ + if(inst.nop_vec.empty()) hw.regs[2] = hw.inst_ptr; + else hw.regs[2] = hw.GetModdedHead(inst.nop_vec[0]); + } + } + static void Inst_If_Label(hardware_t & hw, const inst_t & inst) { + hw.AdvanceIP(inst.nop_vec.size()); + if(!hw.CheckIfLastCopied(hw.GetComplementNopSequence(inst.nop_vec))) hw.AdvanceIP(); + } + static void Inst_Set_Flow(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 2 : inst.nop_vec[0]; + hw.SetFH(hw.regs[idx]); + } + + /// Maintain and return a singleton of default instructions + static const this_t & DefaultInstLib() { + static this_t inst_lib; + if (inst_lib.GetSize() == 0) { + inst_lib.AddInst("NopA", Inst_NopA, 0, "No-operation A"); + inst_lib.AddInst("NopB", Inst_NopB, 0, "No-operation B"); + inst_lib.AddInst("NopC", Inst_NopC, 0, "No-operation C"); + inst_lib.AddInst("IfNEq", Inst_If_Not_Equal, 1, + "Skip next inst unless register values match"); + inst_lib.AddInst("IfLess", Inst_If_Less, 1, + "Skip next inst unless focal register is less than its complement"); + inst_lib.AddInst("Inc", Inst_Inc, 1, "Increment value in reg Arg1"); + inst_lib.AddInst("Dec", Inst_Dec, 1, "Decrement value in reg Arg1"); + inst_lib.AddInst("Pop", Inst_Pop, 1, "Pop value from active stack into register"); + inst_lib.AddInst("Push", Inst_Push, 1, "Add register's value to active stack"); + inst_lib.AddInst("Swap-Stk", Inst_Swap_Stack, 1, "Swap which stack is active"); + inst_lib.AddInst("ShiftR", Inst_Shift_Right, 1, "Shift register value right by one bit"); + inst_lib.AddInst("ShiftL", Inst_Shift_Left, 1, "Shift register value left by one bit"); + inst_lib.AddInst("Add", Inst_Add, 1, + "Add values in registers B and C, then store result in given register"); + inst_lib.AddInst("Sub", Inst_Sub, 1, + "Sub values in registers B and C, then store result in given register"); + inst_lib.AddInst("Nand", Inst_Nand, 1, + "NAND values in registers B and C, then store result in given register"); + inst_lib.AddInst("IO", Inst_IO, 1, + "Output value in given register and then place new input in that register"); + inst_lib.AddInst("HAlloc", Inst_H_Alloc, 1, "Allocate memory for offspring"); + inst_lib.AddInst("HDivide", Inst_H_Divide, 1, "Attempt to split offspring"); + inst_lib.AddInst("HCopy", Inst_H_Copy, 1, "Copy instruction from read head to write head"); + inst_lib.AddInst("HSearch", Inst_H_Search, 1, "Search for label complement"); + inst_lib.AddInst("MovHead", Inst_Mov_Head, 1, "Move a given head to a postiion"); + inst_lib.AddInst("JmpHead", Inst_Jmp_Head, 1, "Move a given head by a relative amount"); + inst_lib.AddInst("GetHead", Inst_Get_Head, 1, "Get location of head"); + inst_lib.AddInst("IfLabel", Inst_If_Label, 1, + "Execute next instruction if label was the last thing copied"); + inst_lib.AddInst("SetFlow", Inst_Set_Flow, 1, "Set flow head to register value"); + /* + inst_lib.AddInst("Dec", Inst_Dec, 1, "Decrement value in reg Arg1"); + inst_lib.AddInst("Not", Inst_Not, 1, "Logically toggle value in reg Arg1"); + inst_lib.AddInst("SetReg", Inst_SetReg, 2, "Set reg Arg1 to numerical value Arg2"); + inst_lib.AddInst("Add", Inst_Add, 3, "regs: Arg3 = Arg1 + Arg2"); + inst_lib.AddInst("Sub", Inst_Sub, 3, "regs: Arg3 = Arg1 - Arg2"); + inst_lib.AddInst("Mult", Inst_Mult, 3, "regs: Arg3 = Arg1 * Arg2"); + inst_lib.AddInst("Div", Inst_Div, 3, "regs: Arg3 = Arg1 / Arg2"); + inst_lib.AddInst("Mod", Inst_Mod, 3, "regs: Arg3 = Arg1 % Arg2"); + inst_lib.AddInst("TestEqu", Inst_TestEqu, 3, "regs: Arg3 = (Arg1 == Arg2)"); + inst_lib.AddInst("TestNEqu", Inst_TestNEqu, 3, "regs: Arg3 = (Arg1 != Arg2)"); + inst_lib.AddInst("TestLess", Inst_TestLess, 3, "regs: Arg3 = (Arg1 < Arg2)"); + inst_lib.AddInst("If", Inst_If, 2, "If reg Arg1 != 0, scope -> Arg2; else skip scope", ScopeType::BASIC, 1); + inst_lib.AddInst("While", Inst_While, 2, "Until reg Arg1 != 0, repeat scope Arg2; else skip", ScopeType::LOOP, 1); + inst_lib.AddInst("Countdown", Inst_Countdown, 2, "Countdown reg Arg1 to zero; scope to Arg2", ScopeType::LOOP, 1); + inst_lib.AddInst("Break", Inst_Break, 1, "Break out of scope Arg1"); + inst_lib.AddInst("Scope", Inst_Scope, 1, "Enter scope Arg1", ScopeType::BASIC, 0); + inst_lib.AddInst("Define", Inst_Define, 2, "Build function Arg1 in scope Arg2", ScopeType::FUNCTION, 1); + inst_lib.AddInst("Call", Inst_Call, 1, "Call previously defined function Arg1"); + inst_lib.AddInst("Push", Inst_Push, 2, "Push reg Arg1 onto stack Arg2"); + inst_lib.AddInst("Pop", Inst_Pop, 2, "Pop stack Arg1 into reg Arg2"); + inst_lib.AddInst("Input", Inst_Input, 2, "Pull next value from input Arg1 into reg Arg2"); + inst_lib.AddInst("Output", Inst_Output, 2, "Push reg Arg1 into output Arg2"); + inst_lib.AddInst("CopyVal", Inst_CopyVal, 2, "Copy reg Arg1 into reg Arg2"); + inst_lib.AddInst("ScopeReg", Inst_ScopeReg, 1, "Backup reg Arg1; restore at end of scope"); + */ + + //for (size_t i = 0; i < hardware_t::NUM_REGS; i++) { + // inst_lib.AddArg(to_string((int)i), i); // Args can be called by value + // inst_lib.AddArg(to_string("Reg", 'A'+(char)i), i); // ...or as a register. + //} + } + + return inst_lib; + } + }; + +} + +#endif // #ifndef EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE diff --git a/include/emp/hardware/signalgp_utils.hpp b/include/emp/hardware/signalgp_utils.hpp index b7a309b706..5a2a39eb89 100644 --- a/include/emp/hardware/signalgp_utils.hpp +++ b/include/emp/hardware/signalgp_utils.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2018 + * date: 2018-2022 */ /** * @file @@ -668,7 +668,7 @@ namespace emp { fun_t new_fun(program[fID].GetAffinity()); size_t expected_func_len = program[fID].GetSize(); // Compute number and location of insertions. - const uint32_t num_ins = rnd.GetRandBinomial(program[fID].GetSize(), INST_INS__PER_INST()); + const uint32_t num_ins = rnd.GetBinomial(program[fID].GetSize(), INST_INS__PER_INST()); emp::vector ins_locs; if (num_ins > 0) { ins_locs = emp::RandomUIntVector(rnd, num_ins, 0, program[fID].GetSize()); diff --git a/include/emp/in_progress/AST.hpp b/include/emp/in_progress/AST.hpp new file mode 100644 index 0000000000..3badb83b98 --- /dev/null +++ b/include/emp/in_progress/AST.hpp @@ -0,0 +1,59 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021. + * + * @file AST.hpp + * @brief Tools to build an Abstract Syntax Tree. + * @note Status: ALPHA + */ + +#ifndef EMP_IN_PROGRESS_AST_HPP_INCLUDE +#define EMP_IN_PROGRESS_AST_HPP_INCLUDE + +#include + +#include "../base/assert.hpp" +#include "../base/error.hpp" +#include "../meta/TypeID.hpp" + +namespace emp { + + template + struct AST { + + // Base class for all AST nodes. + class Node { + protected: + emp::Ptr parent; + public: + Node(emp::Ptr in_parent=nullptr) : parent(in_parent) { } + virtual ~Node(); + + emp::Ptr GetParent() { return parent; } + void SetParent(emp::Ptr in_parent) { parent = in_parent; } + + virtual std::string GetName() const = 0; + virtual emp::TypeID GetType() const = 0; + + virtual bool IsLeaf() const { return false; } + virtual bool IsInternal() const { return false; } + + virtual size_t GetNumChildren() const { return 0; } + virtual emp::Ptr GetChild(size_t /* id */) { + emp_error("Calling GetChild on AST::Node with no children.")); + return nullptr; + } + + // virtual emp::Ptr GetScope() { return parent ? parent->GetScope() : nullptr; } + // virtual entry_ptr_t Process() = 0; + virtual std::function AsMathFunction() = 0; + + virtual void Write(std::ostream & /* os */=std::cout, + const std::string & /* offset */="") const { } + } + + }; +} + +#endif // #ifndef EMP_IN_PROGRESS_AST_HPP_INCLUDE diff --git a/include/emp/in_progress/Ptr-overload-fix.hpp b/include/emp/in_progress/Ptr-overload-fix.hpp new file mode 100644 index 0000000000..1812dd97d9 --- /dev/null +++ b/include/emp/in_progress/Ptr-overload-fix.hpp @@ -0,0 +1,1067 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2016-2022. + * + * @file Ptr-overload-fix.hpp + * @brief A wrapper for pointers that does careful memory tracking (but only in debug mode). + * @note Status: BETA + * + * Ptr objects behave as normal pointers under most conditions. However, if a program is + * compiled with EMP_TRACK_MEM set, then these pointers perform extra tests to ensure that + * they point to valid memory and that memory is freed before pointers are released. + * + * If you want to prevent pointers to pointers (a common source of errors, but MAY be done + * intentionally) you can define EMP_NO_PTR_TO_PTR + * + * If you trip an assert, you can re-do the run a track a specific pointer by defining + * EMP_ABORT_PTR_NEW or EMP_ABORT_PTR_DELETE to the ID of the pointer in question. + * + * For example: -DEMP_ABORT_PTR_NEW=1691 + * + * This will allow you to track the pointer more easily in a debugger. + * + * @todo Track information about emp::vector and emp::array objects to make sure we don't + * point directly into them? (A resize() could make such pointers invalid!) Or better, warn + * it vector memory could have moved. + * @todo Get working with threads + */ + +#ifndef EMP_IN_PROGRESS_PTR_OVERLOAD_FIX_HPP_INCLUDE +#define EMP_IN_PROGRESS_PTR_OVERLOAD_FIX_HPP_INCLUDE + +#include +#include + +#include "assert.hpp" +#include "vector.hpp" + +namespace emp { + + // ------------ Pre-declare some helper types and functions -------------- + + template class Ptr; + + + template + inline void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value); + + /// Fill an array by repeatedly calling the provided fill functions. + template + inline void FillMemoryFunction(emp::Ptr mem_ptr, const size_t num_bytes, T fill_fun); + + #ifndef DOXYGEN_SHOULD_SKIP_THIS + namespace internal { + /// An anonymous log2 calculator for hashing below. + static constexpr size_t Log2(size_t x) { return x <= 1 ? 0 : (Log2(x/2) + 1); } + + static bool ptr_debug = false; + } + #endif // DOXYGEN_SHOULD_SKIP_THIS + inline void SetPtrDebug(bool _d = true) { internal::ptr_debug = _d; } + inline bool GetPtrDebug() { return internal::ptr_debug; } + + enum class PtrStatus { DELETED=0, ACTIVE, ARRAY }; + + class PtrInfo { + private: + const void * ptr; ///< Which pointer are we keeping data on? + int count; ///< How many of this pointer do we have? + PtrStatus status; ///< Has this pointer been deleted? (i.e., if so, don't access it!) + size_t array_bytes; ///< How big is the array pointed to (in bytes)? + + public: + PtrInfo(const void * _ptr) : ptr(_ptr), count(1), status(PtrStatus::ACTIVE), array_bytes(0) { + if (internal::ptr_debug) std::cout << "Created info for pointer: " << ptr << std::endl; + } + PtrInfo(const void * _ptr, size_t _array_bytes) + : ptr(_ptr), count(1), status(PtrStatus::ARRAY), array_bytes(_array_bytes) + { + emp_assert(_array_bytes >= 1); + if (internal::ptr_debug) { + std::cout << "Created info for array pointer (bytes=" << array_bytes << "): " + << ptr << std::endl; + } + } + PtrInfo(const PtrInfo &) = default; + PtrInfo(PtrInfo &&) = default; + PtrInfo & operator=(const PtrInfo &) & = default; + PtrInfo & operator=(PtrInfo &&) & = default; + + ~PtrInfo() { + if (internal::ptr_debug) std::cout << "Deleted info for pointer " << ptr << std::endl; + } + + /// What pointer does this one hold information about? + const void * GetPtr() const noexcept { return ptr; } + + /// How many Ptr objects point to the associated position? + int GetCount() const noexcept { return count; } + + /// If this ptr is to an array, how many bytes large is the array (may be different from size!) + size_t GetArrayBytes() const noexcept { return array_bytes; } + + /// Is this pointer currently valid to access? + bool IsActive() const noexcept { return (bool) status; } + + /// Is this pointer pointing to an array? + bool IsArray() const noexcept { return status == PtrStatus::ARRAY; } + + /// Denote that this pointer is an array. + void SetArray(size_t bytes) noexcept { array_bytes = bytes; status = PtrStatus::ARRAY; } + + /// Add one more pointer. + void Inc([[maybe_unused]] const size_t id) { + if (internal::ptr_debug) std::cout << "Inc info for pointer " << ptr << std::endl; + emp_assert(status != PtrStatus::DELETED, "Incrementing deleted pointer!", id); + count++; + } + + /// Remove a pointer. + void Dec([[maybe_unused]] const size_t id) { + if (internal::ptr_debug) std::cout << "Dec info for pointer " << ptr << std::endl; + + // Make sure that we have more than one copy, -or- we've already deleted this pointer + emp_assert(count > 1 || status == PtrStatus::DELETED, "Removing last reference to owned Ptr!", id); + count--; + } + + /// Indicate that the associated position has been deleted. + void MarkDeleted() { + if (internal::ptr_debug) std::cout << "Marked deleted for pointer " << ptr << std::endl; + status = PtrStatus::DELETED; + } + + /// Debug utility to determine if everything looks okay with this pointer's information. + bool OK() const noexcept { + if (ptr == nullptr) return false; // Should not have info for a null pointer. + if (status == PtrStatus::ARRAY) { + if (array_bytes == 0) return false; // Arrays cannot be size 0. + if (count == 0) return false; // Active arrays must have pointers to them. + } + if (status == PtrStatus::ACTIVE) { + if (array_bytes > 0) return false; // non-arrays must be array size 0. + if (count == 0) return false; // Active pointers must have references to them. + } + return true; + } + }; + + + /// Facilitate tracking of all Ptr objects in this run. + class PtrTracker { + private: + std::unordered_map ptr_id; ///< Associate raw pointers with unique IDs + emp::vector id_info; ///< Associate IDs with pointer information. + static constexpr size_t UNTRACKED_ID = (size_t) -1; + + // Make PtrTracker a singleton. + PtrTracker() : ptr_id(), id_info() { + std::cout << "EMP_TRACK_MEM: Pointer tracking is active!\n"; + } + PtrTracker(const PtrTracker &) = delete; + PtrTracker(PtrTracker &&) = delete; + PtrTracker & operator=(const PtrTracker &) = delete; + PtrTracker & operator=(PtrTracker &&) = delete; + + public: + ~PtrTracker() { + // Track stats about pointer record. + size_t total = 0; + size_t remain = 0; + emp::vector undeleted_info; + + // Scan through live pointers and make sure all have been deleted. + for (const auto & info : id_info) { + total++; + if (info.GetCount()) remain++; + + if (info.IsActive()) { + undeleted_info.push_back(info); + } + } + + if (undeleted_info.size()) { + std::cerr << undeleted_info.size() << " undeleted pointers at end of execution.\n"; + for (size_t i = 0; i < undeleted_info.size() && i < 10; ++i) { + const auto & info = undeleted_info[i]; + std::cerr << " PTR=" << info.GetPtr() + << " count=" << info.GetCount() + << " active=" << info.IsActive() + << " id=" << ptr_id[info.GetPtr()] + << std::endl; + } + if (undeleted_info.size() > 10) { + std::cerr << " ..." << std::endl; + } + abort(); + } + + std::cout << "EMP_TRACK_MEM: No memory leaks found!\n " + << total << " pointers found; "; + if (remain) { + std::cout << remain << " still exist with a non-null value (but have been properly deleted)"; + } else std::cout << "all have been cleaned up fully."; + std::cout << std::endl; + } + + /// Treat this class as a singleton with a single Get() method to retrieve it. + static PtrTracker & Get() { static PtrTracker tracker; return tracker; } + + /// Get the info associated with an existing pointer. + PtrInfo & GetInfo(const void * ptr) { return id_info[ptr_id[ptr]]; } + PtrInfo & GetInfo(size_t id) { return id_info[id]; } + + /// Determine if a pointer is being tracked. + bool HasPtr(const void * ptr) const { + if (internal::ptr_debug) std::cout << "HasPtr: " << ptr << std::endl; + return ptr_id.find(ptr) != ptr_id.end(); + } + + /// Retrieve the ID associated with a pointer. + size_t GetCurID(const void * ptr) { emp_assert(HasPtr(ptr)); return ptr_id[ptr]; } + + /// Lookup how many pointers are being tracked. + size_t GetNumIDs() const { return id_info.size(); } + + /// How big is an array associated with an ID? + size_t GetArrayBytes(size_t id) const { return id_info[id].GetArrayBytes(); } + + /// Check if an ID is for a pointer that has been deleted. + bool IsDeleted(size_t id) const { + if (id == UNTRACKED_ID) return false; // Not tracked, so not deleted. + if (internal::ptr_debug) std::cout << "IsDeleted: " << id << std::endl; + return !id_info[id].IsActive(); + } + + /// Is a pointer active and ready to be used? + bool IsActive(const void * ptr) { + if (internal::ptr_debug) std::cout << "IsActive: " << ptr << std::endl; + if (ptr_id.find(ptr) == ptr_id.end()) return false; // Not in database. + return GetInfo(ptr).IsActive(); + } + + /// Is a pointer id associated with a pointer that's active and ready to be used? + bool IsActiveID(size_t id) { + if (id == UNTRACKED_ID) return false; + if (id >= id_info.size()) return false; + return id_info[id].IsActive(); + } + + /// Is an ID associated with an array? + bool IsArrayID(size_t id) { + if (internal::ptr_debug) std::cout << "IsArrayID: " << id << std::endl; + if (id == UNTRACKED_ID) return false; + if (id >= id_info.size()) return false; + return id_info[id].IsArray(); + } + + /// How many Ptr objects are associated with an ID? + int GetIDCount(size_t id) const { + if (internal::ptr_debug) std::cout << "Count: " << id << std::endl; + return id_info[id].GetCount(); + } + + /// This pointer was just created as a Ptr! + size_t New(const void * ptr) { + emp_assert(ptr); // Cannot track a null pointer. + size_t id = id_info.size(); +#ifdef EMP_ABORT_PTR_NEW + if (id == EMP_ABORT_PTR_NEW) { + std::cerr << "Aborting at creation of Ptr id " << id << std::endl; + abort(); + } +#endif + if (internal::ptr_debug) std::cout << "New: " << id << " (" << ptr << ")" << std::endl; + // Make sure pointer is not already stored -OR- has been deleted (since re-use is possible). + emp_assert(!HasPtr(ptr) || IsDeleted(GetCurID(ptr)), id); + id_info.emplace_back(ptr); + ptr_id[ptr] = id; + return id; + } + + /// This pointer was just created as a Ptr ARRAY! + size_t NewArray(const void * ptr, size_t array_bytes) { + size_t id = New(ptr); // Build the new pointer. + if (internal::ptr_debug) std::cout << " ...Array of size " << array_bytes << std::endl; + id_info[id].SetArray(array_bytes); + return id; + } + + /// Increment the number of Pointers associated with an ID + void IncID(size_t id) { + if (id == UNTRACKED_ID) return; // Not tracked! + if (internal::ptr_debug) std::cout << "Inc: " << id << std::endl; + id_info[id].Inc(id); + } + + /// Decrement the number of Pointers associated with an ID + void DecID(size_t id) { + if (id == UNTRACKED_ID) return; // Not tracked! + auto & info = id_info[id]; + if (internal::ptr_debug) std::cout << "Dec: " << id << "(" << info.GetPtr() << ")" << std::endl; + emp_assert(info.GetCount() > 0, "Decrementing Ptr, but already zero!", + id, info.GetPtr(), info.IsActive()); + info.Dec(id); + } + + /// Mark the pointers associated with this ID as deleted. + void MarkDeleted(size_t id) { +#ifdef EMP_ABORT_PTR_DELETE + if (id == EMP_ABORT_PTR_DELETE) { + std::cerr << "Aborting at deletion of Ptr id " << id << std::endl; + abort(); + } +#endif + if (internal::ptr_debug) std::cout << "Delete: " << id << std::endl; + emp_assert(id_info[id].IsActive(), "Deleting same emp::Ptr a second time!", id); + id_info[id].MarkDeleted(); + } + }; + + +////////////////////////////////// +// +// --- Ptr implementation --- +// +////////////////////////////////// + +#ifdef EMP_TRACK_MEM + + namespace { + // Debug information provided for each pointer type. + struct PtrDebug { + size_t current; + size_t total; + PtrDebug() : current(0), total(0) { ; } + void AddPtr() { current++; total++; } + void RemovePtr() { current--; } + }; + } + + /// Base class with common functionality (that should not exist in void pointers) + template + class BasePtr { + public: + TYPE * ptr; ///< The raw pointer associated with this Ptr object. + size_t id; ///< A unique ID for this pointer type. + + static constexpr size_t UNTRACKED_ID = (size_t) -1; + + BasePtr(TYPE * in_ptr, size_t in_id) : ptr(in_ptr), id(in_id) { + #ifdef EMP_NO_PTR_TO_PTR + emp_assert(!std::is_pointer_v, "Pointers to pointers are disallowed!"); + #endif + } + + static PtrTracker & Tracker() { return PtrTracker::Get(); } // Single tracker for al Ptr types + + /// Dereference a pointer. + [[nodiscard]] TYPE & operator*() const { + // Make sure a pointer is active and non-null before we dereference it. + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + emp_assert(ptr != nullptr, "Do not dereference a null pointer!"); + return *ptr; + } + + /// Follow a pointer. + TYPE * operator->() const { + // Make sure a pointer is active before we follow it. + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + emp_assert(ptr != nullptr, "Do not follow a null pointer!"); + return ptr; + } + + /// Indexing into array + TYPE & operator[](size_t pos) const { + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + emp_assert(id == UNTRACKED_ID || Tracker().IsArrayID(id), "Only arrays can be indexed into.", id); + emp_assert(id == UNTRACKED_ID || Tracker().GetArrayBytes(id) > (pos*sizeof(TYPE)), + "Indexing out of range.", id, ptr, pos, sizeof(TYPE), Tracker().GetArrayBytes(id)); + emp_assert(ptr != nullptr, "Do not follow a null pointer!"); + return ptr[pos]; + } + + }; + + + /// Base class with functionality only needed in void pointers. + template <> + class BasePtr { + public: + void * ptr; ///< The raw pointer associated with this Ptr object. + size_t id; ///< A unique ID for this pointer type. + + BasePtr(void * in_ptr, size_t in_id) : ptr(in_ptr), id(in_id) { } + static PtrTracker & Tracker() { return PtrTracker::Get(); } // Single tracker for al Ptr types + }; + + /// Base class with functionality only needed in void pointers. + template <> + class BasePtr { + public: + const void * ptr; ///< The raw pointer associated with this Ptr object. + size_t id; ///< A unique ID for this pointer type. + + BasePtr(const void * in_ptr, size_t in_id) : ptr(in_ptr), id(in_id) { } + static PtrTracker & Tracker() { return PtrTracker::Get(); } // Single tracker for al Ptr types + }; + + /// Main Ptr class DEBUG definition. + template + class Ptr : public BasePtr { + public: + using BasePtr::ptr; + using BasePtr::id; + using BasePtr::Tracker; + + using element_type = TYPE; ///< Type being pointed at. + + static constexpr size_t UNTRACKED_ID = (size_t) -1; + + static PtrDebug & DebugInfo() { static PtrDebug info; return info; } // Debug info for each type + + /// Construct a null Ptr by default. + Ptr() : BasePtr(nullptr, UNTRACKED_ID) { + if (internal::ptr_debug) { + std::cout << "null construct." << std::endl; + } + } + + /// Construct using copy constructor + Ptr(const Ptr & _in) : BasePtr(_in.ptr, _in.id) { + if (internal::ptr_debug) std::cout << "copy construct: " << ptr << std::endl; + Tracker().IncID(id); + } + + /// Construct from a raw pointer of compatable type. + template + Ptr(std::enable_if_t,T2*> in_ptr, + bool track=false) + : BasePtr(in_ptr, UNTRACKED_ID) + { + if (internal::ptr_debug) { + std::cout << "raw construct: " << ((void *) ptr) << ". track=" << track << std::endl; + } + + // If this pointer is already active, link to it. + if (Tracker().IsActive(ptr)) { + id = Tracker().GetCurID(ptr); + Tracker().IncID(id); + } + // If we are not already tracking this pointer, but should be, add it. + else if (track) { + id = Tracker().New(ptr); + DebugInfo().AddPtr(); + } + } + + /// Construct from a raw pointer of compatible ARRAY type. + template + Ptr(std::enable_if_t,T2*> _ptr, + size_t array_size, + bool track) + : BasePtr(_ptr, UNTRACKED_ID) + { + const size_t array_bytes = array_size * sizeof(T2); + if (internal::ptr_debug) std::cout << "raw ARRAY construct: " << ptr + << ". size=" << array_size << "(" << array_bytes + << " bytes); track=" << track << std::endl; + + // If this pointer is already active, link to it. + if (Tracker().IsActive(ptr)) { + id = Tracker().GetCurID(ptr); + Tracker().IncID(id); + emp_assert(Tracker().GetArrayBytes(id) == array_bytes); // Make sure pointer is consistent. + } + // If we are not already tracking this pointer, but should be, add it. + else if (track) { + id = Tracker().NewArray(ptr, array_bytes); + DebugInfo().AddPtr(); + } + } + + /// Construct from another Ptr<> object of compatible type. + template + Ptr(std::enable_if_t, Ptr > _in) + : BasePtr(_in.Raw(), _in.GetID()) + { + if (internal::ptr_debug) std::cout << "inexact copy construct: " << ptr << std::endl; + Tracker().IncID(id); + } + + /// Construct from nullptr. + Ptr(std::nullptr_t) : Ptr() { + if (internal::ptr_debug) std::cout << "null construct 2." << std::endl; + } + + /// Destructor. + ~Ptr() { + if (internal::ptr_debug) { + std::cout << "destructing Ptr instance "; + if (ptr) std::cout << id << " (" << ((void *) ptr) << ")\n"; + else std::cout << "(nullptr)\n"; + } + Tracker().DecID(id); + } + + /// Is this Ptr currently nullptr? + [[nodiscard]] bool IsNull() const { return ptr == nullptr; } + + /// Convert this Ptr to a raw pointer that isn't going to be tracked. + [[nodiscard]] TYPE * Raw() const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not convert deleted Ptr to raw.", id); + return ptr; + } + + /// Convert this Ptr to a raw pointer of a position in an array. + [[nodiscard]] TYPE * Raw(size_t pos) const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not convert deleted Ptr to array raw.", id); + return &(ptr[pos]); + } + + /// Cast this Ptr to a different type. + template + [[nodiscard]] Ptr Cast() const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not cast deleted pointers.", id); + return (T2*) ptr; + } + + /// Change constness of this Ptr's target; throw an assert of the cast fails. + template + [[nodiscard]] Ptr ConstCast() const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not cast deleted pointers.", id); + emp_assert( (std::is_same< std::remove_const_t , std::remove_const_t >()) ); + return const_cast(ptr); + } + + /// Dynamically cast this Ptr to another type; throw an assert of the cast fails. + template + [[nodiscard]] Ptr DynamicCast() const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not cast deleted pointers.", id); + return dynamic_cast(ptr); + } + + /// Reinterpret this Ptr to another type; throw an assert of the cast fails. + template + [[nodiscard]] Ptr ReinterpretCast() const { + emp_assert(Tracker().IsDeleted(id) == false, "Do not cast deleted pointers.", id); + #ifdef EMP_NO_PTR_TO_PTR + emp_assert(!std::is_pointer_v, "Reinterpreting as pointers to pointers is disallowed!"); + #endif + return reinterpret_cast(ptr); + } + + /// Get the unique ID associated with this pointer. + size_t GetID() const { return id; } + + /// Reallocate this Ptr to a newly allocated value using arguments passed in. + template + void New(T &&... args) { + Tracker().DecID(id); // Remove a pointer to any old memory... + + ptr = new TYPE(std::forward(args)...); // Special new that uses allocated space. + // ptr = (TYPE*) malloc (sizeof(TYPE)); // Build a new raw pointer. + // emp_emscripten_assert(ptr); // No exceptions in emscripten; assert alloc! + // ptr = new (ptr) TYPE(std::forward(args)...); // Special new that uses allocated space. + + if (internal::ptr_debug) std::cout << "Ptr::New() : " << ptr << std::endl; + id = Tracker().New(ptr); // And track it! + DebugInfo().AddPtr(); + } + + /// Reallocate this Ptr to a newly allocated array using the size passed in. + // template + // void NewArray(size_t array_size, Ts &&... args) { + void NewArray(size_t array_size) { + Tracker().DecID(id); // Remove a pointer to any old memory... + + // @CAO: This next portion of code is allocating an array of the appropriate type. + // We are currently using "new", but should shift over to malloc since new throws an + // exception when there's a problem, which will trigger an abort in Emscripten mode. + // We'd rather be able to identify a more specific problem. + ptr = new TYPE[array_size]; // Build a new raw pointer to an array. + // ptr = (TYPE*) malloc (array_size * sizeof(TYPE)); // Build a new raw pointer. + // emp_emscripten_assert(ptr, array_size); // No exceptions in emscripten; assert alloc! + // for (size_t i = 0; i < array_size; i++) { + // new (ptr + i*sizeof(TYPE)) TYPE(args...); + // } + + if (internal::ptr_debug) std::cout << "Ptr::NewArray() : " << ptr << std::endl; + id = Tracker().NewArray(ptr, array_size * sizeof(TYPE)); // And track it! + DebugInfo().AddPtr(); + } + + /// Delete this pointer (must NOT be an array). + void Delete() { + emp_assert(ptr, "Trying to delete null Ptr."); + emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); + emp_assert(Tracker().IsArrayID(id) == false, id, "Trying to delete array pointer as non-array."); + emp_assert(Tracker().IsActive(ptr), id, "Trying to delete inactive pointer (already deleted!)"); + if (internal::ptr_debug) std::cout << "Ptr::Delete() : " << ptr << std::endl; + delete ptr; + Tracker().MarkDeleted(id); + DebugInfo().RemovePtr(); + } + + /// Delete this pointer to an array (must be an array). + void DeleteArray() { + emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); + emp_assert(ptr, "Trying to delete null Ptr."); + emp_assert(Tracker().IsArrayID(id), id, "Trying to delete non-array pointer as array."); + emp_assert(Tracker().IsActive(ptr), id, "Trying to delete inactive pointer (already deleted!)"); + if (internal::ptr_debug) std::cout << "Ptr::DeleteArray() : " << ptr << std::endl; + delete [] ptr; + Tracker().MarkDeleted(id); + DebugInfo().RemovePtr(); + } + + /// Convert this pointer to a hash value. + size_t Hash() const noexcept { + // Chop off useless bits of pointer... + static constexpr size_t shift = internal::Log2(1 + sizeof(TYPE)); + return (size_t)(ptr) >> shift; + } + struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; + + /// Copy assignment + Ptr & operator=(const Ptr & _in) & { + if (internal::ptr_debug) { + std::cout << "copy assignment from id " << _in.id << " to id " << id + << std::endl; + } + emp_assert(Tracker().IsDeleted(_in.id) == false, _in.id, "Do not copy deleted pointers."); + if (id != _in.id || ptr != _in.ptr) { // Assignments only need to happen if ptrs are different. + if (internal::ptr_debug) std::cout << "...pointers differ -- copying!" << std::endl; + Tracker().DecID(id); + ptr = _in.ptr; + id = _in.id; + Tracker().IncID(id); + } else { + if (internal::ptr_debug) std::cout << "...pointers same -- no copying!" << std::endl; + } + return *this; + } + + /// Assign to a raw pointer of the correct type; if this is already tracked, hooked in + /// correctly, otherwise don't track. + template + Ptr & operator=(std::enable_if_t,T2*> _in) & { + if (internal::ptr_debug) std::cout << "raw assignment" << std::endl; + + Tracker().DecID(id); // Decrement references to former pointer at this position. + ptr = _in; // Update to new pointer. + + // If this pointer is already active, link to it. + if (Tracker().IsActive(ptr)) { + id = Tracker().GetCurID(ptr); + Tracker().IncID(id); + } + // Otherwise, since this ptr was passed in as a raw pointer, we do not manage it. + else { + id = UNTRACKED_ID; + } + + return *this; + } + + /// Assign to a convertible Ptr + template + Ptr & operator=(std::enable_if_t, Ptr > _in) & { + if (internal::ptr_debug) std::cout << "convert-copy assignment" << std::endl; + emp_assert(Tracker().IsDeleted(_in.id) == false, _in.id, "Do not copy deleted pointers."); + Tracker().DecID(id); + ptr = _in.Raw(); + id = _in.GetID(); + Tracker().IncID(id); + return *this; + } + + /// Auto-cast to raw pointer type. + operator TYPE *() { + // Make sure a pointer is active before we convert it. + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + + // We should not automatically convert managed pointers to raw pointers; use .Raw() + emp_assert(id != UNTRACKED_ID /*, typeid(TYPE).name() */, id, + "Use Raw() to convert to an untracked Ptr"); + return ptr; + } + + /// Does this pointer exist? + operator bool() { return ptr != nullptr; } + + /// Does this const pointer exist? + operator bool() const { return ptr != nullptr; } + + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } + + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } + + [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } + [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } + [[nodiscard]] Ptr operator+(size_t value) const { return ptr + value; } + [[nodiscard]] Ptr operator-(size_t value) const { return ptr - value; } + + /// Fill an array with the provided fill_value. + /// If fill_value is a function, repeatedly call function. + template + void FillMemoryFunction(const size_t num_bytes, T fill_fun) { + // Make sure a pointer is active before we write to it. + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + emp_assert(id == UNTRACKED_ID || Tracker().IsArrayID(id), "Only arrays can fill memory.", id); + emp_assert(id == UNTRACKED_ID || Tracker().GetArrayBytes(id) >= num_bytes, + "Overfilling memory.", id, ptr, sizeof(TYPE), Tracker().GetArrayBytes(id)); + emp_assert(ptr != nullptr, "Do not follow a null pointer!"); + + emp::FillMemoryFunction(*this, num_bytes, fill_fun); + } + + /// Fill an array with the provided fill_value. + /// If fill_value is a function, repeatedly call function. + template + void FillMemory(const size_t num_bytes, T fill_value) { + // Make sure a pointer is active before we write to it. + emp_assert(Tracker().IsDeleted(id) == false /*, typeid(TYPE).name() */, id); + emp_assert(Tracker().IsArrayID(id) || id == UNTRACKED_ID, "Only arrays can fill memory.", id); + emp_assert(Tracker().GetArrayBytes(id) >= num_bytes, + "Overfilling memory.", id, ptr, sizeof(TYPE), Tracker().GetArrayBytes(id)); + emp_assert(ptr != nullptr, "Do not follow a null pointer!"); + + emp::FillMemory(*this, num_bytes, fill_value); + } + + /// Some debug testing functions + int DebugGetCount() const { return Tracker().GetIDCount(id); } + bool DebugIsArray() const { return Tracker().IsArrayID(id); } + size_t DebugGetArrayBytes() const { return Tracker().GetArrayBytes(id); } + bool DebugIsActive() const { return Tracker().IsActiveID(id); } + + bool OK() const { + // Untracked ID's should not have pointers in the Tracker. + if (id == UNTRACKED_ID) return !Tracker().HasPtr(ptr); + + // Make sure this pointer is linked to the correct info. + if (Tracker().GetInfo(id).GetPtr() != ptr) return false; + + // And make sure that info itself is okay. + return Tracker().GetInfo(id).OK(); + } + + // Prevent use of new and delete on Ptr + // static void* operator new(std::size_t) noexcept { + // emp_assert(false, "No Ptr::operator new; use emp::NewPtr for clarity."); + // return nullptr; + // } + // static void* operator new[](std::size_t sz) noexcept { + // emp_assert(false, "No Ptr::operator new[]; use emp::NewPtrArray for clarity."); + // return nullptr; + // } + // + // static void operator delete(void* ptr, std::size_t sz) { + // emp_assert(false, "No Ptr::operator delete; use Delete() member function for clarity."); + // } + // static void operator delete[](void* ptr, std::size_t sz) { + // emp_assert(false, "No Ptr::operator delete[]; use DeleteArray() member function for clarity."); + // } + + }; + +#else // EMP_MEM_TRACK off... + + + template + class BasePtr { + protected: + TYPE * ptr; ///< The raw pointer associated with this Ptr object. + + public: + BasePtr(TYPE * in_ptr) : ptr(in_ptr) { } + + // Dereference a pointer. + [[nodiscard]] TYPE & operator*() const { return *ptr; } + + // Follow a pointer. + TYPE * operator->() const { return ptr; } + + // Should implement operator->* to follow a pointer to a member function. + // For an example, see: + // https://stackoverflow.com/questions/27634036/overloading-operator-in-c + + // Indexing into array + TYPE & operator[](size_t pos) const { return ptr[pos]; } + }; + + /// Base class with functionality only needed in void pointers. + template <> class BasePtr { + protected: void * ptr; ///< The raw pointer associated with this Ptr object. + public: BasePtr(void * in_ptr) : ptr(in_ptr) { } + }; + + template <> class BasePtr { + protected: const void * ptr; ///< The raw pointer associated with this Ptr object. + public: BasePtr(const void * in_ptr) : ptr(in_ptr) { } + }; + + template + class Ptr : public BasePtr { + private: + using BasePtr::ptr; + + public: + using element_type = TYPE; + + /// Default constructor + Ptr() : BasePtr(nullptr) {} + + /// Copy constructor + Ptr(const Ptr & _in) : BasePtr(_in.ptr) {} + + /// Construct from raw ptr + template + Ptr(std::enable_if_t,T2*> in_ptr, bool=false) + : BasePtr(in_ptr) {} + //Ptr(TYPE * in_ptr, bool=false) : BasePtr(in_ptr) {} + + /// Construct from array + template + Ptr(std::enable_if_t,T2*> _ptr, size_t, bool) + : BasePtr(_ptr) {} + + /// From compatible Ptr + template + Ptr(Ptr _in, std::enable_if_t,bool> = true) + : BasePtr(_in.Raw()) {} + + /// From nullptr + Ptr(std::nullptr_t) : Ptr() {} + + /// Destructor + ~Ptr() { ; } + + [[nodiscard]] bool IsNull() const { return ptr == nullptr; } + [[nodiscard]] TYPE * Raw() const { return ptr; } + [[nodiscard]] TYPE * Raw(size_t pos) const { return &(ptr[pos]); } + template Ptr Cast() const { return (T2*) ptr; } + template Ptr ConstCast() const { return const_cast(ptr); } + template Ptr DynamicCast() const { return dynamic_cast(ptr); } + template Ptr ReinterpretCast() const { return reinterpret_cast(ptr); } + + template + void New(T &&... args) { ptr = new TYPE(std::forward(args)...); } // New raw pointer. + void NewArray(size_t array_size) { ptr = new TYPE[array_size]; } + void Delete() { delete ptr; } + void DeleteArray() { delete [] ptr; } + + size_t Hash() const noexcept { + static constexpr size_t shift = internal::Log2(1 + sizeof(TYPE)); // Chop off useless bits... + return (size_t)(ptr) >> shift; + } + struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; + + // Copy assignments + Ptr & operator=(const Ptr & _in) & { ptr = _in.ptr; return *this; } + + // Assign to compatible Ptr or raw (non-managed) pointer. + template Ptr & operator=(std::enable_if_t,T2*> _in) & { ptr = _in; return *this; } + template Ptr & operator=(std::enable_if_t, Ptr > _in) & { ptr = _in.Raw(); return *this; } + + // Auto-cast to raw pointer type. + operator TYPE *() { return ptr; } + + operator bool() { return ptr != nullptr; } + operator bool() const { return ptr != nullptr; } + + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } + + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } + + [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } + [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } + [[nodiscard]] Ptr operator+(size_t value) const { return ptr + value; } + [[nodiscard]] Ptr operator-(size_t value) const { return ptr - value; } + + // Extra functionality (not in raw pointers) + + /// Fill an array with the provided fill_value. + /// If fill_value is a function, repeatedly call function. + template + void FillMemoryFunction(const size_t num_bytes, T fill_fun) { + emp::FillMemoryFunction(*this, num_bytes, fill_fun); + } + + /// Fill an array with the provided fill_value. + /// If fill_value is a function, repeatedly call function. + template + void FillMemory(const size_t num_bytes, T fill_value) { + emp::FillMemory(*this, num_bytes, fill_value); + } + + // Stubs for debug-related functions when outside debug mode. + int DebugGetCount() const { return -1; } + bool DebugIsArray() const { emp_assert(false); return false; } + size_t DebugGetArrayBytes() const { return 0; } + bool DebugIsActive() const { return true; } + bool OK() const { return true; } + }; + +#endif // #ifdef EMP_TRACK_MEM + + // IO + template + std::ostream & operator<<(std::ostream & out, const emp::Ptr & ptr) { + out << ptr.Raw(); + return out; + } + + // @CAO: Reading a pointer from a stream seems like a terrible idea in most situations, but I + // can imagine limited circumstances where it would be needed. + template + std::istream & operator>>(std::istream & is, emp::Ptr & ptr) { + T * val; + is >> val; + ptr = val; + return is; + } + + /// Convert a T* to a Ptr. By default, don't track. + template + [[nodiscard]] Ptr ToPtr(T * _in, bool own=false) { return Ptr(_in, own); } + + /// Convert a T* to a Ptr that we DO track. + template + [[nodiscard]] Ptr TrackPtr(T * _in, bool own=true) { return Ptr(_in, own); } + + /// Create a new Ptr of the target type; use the args in the constructor. + template + [[nodiscard]] Ptr NewPtr(ARGS &&... args) { + auto ptr = new T(std::forward(args)...); + // auto ptr = (T*) malloc (sizeof(T)); // Build a new raw pointer. + // emp_assert(ptr); // No exceptions in emscripten; assert alloc! + // new (ptr) T(std::forward(args)...); // Special new that uses allocated space. + return Ptr(ptr, true); + } + + /// Copy an object pointed to and return a Ptr to the copy. + template + [[nodiscard]] Ptr CopyPtr(Ptr in) { return NewPtr(*in); } + + /// Copy a vector of objects pointed to; return a vector of Ptrs to the new copies. + template + [[nodiscard]] emp::vector> CopyPtrs(const emp::vector> & in) { + emp::vector> out_ptrs(in.size()); + for (size_t i = 0; i < in.size(); i++) out_ptrs[i] = CopyPtr(in[i]); + return out_ptrs; + } + + /// Copy a vector of objects pointed to by using their Clone() member function; return vector. + template + [[nodiscard]] emp::vector> ClonePtrs(const emp::vector> & in) { + emp::vector> out_ptrs(in.size()); + for (size_t i = 0; i < in.size(); i++) out_ptrs[i] = in[i]->Clone(); + return out_ptrs; + } + + /// Create a pointer to an array of objects. + template + [[nodiscard]] Ptr NewArrayPtr(size_t array_size) { + auto ptr = new T[array_size]; // Build a new raw pointer. + // const size_t alloc_size = array_size * sizeof(T); + // auto ptr = (T*) malloc (alloc_size); + emp_assert(ptr, array_size); // No exceptions in emscripten; assert alloc! + // for (size_t i = 0; i < array_size; i++) { // Loop through all array elements. + // new (ptr + i*sizeof(T)) T(args...); // ...and initialize them. + // } + return Ptr(ptr, array_size, true); + } + + /// Fill an array with the provided fill_value. + /// If fill_value is a function, repeatedly call function. + template + void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value) { + // If the fill value is a function, call that function for each memory position. + if constexpr (std::is_invocable_v) { + FillMemoryFunction(mem_ptr, num_bytes, std::forward(fill_value)); + } + + constexpr size_t FILL_SIZE = sizeof(T); + + const size_t leftover = num_bytes % FILL_SIZE; + const size_t limit = num_bytes - leftover; + unsigned char * dest = mem_ptr.Raw(); + + // Fill out random bytes in groups of FILL_SIZE. + for (size_t byte = 0; byte < limit; byte += FILL_SIZE) { + std::memcpy(dest+byte, &fill_value, FILL_SIZE); + } + + // If we don't have a multiple of FILL_SIZE, fill in part of the remaining. + if (leftover) std::memcpy(dest+limit, &fill_value, leftover); + } + + /// Fill an array by repeatedly calling the provided fill functions. + template + void FillMemoryFunction(emp::Ptr mem_ptr, const size_t num_bytes, T fill_fun) { + static_assert(std::is_invocable_v, "FillMemoryFunction requires an invocable fill_fun."); + using return_t = decltype(fill_fun()); + constexpr size_t FILL_SIZE = sizeof(return_t); + + const size_t leftover = num_bytes % FILL_SIZE; + const size_t limit = num_bytes - leftover; + unsigned char * dest = mem_ptr.Raw(); + + // Fill out random bytes in groups of FILL_SIZE. + return_t fill_value; + for (size_t byte = 0; byte < limit; byte += FILL_SIZE) { + fill_value = fill_fun(); + std::memcpy(dest+byte, &fill_value, FILL_SIZE); + } + + // If we don't have a multiple of FILL_SIZE, fill in part of the remaining. + if (leftover) { + fill_value = fill_fun(); + std::memcpy(dest+limit, &fill_value, leftover); + } + } + +} // namespace emp + +#endif // #ifndef EMP_IN_PROGRESS_PTR_OVERLOAD_FIX_HPP_INCLUDE diff --git a/include/emp/in_progress/SimpleLexer.hpp b/include/emp/in_progress/SimpleLexer.hpp new file mode 100644 index 0000000000..ef24db6565 --- /dev/null +++ b/include/emp/in_progress/SimpleLexer.hpp @@ -0,0 +1,87 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021. + * + * @file SimpleLexer.hpp + * @brief Lexer with common functionality already filled in. + * @note Status: ALPHA + */ + +#ifndef EMP_IN_PROGRESS_SIMPLELEXER_HPP_INCLUDE +#define EMP_IN_PROGRESS_SIMPLELEXER_HPP_INCLUDE + +#include +#include + +#include "../base/error.hpp" +#include "../compiler/Lexer.hpp" + +namespace emp { + + using namespace std::string_literals; + + class SimpleLexer : public emp::Lexer { + private: + int token_identifier; ///< Token id for identifiers + int token_number; ///< Token id for literal numbers + int token_string; ///< Token id for literal strings + int token_char; ///< Token id for literal characters + int token_external; ///< Token id for strings to be evaluated externally. + int token_symbol; ///< Token id for other symbols + + public: + SimpleLexer() { + // Whitespace and comments should always be dismissed (top priority) + IgnoreToken("Whitespace", "[ \t\n\r]+"); + IgnoreToken("//-Comments", "//.*"); + IgnoreToken("/*...*/-Comments", "/[*]([^*]|([*]+[^*/]))*[*]+/"); + + // Meaningful tokens have next priority. + + // An indentifier must begin with a letter, underscore, or dot, and followed by + // more of the same OR numbers or brackets. + token_identifier = AddToken("Identifier", "[a-zA-Z_.][a-zA-Z0-9_.[\\]]*"); + + // A literal number must begin with a digit; it can have any number of digits in it and + // optionally a decimal point. + token_number = AddToken("Literal Number", "[0-9]+(\\.[0-9]+)?"); + + // A string must begin and end with a quote and can have an escaped quote in the middle. + token_string = AddToken("Literal String", "\\\"([^\"\\\\]|\\\\.)*\\\""); + + // A literal char must begin and end with a single quote. It will always be treated as + // its ascii value. + token_char = AddToken("Literal Character", "'([^'\n\\\\]|\\\\.)+'"); + + // Setup a RegEx that can detect up to 4-deep nested parentheses. + const std::string no_parens = "[^()\n\r]*"; + const std::string open = "\"(\""; + const std::string close = "\")\""; + const std::string matched_parens = open + no_parens + close; + const std::string multi_parens = no_parens + "("s + matched_parens + no_parens + ")*"s; + const std::string nested_parens2 = open + multi_parens + close; + const std::string multi_nested2 = no_parens + "("s + nested_parens2 + no_parens + ")*"s; + const std::string nested_parens3 = open + multi_nested2 + close; + const std::string multi_nested3 = no_parens + "("s + nested_parens3 + no_parens + ")*"s; + const std::string nested_parens4 = open + multi_nested3 + close; + const std::string multi_nested4 = no_parens + "("s + nested_parens4 + no_parens + ")*"s; + + // An external value should be evaluated in a provided function. If no such function + // exists, using it will be an error. + token_external = AddToken("External Evaluation", "\"$(\""s + multi_nested4 + "\")\""s); + + // Symbols should have least priority. They include any solitary character not listed + // above, or pre-specified multi-character groups. + token_symbol = AddToken("Symbol", ".|\"==\"|\"!=\"|\"<=\"|\">=\"|\"&&\"|\"||\"|\"**\"|\"%%\""); + } + + bool IsID(const emp::Token token) const noexcept { return token.token_id == token_identifier; } + bool IsNumber(const emp::Token token) const noexcept { return token.token_id == token_number; } + bool IsString(const emp::Token token) const noexcept { return token.token_id == token_string; } + bool IsChar(const emp::Token token) const noexcept { return token.token_id == token_char; } + bool IsSymbol(const emp::Token token) const noexcept { return token.token_id == token_symbol; } + }; +} + +#endif // #ifndef EMP_IN_PROGRESS_SIMPLELEXER_HPP_INCLUDE diff --git a/include/emp/in_progress/SimpleParser.hpp b/include/emp/in_progress/SimpleParser.hpp new file mode 100644 index 0000000000..d88a9f9048 --- /dev/null +++ b/include/emp/in_progress/SimpleParser.hpp @@ -0,0 +1,352 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021. + * + * @file SimpleParser.hpp + * @brief Common praser functionality with custom plugins for variables and functions. + * @note Status: ALPHA + * + * Developer TODO: + * - Make ${ ... } actually work + * - Setup operator RegEx to be built dynamically + * - Allow new operators to be added externally + * - Setup LVALUES as a type, and allow assignment + * - Add in a type system (String, double, vectors, etc.) + */ + +#ifndef EMP_IN_PROGRESS_SIMPLEPARSER_HPP_INCLUDE +#define EMP_IN_PROGRESS_SIMPLEPARSER_HPP_INCLUDE + +#include +#include + +#include "../base/error.hpp" +#include "../compiler/Lexer.hpp" + +#include "SimpleLexer.hpp" + +namespace emp { + class SimpleParser { + + using pos_t = emp::TokenStream::Iterator; + + bool verbose = false; + + using value_fun_t = std::function; + struct ValueType { + enum type_t { ERROR=0, VALUE, FUNCTION }; + + type_t type; + double value; + value_fun_t fun; + + ValueType() : type(ERROR) {} + ValueType(const ValueType &) = default; + ValueType(double in_val) : type(VALUE), value(in_val) { } + ValueType(value_fun_t in_fun) : type(FUNCTION), fun(in_fun) { } + + ValueType & operator=(const ValueType &) = default; + ValueType & operator=(double in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(value_fun_t in_fun) { type = FUNCTION; fun = in_fun; return *this; } + + value_fun_t AsFun() { + if (type==FUNCTION) return fun; else return [v=value](emp::DataMap &){ return v; }; + } + }; + + struct BinaryOperator { + using fun_t = std::function; + size_t prec; + fun_t fun; + void Set(size_t in_prec, fun_t in_fun) { prec = in_prec; fun = in_fun; } + }; + + struct Function { + using fun0_t = std::function; + using fun1_t = std::function; + using fun2_t = std::function; + using fun3_t = std::function; + + size_t num_args = 0; + fun0_t fun0; fun1_t fun1; fun2_t fun2; fun3_t fun3; + + void Set0(fun0_t in_fun) { num_args = 0; fun0 = in_fun; } + void Set1(fun1_t in_fun) { num_args = 1; fun1 = in_fun; } + void Set2(fun2_t in_fun) { num_args = 2; fun2 = in_fun; } + void Set3(fun3_t in_fun) { num_args = 3; fun3 = in_fun; } + }; + + // --------- MEMBER VARIABLES ----------- + SimpleLexer lexer; + std::unordered_map> unary_ops; + std::unordered_map binary_ops; + std::unordered_map functions; + size_t error_count = 0; + + using error_fun_t = std::function; + error_fun_t error_fun = + [](const std::string & msg){ std::cerr << "ERROR: " << msg << std::endl; }; + + template + ValueType AddError(Ts &&... args) { + error_fun( emp::to_string(args...); ); + ++error_count; + return ValueType(); + } + + public: + SimpleParser(bool use_defaults=true) { + if (use_defaults) { + AddDefaultOperators(); + AddDefaultFunctions(); + } + } + + bool HasErrors() const { return error_count; } + size_t NumErrors() const { return error_count; } + + error_fun_t GetErrorFun() const { return error_fun; } + void SetErrorFun(error_fun_t in_fun) { error_fun = in_fun; } + + // Add a unary operator + void AddOp(const std::string & op, std::function fun) { + unary_ops[op] = fun; + } + + void AddDefaultOperators() { + // Setup the unary operators for the parser. + AddOp("+", [](double x) { return x; }); + AddOp("-", [](double x) { return -x; }; + AddOp("!", [](double x) { return (double) (x==0.0); }; + + // Setup the default binary operators for the parser. + size_t prec = 0; // Precedence level of each operator... + binary_ops["||"].Set( ++prec, [](double x, double y){ return (x!=0.0)||(y!=0.0); } ); + binary_ops["&&"].Set( ++prec, [](double x, double y){ return (x!=0.0)&&(y!=0.0); } ); + binary_ops["=="].Set( ++prec, [](double x, double y){ return x == y; } ); + binary_ops["!="].Set( prec, [](double x, double y){ return x != y; } ); + binary_ops["<"] .Set( ++prec, [](double x, double y){ return x < y; } ); + binary_ops["<="].Set( prec, [](double x, double y){ return x <= y; } ); + binary_ops[">"] .Set( prec, [](double x, double y){ return x > y; } ); + binary_ops[">="].Set( prec, [](double x, double y){ return x >= y; } ); + binary_ops["+"] .Set( ++prec, [](double x, double y){ return x + y; } ); + binary_ops["-"] .Set( prec, [](double x, double y){ return x - y; } ); + binary_ops["*"] .Set( ++prec, [](double x, double y){ return x * y; } ); + binary_ops["/"] .Set( prec, [](double x, double y){ return x / y; } ); + binary_ops["%"] .Set( prec, [](double x, double y){ return emp::Mod(x, y); } ); + binary_ops["**"].Set( ++prec, [](double x, double y){ return emp::Pow(x, y); } ); + binary_ops["%%"].Set( prec, [](double x, double y){ return emp::Log(x, y); } ); + } + + void AddDefaultFunctions() { + // Setup the default functions. + functions["ABS"].Set1( [](double x){ return std::abs(x); } ); + functions["EXP"].Set1( [](double x){ return emp::Pow(emp::E, x); } ); + functions["LOG"].Set1( [](double x){ return std::log(x); } ); + functions["LOG2"].Set1( [](double x){ return std::log2(x); } ); + functions["LOG10"].Set1( [](double x){ return std::log10(x); } ); + + functions["SQRT"].Set1( [](double x){ return std::sqrt(x); } ); + functions["CBRT"].Set1( [](double x){ return std::cbrt(x); } ); + + functions["SIN"].Set1( [](double x){ return std::sin(x); } ); + functions["COS"].Set1( [](double x){ return std::cos(x); } ); + functions["TAN"].Set1( [](double x){ return std::tan(x); } ); + functions["ASIN"].Set1( [](double x){ return std::asin(x); } ); + functions["ACOS"].Set1( [](double x){ return std::acos(x); } ); + functions["ATAN"].Set1( [](double x){ return std::atan(x); } ); + functions["SINH"].Set1( [](double x){ return std::sinh(x); } ); + functions["COSH"].Set1( [](double x){ return std::cosh(x); } ); + functions["TANH"].Set1( [](double x){ return std::tanh(x); } ); + functions["ASINH"].Set1( [](double x){ return std::asinh(x); } ); + functions["ACOSH"].Set1( [](double x){ return std::acosh(x); } ); + functions["ATANH"].Set1( [](double x){ return std::atanh(x); } ); + + functions["CEIL"].Set1( [](double x){ return std::ceil(x); } ); + functions["FLOOR"].Set1( [](double x){ return std::floor(x); } ); + functions["ROUND"].Set1( [](double x){ return std::round(x); } ); + + functions["ISINF"].Set1( [](double x){ return std::isinf(x); } ); + functions["ISNAN"].Set1( [](double x){ return std::isnan(x); } ); + + // Default 2-input functions + functions["HYPOT"].Set2( [](double x, double y){ return std::hypot(x,y); } ); + functions["EXP"].Set2( [](double x, double y){ return emp::Pow(x,y); } ); + functions["LOG"].Set2( [](double x, double y){ return emp::Log(x,y); } ); + functions["MIN"].Set2( [](double x, double y){ return (xy) ? x : y; } ); + functions["POW"].Set2( [](double x, double y){ return emp::Pow(x,y); } ); + + // Default 3-input functions. + functions["IF"].Set3( [](double x, double y, double z){ return (x!=0.0) ? y : z; } ); + functions["CLAMP"].Set3( [](double x, double y, double z){ return (xz) ? z : x; } ); + functions["TO_SCALE"].Set3( [](double x, double y, double z){ return (z-y)*x+y; } ); + functions["FROM_SCALE"].Set3( [](double x, double y, double z){ return (x-y) / (z-y); } ); + } + + /// Helpers for parsing. + ValueType ParseValue(const DataMap & dm, pos_t & pos) { + if constexpr (verbose) { + std::cout << "ParseValue at position " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } + + // Deal with any unary operators... + if (emp::Has(unary_ops, pos->lexeme)) { + if constexpr (verbose) std::cout << "Found UNARY OP: " << pos->lexeme << std::endl; + auto op = unary_ops[pos->lexeme]; + ++pos; + ValueType val = ParseValue(dm, pos); + if (val.type == ValueType::VALUE) { return op(val.value); } + else { return (value_fun_t) [fun=val.fun,op](emp::DataMap & dm){ return op(fun(dm)); }; } + } + + // If we have parentheses, process the contents + if (pos->lexeme == "(") { + if constexpr (verbose) std::cout << "Found: OPEN PAREN" << std::endl; + ++pos; + ValueType val = ParseMath(dm, pos); + if (pos->lexeme != ")") return AddError("Expected ')', but found '", pos->lexeme, "'."); + ++pos; + return val; + } + + // If this is a value, set it and return. + if (lexer.IsNumber(*pos)) { + double result = emp::from_string(pos->lexeme); + ++pos; + return result; + } + + // Otherwise it should be and identifier! + const std::string & name = pos->lexeme; + ++pos; + + // If it is followed by a parenthesis, it should be a function. + const bool is_fun = (pos.IsValid() && pos->lexeme == "("); + + if (is_fun) { + if (!emp::Has(functions, name)) return AddError("Call to unknown function '", name,"'."); + ++pos; + emp::vector args; + while(pos->lexeme != ")") { + args.push_back(ParseMath(dm, pos)); + if (pos->lexeme == ",") ++pos; + } + ++pos; + + // Now build the function based on its argument count. + value_fun_t out_fun; + switch (args.size()) { + case 0: + if (!functions[name].fun0) AddError("Function '", name, "' requires arguments."); + out_fun = [fun=functions[name].fun0](emp::DataMap & dm) { return fun(); }; + break; + case 1: + if (!functions[name].fun1) AddError("Function '", name, "' cannot have 1 arguments."); + out_fun = [fun=functions[name].fun1,arg0=args[0].AsFun()](emp::DataMap & dm) { + return fun(arg0(dm)); + }; + break; + case 2: + if (!functions[name].fun2) AddError("Function '", name, "' cannot have 2 arguments."); + out_fun = [fun=functions[name].fun2,arg0=args[0].AsFun(),arg1=args[1].AsFun()](emp::DataMap & dm) { + return fun(arg0(dm), arg1(dm)); + }; + break; + case 3: + if (!functions[name].fun3) AddError("Function '", name, "' cannot have 3 arguments."); + out_fun = [fun=functions[name].fun3,arg0=args[0].AsFun(),arg1=args[1].AsFun(),arg2=args[2].AsFun()](emp::DataMap & dm) { + return fun(arg0(dm), arg1(dm), arg2(dm)); + }; + break; + default: + AddError("Too many arguments for function '", name, "'."); + } + return out_fun; + } + + // This must be a DataMap entry name. + if (!dm.HasName(name)) AddError("Unknown data map entry '", name, "'."); + size_t id = dm.GetID(name); + return (value_fun_t) [id](emp::DataMap & dm){ return dm.GetAsDouble(id); }; + } + + ValueType ParseMath(const DataMap & dm, pos_t & pos, size_t prec_limit=0) { + ValueType val1 = ParseValue(dm, pos); + + if constexpr (verbose) { + if (pos.IsValid()) { + std::cout << "ParseMath at " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } else std::cout << "PROCESSED!" << std::endl; + } + + while (pos.IsValid() && pos->lexeme != ")" && pos->lexeme != ",") { + if constexpr (verbose) { std::cout << "...Scanning for op... [" << pos->lexeme << "]" << std::endl; } + + // If we have an operator, act on it! + if (Has(binary_ops, pos->lexeme)) { + const BinaryOperator & op = binary_ops[pos->lexeme]; + if (prec_limit >= op.prec) return val1; // Precedence not allowed; return currnet value. + ++pos; + ValueType val2 = ParseMath(dm, pos, op.prec); + if (val1.type == ValueType::VALUE) { + if (val2.type == ValueType::VALUE) { val1 = op.fun(val1.value, val2.value); } + else { + val1 = (value_fun_t) [val1_num=val1.value,val2_fun=val2.fun,op_fun=op.fun](emp::DataMap & dm){ + return op_fun(val1_num, val2_fun(dm)); + }; + } + } else { + if (val2.type == ValueType::VALUE) { + val1 = (value_fun_t) [val1_fun=val1.fun,val2_num=val2.value,op_fun=op.fun](emp::DataMap & dm){ + return op_fun(val1_fun(dm), val2_num); + }; + } else { + val1 = (value_fun_t) [val1_fun=val1.fun,val2_fun=val2.fun,op_fun=op.fun](emp::DataMap & dm){ + return op_fun(val1_fun(dm), val2_fun(dm)); + }; + } + } + } + + else AddError("Operator '", pos->lexeme, "' NOT found!"); + } + + // @CAO Make sure there's not a illegal lexeme here. + + return val1; + } + + /// Parse a function description that will take a DataMap and return the results. + /// For example, if the string "foo * 2 + bar" is passed in, a function will be returned + /// that takes a datamap (of the example type) loads in the values of "foo" and "bar", and + /// returns the result of the above equation. + + value_fun_t BuildMathFunction(const DataMap & dm, const std::string & expression) { + emp::TokenStream tokens = lexer.Tokenize(expression); + if constexpr (verbose) tokens.Print(); + pos_t pos = tokens.begin(); + ValueType val = ParseMath(dm, pos); + + // If this value is fixed, turn it into a function. + if (val.type == ValueType::VALUE) { + return [out=val.value](emp::DataMap &){ return out; }; + } + + // Otherwise return the function produced. + #ifdef NDEBUG + return val.fun; + #else + // If we are in debug mode, save the original datamap and double-check compatability. + return [fun=val.fun,&orig_layout=dm.GetLayout()](emp::DataMap & dm){ + emp_assert(dm.HasLayout(orig_layout)); + return fun(dm); + }; + #endif + } + + }; +} + +#endif // #ifndef EMP_IN_PROGRESS_SIMPLEPARSER_HPP_INCLUDE diff --git a/include/emp/in_progress/TrackedPtr.hpp b/include/emp/in_progress/TrackedPtr.hpp new file mode 100644 index 0000000000..4d47778143 --- /dev/null +++ b/include/emp/in_progress/TrackedPtr.hpp @@ -0,0 +1,56 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file TrackedPtr.hpp + * @brief Similar to Ptr, but memory is tracked and managed elsewhere, such as smart pointers. + * @note Status: ALPHA + * + */ + +#ifndef EMP_IN_PROGRESS_TRACKEDPTR_HPP_INCLUDE +#define EMP_IN_PROGRESS_TRACKEDPTR_HPP_INCLUDE + +#include "Ptr.hpp" + +namespace emp { + + class PtrManager { + public: + virtual void IncCount() = 0; + }; + + template + class TrackedPtr : public Ptr { + private: + using BasePtr::ptr; + + public: + using element_type = TYPE; + + /// Default constructor + TrackedPtr() : Ptr(nullptr) {} + + /// Copy constructor + TrackedPtr(const TrackedPtr & _in) : Ptr(_in) {} + + /// Construct from raw ptr + template Ptr(T2 * in_ptr, bool=false) : BasePtr(in_ptr) {} + + /// Construct from array + template Ptr(T2 * _ptr, size_t, bool) : BasePtr(_ptr) {} + + /// From compatible Ptr + template Ptr(Ptr _in) : BasePtr(_in.Raw()) {} + + /// From nullptr + Ptr(std::nullptr_t) : Ptr() {} + + /// Destructor + ~Ptr() { ; } + + +} + +#endif // #ifndef EMP_IN_PROGRESS_TRACKEDPTR_HPP_INCLUDE diff --git a/include/emp/in_progress/constexpr/ce_array.hpp b/include/emp/in_progress/constexpr/ce_array.hpp index 0f03903d4d..bf7e88003e 100644 --- a/include/emp/in_progress/constexpr/ce_array.hpp +++ b/include/emp/in_progress/constexpr/ce_array.hpp @@ -7,7 +7,7 @@ * @file * @brief ce_array defines a limited array object for use within a constexpr class or function. * - * STATUS: ALPHA + * @note STATUS: ALPHA * */ diff --git a/include/emp/in_progress/constexpr/ce_random.hpp b/include/emp/in_progress/constexpr/ce_random.hpp index 2e5f6f56c5..ae5b95c93e 100644 --- a/include/emp/in_progress/constexpr/ce_random.hpp +++ b/include/emp/in_progress/constexpr/ce_random.hpp @@ -7,7 +7,7 @@ * @file * @brief A versatile and non-patterned pseudo-random-number generator. * - * Status: DESIGN + * @note Status: DESIGN * * Constructor: * Random(int _seed=-1) diff --git a/include/emp/in_progress/constexpr/ce_string.hpp b/include/emp/in_progress/constexpr/ce_string.hpp index c47f4c9508..3583f98d28 100644 --- a/include/emp/in_progress/constexpr/ce_string.hpp +++ b/include/emp/in_progress/constexpr/ce_string.hpp @@ -7,7 +7,7 @@ * @file * @brief ce_string defines a limited string object for use within a constexpr class or function. * - * Status: DESIGN. + * @note Status: DESIGN. */ #ifndef EMP_IN_PROGRESS_CONSTEXPR_CE_STRING_HPP_INCLUDE diff --git a/include/emp/io/ContiguousStream.hpp b/include/emp/io/ContiguousStream.hpp index ab14fd42b0..14cff228c5 100644 --- a/include/emp/io/ContiguousStream.hpp +++ b/include/emp/io/ContiguousStream.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Useful for streaming data to contiguous memory. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_IO_CONTIGUOUSSTREAM_HPP_INCLUDE diff --git a/include/emp/io/File.hpp b/include/emp/io/File.hpp index faf3f642aa..70345f4106 100644 --- a/include/emp/io/File.hpp +++ b/include/emp/io/File.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2018-2020. + * date: 2018-2022. */ /** * @file @@ -9,7 +9,6 @@ * @note Status: BETA * * @todo We need to modify this code so that File can work with Emscripten. - * Alternatively, we might want to have a more flexible file class that wraps this one. * */ @@ -25,19 +24,22 @@ #include #include "../base/vector.hpp" +#include "../meta/FunInfo.hpp" +#include "../tools/String.hpp" #include "../tools/string_utils.hpp" namespace emp { - /// A class to maintin files for loading, writing, storing, and easy access to components. + /// A class to maintain files for loading, writing, storing, and easy access to components. class File { protected: - emp::vector lines; + emp::vector lines; + String file_error = ""; public: File() : lines() { ; } File(std::istream & input) : lines() { Load(input); } - File(const std::string & filename) : lines() { Load(filename); } + File(const String & filename) : lines() { Load(filename); } File(const File &) = default; File(File &&) = default; ~File() { ; } @@ -64,31 +66,41 @@ namespace emp { size_t size() const { return lines.size(); } /// Return entire text of the file - emp::vector GetAllLines() {return lines;} + emp::vector GetAllLines() {return lines;} /// Index into a specific line in this file. - std::string & operator[](size_t pos) { return lines[pos]; } + String & operator[](size_t pos) { return lines[pos]; } /// Const index into a specific line in this file. - const std::string & operator[](size_t pos) const { return lines[pos]; } + const String & operator[](size_t pos) const { return lines[pos]; } /// Return the first line in the file. - std::string & front() { return lines.front(); } + String & front() { return lines.front(); } /// Return a const reference to to the first line in the file. - const std::string & front() const { return lines.front(); } + const String & front() const { return lines.front(); } /// Return the last line in the file. - std::string & back() { return lines.back(); } + String & back() { return lines.back(); } /// Return a const reference to the last line in the file. - const std::string & back() const { return lines.back(); } + const String & back() const { return lines.back(); } + + // Was there an error working with this file? + bool HasError() const { return file_error.size(); } + + // Text of error. + const String & GetError() const { return file_error; } + + // Remove any errors. + void ClearError() { file_error.resize(0); } /// Append a new line to the end of the file. - File & Append(const std::string & line) { lines.emplace_back(line); return *this; } + File & Append(const String & line) { lines.emplace_back(line); return *this; } /// Append a vector of lines to the end of the file. - File & Append(const emp::vector & in_lines) { + template + File & Append(const emp::vector & in_lines) { size_t start_size = lines.size(); lines.resize(start_size + in_lines.size()); for (size_t pos = 0; pos < in_lines.size(); pos++) { @@ -112,57 +124,62 @@ namespace emp { } /// Extract first line from file - auto operator>>(std::string &out) { - out = size() ? front() : out; - lines.erase(begin()); + auto operator>>(std::string & out) { + if (size()) { + out = front(); + lines.erase(begin()); + } } /// Test if two files are identical. - bool operator==(const File in) { return lines == in.lines; } + bool operator==(const File & in) const { return lines == in.lines; } /// Test if two files are different. - bool operator!=(const File in) { return lines != in.lines; } + bool operator!=(const File & in) const { return lines != in.lines; } - /// Load a line from an input stream into a file. - File & LoadLine(std::istream & input) { + /// Load a line from an input stream into a file; return whether load was successful. + bool LoadLine(std::istream & input) { lines.emplace_back(""); - std::getline(input, lines.back()); + if (!std::getline(input, lines.back())) { + lines.pop_back(); + return false; + } // If the input file is DOS formatted, make sure to remove the \r at the end of each line. if (lines.back().size() && lines.back().back() == '\r') lines.back().pop_back(); - return *this; + return true; } /// Load an entire input stream into a file. File & Load(std::istream & input) { - while (!input.eof()) { - LoadLine(input); - } + while (LoadLine(input)); return *this; } /// Load a file from disk using the provided name. /// If file does not exist, this is a nop - File & Load(const std::string & filename) { + File & Load(const String & filename) { std::ifstream file(filename); if (file.is_open()) { Load(file); file.close(); + } else { + file_error.Set("File '", filename, "' failed to open."); } return *this; } /// Write this file to a provided output stream. File & Write(std::ostream & output) { - for (std::string & cur_line : lines) { + for (String & cur_line : lines) { output << cur_line << '\n'; } return *this; } /// Write this file to a file of the provided name. - File & Write(const std::string & filename) { + File & Write(const String & filename) { std::ofstream file(filename); Write(file); file.close(); @@ -170,16 +187,16 @@ namespace emp { } /// Test if a substring exists on ANY line of a file. - bool Contains(const std::string & pattern) const { - for (const std::string & line : lines) { - if (line.find(pattern) != std::string::npos) return true; + bool Contains(const String & pattern) const { + for (const String & line : lines) { + if (line.find(pattern) != String::npos) return true; } return false; } /// Convert this file into an std::set of lines (loses line ordering). - std::set AsSet() const { - std::set line_set; + std::set AsSet() const { + std::set line_set; for (size_t i = 0; i < lines.size(); i++) { line_set.insert(lines[i]); } @@ -187,17 +204,25 @@ namespace emp { } /// Apply a string manipulation function to all lines in the file. - File & Apply(const std::function & fun) { - for (std::string & cur_line : lines) { - fun(cur_line); + template + File & Apply(FUN_T fun) { + for (String & cur_line : lines) { + // If the function returns a string, assume that's what we're supposed to use. + // Otherwise assume that the string gets modified. + using return_t = typename FunInfo::return_t; + if constexpr ( std::is_same() ) { + cur_line = fun(cur_line); + } else { + fun(cur_line); + } } return *this; } /// Purge all lines that don't the criterion function. - File & KeepIf(const std::function & fun) { - emp::vector new_lines; - for (std::string & cur_line : lines) { + File & KeepIf(const std::function & fun) { + emp::vector new_lines; + for (String & cur_line : lines) { if (fun(cur_line)) new_lines.emplace_back(cur_line); } std::swap(lines, new_lines); @@ -205,39 +230,53 @@ namespace emp { } /// Keep only strings that contain a specific substring. - File & KeepIfContains(const std::string & pattern) { + File & KeepIfContains(const String & pattern) { return KeepIf( - [&pattern](const std::string & line){ return line.find(pattern) != std::string::npos; } + [&pattern](const String & line){ return line.find(pattern) != String::npos; } ); } /// Remove all strings that contain a specific substring. - File & RemoveIfContains(const std::string & pattern) { + File & RemoveIfContains(const String & pattern) { return KeepIf( - [&pattern](const std::string & line){ return line.find(pattern) == std::string::npos; } + [&pattern](const String & line){ return line.find(pattern) == String::npos; } + ); + } + + /// Keep only strings that contain a specific substring. + File & KeepIfBegins(const String & prefix) { + return KeepIf( + [&prefix](const String & line){ return line.find(prefix) == 0; } + ); + } + + /// Remove all strings that contain a specific substring. + File & RemoveIfBegins(const String & prefix) { + return KeepIf( + [&prefix](const String & line){ return line.Find(prefix) != 0; } ); } /// Remove all lines that are empty strings. File & RemoveEmpty() { - return KeepIf( [](const std::string & str){ return (bool) str.size(); } ); + return KeepIf( [](const String & str){ return (bool) str.size(); } ); } /// Any time multiple whitespaces are next to each other, collapse to a single WS char. /// Prefer '\n' if in whitespace collapsed, otherwise use ' '. File & CompressWhitespace() { - Apply(compress_whitespace); + Apply([](String & in){ in.Compress(); }); RemoveEmpty(); return *this; } /// Delete all whitespace; by default keep newlines. File & RemoveWhitespace(bool keep_newlines=true) { - Apply(remove_whitespace); + Apply([](String & in){ in.RemoveWhitespace(); }); RemoveEmpty(); if (!keep_newlines) { - std::string all_lines; - for (const std::string & cur_line : lines){ + String all_lines; + for (const String & cur_line : lines){ all_lines += cur_line; } lines.resize(1); @@ -247,21 +286,23 @@ namespace emp { } /// A technique to remove all comments in a file. - File & RemoveComments(const std::string & marker) { - Apply( [marker](std::string & str) { - size_t pos = str.find(marker); - if (pos !=std::string::npos) str.resize( pos ); + File & RemoveComments(const String & marker, bool skip_quotes=true) { + Apply( [marker,skip_quotes](String & str) { + size_t pos = str.Find(marker, 0, skip_quotes); + if (pos !=String::npos) str.resize( pos ); } ); return *this; } /// Allow remove comments to also be specified with a single character. - File & RemoveComments(char marker) { return RemoveComments(emp::to_string(marker)); } + File & RemoveComments(char marker, bool skip_quotes=true) { + return RemoveComments(emp::MakeString(marker), skip_quotes); + } - /// Run a function on each line of a file and return the restults as a vector. + /// Run a function on each line of a file and return the results as a vector. /// Note: Function is allowed to modify string. template - emp::vector Process(const std::function & fun) { + emp::vector Process(const std::function & fun) { emp::vector results(lines.size()); for (size_t i = 0; i < lines.size(); i++) { results[i] = fun(lines[i]); @@ -269,43 +310,65 @@ namespace emp { return results; } + /// Get a series of lines. + emp::vector Read(size_t start, size_t end) const { + if (end > lines.size()) end = lines.size(); + auto start_it = lines.begin()+static_cast(start); + auto end_it = lines.begin()+static_cast(end); + return emp::vector(start_it, end_it); + } + + /// Get a series of lines until a line meets a certain condition. + emp::vector ReadUntil(size_t start, auto test_fun) const { + size_t end = start; + while (end < lines.size() && !test_fun(lines[end])) ++end; + return Read(start, end); + } + + /// Get a series of lines while lines continue to meet a certain condition. + emp::vector ReadWhile(size_t start, auto test_fun) const { + size_t end = start; + while (end < lines.size() && test_fun(lines)) ++end; + return Read(start, end); + } + /// Remove the first column from the file, returning it as a vector of strings. - emp::vector ExtractCol(char delim=',') { - return Process( [delim](std::string & line){ - return string_pop(line, delim); + emp::vector ExtractCol(char delim=',') { + return Process( [delim](String & line){ + return line.Pop(delim); }); } /// Remove the first column from the file, returning it as a vector of a specified type. template emp::vector ExtractColAs(char delim=',') { - return Process( [delim](std::string & line){ - return emp::from_string(string_pop(line, delim)); + return Process( [delim](String & line){ + return line.Pop(delim).As(); }); } /// Convert a row of a file to a vector of string views. - emp::vector ViewRowSlices(size_t row_id, char delim=',') { - return view_slices(lines[row_id], delim); + emp::vector ViewRowSlices(size_t row_id, String delim=",") { + return lines[row_id].ViewSlices(delim); } /// Remove the first row from the file, returning it as a vector of strings. - emp::vector ExtractRow(char delim=',') { + emp::vector ExtractRow(String delim=",") { // Identify the data as string_views emp::vector sv_row = ViewRowSlices(0, delim); // Build the array to return and copy strings into it. - emp::vector out_row(sv_row.size()); + emp::vector out_row(sv_row.size()); for (size_t i=0; i < sv_row.size(); i++) out_row[i] = sv_row[i]; - // Remove the row to be extrated and return the result. + // Remove the row to be extracted and return the result. lines.erase(begin()); return out_row; } /// Remove the first row from the file, returning it as a vector of a specified type. template - emp::vector ExtractRowAs(char delim=',') { + emp::vector ExtractRowAs(String delim=",") { // Identify the data as string_views emp::vector sv_row = ViewRowSlices(0, delim); @@ -313,18 +376,25 @@ namespace emp { emp::vector out_row(sv_row.size()); for (size_t i=0; i < sv_row.size(); i++) out_row[i] = from_string(sv_row[i]); - // Remove the row to be extrated and return the result. + // Remove the row to be extracted and return the result. lines.erase(begin()); return out_row; } + emp::vector< emp::vector > ToCSV(String delim=",") const { + emp::vector< emp::vector > out_csv(lines.size()); + for (size_t row_id = 0; row_id < lines.size(); row_id++) { + out_csv[row_id] = lines[row_id].Slice(delim); + } + return out_csv; + } + template - emp::vector< emp::vector > ToData(char delim=',') { + emp::vector< emp::vector > ToData(String delim=",") const { emp::vector< emp::vector > out_data(lines.size()); - emp::vector sv_row; for (size_t row_id = 0; row_id < lines.size(); row_id++) { - view_slices(lines[row_id], sv_row, delim); + auto sv_row = lines[row_id].ViewSlices(delim); out_data[row_id].resize(sv_row.size()); for (size_t i=0; i < sv_row.size(); i++) { out_data[row_id][i] = from_string(sv_row[i]); @@ -334,6 +404,59 @@ namespace emp { return out_data; } + + // A File::Scan object allows a user to easily step through a File. + class Scan { + private: + const File & file; + size_t line = 0; + + public: + Scan(const File & in, size_t start=0) : file(in), line(start) { } + Scan(const Scan & in) = default; + + const File & GetFile() const { return file; } + size_t GetLine() const { return line; } + + bool AtStart() const { return line == 0; } + bool AtEnd() const { return line >= file.size(); } + operator bool() const { return !AtEnd(); } + + void Set(size_t in_line) { line = in_line; } + void Reset() { line = 0; } + void SetEnd() { line = file.size(); } + + // Get the very next line. + const String & Read() { + if (line > file.size()) return String::Empty(); + return file[line++]; + } + + // Get a block of lines. + emp::vector ReadTo(size_t end) { + emp_assert(end >= line); + if (end > file.size()) end = file.size(); + size_t start = line; + line = end; + return file.Read(start, end); + } + + // Get a block of lines, ending when a condition is met. + emp::vector ReadUntil(auto test_fun) { + auto out = file.ReadUntil(line, test_fun); + line += out.size(); + return out; + } + + // Get a block of lines for as lone as a condition is met. + emp::vector ReadWhile(auto test_fun) { + auto out = file.ReadWhile(line, test_fun); + line += out.size(); + return out; + } + }; + + Scan StartScan(size_t start=0) const { return Scan(*this, start); } }; } diff --git a/include/emp/io/MemoryIStream.hpp b/include/emp/io/MemoryIStream.hpp index 912023a5b5..8912961c9b 100644 --- a/include/emp/io/MemoryIStream.hpp +++ b/include/emp/io/MemoryIStream.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Useful for streaming data from contiguous memory. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_IO_MEMORYISTREAM_HPP_INCLUDE diff --git a/include/emp/io/StreamManager.hpp b/include/emp/io/StreamManager.hpp index d92f3a000f..b7b98af5be 100644 --- a/include/emp/io/StreamManager.hpp +++ b/include/emp/io/StreamManager.hpp @@ -37,7 +37,7 @@ namespace emp { protected: - // Helper under error conditions. + // Helper, especially under error conditions. static std::iostream & GetDefaultStream() { static std::stringstream default_stream; return default_stream; @@ -137,6 +137,7 @@ namespace emp { if constexpr (ACCESS == Access::INPUT) ptr = NewPtr(name); else if constexpr (ACCESS == Access::OUTPUT) ptr = NewPtr(name); else if constexpr (ACCESS == Access::IO) ptr = NewPtr(name); + else emp_error("Unknown access type for file creation in StreamManager."); } // Build string streams. @@ -306,12 +307,18 @@ namespace emp { std::istream & GetInputStream(const std::string & name) { - if (!HasInputStream(name)) return AddInputStream(name); + if (!HasInputStream(name)) { // If we don't have this input stream, add it! + emp_assert(!Has(name)); // Make sure we don't have this stream at all! + return AddInputStream(name); + } return streams[name]->GetInputStream(); } std::ostream & GetOutputStream(const std::string & name) { - if (!HasOutputStream(name)) return AddOutputStream(name); + if (!HasOutputStream(name)) { // If we don't have this output stream, add it! + emp_assert(!Has(name)); // Make sure we don't have this stream at all! + return AddOutputStream(name); + } return streams[name]->GetOutputStream(); } diff --git a/include/emp/matching/matchbin_metrics.hpp b/include/emp/matching/matchbin_metrics.hpp index 98d8d902b3..70cfde0aeb 100644 --- a/include/emp/matching/matchbin_metrics.hpp +++ b/include/emp/matching/matchbin_metrics.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2019-2021. + * date: 2019-2022. */ /** * @file @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,6 @@ #include "../datastructs/tuple_utils.hpp" #include "../math/Distribution.hpp" #include "../math/math.hpp" -#include "../polyfill/span.hpp" #include "../tools/string_utils.hpp" namespace emp { @@ -256,7 +256,7 @@ namespace emp { } inline static double calculate(const query_t& a, const tag_t& b) { - return (b - a).GetValue() / emp::BitSet::GetNumStates(); + return (b - a).GetValue() / emp::Pow2(Width); } }; @@ -283,7 +283,7 @@ namespace emp { } inline static double calculate(const query_t& a, const tag_t& b) { - constexpr double max_dist = emp::BitSet::GetNumStates(); + constexpr double max_dist = emp::Pow2(Width); return (b >= a ? (b - a).GetValue() : max_dist) / max_dist; } @@ -314,7 +314,7 @@ namespace emp { } inline static double calculate(const query_t& a, const tag_t& b) { - constexpr double max_dist = emp::BitSet::GetNumStates() / 2.0; + constexpr double max_dist = emp::Pow2(Width) / 2.0; return std::min(a - b, b - a).GetValue() / max_dist; } @@ -344,7 +344,7 @@ namespace emp { } inline static double calculate(const query_t& a, const tag_t& b) { - return (a > b ? a - b : b - a).GetValue() / emp::BitSet::GetNumStates(); + return (a > b ? a - b : b - a).GetValue() / emp::Pow2(Width); } }; @@ -716,7 +716,7 @@ namespace emp { for (size_t i = 0; i < metric_width; ++ i) { best = std::min(Metric::calculate(dup, b), best); - dup.template ROTL_SELF<1>(); + dup.ROTATE_SELF(-1); } return best; @@ -1178,22 +1178,22 @@ namespace emp { : public BaseMetric< emp::BitSet< std::tuple_size::value - * DimMetric::query_t::value_type::GetSize() + * DimMetric::query_t::value_type::GetCTSize() >, emp::BitSet< std::tuple_size::value - * DimMetric::tag_t::value_type::GetSize() + * DimMetric::tag_t::value_type::GetCTSize() > > { using query_t = emp::BitSet< std::tuple_size::value - * DimMetric::query_t::value_type::GetSize() + * DimMetric::query_t::value_type::GetCTSize() >; using tag_t = emp::BitSet< std::tuple_size::value - * DimMetric::tag_t::value_type::GetSize() + * DimMetric::tag_t::value_type::GetCTSize() >; inline static DimMetric metric{}; @@ -1216,8 +1216,8 @@ namespace emp { typename DimMetric::tag_t arr_b; for (size_t d = 0; d < metric.dim(); ++d) { - arr_a[d].Import(a, d * DimMetric::query_t::value_type::GetSize()); - arr_b[d].Import(b, d * DimMetric::tag_t::value_type::GetSize()); + arr_a[d].Import(a, d * DimMetric::query_t::value_type::GetCTSize()); + arr_b[d].Import(b, d * DimMetric::tag_t::value_type::GetCTSize()); } return DimMetric::calculate(arr_a, arr_b); diff --git a/include/emp/math/CombinedBinomialDistribution.hpp b/include/emp/math/CombinedBinomialDistribution.hpp new file mode 100644 index 0000000000..a8b5786d71 --- /dev/null +++ b/include/emp/math/CombinedBinomialDistribution.hpp @@ -0,0 +1,85 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2018-2022. + * + * @file CombinedBinomialDistribution.hpp + * @brief A means of quickly generating binomial random variables while only storing a small number of distributions. + * @note Status: ALPHA + * + * Quick check for theory: https://math.stackexchange.com/questions/1176385/sum-of-two-independent-binomial-variables + * + * If we want to generate binomial random variables of various trial counts (n's) using the + * Distribution class, we'd have to create a new Distribution for each unique trial count. + * + * This class leverages the fact that B(n, p) + B(m, p) = B(n + m, p) to calculate binomial + * draws with arbitrary trail counts without storing N distributions. + * By storing distributions for powers of 2, we only store log_2(N) distributions. + * + * Developor Notes: + * - We should come up with a more informative name for the file/class + */ + +#ifndef EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE +#define EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE + +#include "./Distribution.hpp" + +namespace emp{ + /// \brief A collection of distributions that allows for pulls from a binomial distribution with arbitrary N while only storing log_2(N) distributions + class CombinedBinomialDistribution{ + protected: + emp::vector distribution_vec; /**< The collection of binomial distributions + used to construct any N */ + double p; ///< The success probability of a single Bernoulli trial + size_t cur_max_power; /**< The maximum power of two currently supported by our + distributions */ + + /// Fetch the smallest power of two that is larger than N + size_t GetMaxPower(size_t n) const { + size_t power = 0; + for(size_t val = 1; val < n; val <<= 1, ++power){ ; } + return power; + } + + public: + CombinedBinomialDistribution() : p(0), cur_max_power(0){ ; } + CombinedBinomialDistribution(double _p, size_t _starting_n) : p(_p), cur_max_power(0){ + Expand(_starting_n); + } + + /// Sample a binomial distribution with n events + size_t PickRandom(size_t n, Random & random){ + size_t local_max_power = GetMaxPower(n); + size_t result = 0; + if(local_max_power > cur_max_power) Expand(n); + for(size_t power = 0; power <= local_max_power; ++power){ + if( (n & (1 << power)) != 0){ + result += distribution_vec[power].PickRandom(random); + } + } + return result; + } + + /// Reset the distribution with a new probability, p, and a starting n value + void Setup(double _p, size_t _n){ + distribution_vec.clear(); + cur_max_power = 0; + p = _p; + if(_n > (1ull << cur_max_power)) Expand(_n); + } + + /// Create more distributions to handle the given value of n + void Expand(size_t max_n){ + cur_max_power = GetMaxPower(max_n); + for(size_t power = distribution_vec.size(); power <= cur_max_power; ++power){ + distribution_vec.emplace_back(p, 1 << power); + } + } + + /// Fetch the current maximum power handled by this combined distribution + size_t GetCurMaxPower(){ return cur_max_power; } + }; +} + +#endif // #ifndef EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE diff --git a/include/emp/math/Distribution.hpp b/include/emp/math/Distribution.hpp index 9ddde7e2f5..73a8d935af 100644 --- a/include/emp/math/Distribution.hpp +++ b/include/emp/math/Distribution.hpp @@ -9,9 +9,9 @@ * @note Status: ALPHA * * A Distribution is a pre-calculated set of probabilities to quickly pick a whole-number result. - * These should be used when either we need to draw from the same distribution many time (and hence - * the extra time to pre-calculate it is amortized away) -or- in functions that we want to call with - * a range of distributions that we may not know ahead of time. + * These should be used when either we need to draw from the same distribution many time (and + * hence the extra time to pre-calculate it is amortized away) -or- in functions that we want to + * call with a range of distributions that we may not know ahead of time. * * Currently, we have: * @@ -20,7 +20,7 @@ * NegativeBinomial - How many attempts to reach N successes, with p probability per attempt? * * - * Developor Notes: + * Developer Notes: * - We should setup an offset in the base Distribution class to ignore "impossible" low values. * */ @@ -51,6 +51,7 @@ namespace emp { return weights.Index( in_value * GetTotalProb() ); } + /// Pick a random item using this distribution. size_t PickRandom(Random & random) const { emp_assert(weights.GetSize() > 0, "Distribution can only pick a random entry if it has at least one entry!"); return weights.Index( random.GetDouble(GetTotalProb()) ); @@ -126,7 +127,7 @@ namespace emp { }; - /// How many attempts to reach N successes, assumming p probability per attempt? + /// How many attempts to reach N successes, assuming p probability per attempt? class NegativeBinomial : public Distribution { private: double p = 0.0; diff --git a/include/emp/math/DistributionSet.hpp b/include/emp/math/DistributionSet.hpp new file mode 100644 index 0000000000..5fdcb0c188 --- /dev/null +++ b/include/emp/math/DistributionSet.hpp @@ -0,0 +1,50 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file DistributionSet.hpp + * @brief Management of pre-calculated distributions with different input values. + * @note Status: ALPHA + * + * A DistributionSet manages a set of pre-calculated distributions. When input values are + * provided, the correct distribution is identified, and the associated value is drawn. + * + */ + +#ifndef EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE +#define EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE + +#include "Distribution.hpp" + +#include +#include + +#include "Distribution.hpp" + +#include "../datastructs/tuple_utils.hpp" + +namespace emp { + + /// @param DIST_T Type of distribution being used. + /// @param Ts Types of parameters to choose the set based on. + template + class DistributionSet { + private: + /// Map parameters to pre-calculated distributions. + unordered_map< std::tuple, DIST_T, emp::TupleHash> dist_map; + + public: + size_t PickRandom(Random & random, Ts... args) { + auto arg_tup = std::make_tuple(args...); // Build the tuple to use as a key. + auto [it, success] = dist_map.emplace(arg_tup, DIST_T(args...)); + return it->second.PickRandom(random); + } + }; + + using BinomialSet = emp::DistributionSet; + using NegativeBinomialSet = emp::DistributionSet; + +} + +#endif // #ifndef EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE diff --git a/include/emp/math/Random.hpp b/include/emp/math/Random.hpp index 0ce324fb90..95d6540321 100644 --- a/include/emp/math/Random.hpp +++ b/include/emp/math/Random.hpp @@ -6,7 +6,7 @@ /** * @file * @brief A versatile and non-patterned pseudo-random-number generator. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_MATH_RANDOM_HPP_INCLUDE @@ -76,6 +76,9 @@ namespace emp { /// Starts a new sequence of pseudo random numbers. A negative seed means that the random /// number generator gets its seed from the current system time and the process memory. void ResetSeed(const int64_t seed) noexcept { + value = 0; + expRV = 0.0; + // If the provided seed is <= 0, choose a unique seed based on time and memory location. if (seed <= 0) { uint64_t seed_time = (uint64_t) time(NULL); @@ -178,7 +181,7 @@ namespace emp { inline uint64_t GetUInt64(const uint64_t max) noexcept { if (max <= RAND_CAP) return (uint64_t) GetUInt(max); // Don't need extra precision. - size_t mask = emp::MaskUsed(max); // Create a mask for just the bits we need. + uint64_t mask = emp::MaskUsed(max); // Create a mask for just the bits we need. uint64_t val = GetUInt64() & mask; // Grab a value using just the current bits. while (val >= max) val = GetUInt64() & mask; // Grab new values until we find a valid one. @@ -393,7 +396,7 @@ namespace emp { // Distributions // /// Generate a random variable drawn from a unit normal distribution. - double GetRandNormal() noexcept { + double GetNormal() noexcept { // Draw from a Unit Normal Dist // Using Rejection Method and saving of initial exponential random variable double expRV2; @@ -410,18 +413,18 @@ namespace emp { /// @return A random variable drawn from a normal distribution. /// @param mean Center of distribution. /// @param std Standard deviation of distribution. - inline double GetRandNormal(const double mean, const double std) { return mean + GetRandNormal() * std; } + inline double GetNormal(const double mean, const double std) { return mean + GetNormal() * std; } /// Generate a random variable drawn from a Poisson distribution. - inline uint32_t GetRandPoisson(const double n, const double p) { + inline uint32_t GetPoisson(const double n, const double p) { emp_assert(p >= 0.0 && p <= 1.0, p); - // Optimizes for speed and calculability using symmetry of the distribution - if (p > .5) return (uint32_t) n - GetRandPoisson(n * (1 - p)); - else return GetRandPoisson(n * p); + // Optimizes for speed and calculability using symetry of the distribution + if (p > .5) return (uint32_t) n - GetPoisson(n * (1 - p)); + else return GetPoisson(n * p); } /// Generate a random variable drawn from a Poisson distribution. - inline uint32_t GetRandPoisson(const double mean) { + inline uint32_t GetPoisson(const double mean) { // Draw from a Poisson Dist with mean; if cannot calculate, return UINT_MAX. // Uses Rejection Method const double a = exp(-mean); @@ -440,7 +443,7 @@ namespace emp { /// This function is exact, but slow. /// @see Random::GetApproxRandBinomial /// @see emp::Binomial in source/tools/Distribution.h - inline uint32_t GetRandBinomial(const double n, const double p) { // Exact + inline uint32_t GetBinomial(const double n, const double p) { // Exact emp_assert(p >= 0.0 && p <= 1.0, p); emp_assert(n >= 0.0, n); // Actually try n Bernoulli events, each with probability p @@ -449,17 +452,18 @@ namespace emp { return k; } - inline uint32_t GetRandGeometric(double p){ - emp_assert(p >= 0 && p <= 1, "Probabilities must be between 0 and 1"); - // TODO: When we have warnings, add one for passing a really small number to - // this function. Alternatively, make this function not ludicrously slow with small numbers. - // Looks like return floor(ln(GetDouble())/ln(1-p)) might be sufficient? - if (p == 0) { - return std::numeric_limits::infinity(); - } - uint32_t result = 1; - while (!P(p)) { result++;} - return result; + /// Generate a random variable drawn from an exponential distribution. + inline double GetExponential(double p) { + emp_assert(p > 0.0 && p <= 1.0, p); + // if (p == 0.0) return std::numeric_limits::infinity(); + if (p == 1.0) return 0.0; + return std::log(GetDouble()) / std::log(1.0 - p); + } + + /// Generate a random variable drawn from a geometric distribution. + inline uint32_t GetGeometric(double p) { + emp_assert(p > 0.0 && p <= 1.0, p); + return static_cast( GetExponential(p) ) + 1; } }; diff --git a/include/emp/math/Range.hpp b/include/emp/math/Range.hpp index c816a53e30..07934bd93b 100644 --- a/include/emp/math/Range.hpp +++ b/include/emp/math/Range.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2019 + * date: 2016-2023 */ /** * @file @@ -14,70 +14,214 @@ #include #include +#include #include "../base/assert.hpp" #include "../base/vector.hpp" +#include "../tools/String.hpp" namespace emp { /// A range of values from a lower limit to and upper limit, of any provided type. - template + template class Range { private: - T lower = std::numeric_limits::min(); ///< Beginning of range, inclusive. - T upper = std::numeric_limits::max(); ///< End of range, inclusive. + T lower = std::numeric_limits::lowest(); ///< Beginning of range, inclusive. + T upper = std::numeric_limits::max(); ///< End of range, (included if INCLUDE_UPPER) + using this_t = Range; public: + static constexpr bool is_integral = std::is_integral(); + Range() = default; - Range(T _l, T _u) : lower(_l), upper(_u) { emp_assert(_l < _u); } + Range(T val) : lower(val), upper(val) { + if constexpr (!INCLUDE_UPPER) upper += GetEpsilon(); + } + Range(T _l, T _u) : lower(_l), upper(_u) { emp_assert(_l <= _u, _l, _u); } + Range(const Range &) = default; + + Range & operator=(const Range&) = default; + bool operator==(const Range& _in) const = default; + bool operator!=(const Range& _in) const = default; T GetLower() const { return lower; } T GetUpper() const { return upper; } - - size_t CalcBin(T value, size_t num_bins) const { - return (size_t) (((double) (value - lower)) / ((double) (upper - lower)) * (double) num_bins); + T GetEpsilon() const { + if constexpr (is_integral) return 1; + else return upper * std::numeric_limits::epsilon(); + } + T GetMaxValue() const { // What is the maximum included value? + if constexpr (INCLUDE_UPPER) return upper; + else return upper - GetEpsilon(); + } + T GetSize() const { return upper - lower + (INCLUDE_UPPER && is_integral); } + [[nodiscard]] static constexpr T MaxLimit() { return std::numeric_limits::max(); } + [[nodiscard]] static constexpr T MinLimit() { return std::numeric_limits::lowest(); } + + emp::String ToString() const { + if constexpr (INCLUDE_UPPER) { + return emp::MakeString('[', lower, ',', upper, ']'); + } else { + return emp::MakeString('[', lower, ',', upper, ')'); + } } - - Range & operator=(const Range&) = default; - bool operator==(const Range& _in) const { return lower==_in.lower && upper==_in.upper; } - bool operator!=(const Range& _in) const { return lower!=_in.lower || upper!=_in.upper; } void SetLower(T l) { lower = l; } void SetUpper(T u) { upper = u; } void Set(T _l, T _u) { emp_assert(_l < _u); lower = _l; upper = _u; } + void ShiftDown(T shift) { + emp_assert(shift > 0); + emp_assert(lower <= upper, lower, upper); + // Guard against underflow + upper = (MinLimit() + shift < upper) ? (upper - shift) : MinLimit(); + lower = (MinLimit() + shift < lower) ? (lower - shift) : MinLimit(); + } + void ShiftUp(T shift) { + emp_assert(shift > 0); + emp_assert(lower <= upper, lower, upper); + // Guard against overflow + upper = (MaxLimit() - shift > upper) ? (upper + shift) : MaxLimit(); + lower = (MaxLimit() - shift > lower) ? (lower + shift) : MaxLimit(); + } + void Shift(T shift) { + if (shift > 0) ShiftUp(shift); + else ShiftDown(-shift); + } - void SetMaxLower() { lower = std::numeric_limits::min(); } + void SetMinLower() { lower = std::numeric_limits::min(); } void SetMaxUpper() { upper = std::numeric_limits::max(); } - /// Determine if a provided value is in the range. - bool Valid(T value) const { return value >= lower && value <= upper; } + void Grow(T amount=1) { + if (amount > 0) upper += amount; + else lower += amount; + } + + // Flexible lower/upper accessor that can get and set. + T & Lower() { return lower; } + T & Upper() { return upper; } + + const T & Lower() const noexcept { return lower; } + const T & Upper() const noexcept { return upper; } + + /// Determine if a provided value is in the range INCLUSIVE of the endpoints. + bool Has(T value) const { + return (value >= lower && value < upper) || (INCLUDE_UPPER && value == upper); + } + [[deprecated("Renamed to Has()")]] + bool Valid(T value) const { return Has(value); } + + bool HasRange(this_t in_range) { + return Has(in_range.lower) && Has(in_range.upper); + } + + /// Will identify if two ranges are next to each other or overlapping. + bool IsConnected(this_t in) const { + return (in.lower >= lower && in.lower <= upper) || + (lower >= in.lower && lower <= in.upper); + } + + /// Determine if there is overlap between two range. + /// Similar to IsConnected, but cannot be merely adjacent. + bool HasOverlap(this_t in) const { + return (in.lower >= lower && in.lower < upper) || + (lower >= in.lower && lower < in.upper); + } + + /// Determine the amount of overlap between two range. + T CalcOverlap(this_t in) const { + const T combo_upper = std::min(upper, in.upper); + const T combo_lower = std::max(lower, in.lower); + return (combo_upper > combo_lower) ? (combo_upper - combo_lower) : T{}; + } + + /// @brief Expand this range to encompass a provided value. + /// @param val Value to expand through. + /// @return Whether the range has changed due to this expansion. + bool Expand(T val) { + if (val < lower) lower = val; + else if (val > upper) { + upper = val; + if constexpr (INCLUDE_UPPER) upper += GetEpsilon(); + } else return false; + return true; + } + + /// @brief Expand this range to encompass all provided values. + /// @return Whether the range has changed due to this expansion. + template + bool Expand(T val1, T val2, Ts... args) { + return Expand(val1) + Expand(val2, args...); // Use + to avoid short-circuiting. + } + + /// Merge this range with another. Must be adjacent or overlap (return false if not!) + bool Merge(this_t in) { + if (!IsConnected(in)) return false; + Expand(in.lower, in.upper); + return true; + } + + /// Add a specified value to the end of a range (or return false if failed). + bool Append(T val) { + emp_assert(is_integral, "Only integral ranges can call Append() with a single value."); + if (val != upper + INCLUDE_UPPER) return false; + upper++; + return true; + } /// Force a value into range - T Limit(T _in) const { return (_in < lower) ? lower : ((_in > upper) ? upper : _in); } + T Clamp(T _in) const { + return (_in < lower) ? lower : ((_in >= upper) ? GetMaxValue() : _in); + } + [[deprecated("Renamed to Clamp()")]] + T LimitValue(T _in) const { return Clamp(_in); } + + double ToFraction(T _in) const { + emp_assert(GetSize() != 0); + return static_cast(_in - lower) / static_cast(GetSize()); + } + T FromFraction(double frac) const { return frac * GetSize() + lower; } + + // Adjust the upper or lower if provided value is more limiting. + void LimitLower(T in) { if (in > lower) lower = in; } + void LimitUpper(T in) { if (in < upper) upper = in; } + + size_t CalcBin(T value, size_t num_bins) const { + if (upper == lower) return 0; + return (size_t) (((double) (value - lower)) / ((double) (upper - lower)) * (double) num_bins); + } /// Produce a vector that spreads values evenly across the range. - emp::vector Spread(size_t s) const { + emp::vector Spread(const size_t s) const { emp_assert(s >= 1); + if (s == 1) return emp::vector(1,lower/2+upper/2); // On point is in the middle of the range. emp::vector out(s); out[0] = lower; - if (s > 1) { - T range = upper - lower; - for (size_t i = 1; i < s; i++) { - out[i] = lower + (T) ((((double) i) / (double)(s-1)) * range); - } + T range = upper - lower; + for (size_t i = 1; i < s; i++) { + double frac = static_cast(i)/static_cast(s-1); + out[i] = lower + static_cast(frac * range); } return out; } }; /// Build a new range with auto-detected type. - template Range MakeRange(T _l, T _u) { return Range(_l,_u); } + template + Range MakeRange(T _l, T _u) { + return Range(_l,_u); + } /// Build a new range of type int. - inline Range IntRange(int _l, int _u) { return Range(_l,_u); } + template + inline Range IntRange(int _l, int _u) { + return Range(_l,_u); + } /// Build a new range of type double. - inline Range DRange(double _l, double _u) { return Range(_l,_u); } + template + inline Range DRange(double _l, double _u) { + return Range(_l,_u); + } } #endif // #ifndef EMP_MATH_RANGE_HPP_INCLUDE diff --git a/include/emp/math/RangeSet.hpp b/include/emp/math/RangeSet.hpp new file mode 100644 index 0000000000..98ed956da0 --- /dev/null +++ b/include/emp/math/RangeSet.hpp @@ -0,0 +1,499 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2023 + * + * @file RangeSet.hpp + * @brief A collection of ranges that can be operated on collectively. + * @note Status: BETA + */ + +#ifndef EMP_MATH_RANGESET_HPP_INCLUDE +#define EMP_MATH_RANGESET_HPP_INCLUDE + +#include + +#include "../base/vector.hpp" +#include "../datastructs/vector_utils.hpp" + +#include "Range.hpp" + +namespace emp { + + /// RangeSet maintains a collection of ranges. The ranges are exclusive of the endpoint + /// and kept sorted and non-adjacent (i.e., there is a gap between successive ranges). + template + class RangeSet { + public: + using range_t = emp::Range; + using this_t = RangeSet; + + private: + emp::vector range_set; + + // Helper function to find the id of an Range that a value belongs in or can extend; + // returns next-higher index (where a new range would be place) if none fit perfectly. + // size_t _FindRange(size_t val) const { + // for (size_t id = 0; id < range_set.size(); ++id) { + // if (id <= range_set[id].GetEnd()) return id; + // } + // return range_set.size(); + // } + size_t _FindRange(T value) const { + auto it = std::lower_bound( + range_set.begin(), + range_set.end(), + value, + [](const range_t & range, T value) { return range.GetUpper() < value; } + ); + return it - range_set.begin(); + }; + + void _InsertRange(size_t id, range_t range) { emp::InsertAt(range_set, id, range); } + void _RemoveRange(size_t id) { emp::RemoveAt(range_set, id); } + void _RemoveRanges(size_t id, size_t count) { emp::RemoveAt(range_set, id, count); } + + // Helper function to remove empty ranges at the beginning. + void _PruneEmptyFront() { + size_t count = 0; + while (count < range_set.size() && range_set[count].GetSize() == 0) ++count; + if (count) _RemoveRanges(0, count); + } + + // Helper function to remove empty ranges at the end. + void _PruneEmptyBack() { + size_t count = 0; + while (count < range_set.size() && + range_set[range_set.size()-count-1].GetSize() == 0) ++count; + if (count) range_set.resize(range_set.size() - count); + } + + // Helper function to increase the side of a range, possibly merging it with the next range. + void _CleanupMerge(size_t id) { + while (id+1 < range_set.size() && range_set[id].Merge(range_set[id+1])) { + _RemoveRange(id+1); // Delete next range (merged in to current) + } + emp_assert(OK()); + } + + // Helper function to convert a string into a RangeSet. + // Two formats are available + // bitstring: 010001110101111 + // RangeSet: [1,2),[5,8),[9,10),[11,15) + // In the bitstring format, if the final character is '+', all additional positions are + // assumed to be 1. In the RangeSet format, if the first or last element is a '*', it is + // assumed to be the limit for the type; also commas are optional. + // A star by itself is a full RangeSet. + void _FromString(emp::String in) { + if (in.size() == 0) { Clear(); } + else if (in[0] == '*') { + emp::notify::TestError(in.size() > 1, "Star indicates a full range, but must be by itself."); + SetAll(); + } + else if (in[0] == '0' || in[0] == '1') { + Clear(); + for (size_t i=0; i < in.size(); ++i) { + if (in[i] != '0') Insert((T)i); + } + } + else if (in[0] == '[') { + while (in.size()) { + emp::String segment = in.Pop(')'); + segment.PopIf(','); + emp::notify::TestError(!segment.PopIf('['), "Each segment of a RangeSet must begin with '['"); + T start = segment.PopIf('*') ? MinLimit() : segment.PopLiteral(); + emp::notify::TestError(!segment.PopIf(',') && !segment.PopIf('-'), + "Each segment of a RangeSet must be separated by ',' or '-'"); + T end = segment.PopIf('*') ? MaxLimit() : segment.PopLiteral(); + InsertRange(start, end); + } + } + } + + public: + static constexpr bool is_integral = std::is_integral(); + + RangeSet() = default; + explicit RangeSet( range_t start_range) { Insert(start_range); } + RangeSet(T start, T end) { InsertRange(start, end); } + RangeSet(const RangeSet &) = default; + RangeSet(RangeSet &&) = default; + RangeSet(const std::string & bitstring) { + emp_assert(is_integral, "RangeSets can be represented as strings only if they are integral."); + for (size_t i=0; i < bitstring.size(); ++i) { + if (bitstring[i] == '1') Insert((T)i); + } + } + + RangeSet & operator=(const RangeSet &) = default; + RangeSet & operator=(RangeSet &&) = default; + RangeSet & operator=(const std::string & bitstring) { + emp_assert(is_integral, "RangeSets can be represented as strings only if they are integral."); + Clear(); + for (size_t i=0; i < bitstring.size(); ++i) { + if (bitstring[i] == '1') Insert((T)i); + } + return *this; + } + + [[nodiscard]] bool operator<=>(const RangeSet &) const = default; + + [[nodiscard]] bool Has(T val) const { + const size_t id = _FindRange(val); + return (id < range_set.size()) ? range_set[id].Has(val) : false; + } + [[nodiscard]] bool HasRange(range_t range) const { + const size_t id = _FindRange(range.Lower()); + return (id < range_set.size()) ? range_set[id].HasRange(range) : false; + } + [[nodiscard]] bool IsEmpty() const { return !range_set.size(); } + [[nodiscard]] static constexpr T MaxLimit() { return std::numeric_limits::max(); } + [[nodiscard]] static constexpr T MinLimit() { return std::numeric_limits::lowest(); } + + /// @return Overall start of all ranges (or max value if no ranges exist.) + [[nodiscard]] T GetStart() const { return IsEmpty() ? MaxLimit() : range_set[0].Lower(); } + + /// @return Overall end of all ranges (or min value if no ranges exist.) + [[nodiscard]] T GetEnd() const { return IsEmpty() ? MinLimit() : range_set.back().Upper(); } + + [[nodiscard]] size_t GetNumRanges() const { return range_set.size(); } + + /// @brief Calculate the total combined size of all ranges. + [[nodiscard]] T GetSize() const { + T total = 0; + for (const auto & x : range_set) total += x.GetSize(); + return total; + } + + /// Present this set of ranges as a string. + [[nodiscard]] emp::String ToString() const { + emp::String out; + for (size_t i = 0; i < range_set.size(); ++i) { + if (i) out += ','; + out += range_set[i].ToString(); + } + return out; + } + + // Return all of the internal ranges (can only be called on l-values) + [[nodiscard]] const emp::vector & GetRanges() const & { return range_set; } + + // Calculate the size of the overlap with a provided range. + [[nodiscard]] bool HasOverlap(range_t range) const { + size_t low_id = _FindRange(range.GetLower()); + if (low_id >= range_set.size()) return false; // Entirely after ranges. + if (range_set[low_id].HasOverlap(range)) return true; // Overlaps at beginning. + return low_id+1 < range_set.size() && range_set[low_id+1].HasOverlap(range); + } + + // Calculate the size of the overlap with a provided range. + [[nodiscard]] T CalcOverlap(range_t range) const { + size_t low_id = _FindRange(range.GetLower()); + size_t up_id = _FindRange(range.GetUpper()); + T result = range_set[low_id].CalcOverlap(range); + if (low_id < up_id) { + for (size_t id=low_id+1; id < up_id; id++) result += range_set[id].GetSize(); + result += range_set[up_id].CalcOverlap(range); + } + return result; + } + + /// @brief Remove all ranges in the set. + RangeSet & Clear() { range_set.resize(0); return *this; } + + /// @brief Set a single range that includes all value. + RangeSet & SetAll() { InsertRange(MinLimit(), MaxLimit()); return *this; } + + /// @brief Shift all ranges by a fixed amount. + /// @param shift How much should the range be shifted by? + RangeSet & Shift(T shift) { + if (shift > 0) ShiftUp(shift); + else if (shift < 0) ShiftDown(shift); + return *this; + } + + RangeSet & ShiftUp(T shift) { + for (auto & range : range_set) range.ShiftUp(shift); + _PruneEmptyBack(); + return *this; + } + + RangeSet & ShiftDown(T shift) { + for (auto & range : range_set) range.ShiftDown(shift); + _PruneEmptyFront(); + return *this; + } + + + [[nodiscard]] this_t CalcShift(T shift) const { + this_t out(*this); + return out.Shift(shift); + } + + [[nodiscard]] this_t CalcShiftDown(T shift) const { + this_t out(*this); + return out.ShiftDown(shift); + } + + [[nodiscard]] this_t CalcShiftUp(T shift) const { + this_t out(*this); + return out.ShiftUp(shift); + } + + /// @brief Insert a value into this range set + /// @param val Value to insert. + /// @return This RangeSet after insertion. + RangeSet & Insert(T val) { + emp_assert(is_integral, "Only integral ranges can call Insert() with a single value."); + + // If empty or beyond the end, append a new range. + if (range_set.size() == 0 || val > GetEnd()) { + range_set.emplace_back(val); + } + + else { + const size_t id = _FindRange(val); + emp_assert(id < range_set.size(), id, range_set.size()); + range_t & range = range_set[id]; + + if (range.Has(val)) return *this; // Already has the value! + else if (range.Append(val)) _CleanupMerge(id); // Extending 'upper' on range + else if (range.GetLower() == val+1) range.Lower()--; // Extending 'lower' on range + else range_set.emplace(range_set.begin()+id, val); // Inserting NEW range. + } + + return *this; + } + + /// @brief Insert a whole range into this set, merging other ranges if needed. + /// @param in New range to include. + /// @return This RangeSet after insertion. + RangeSet & Insert(range_t in) { + const size_t start_id = _FindRange(in.GetLower()); + + // Are we adding a whole new range to the end? + if (start_id == range_set.size()) range_set.push_back(in); + + // Is it already included in the found range? No change! + else if (range_set[start_id].HasRange(in)) return *this; + + // Should we merge in with an existing range? + else if (range_set[start_id].IsConnected(in)) { + range_set[start_id].Merge(in); + _CleanupMerge(start_id); + } + + // Otherwise insert as a new range. + else _InsertRange(start_id, in); + + return *this; + } + + /// @brief Merge an entire range set into this one. + /// @param in_set Range set to add in. + /// @return This RangeSet after insertion. + /// @note Can be optimized to handle big set mergers more efficiently! + RangeSet & Insert(const this_t & in_set) { + for (const range_t & range : in_set.GetRanges()) Insert(range); + return *this; + } + + /// @brief Insert a range into this set, specifying the start and end points. + /// @param start Beginning of new range to include. + /// @param stop Ending of new range to include (range is not inclusive of stop) + /// @return This RangeSet after insertion. + RangeSet & InsertRange(T start, T stop) { return Insert(range_t{start, stop}); } + + /// @brief Remove a single value from this RangeSet. + /// @param val Value to remove + /// @return This RangeSet after removal. + RangeSet & Remove(T val) { + emp_assert(is_integral, "Only integral ranges can call Remove() with a single value."); + + if (!Has(val)) return *this; // Nothing to remove. + + const size_t id = _FindRange(val); + range_t & range = range_set[id]; + if (range.GetSize() == 1) _RemoveRange(id); // Remove whole range + else if (range.GetLower() == val) range.Lower()++; // Inc lower end + else if (range.GetUpper()-1 == val) range.Upper()--; // Dec upper end + else { // Split a range! + _InsertRange(id+1, range_t{val+1,range.GetUpper()}); + range_set[id].SetUpper(val); + } + return *this; + } + + /// @brief Remove all ranges (or partial range) less than a target value. + /// @param val New floor for ranges. + /// @return This RangeSet after removal. + RangeSet & RemoveTo(T val) { + if (val <= GetStart()) return *this; // Nothing to remove. + size_t id = _FindRange(val); + if (val == range_set[id].GetUpper()) ++id; + _RemoveRanges(0, id); // Remove everything before the new start. + if (range_set.size() && range_set[0].Lower() < val) range_set[0].SetLower(val); + return *this; + } + + /// @brief Remove all ranges (or partial range) greater than a target value. + /// @param val New cap for ranges. + /// @return This RangeSet after removal. + RangeSet & RemoveFrom(T val) { + if (val >= GetEnd()) return *this; // Nothing to remove. + size_t id = _FindRange(val); + if (val > range_set[id].GetLower()) ++id; // Include current range if needed. + range_set.resize(id); // Remove everything past new end. + if (GetEnd() > val) range_set.back().SetUpper(val); + return *this; + } + + /// @brief Remove a whole Range from this RangeSet. + /// @param rm_range Range to remove + /// @return This RangeSet after removal. + RangeSet & Remove(range_t rm_range) { + if (!HasOverlap(rm_range)) return *this; + if (rm_range.Lower() <= GetStart()) return RemoveTo(rm_range.Upper()); + if (rm_range.Upper() >= GetEnd()) return RemoveFrom(rm_range.Lower()); + + // Must be removing from the middle. + size_t start_id = _FindRange(rm_range.Lower()); + range_t & start_range = range_set[start_id]; + + // Fully internal to a single Range? Split it! + if (start_range.Lower() < rm_range.Lower() && start_range.Upper() > rm_range.Upper()) { + _InsertRange(start_id+1, range_t{rm_range.Upper(), start_range.Upper()}); + range_set[start_id].SetUpper(rm_range.Lower()); + return *this; + } + + // Deal with beginning of removal - cut it down if needed, and move on to next range. + if (rm_range.Lower() > start_range.Lower()) { + start_range.Upper() = rm_range.Lower(); + ++start_id; + } + + // Deal with end of removal. + size_t end_id = _FindRange(rm_range.Upper()); + if (rm_range.Upper() >= range_set[end_id].Upper()) end_id++; + else range_set[end_id].Lower() = std::max(range_set[end_id].Lower(), rm_range.Upper()); + + // Remove middle. + _RemoveRanges(start_id, end_id - start_id); + + return *this; + } + + /// @brief Remove all ranges in an entire range set from this one. + /// @param in_set Range set to remove. + /// @return This RangeSet after removal. + /// @note Can be optimized to handle big sets more efficiently! + RangeSet & Remove(const this_t & in_set) { + for (const range_t & range : in_set.GetRanges()) Remove(range); + return *this; + } + + + RangeSet & RemoveRange(T start, T stop) { return Remove(range_t{start, stop}); } + + /// @brief Remove everything outside of the provided range. + RangeSet & KeepOnly(T start, T stop) { + emp_assert(start < stop); + RemoveTo(start); + return RemoveFrom(stop); + } + + /// @brief Remove everything outside of the provided range. + RangeSet & KeepOnly(range_t keep_range) { + return KeepOnly(keep_range.GetLower(), keep_range.GetUpper()); + } + + /// @brief Remove everything outside of the provided set of ranges. + RangeSet & KeepOnly(const this_t & in_set) { return Remove(~in_set); } + + + // Some more advanced functions. + + /// @brief Calculate the inverted range set, swapping included and excluded values. + /// @return The inverted RangeSet. + [[nodiscard]] this_t CalcInverse() const { + emp_assert(OK()); + // If this is an empty set, return a full set. + if (range_set.size() == 0) return this_t(MinLimit(), MaxLimit()); + + // Determine if we need to extend the the limits on each side. + const bool add_begin = (GetStart() != MinLimit()); + const bool add_end = (GetEnd() != MaxLimit()); + this_t out; + out.range_set.reserve(range_set.size() + add_begin + add_end - 1); + if (add_begin) out.range_set.emplace_back(MinLimit(),GetStart()); + for (size_t i = 1; i < range_set.size(); ++i) { + out.range_set.emplace_back(range_set[i-1].Upper(), range_set[i].Lower()); + } + if (add_end) out.range_set.emplace_back(GetEnd(), MaxLimit()); + emp_assert(out.OK()); + return out; + } + + this_t & Invert() { *this = CalcInverse(); return *this; } + + // Simple operators: + [[nodiscard]] this_t operator~() const { return CalcInverse(); } + [[nodiscard]] this_t operator|(const this_t & in) const { + emp_assert(in.OK()); + this_t out(*this); + return out.Insert(in); + } + [[nodiscard]] this_t operator&(const this_t & in) const { + emp_assert(in.OK()); + this_t out(*this); + return out.Remove(~in); + } + [[nodiscard]] this_t operator^(const this_t & in) { + emp_assert(in.OK()); + return (*this | in) & ~(*this & in); + } + [[nodiscard]] this_t operator<<(const T shift) const { return CalcShiftUp(shift); } + [[nodiscard]] this_t operator>>(const T shift) const { return CalcShiftDown(shift); } + [[nodiscard]] bool operator[](T val) const { return Has(val); } + + this_t & operator|=(const this_t & in) { Insert(in); return *this; } + this_t & operator&=(const this_t & in) { Remove(~in); return *this; } + this_t & operator^=(const this_t & in) { emp_assert(in.OK()); *this = *this^in; return *this; } + this_t & operator<<=(const T shift) { ShiftUp(shift); return *this; } + this_t & operator>>=(const T shift) { ShiftDown(shift); return *this; } + + explicit operator bool() const { return range_set.size(); } + + + /// @brief Overload ostream operator to return Print. + friend std::ostream& operator<<(std::ostream &out, const this_t & range) { + out << range.ToString(); + return out; + } + + /// @brief Check for internal errors in this RangeSet. + bool OK() const { + // Check each range individually. + for (const auto & range : range_set) { + if (range.GetLower() > range.GetUpper()) { + emp::notify::Message("RangeSet::OK() Failed due to invalid range: ", range.ToString()); + return false; + } + } + + // Make sure ranges are in order and have gaps between them. + for (size_t i = 1; i < range_set.size(); ++i) { + if (range_set[i-1].GetUpper() >= range_set[i].GetLower()) { + emp::notify::Message("RangeSet::OK() Failed at range ", i, " of ", range_set.size(), + ". Ranges are: ", ToString()); + return false; + } + } + return true; + } + }; + +} + +#endif // #ifndef EMP_MATH_RANGESET_HPP_INCLUDE diff --git a/include/emp/math/constants.hpp b/include/emp/math/constants.hpp index 35f87bc568..fba2785022 100644 --- a/include/emp/math/constants.hpp +++ b/include/emp/math/constants.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Commonly used constant values. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_MATH_CONSTANTS_HPP_INCLUDE @@ -30,6 +30,8 @@ namespace emp { constexpr const int32_t MIN_INT = -2147483648; ///< (- 2^31) + constexpr const size_t MAX_SIZE_T = static_cast(-1); + /// Determine the maximum value for any type. // @CAO: Prevent inf to get more realistic numbers for double/float? template diff --git a/include/emp/math/math.hpp b/include/emp/math/math.hpp index 0f9acd92ed..012f1691fe 100644 --- a/include/emp/math/math.hpp +++ b/include/emp/math/math.hpp @@ -272,7 +272,7 @@ namespace emp { // exclude clang versions with compiler bug https://reviews.llvm.org/D35190 #if defined(__clang__) && __clang_major__>=9 || defined(__GNUC__) && !defined(__clang__) // if base is not known at compile time, use std::pow which is faster - if ( !__builtin_constant_p( base ) ) return std::pow(base, exp); + if ( !__builtin_constant_p( base ) ) return static_cast(std::pow(base, exp)); // otherwise, use constexpr-friendly implementations else #endif diff --git a/include/emp/math/random_utils.hpp b/include/emp/math/random_utils.hpp index dc2423f5f1..3472b748da 100644 --- a/include/emp/math/random_utils.hpp +++ b/include/emp/math/random_utils.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Helper functions for emp::Random for common random tasks. - * Status: RELEASE + * @note Status: RELEASE */ #ifndef EMP_MATH_RANDOM_UTILS_HPP_INCLUDE @@ -22,6 +22,12 @@ namespace emp { + /// Choose a random element from an indexable container. + template + inline auto SelectRandom(Random & random, const T & container) { + return container[random.GetUInt(container.size())]; + } + /// Randomly reorder all of the elements in a vector. /// If max_count is provided, just make sure that the first max_count entries are randomly /// drawn from entire vector. @@ -29,6 +35,7 @@ namespace emp { template inline void Shuffle(Random & random, emp::vector & v, size_t max_count) { + emp_assert(max_count <= v.size()); for (size_t i = 0; i < max_count; i++) { const size_t pos = random.GetUInt(i, v.size()); if (pos == i) continue; @@ -39,6 +46,17 @@ namespace emp { template inline void Shuffle(Random & random, emp::vector & v) { Shuffle(random, v, v.size()); } + template + inline void ShuffleRange(Random & random, emp::vector & v, size_t first, size_t last) + { + emp_assert(first <= last); + emp_assert(last <= v.size()); + for (size_t i = first; i < last; i++) { + const size_t pos = random.GetUInt(i, last); + if (pos == i) continue; + std::swap(v[i], v[pos]); + } + } /// Return an emp::vector numbered 0 through size-1 in a random order. diff --git a/include/emp/math/sequence_utils.hpp b/include/emp/math/sequence_utils.hpp index 6bd6d96c2b..b6184e6cf6 100644 --- a/include/emp/math/sequence_utils.hpp +++ b/include/emp/math/sequence_utils.hpp @@ -9,7 +9,7 @@ * @note Status: BETA * * A set of functions for analyzing sequences, including distance metrics (Hamming and - * Edit/Levenschtein) and alignment. + * Edit/Levenshtein) and alignment. */ #ifndef EMP_MATH_SEQUENCE_UTILS_HPP_INCLUDE @@ -17,12 +17,51 @@ #include +#include "../base/notify.hpp" #include "../base/vector.hpp" +#include "../tools/string_utils.hpp" #include "math.hpp" namespace emp { + /// Generate a sequence from a string. + /// Format: "entry1,entry2,entry3" etc. + /// Entries can be single values (Eg: "72") or ranges using start[:step]:stop format + /// (Eg: "0:100" or "3:5:33"). + + template + emp::vector ToSequence(std::string sequence_str) { + // Clean up input sequence and slice by commas. + emp::remove_whitespace(sequence_str); + emp::vector seq_slices = emp::slice(sequence_str, ','); + emp::vector out; + + // Convert each slice into a value or range of values. + emp::vector range_slices; + for (const std::string & slice : seq_slices) { + emp::slice(slice, range_slices, ':'); + T start = emp::from_string(range_slices[0]); + T step = static_cast(1); + T stop = start + static_cast(1); + + if (range_slices.size() == 2) stop = emp::from_string(range_slices[1]); + else if (range_slices.size() == 3) { + step = emp::from_string(range_slices[1]); + stop = emp::from_string(range_slices[2]); + } + else if (range_slices.size() > 3) { + emp::notify::Exception("math::sequence_utils::ToSequence::invalid_range", + "emp::ToSequence() provided with range with too many ':'", + slice); + } + + for (T i = start; i < stop; i += step) out.push_back(i); + } + + return out; + } + // --- Distance functions for any array-type objects --- /// Hamming distance is a simple count of substitutions needed to convert one array to another. @@ -108,7 +147,7 @@ namespace emp { emp::vector prev_row(size1); // The previous row we calculated emp::vector > edit_info(size2, emp::vector(size1)); - // Initialize the previous row to record the differece from nothing. + // Initialize the previous row to record the difference from nothing. for (size_t i = 0; i < size1; i++) { prev_row[i] = i + 1; edit_info[0][i] = 'i'; diff --git a/include/emp/meta/ConceptWrapper.hpp b/include/emp/meta/ConceptWrapper.hpp index 5940466f2f..d0ea5f8d8b 100644 --- a/include/emp/meta/ConceptWrapper.hpp +++ b/include/emp/meta/ConceptWrapper.hpp @@ -30,7 +30,7 @@ * * REQUIRED_OVERLOAD_FUN ( FUNCTION_NAME, ERROR_MESSAGE, RETURN_TYPE, ARG1_TYPES, OTHER_ARGS... ) * Setup a set of overloaded member functions called FUNCTION_NAME that varies the first - * parameter (and may have additional paramters with fixed types. ARG1_TYPES must be an + * parameter (and may have additional parameters with fixed types. ARG1_TYPES must be an * emp::TypePack that includes the full set of types to be used for the first parameter. * Zero or more additional parameters may be included in OTHER_ARGS. The wrapped class must * already define the full set of overloaded functions by the correct name and with the correct @@ -91,7 +91,7 @@ * OPTIONAL_VAR ( VAR_NAME, DEFAULT_VALUE, TYPE ) * Setup a member variable called VAR_NAME. If it already exists in the wrapped class, use * that version. If it does not already exist, create it with the provided TYPE and set it to - * the DEFAULT_VALUE prodided. + * the DEFAULT_VALUE provided. * */ @@ -109,7 +109,7 @@ #define EMP_BUILD_CONCEPT( WRAPPER_NAME, BASE_NAME, ... ) \ - /* Do error-checkig on the inputs! */ \ + /* Do error-checking on the inputs! */ \ EMP_WRAP_EACH(EMP_BUILD_CONCEPT__ERROR_CHECK, __VA_ARGS__) \ /* Build the interface class. */ \ class BASE_NAME { \ @@ -140,8 +140,14 @@ #define EMP_BUILD_CONCEPT__EC_PROTECTED(...) /* PROTECTED okay */ #define EMP_BUILD_CONCEPT__EC_PUBLIC(...) /* PUBLIC okay */ -#define EMP_BUILD_CONCEPT__CHECK_EMPTY(A, CMD) EMP_GET_ARG_2( EMP_BUILD_CONCEPT__SPACER ## A, \ - static_assert(false, "\n\n \033[1;31mInvalid EMP_BUILD_CONCEPT.\033[0m May be invalid command or missing comma in:\n \033[1;32m" #CMD "\033[0m;\n\n"); ) +#define EMP_BUILD_CONCEPT__CHECK_EMPTY(A, CMD) \ + EMP_GET_ARG_2( EMP_BUILD_CONCEPT__SPACER ## A, \ + static_assert(false, \ + "\n\n \033[1;31mInvalid EMP_BUILD_CONCEPT.\033" \ + "[0m May be invalid command or missing comma in:\n" \ + " \033[1;32m" #CMD "\033[0m;\n\n" \ + ); \ + ) #define EMP_BUILD_CONCEPT__SPACER ~, /* EMPTY! */ #define EMP_BUILD_CONCEPT__ERROR @@ -155,7 +161,7 @@ #define EMP_BUILD_CONCEPT__BASE_REQUIRED_FUN(NAME, X, RETURN_T, ...) virtual RETURN_T NAME( __VA_ARGS__ ) = 0; #define EMP_BUILD_CONCEPT__BASE_OPTIONAL_FUN(NAME, X, RETURN_T, ...) virtual RETURN_T NAME( __VA_ARGS__ ) = 0; -// Since you cannot have virtual tempalated functions, we need to do a bit of work in the bast class. +// Since you cannot have virtual templated functions, we need to do a bit of work in the bast class. // ARGS are: FUNCTION_NAME, ERROR_MESSAGE, RETURN_TYPE, ARG1_TYPES, OTHER_ARGS... #define EMP_BUILD_CONCEPT__BASE_REQUIRED_OVERLOAD_FUN(NAME, X, RETURN_TYPE, ...) \ static_assert(emp::is_TypePack() == true, \ @@ -195,7 +201,7 @@ #define EMP_BUILD_CONCEPT__REQUIRED_FUN_impl(FUN_NAME, ERROR, NUM_ARGS, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ EMP_IF( NUM_ARGS, \ @@ -241,7 +247,7 @@ #define EMP_BUILD_CONCEPT__OPTIONAL_impl(FUN_NAME, DEFAULT, NUM_ARGS, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ EMP_IF( NUM_ARGS, \ @@ -278,7 +284,7 @@ #define EMP_BUILD_CONCEPT__PROCESS_REQUIRED_OVERLOAD_FUN(FUN_NAME, ERROR_MESSAGE, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ decltype( std::declval().FUN_NAME( std::declval() ) ); \ diff --git a/include/emp/meta/FunInfo.hpp b/include/emp/meta/FunInfo.hpp new file mode 100644 index 0000000000..e0ff0c5938 --- /dev/null +++ b/include/emp/meta/FunInfo.hpp @@ -0,0 +1,256 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021 + * + * @file FunInfo.hpp + * @brief Wrap a function to provide more information about it. + * @note Status: ALPHA + * + * FunInfo will collect information about a provided function and facilitate + * manipulations. + * + * + * Developer Notes: + * - Will not currently handle return by reference! + */ + +#ifndef EMP_META_FUNINFO_HPP_INCLUDE +#define EMP_META_FUNINFO_HPP_INCLUDE + +#include + +#include "TypePack.hpp" +#include "ValPack.hpp" + +namespace emp { + + // A generic base class that expands anything with operator() + template + struct FunInfo : public FunInfo< decltype(&T::operator()) > {}; + + // Specialization for functions; redirect to function-object specialization. + template + struct FunInfo + : public FunInfo< std::function > {}; + + // Specialization for functions; redirect to function-object specialization. + template + struct FunInfo + : public FunInfo< std::function > {}; + + + // Specialization for function objects with AT LEAST ONE parameter... + template + struct FunInfo { + private: + // template struct is_templated_converter : std::false_type{}; + // template + // struct is_templated_converter().template operator()(0))>> : std::true_type{}; + + /// Helper function to lock an argument at a designated position in a function. + template + static auto BindAt_impl(CLASS_T fun, BOUND_T && bound, + TypePack, TypePack) { + // If the function needs a reference for the parameter, send the supplied value through. + if constexpr (std::is_reference()) { + return [fun, &bound](BEFORE_Ts &&... before_args, AFTER_Ts &&... after_args) { + return fun(std::forward(before_args)..., + std::forward(bound), + std::forward(after_args)...); + }; + } + // Otherwise, a copy is fine. + else { + return [fun, bound](BEFORE_Ts &&... before_args, AFTER_Ts &&... after_args) { + return fun(std::forward(before_args)..., + bound, + std::forward(after_args)...); + }; + } + } + + + public: + using fun_t = RETURN_T(PARAM1_T, PARAM_Ts...); + using return_t = RETURN_T; + using params_t = TypePack; + + template + using arg_t = typename params_t::template get; + + static constexpr size_t num_args = 1 + sizeof...(PARAM_Ts); + + /// Test if this function can be called with a particular set of arguments. + template + static constexpr bool InvocableWith(ARG1 &&, ARG_Ts &&...) { + return std::is_invocable(); + } + + /// Test if this function can be called with a particular set of argument TYPEs. + template + static constexpr bool InvocableWith() { + return std::is_invocable(); + } + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun](PARAM1_T && arg1, PARAM_Ts &&... args) { + return c( fun(std::forward(arg1), std::forward(args)...) ); + }; + } + + /// Change a function's arguments using a fixed converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun](NEW_T arg1, decoy_t... args) { + return fun(c(arg1), c(args)...); + }; + } + + /// Convert a function's arguments using a dynamic (tempalted) lambda function. + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T convert_lambda) + { + // If the converter can take two arguments, assume the second is for type. + if constexpr ( std::is_invocable()) { + return [fun=fun, c=convert_lambda](NEW_T arg1, decoy_t... args) { + return fun(c(arg1, std::decay_t{}), + c(args, std::decay_t{})...); + }; + } + + // Otherwise assume that we are using a templated lambda (or similar object) + else { + return [fun=fun, c=convert_lambda](NEW_T arg1, decoy_t... args) { + return fun(c.template operator()(arg1), + c.template operator()(args)...); + }; + } + } + + /// Lock in a specified argument of a function. + template + static auto BindAt(CLASS_T fun, T && bound) { + using before_pack = typename params_t::template shrink; + using after_pack = typename params_t::template popN; + return BindAt_impl(fun, std::forward(bound), before_pack(), after_pack()); + } + + /// Lock in multiple function arguments. + template + static auto Bind(CLASS_T fun, T1 && bound1, Ts &&... bound) { + static_assert(emp::ValPack::IsSorted() && emp::ValPack::IsUnique(), + "FunInfo::Bind must be given unique, sorted indicies."); + static_assert(sizeof...(IDs) == sizeof...(Ts), + "FunInfo::Bind must have exactly one ID per bound value."); + + // Bind all LATER positions first, if there are any. + if constexpr (sizeof...(IDs) > 0) { + auto new_fun = Bind(fun, std::forward(bound)...); + return FunInfo::template BindAt(new_fun, bound1); + } + + // Otherwise just bind THIS position. + else return FunInfo::template BindAt(fun, bound1); + } + }; + + // Specialization for function objects with NO parameters... + template + struct FunInfo + { + using fun_t = RETURN_T(); + using return_t = RETURN_T; + using params_t = TypePack<>; + + static constexpr size_t num_args = 0; + + /// Test if this function can be called with a particular set of arguments. + template + static constexpr bool InvocableWith(ARG_Ts...) { return sizeof...(ARG_Ts) == 0; } + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun]() { + return c(fun()); + }; + } + + /// Change a function's arguments using a converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T /*convert_fun*/) + { + // No parameters, so no changes to make. + return fun; + } + + /// Convert a function's arguments using a dynamic (tempalted) lambda function. + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T /*convert_lambda*/) + { + // No parameters, so no conversions to make. + return fun; + } + + }; + + + // === Stand-alone helper functions === + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::ChangeReturnType(fun, convert_fun); + } + + /// Change a function's arguments using a simple converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::template ChangeParameterTypes(fun, convert_fun); + } + + /// Convert a function's arguments using a templated lambda. + /// @note: Will not work until C++20!! + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::template ConvertParameterTypes(fun, convert_fun); + } + + /// Convert both return type AND parameter type. + /// Convert a function's arguments using a templated lambda. + template + static auto ChangeTypes(FUN_T fun, R_CONVERTER_T ret_convert_fun, P_CONVERTER_T param_convert_fun) + { + auto partial = FunInfo::template ChangeParameterTypes(fun, param_convert_fun); + return FunInfo::ChangeReturnType(partial, ret_convert_fun); + } + + /// Lock in a specified argument of a function. + template + auto BindAt(FUN_T fun, BOUND_T && bound) { + return FunInfo::template BindAt(fun, std::forward(bound)); + } + + /// Lock in the first argument of a function. + template + auto BindFirst(FUN_T fun, BOUND_T && bound) { + return FunInfo::template BindAt<0>(fun, std::forward(bound)); + } + + /// Lock in a series of specified arguments to a function. + template + auto Bind(FUN_T fun, Ts &&... bound) { + return FunInfo::template Bind(fun, std::forward(bound)...); + } +} + +#endif // #ifndef EMP_META_FUNINFO_HPP_INCLUDE diff --git a/include/emp/meta/TypeID.hpp b/include/emp/meta/TypeID.hpp index 2a4b967646..3712921b27 100644 --- a/include/emp/meta/TypeID.hpp +++ b/include/emp/meta/TypeID.hpp @@ -1,15 +1,66 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2021 + * date: 2016-2022 */ /** * @file * @brief TypeID provides an easy way to convert types to strings. * + * TypeID provides an easy way to compare types, analyze them, and convert to strings. + * All TypeID objects are consistent within a type, and are ordinal and hashable. + * + * To get the unique type information for type T use: + * TypeID t = emp::GetTypeID(); + * + * To make TypeID work more effectively with your custom class, implement the static member + * function EMPGetTypeName() which returns a string with its full name (including namespace). + * static std::string EMPGetTypeName() { return "myns::MyClass"; } + * + * MEMBER FUNCTIONS: + * + * std::string GetName() - Return a human readable (ideally) version of type's name. + * void SetName(in_name) - Set the name that should be used henceforth for this type. + * size_t GetSize() - Return number of bytes used by this type. + * + * -- TYPE TESTS -- + * bool IsAbstract() - Is this type a pure-virtual class? + * bool IsArithmetic() - Is this type numeric? + * bool IsArray() - Does this type represent a sequence of objects in memory? + * bool IsClass() - Is this type a non-union class? + * bool IsConst() - Is this contents of this type prevented from changing? + * bool IsEmpty() - Does type type have no contents? + * bool IsObject() - Is this type ANY object type? + * bool IsPointer() - Is this type a pointer? + * bool IsReference() - Is this type a reference? + * bool IsTrivial() - Is this type trivial? + * bool IsVoid() - Is this the type "void"? + * bool IsVolatile() - Is this type volatile qualified? + * bool IsTypePack() - Is this type an emp::TypePack? + * + * -- COMPARISON TESTS -- + * bool IsType() - Is this type the specified type T? + * bool IsTypeIn() - Is this type one of the listed types? + * + * -- TYPE CONVERSIONS -- + * TypeID GetDecayTypeID() - Remove all qualifications (const, reference, etc.) + * TypeID GetElementTypeID() - Return type that makes up this type (i.e. for arrays) + * TypeID GetRemoveConstTypeID() - Remove const-ness of this type, if any. + * TypeID GetRemoveCVTypeID() - Remove constness and volatility of this type. + * TypeID GetRemoveExtentTypeID() - Flatten one level of a multi-dimensional array. + * TypeID GetRemoveAllExtentsTypeID() - Flatten multi-dimensional arrays. + * TypeID GetRemovePointerTypeID() - If this is a pointer, change to type pointed to. + * TypeID GetRemoveReferenceTypeID() - If this is a reference, change to type referred to. + * TypeID GetRemoveVolatileTypeID() - Remove volatility of this type, if any + * + * -- VALUE CONVERSIONS -- + * double ToDouble(pointer) - Convert pointed-to object (of this type) to a double. + * std::string ToString(pointer) - Convert pointed-to object (of this type) to a std::string. + * bool FromDouble(value, pointer) - Use double value to set pointed-to object (of this type) + * bool FromString(string, pointer) - Use string value to set pointed-to object (of this type) + * * Developer notes: * * Fill out defaults for remaining standard library classes (as possible) - * * If a class has a static TypeID_GetName() defined, use that for the name. * * If a type is a template, give access to parameter types. * * If a type is a function, give access to parameter types. */ @@ -71,6 +122,7 @@ namespace emp { virtual bool IsTrivial() const { return false; } virtual bool IsVoid() const { return false; } virtual bool IsVolatile() const { return false; } + virtual bool IsFunction() const { return false; } virtual bool IsTypePack() const { return false; } @@ -122,6 +174,7 @@ namespace emp { bool IsTrivial() const override { return std::is_trivial(); } bool IsVoid() const override { return std::is_same(); } bool IsVolatile() const override { return std::is_volatile(); } + bool IsFunction() const override { return std::is_function(); } bool IsTypePack() const override { return emp::is_TypePack(); } @@ -173,6 +226,7 @@ namespace emp { size_t GetSize() const override { if constexpr (std::is_void()) return 0; + else if constexpr (std::is_function()) return 0; else return sizeof(T); } @@ -184,9 +238,9 @@ namespace emp { return ptr.ReinterpretCast()->ToDouble(); } - // If this type is convertable to a double, cast the pointer to the correct type, de-reference it, + // If this type is convertible to a double, cast the pointer to the correct type, de-reference it, // and then return the conversion. Otherwise return NaN - if constexpr (std::is_convertible::value) { + else if constexpr (std::is_convertible::value) { return (double) *ptr.ReinterpretCast(); } else return std::nan(""); @@ -223,7 +277,8 @@ namespace emp { return "[N/A]"; } - bool FromDouble(double value, const emp::Ptr ptr) const override { + bool FromDouble([[maybe_unused]] double value, + [[maybe_unused]] const emp::Ptr ptr) const override { using base_t = std::decay_t; // If this variable has a built-in FromDouble() trait, use it! @@ -231,7 +286,7 @@ namespace emp { return ptr.ReinterpretCast()->FromDouble(value); } - // If this type is convertable to a double, cast the pointer to the correct type, de-reference it, + // If this type is convertible to a double, cast the pointer to the correct type, de-reference it, // and then return the conversion. Otherwise return NaN if constexpr (std::is_convertible::value) { *ptr.ReinterpretCast() = (base_t) value; @@ -387,6 +442,11 @@ namespace emp { return internal::TypePackIDs_impl::GetIDs(); } + // Determine if a type has a static EMPGetTypeName() member function. + template struct HasEMPGetTypeName : std::false_type { }; + template + struct HasEMPGetTypeName> : std::true_type{}; + /// Build the information for a single TypeID. template static emp::Ptr BuildInfo() { @@ -395,7 +455,12 @@ namespace emp { TypeID type_id(&info); info.init = true; - info.name = typeid(T).name(); + + if constexpr (HasEMPGetTypeName()) { + info.name = T::EMPGetTypeName(); + } else { + info.name = typeid(T).name(); + } // Now, fix the name if we can be more precise about it. if constexpr (std::is_const()) { @@ -438,10 +503,16 @@ namespace emp { /// Setup a bunch of standard type names to be more readable. void SetupTypeNames() { - - // Built-in types. GetTypeID().SetName("void"); + // Probably replaced later, but good to have for systems where it's not. + GetTypeID().SetName("size_t"); + GetTypeID().SetName("long"); + GetTypeID().SetName("long long"); + GetTypeID().SetName("unsigned long"); + GetTypeID().SetName("unsigned long long"); + + // Main built-in types. GetTypeID().SetName("bool"); GetTypeID().SetName("double"); GetTypeID().SetName("float"); diff --git a/include/emp/meta/TypePack.hpp b/include/emp/meta/TypePack.hpp index 9594c01749..c5dcef61a4 100644 --- a/include/emp/meta/TypePack.hpp +++ b/include/emp/meta/TypePack.hpp @@ -7,10 +7,10 @@ * @file * @brief A set of types that can be manipulated at compile time (good for metaprogramming) * - * TypePacks are static structues that provide a large set of mechanisms to access and adjust + * TypePacks are static structures that provide a large set of mechanisms to access and adjust * the included types. * - * To create a typepack, just pass in zero or more types into the TypePack template. + * To create a TypePack, just pass in zero or more types into the TypePack template. * * using my_pack = emp::TypePack; * @@ -294,7 +294,7 @@ namespace emp { /// Rearrange types in TypePack into reverse order. using reverse = typename pop::reverse::template push_back; - /// Rotate types through typepack by the specified number of steps. + /// Rotate types through TypePack by the specified number of steps. using rotate = typename pop::template push_back; /// Set the type at the specified position to the new type provided. Return as new TypePack. diff --git a/include/emp/meta/ValPack.hpp b/include/emp/meta/ValPack.hpp index 83ff623ef9..060e1be0c5 100644 --- a/include/emp/meta/ValPack.hpp +++ b/include/emp/meta/ValPack.hpp @@ -28,11 +28,13 @@ namespace emp { // Anonymous implementations of ValPack interface. #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace internal { + // Helper. DONE arg starts as true, but set to false when sequence finished. template struct vp_range { static constexpr auto NEXT = START + STEP; using type = typename vp_range<(NEXT >= END), NEXT, END, STEP, VALS..., START>::type; }; + // Specialization for when DONE is true. template struct vp_range { using type = ValPack; @@ -202,6 +204,9 @@ namespace emp { /// Find the overall maximum value in an ValPack. constexpr static auto Max() { return pop::Max(V1); } + /// Determine if the pack is sorted. + constexpr static bool IsSorted() { return V1 <= Min() && pop::IsSorted(); } + /// Use each value in an ValPack as an index and return results as a tuple. template constexpr static auto ApplyIndex(T && container) { @@ -253,6 +258,8 @@ namespace emp { template constexpr static auto Max(T floor) { return floor; } + constexpr static bool IsSorted() { return true; } + static std::string ToString() { return ""; } static void PrintVals(std::ostream & /* os */=std::cout) { ; } diff --git a/include/emp/meta/macro_math.hpp b/include/emp/meta/macro_math.hpp index 3d43e822be..e0cc530c5d 100644 --- a/include/emp/meta/macro_math.hpp +++ b/include/emp/meta/macro_math.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Macros to build a pre-processor calculator system. - * Status: RELEASE + * @note Status: RELEASE * * Working macros include: * EMP_INC(A) : converts to result of A+1 diff --git a/include/emp/meta/macros.hpp b/include/emp/meta/macros.hpp index ea1fbafe43..e18730bf8f 100644 --- a/include/emp/meta/macros.hpp +++ b/include/emp/meta/macros.hpp @@ -6,7 +6,7 @@ /** * @file * @brief Generally useful macros that can perform cools tricks. - * Status: RELEASE + * @note Status: RELEASE * * * Generally useful macros that can perform cools tricks. As with all macros, use only @@ -90,7 +90,7 @@ /// The below values allow you to have EMP_FAKE_ARG or EMP_FAKE_2ARG as a single argument. /// If you prepend it with EMP_CONVERT it will trigger a conversion. If you prepend anything -/// else similarly, it wil NOT triggera a conversion (and stay a single argument) +/// else similarly, it wil NOT trigger a conversion (and stay a single argument) #define EMP_CONVERT_ARG_EMP_FAKE_ARG(A) A #define EMP_CONVERT_ARG_EMP_FAKE_2ARG(A) ~, A @@ -204,7 +204,7 @@ /// @cond MACROS // S = Size of each pack // N = Number of packs -// P = Pack representatio of number of packs +// P = Pack representation of number of packs #define EMP_ARGS_TO_PACKS_impl(S, N, ...) EMP_ARGS_TO_PACKS_implB(S, EMP_DEC_TO_PACK(N), __VA_ARGS) #define EMP_ARGS_TO_PACKS_implB(S, P, ...) @CAO diff --git a/include/emp/meta/meta.hpp b/include/emp/meta/meta.hpp index 0c3e017da4..ccd23cc122 100644 --- a/include/emp/meta/meta.hpp +++ b/include/emp/meta/meta.hpp @@ -31,10 +31,59 @@ namespace emp { /// Effectively create a function (via constructor) where all args are computed, then ignored. struct run_and_ignore { template run_and_ignore(T&&...) {} }; + template struct type_index; + + template <> struct type_index<> { + using t1 = void; using t2 = void; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = void; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = T2; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = T2; using t3 = T3; using t4 = void; + }; + + template + struct type_index { + using t1 = T1; using t2 = T2; using t3 = T3; using t4 = T4; + }; + + /// Trim off a specific type position from a pack. - template using first_type = T1; - template using second_type = T2; - template using third_type = T3; + template using first_type = typename type_index::t1; + template using second_type = typename type_index::t2; + template using third_type = typename type_index::t3; + template using fourth_type = typename type_index::t4; + + // Index into a template parameter pack to grab a specific type. + #ifndef DOXYGEN_SHOULD_SKIP_THIS + namespace internal { + template + struct pack_id_impl { using type = typename pack_id_impl::type; }; + + template struct pack_id_impl<0,T,Ts...> { using type = T; }; + } + #endif // DOXYGEN_SHOULD_SKIP_THIS + + /// Pick a specific position from a type pack. + template + using pack_id = typename internal::pack_id_impl::type; + + /// Trim off the last type from a pack. + template using last_type = pack_id; + + /// A struct declaration with no definition to show a type name in a compile time error. + template struct ShowType; + + /// A false type that does NOT resolve in unexecuted if-constexpr branches. + /// By Brian Bi; from: https://stackoverflow.com/questions/69501472/best-way-to-trigger-a-compile-time-error-if-no-if-constexprs-succeed + template struct dependent_false : std::false_type {}; /// Create a placeholder template to substitute for a real type. template struct PlaceholderType; @@ -78,23 +127,6 @@ namespace emp { return out_v; } - // Index into a template parameter pack to grab a specific type. - #ifndef DOXYGEN_SHOULD_SKIP_THIS - namespace internal { - template - struct pack_id_impl { using type = typename pack_id_impl::type; }; - - template - struct pack_id_impl<0,T,Ts...> { using type = T; }; - } - - template - using pack_id = typename internal::pack_id_impl::type; - #endif // DOXYGEN_SHOULD_SKIP_THIS - - // Trim off the last type from a pack. - template using last_type = pack_id; - // Trick to call a function using each entry in a parameter pack. #define EMP_EXPAND_PPACK(PPACK) ::emp::run_and_ignore{ 0, ((PPACK), void(), 0)... } diff --git a/include/emp/meta/type_traits.hpp b/include/emp/meta/type_traits.hpp index b7fe7471ed..4ea71db94a 100644 --- a/include/emp/meta/type_traits.hpp +++ b/include/emp/meta/type_traits.hpp @@ -1,7 +1,7 @@ /* * This file is part of Empirical, https://github.com/devosoft/Empirical * Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * date: 2016-2021. + * date: 2016-2022. */ /** * @file @@ -12,8 +12,9 @@ #define EMP_META_TYPE_TRAITS_HPP_INCLUDE -#include +#include // uint8_t, uint16_t, etc. #include +#include #include #include #include @@ -25,6 +26,7 @@ #include "meta.hpp" + namespace emp { // Predeclarations used below. @@ -99,7 +101,7 @@ namespace emp { template struct remove_std_function_type> { using type = T; }; template using remove_std_function_t = typename remove_std_function_type::type; - // Collect the reference type for any container. + // Collect the reference type for any standard container. template struct element_type { using type = T; }; template

Word" + << "ExpectedWords" + << "MaximumWords" + << "Information
" << word.word << "" + << "" << word.ave_options + << "" << word.max_options + << "" << word.entropy + << "