diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index bf3e370cd6..4fc38573d0 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -7,6 +7,7 @@ on: - '**' schedule: - cron: '0 0 * * 0' + workflow_dispatch: jobs: tidy: name: Enforce Tidyness @@ -143,7 +144,7 @@ jobs: submodules: 'recursive' - run: | make -C doc html coverage | ./doc/headtail.sh - python doc/parse_documentation_coverage.py doc/_build/doc-coverage.json >> doc-coverage.json + python doc/parse_documentation_coverage.py doc/_build/doc-coverage.json >> doc/doc-coverage.json - uses: sylvanld/action-storage@v1 if: github.ref == 'refs/heads/master' with: diff --git a/.gitignore b/.gitignore index d4668f7843..0901369ed0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *~ +*.csv *.debug *.dSYM *.gcov @@ -17,6 +18,8 @@ *.dat *.exe *tmp.* +*-bak.* +*-bak2.* */env/* */3nv/* @@ -24,9 +27,6 @@ .DS_Store a.out a.out.js -fitness.csv -population.csv -systematics.csv incoming/ tmp/ @@ -88,7 +88,6 @@ examples/*/* !examples/*/Makefile !examples/timing/BENCHMARKS -tests/*.csv tests/StatsConfig.cfg tests/web/*.js tests/web/package.json diff --git a/.gitmodules b/.gitmodules index b6dbc3bf5d..8a533ca029 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,10 +10,6 @@ path = third-party/cereal url = https://github.com/mmore500/cereal.git shallow = true -[submodule "third-party/span-lite"] - path = third-party/span-lite - url = https://github.com/martinmoene/span-lite.git - shallow = true [submodule "third-party/robin-hood-hashing"] path = third-party/robin-hood-hashing url = https://github.com/martinus/robin-hood-hashing.git diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 3a0195bf2e..675e23b0d1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -2,22 +2,21 @@ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details -#Required +# Required version: 2 +# Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: - python: "3.7" + python: "3.10" + # Build documentation in the docs/ directory with Sphinx sphinx: - configuration: doc/conf.py - -# Optionally build your docs in additional formats such as PDF -formats: [] + configuration: doc/conf.py -# Optionally set the version of Python and requirements required to build your docs +# Optionally declare the Python requirements required to build your docs python: - install: - - requirements: doc/requirements.txt + install: + - requirements: doc/requirements.txt diff --git a/Dockerfile b/Dockerfile index a06368ac46..6879625359 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Pull base image. -FROM ubuntu:bionic-20210416 +FROM ubuntu:focal-20230412 COPY . /opt/Empirical @@ -8,6 +8,7 @@ SHELL ["/bin/bash", "-c"] # Prevent interactive time zone config. # adapted from https://askubuntu.com/a/1013396 ENV DEBIAN_FRONTEND=noninteractive +ENV SPHINXBUILD="python3.10 -m sphinx" RUN \ echo 'Acquire::http::Timeout "60";' >> "/etc/apt/apt.conf.d/99timeout" \ @@ -31,22 +32,21 @@ RUN \ && \ rm -rf /var/lib/apt/lists/* \ && \ - find /etc/apt -type f -name '*.list' -exec sed -i 's/\(^deb.*-backports.*\)/#\1/; s/\(^deb.*-updates.*\)/#\1/; s/\(^deb.*-proposed.*\)/#\1/; s/\(^deb.*-security.*\)/#\1/' {} + \ - && \ apt-get update -y \ && \ - apt-get install -y software-properties-common=0.96.24.32.1 \ + apt-get install -y software-properties-common \ && \ add-apt-repository -y ppa:ubuntu-toolchain-r/test \ && \ + add-apt-repository -y ppa:deadsnakes/ppa \ + && \ apt-get update -y \ && \ apt-get install --no-install-recommends --allow-downgrades -y \ - dpkg-dev \ - libc6=2.27-3ubuntu1 \ - libc6-dev \ - libc6-dbg \ build-essential \ + dpkg-dev \ + g++-11 \ + libc6 \ xvfb \ x11vnc \ x11-xkb-utils \ @@ -60,21 +60,14 @@ RUN \ libnss3 \ lsb-release \ xdg-utils \ - g++-8=8-20180414-1ubuntu2 \ - gcc-8-base=8-20180414-1ubuntu2 \ - cpp-8=8-20180414-1ubuntu2 \ - gcc-8=8-20180414-1ubuntu2 \ - gcc-8-base=8-20180414-1ubuntu2 \ - libgcc-8-dev \ - libstdc++-8-dev \ cmake \ - python-virtualenv \ - python-pip-whl \ - python-pip \ - python-setuptools \ + python3-distutils \ python3-setuptools \ python3-virtualenv \ python3-pip \ + 'python3\.10' \ + 'python3\.10-distutils' \ + 'python3\.10-venv' \ nodejs \ npm \ tar \ @@ -84,7 +77,7 @@ RUN \ doxygen \ curl \ perl \ - perl-base=5.26.1-6 \ + perl-base \ git \ htop \ man \ @@ -162,12 +155,14 @@ ENV DISPLAY :99 RUN echo 'kernel.unprivileged_userns_clone=1' > /etc/sysctl.d/userns.conf RUN \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 90 \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 90 \ && \ npm install -g n \ && \ n 14.17 \ && \ + hash -r \ + && \ export python="/usr/bin/python3" \ && \ npm install source-map \ @@ -175,12 +170,32 @@ RUN \ echo "finalized set up dependency versions" RUN \ - pip install wheel==0.30.0 \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3 \ + && \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \ + && \ + pip install --upgrade --force-reinstall pip virtualenv \ + && \ + pip3 install --upgrade --force-reinstall pip virtualenv \ + && \ + python3.10 -m pip install --upgrade --force-reinstall pip virtualenv \ && \ - pip3 install wheel==0.30.0 \ + python3 -m pip install --upgrade --force-reinstall pip virtualenv \ + && \ + pip install wheel==0.30.0 six==1.16.0 \ + && \ + pip3 install wheel==0.30.0 six==1.16.0 \ + && \ + python3.10 -m pip install wheel==0.30.0 six==1.16.0 \ + && \ + python3 -m pip install wheel==0.30.0 six==1.16.0 \ && \ pip3 install -r /opt/Empirical/doc/requirements.txt \ && \ + python3.10 -m pip install -r /opt/Empirical/doc/requirements.txt \ + && \ + python3 -m pip install -r /opt/Empirical/doc/requirements.txt \ + && \ echo "installed documentation build requirements" RUN \ @@ -190,13 +205,19 @@ RUN \ && \ git submodule init \ && \ - git submodule update -f \ + echo "nameserver 8.8.8.8" > /etc/resolv.conf \ + && \ + n=0; until [ $n -ge 3 ]; do git submodule update -f && break || ((n++)); sleep 5; done; if [ $n -eq 3 ]; then echo "Update failed after 3 attempts."; else echo "Update successful!"; fi \ && \ echo "initialized submodules" RUN \ cd /opt/Empirical \ && \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3 \ + && \ + python3 -m pip install virtualenv \ + && \ make install-test-dependencies \ && \ echo "installed test dependencies" diff --git a/demos/MAP-Elites-Arm/Makefile b/demos/MAP-Elites-Arm/Makefile index 52b547329f..866d9093ed 100644 --- a/demos/MAP-Elites-Arm/Makefile +++ b/demos/MAP-Elites-Arm/Makefile @@ -3,7 +3,7 @@ PROJECT := MAP-Elites-Arm EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ @@ -12,7 +12,7 @@ CFLAGS_nat_debug := -g -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align -Woverloa # Emscripten compiler information CXX_web := emcc -OFLAGS_web_all := -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library $(EMP_DIR)/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 #--embed-file configs +OFLAGS_web_all := -s "EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library $(EMP_DIR)/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 #--embed-file configs OFLAGS_web := -Oz -DNDEBUG # OFLAGS_web_debug := -g4 -Oz -pedantic -Wno-dollar-in-identifier-extension -s ASSERTIONS=2 OFLAGS_web_debug := -g4 -pedantic -Wno-dollar-in-identifier-extension -s ASSERTIONS=2 diff --git a/demos/NK/Makefile b/demos/NK/Makefile index 85cdf9efb6..5a1f1fdae3 100644 --- a/demos/NK/Makefile +++ b/demos/NK/Makefile @@ -3,7 +3,7 @@ PROJECT := NK EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/demos/SelectionAnalyze/Makefile b/demos/SelectionAnalyze/Makefile index 39543cfd70..3a81bbc910 100644 --- a/demos/SelectionAnalyze/Makefile +++ b/demos/SelectionAnalyze/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/demos/SpatialCoop2017/Makefile b/demos/SpatialCoop2017/Makefile index dced09e09b..1b61d0e8e9 100644 --- a/demos/SpatialCoop2017/Makefile +++ b/demos/SpatialCoop2017/Makefile @@ -3,7 +3,7 @@ PROJECT := SimplePDWorld EMP_DIR := ../../include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/demos/Sudoku/Makefile b/demos/Sudoku/Makefile index ff3f1e6658..7d0df6543a 100644 --- a/demos/Sudoku/Makefile +++ b/demos/Sudoku/Makefile @@ -1,5 +1,5 @@ # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/demos/utils/graphs/web/Makefile b/demos/utils/graphs/web/Makefile index 4725b29149..19ec718967 100644 --- a/demos/utils/graphs/web/Makefile +++ b/demos/utils/graphs/web/Makefile @@ -1,7 +1,7 @@ CXX_web := emcc # OFLAGS_web := -g4 -Wall OFLAGS_web := -oz -DNDEBUG -CFLAGS_web := -std=c++17 $(OFLAGS_web) -s EXPORTED_FUNCTIONS="['_empLoadString']" -I../../../../include/ +CFLAGS_web := -std=c++20 $(OFLAGS_web) -s EXPORTED_FUNCTIONS="['_empLoadString']" -I../../../../include/ default: web diff --git a/demos/utils/words/Wordle-simple.cpp b/demos/utils/words/Wordle-simple.cpp new file mode 100644 index 0000000000..be8c470f91 --- /dev/null +++ b/demos/utils/words/Wordle-simple.cpp @@ -0,0 +1,422 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Wordle-simple.cpp + * + * This version of Wordle is a bit simpler than it should be; it does not handle double letters + * correctly. + */ + +#include +#include +#include +#include +#include + +#include "../../../include/emp/base/Ptr.hpp" +#include "../../../include/emp/base/vector.hpp" +#include "../../../include/emp/bits/BitSet.hpp" +#include "../../../include/emp/bits/BitVector.hpp" +#include "../../../include/emp/config/command_line.hpp" +#include "../../../include/emp/datastructs/map_utils.hpp" +#include "../../../include/emp/datastructs/vector_utils.hpp" +#include "../../../include/emp/io/File.hpp" +#include "../../../include/emp/tools/string_utils.hpp" + +enum class Result { NOWHERE=0, ELSEWHERE, HERE }; + +/// A collection of results for a whole word. +struct ResultSet { + emp::vector results; + + static const emp::vector & PlaceValues(const size_t num_results) { + static emp::vector place_values; + if (place_values.size() == 0) { + place_values.resize(num_results); + size_t value = 1; + for (size_t i = 0; i < num_results; ++i) { + place_values[i] = value; + value *= 3; + } + } + return place_values; + } + + ResultSet(const emp::vector & in) : results(in) { } + ResultSet(size_t size, size_t id) : results(size) { + emp::vector place_values = PlaceValues(results.size()); + for (size_t i = results.size()-1; i < results.size(); --i) { + if (id > place_values[i]) { + size_t value = id / place_values[i]; + results[i] = (Result) value; + id -= value * place_values[i]; + } + } + } + ResultSet(const ResultSet &) = default; + + size_t ToID() { + emp::vector place_values = PlaceValues(results.size()); + size_t id = 0; + for (size_t i = 0; i < results.size(); ++i) { + id += place_values[i] * (size_t) results[i]; + } + return id; + } +}; + +// A clue is a given letter, position, and result +struct Clue { + emp::BitVector words; // IDs of words consistent with this clue. +}; + +// All of the clues for a given position. +struct PositionClues { + std::array nowhere; + std::array elsewhere; + std::array here; + + void SetNumWords(size_t num_words) { + for (auto & x : nowhere) x.words.resize(num_words); + for (auto & x : elsewhere) x.words.resize(num_words); + for (auto & x : here) x.words.resize(num_words); + } +}; + +// Trying to build a full tree of solutions... +struct SolveState { + emp::BitVector words; +}; + +struct WordData { + std::string word; + emp::BitSet<26> letters; + size_t max_options = 0; // Maximum number of word options after used as a guess. + double ave_options = 0.0; // Average number of options after used as a guess. + double entropy = 0.0; // What is the entropy (and thus information gained) for this choice? + bool is_active = false; + + WordData(const std::string & in_word) : word(in_word) { + for (char x : word) letters.Set(x - 'a'); + } +}; + +class WordSet { +private: + size_t word_length; + emp::vector words; + emp::vector clues; // A PositionClues object for each position. + std::unordered_map pos_map; // Map of words to their position ids. + emp::BitVector start_options; // Current options. + size_t start_count; // Count of start options (cached) + + bool verbose = true; + + // Get the ID (0-26) associated with a letter. + size_t ID(char letter) { + emp_assert(letter >= 'a' && letter <= 'z'); + return static_cast(letter - 'a'); + } + + char LET(size_t id) { + emp_assert(id < 26); + return (char) (id + 'a'); + } + +public: + WordSet(size_t length=5) : word_length(length) { } + + void AddWord(std::string & in_word) { + size_t id = words.size(); + pos_map[in_word] = id; + words.emplace_back(in_word); + } + + void Load(std::istream & is, std::ostream & os) { + // Load in all of the words. + std::string in_word; + size_t wrong_size_count = 0; + size_t invalid_char_count = 0; + size_t dup_count = 0; + while (is) { + is >> in_word; + // Only keep words of the correct size and all lowercase. + if (in_word.size() != word_length) { wrong_size_count++; continue; } + if (!emp::is_lower(in_word)) { invalid_char_count++; continue; } + if (emp::Has(pos_map, in_word)) { dup_count++; continue; } + AddWord(in_word); + } + + if (wrong_size_count) { + std::cerr << "Warning: eliminated " << wrong_size_count << " words of the wrong size." + << std::endl; + } + if (invalid_char_count) { + std::cerr << "Warning: eliminated " << invalid_char_count << " words with invalid characters." + << std::endl; + } + if (dup_count) { + std::cerr << "Warning: eliminated " << dup_count << " words that were duplicates." + << std::endl; + } + + if (verbose) std::cerr << "Loaded " << words.size() << " valid words." << std::endl; + } + + void ResetOptions() { + start_count = words.size(); + start_options.resize(start_count); + start_options.SetAll(); + } + + // Once the words are loaded, Preprocess will collect info. + void Preprocess() { + // Setup all clue info to know the number of words. + clues.resize(word_length); + for (auto & x : clues) x.SetNumWords(words.size()); + + // Loop through each word, indicating which clues it is consistent with. + for (size_t word_id = 0; word_id < words.size(); ++word_id) { + const std::string & word = words[word_id].word; + + // Figure out which letters are in this word. + emp::BitSet<26> letters = words[word_id].letters; + + // Now figure out what clues it is consistent with. + for (size_t pos=0; pos < word.size(); ++pos) { + const char cur_letter = word[pos]; + // Incorrect letter for alternatives at this position. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + if (letter_id == ID(cur_letter)) { // Letter is HERE. + clues[pos].here[letter_id].words.Set(word_id); + } else if (letters.Has(letter_id)) { // Letter is ELSEWHERE + clues[pos].elsewhere[letter_id].words.Set(word_id); + } else { // Letter is NOT IN WORD + clues[pos].nowhere[letter_id].words.Set(word_id); + } + } + } + } + + ResetOptions(); + } + + /// Limit starting options based on a specific clue. + void AddClue(size_t pos, char letter, Result result) { + size_t let_id = ID(letter); + if (result == Result::NOWHERE) { + start_options &= clues[pos].nowhere[let_id].words; + } else if (result == Result::ELSEWHERE) { + start_options &= clues[pos].elsewhere[let_id].words; + } else { + start_options &= clues[pos].here[let_id].words; + } + start_count = start_options.CountOnes(); + } + + void AddClue(std::string word, std::string result) { + for (size_t i = 0; i < word.size(); ++i) { + if (result[i] == 'N') AddClue(i, word[i], Result::NOWHERE); + else if (result[i] == 'E') AddClue(i, word[i], Result::ELSEWHERE); + else if (result[i] == 'H') AddClue(i, word[i], Result::HERE); + } + } + + emp::BitVector AnalyzeGuess(const std::string & guess, const WordData & answer) { + // Loop through all possible answers to see how much a word cuts down choices. + emp::BitVector options(start_options); + + for (size_t pos = 0; pos < word_length; ++pos) { + const size_t guess_letter = ID(guess[pos]); + if (guess[pos] == answer.word[pos]) { // CORRECT GUESS FOR POSITION! + options &= clues[pos].here[guess_letter].words; + } else if (answer.letters.Has(guess_letter)) { // WRONG POSITION + options &= clues[pos].elsewhere[guess_letter].words; + } else { // WRONG CHARACTER + options &= clues[pos].nowhere[guess_letter].words; + } + } + + return options; + } + + // Slow way to manually call on specific words; brute-force find the entires for each. + emp::BitVector AnalyzeGuess(const std::string & guess, const std::string & answer) { + if (!emp::Has(pos_map, answer)) std::cerr << "UNKNOWN WORD: " << answer << std::endl; + return AnalyzeGuess(guess, words[pos_map[answer]]); + } + + void AnalyzeGuess(WordData & guess) { + size_t max_options = 0; + size_t total_options = 0; + double entropy = 0.0; + + // Scan through all possible answers... + for (WordData & answer : words) { + size_t options = AnalyzeGuess(guess.word, answer).CountOnes(); + if (options > max_options) max_options = options; + total_options += options; + const double p = static_cast(options) / static_cast(start_count); + entropy -= p * std::log2(p); + } + guess.max_options = max_options; + guess.ave_options = static_cast(total_options) / static_cast(words.size()); + guess.entropy = entropy; + } + + void Analyze() { + // for (int id = start_options.FindOne(); id >= 0; id = start_options.FindOne(id+1)) { + for (size_t id = 0; id < words.size(); ++id) { + AnalyzeGuess(words[id]); + } + } + + /// Also analyze non-word guesses. + void AnalyzeAll() { + std::string guess(word_length, 'a'); + size_t best_max_options = 10000; + double best_ave_options = 10000.0; + double best_entropy = 0.0; + std::string best_max_options_word = ""; + std::string best_ave_options_word = ""; + std::string best_entropy_word = ""; + + size_t silent_count = 0; // Keep a count of how many loops since out last output. + while (true) { + size_t max_options = 0; + size_t total_options = 0; + double entropy = 0.0; + + // Scan through all possible answers... + for (WordData & answer : words) { + size_t options = AnalyzeGuess(guess, answer).CountOnes(); + if (options > max_options) max_options = options; + total_options += options; + const double p = static_cast(options) / static_cast(start_count); + entropy -= p * std::log2(p); + } + double ave_options = static_cast(total_options) / static_cast(words.size()); + + ++silent_count; + if (max_options < best_max_options) { + best_max_options = max_options; + best_max_options_word = guess; + std::cout << "New best MAX options: " << guess << " : " << max_options << std::endl; + silent_count = 0; + } + if (ave_options < best_ave_options) { + best_ave_options = ave_options; + best_ave_options_word = guess; + std::cout << "New best AVE options: " << guess << " : " << ave_options << std::endl; + silent_count = 0; + } + if (entropy > best_entropy) { + best_entropy = entropy; + best_entropy_word = guess; + std::cout << "New best ENTROPY: " << guess << " : " << entropy << std::endl; + silent_count = 0; + } + if (silent_count >= 10000) { + std::cout << "...processing... ('" << guess << "')" << std::endl; + silent_count = 0; + } + + // Now move on to the next word... + size_t inc_pos = word_length - 1; // find the first non-z letter. + while (inc_pos < word_length && guess[inc_pos] == 'z') { + guess[inc_pos] = 'a'; + --inc_pos; + } + if (inc_pos == word_length) break; + ++guess[inc_pos]; + } + } + + /// Print all of the words with a given set of IDs. + void PrintWords(const emp::BitVector & word_ids) { + size_t count = 0; + for (int id = word_ids.FindOne(); id >= 0; id = word_ids.FindOne(id+1)) { + if (count) std::cout << ","; + std::cout << words[id].word; + ++count; + } + std::cout << " (" << count << " words found)" << std::endl; + } + + /// Print all of the results, sorted by max number of options. + void PrintResults() { + for (size_t i = 0; i < words.size(); ++i) { + words[i].is_active = start_options.Has(i); + } + emp::Sort(words, [](const WordData & w1, const WordData & w2){ + if (w1.is_active == w2.is_active) { + return w1.max_options < w2.max_options; + } + return w2.is_active; + }); + for (auto & word : words) { + std::cout << word.word + << ", " << word.max_options + << ", " << word.ave_options + << ", " << word.is_active + << std::endl; + } + } +}; + +int main(int argc, char* argv[]) +{ + emp::vector args = emp::cl::args_to_strings(argc, argv); + + if (args.size() > 3) { + std::cerr << "May provide am input filename (with the word list to use) and output filename (for results)" + << std::endl; + exit(1); + } + + WordSet word_set(5); + + if (args.size() == 1) word_set.Load(std::cin, std::cout); + else { + std::ifstream in_file{args[1]}; + if (args.size() == 2) word_set.Load(in_file, std::cout); + else { + std::ofstream out_file{args[2]}; + word_set.Load(in_file, out_file); + } + } + + word_set.Preprocess(); + + //word_set.AddClue("aloes", "NNNEN"); + word_set.AddClue("rates", "NENEN"); + // word_set.AddClue("login", "ENNEN"); + // word_set.AddClue("dimly", "NHNHH"); + // word_set.AddClue("finch", "NNNNN"); + + /* + word_set.AddClue(0,'a',Result::NOWHERE); + word_set.AddClue(1,'l',Result::NOWHERE); + word_set.AddClue(2,'o',Result::NOWHERE); + word_set.AddClue(3,'e',Result::ELSEWHERE); + word_set.AddClue(4,'s',Result::NOWHERE); + + word_set.AddClue(0,'d',Result::NOWHERE); + word_set.AddClue(1,'i',Result::ELSEWHERE); + word_set.AddClue(2,'r',Result::NOWHERE); + word_set.AddClue(3,'t',Result::NOWHERE); + word_set.AddClue(4,'y',Result::NOWHERE); + + word_set.AddClue(0,'h',Result::NOWHERE); + word_set.AddClue(1,'e',Result::NOWHERE); + word_set.AddClue(2,'n',Result::NOWHERE); + word_set.AddClue(3,'g',Result::NOWHERE); + word_set.AddClue(4,'e',Result::HERE); + */ + + word_set.Analyze(); + word_set.PrintResults(); +// word_set.AnalyzeAll(); +} diff --git a/demos/utils/words/Wordle/Makefile b/demos/utils/words/Wordle/Makefile new file mode 100644 index 0000000000..71a2bb9d57 --- /dev/null +++ b/demos/utils/words/Wordle/Makefile @@ -0,0 +1,69 @@ +EMP_DIR := ../../../../include + +# Flags to use regardless of compiler +CFLAGS_all := -Wall -Wextra -Wno-unused-function -I$(EMP_DIR)/ +CFLAGS_version := -std=c++20 + +# Emscripten compiler information +CXX_web := emcc +CXX_native := g++ + +OFLAGS_native_opt := -O3 -DNDEBUG +OFLAGS_native_debug := -g -pedantic -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align +OFLAGS_native_grumpy := -g -pedantic -DEMP_TRACK_MEM -Wnon-virtual-dtor -Wcast-align -Wconversion -Weffc++ + +OFLAGS_web_opt := -Os -DNDEBUG -s TOTAL_MEMORY=67108864 +OFLAGS_web_debug := -g4 -pedantic -Wno-dollar-in-identifier-extension -s TOTAL_MEMORY=67108864 -s ASSERTIONS=2 -s DEMANGLE_SUPPORT=1 # -s SAFE_HEAP=1 + +CFLAGS_native_opt := $(CFLAGS_all) $(OFLAGS_native_opt) +CFLAGS_native_debug := $(CFLAGS_all) $(OFLAGS_native_debug) +CFLAGS_native_grumpy := $(CFLAGS_all) $(OFLAGS_native_grumpy) + +CFLAGS_web_debug := $(CFLAGS_all) $(OFLAGS_web_debug) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 +CFLAGS_web_opt := $(CFLAGS_all) $(OFLAGS_web_opt) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 +#CFLAGS_web := $(CFLAGS_all) $(OFLAGS_web) --js-library $(EMP_DIR)/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 + +TARGETS := Wordle + +default: native + +CXX := $(CXX_native) +CFLAGS := $(CFLAGS_native_opt) + +debug: CFLAGS := $(CFLAGS_native_debug) +debug: all + +grumpy: CFLAGS := $(CFLAGS_native_grumpy) +grumpy: all + +web: CXX := $(CXX_web) +web: CFLAGS := $(CFLAGS_web_opt) +web: all + +web-debug: CXX := $(CXX_web) +web-debug: CFLAGS := $(CFLAGS_web_debug) +web-debug: all + +native: all + +all: $(TARGETS) + +$(TARGETS): % : %.cpp + $(CXX) $(CFLAGS_version) $(CFLAGS) $< -o $@ + +$(JS_TARGETS): %.js : %.cpp + $(CXX_web) $(CFLAGS_web) $< -o $@ + +debug-%: $*.cpp + $(CXX) $(CFLAGS_version) $(CFLAGS_native_debug) $< -o $@ + +clean: + rm -rf debug-* *~ *.dSYM $(TARGETS) +# rm -rf debug-* *~ *.dSYM $(JS_TARGETS) + +new: clean +new: native + +# Debugging information +#print-%: ; @echo $*=$($*) +print-%: ; @echo '$(subst ','\'',$*=$($*))' diff --git a/demos/utils/words/Wordle/Result.hpp b/demos/utils/words/Wordle/Result.hpp new file mode 100644 index 0000000000..efce04ed72 --- /dev/null +++ b/demos/utils/words/Wordle/Result.hpp @@ -0,0 +1,164 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Result.hpp + * + */ + +#ifndef DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE +#define DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE + +#include + +#include "emp/base/array.hpp" +#include "emp/base/error.hpp" +#include "emp/bits/BitVector.hpp" +#include "emp/math/math.hpp" + +template +class Result { +public: + enum PositionResult { NOWHERE, ELSEWHERE, HERE }; + static constexpr size_t NUM_IDS = emp::Pow(3, WORD_SIZE); + +private: + using results_t = emp::array; + + results_t results; + size_t id; + + /// Return a result array where each index is an associated (unique) possible result set. + static const results_t & LookupResult(size_t result_id) { + static emp::array result_array; + static bool init = false; + + // If this is our first time requsting the result array, generate it. + if (!init) { + init = true; + for (size_t id = 0; id < NUM_IDS; ++id) { + size_t tmp_id = id; + for (size_t pos = WORD_SIZE-1; pos < WORD_SIZE; --pos) { + const size_t magnitude = emp::Pow(3, pos); + const size_t cur_result = tmp_id / magnitude; + result_array[id][pos] = static_cast(cur_result); + tmp_id -= cur_result * magnitude; + } + } + } + + return result_array[result_id]; + } + + /// Assume that we have results, calculate the associated ID. + void CalcID() { + size_t base = 1; + id = 0; + for (PositionResult r : results) { id += static_cast(r) * base; base *= 3; } + } + + /// Assume that we have an ID, lookup the correct results. + void CalcResults() { results = LookupResult(id); } + + /// Convert a results string of 'N's, 'E's, and 'W's into a Results object. + void FromString(const std::string & result_str) { + emp_assert(result_str.size() == WORD_SIZE); + for (size_t i=0; i < WORD_SIZE; ++i) { + switch (result_str[i]) { + case 'N': case 'n': results[i] = NOWHERE; break; + case 'E': case 'e': results[i] = ELSEWHERE; break; + case 'H': case 'h': results[i] = HERE; break; + default: + emp_error("Invalid character in result string", result_str[i]); + }; + } + } + +public: + /// Create a result by id. + Result(size_t _id) : id(_id) { CalcResults(); } + + /// Create a result by a result array. + Result(const results_t & _results) : results(_results) { CalcID(); } + + /// Create a result by a result string. + Result(const std::string & result_str) { FromString(result_str); } + + /// Create a result by an guess and answer pair. + Result(const std::string & guess, const std::string & answer) { + emp_assert(guess.size() == WORD_SIZE); + emp_assert(answer.size() == WORD_SIZE); + emp::BitVector used(answer.size()); + // Test perfect matches. + for (size_t i = 0; i < guess.size(); ++i) { + if (guess[i] == answer[i]) { results[i] = HERE; used.Set(i); } + } + // Test offset matches. + for (size_t i = 0; i < guess.size(); ++i) { + if (guess[i] == answer[i]) continue; // already matched. + bool found = false; + for (size_t j = 0; j < answer.size(); ++j) { // seek a match elsewhere in answer! + if (!used.Has(j) && guess[i] == answer[j]) { + results[i] = ELSEWHERE; // found letter elsewhere! + used.Set(j); // make sure this letter is noted as used. + found = true; + break; // move on to next letter; we found this one. + } + } + if (!found) results[i] = NOWHERE; + } + CalcID(); // Now that we know the symbols, figure out the ID. + } + + Result(const Result & result) = default; + Result(Result && result) = default; + + Result & operator=(const std::string & result_str) { FromString(result_str); } + Result & operator=(const Result & result) = default; + Result & operator=(Result && result) = default; + + bool operator==(const Result & in) const { return id == in.id; } + bool operator!=(const Result & in) const { return id != in.id; } + bool operator< (const Result & in) const { return id < in.id; } + bool operator<=(const Result & in) const { return id <= in.id; } + bool operator> (const Result & in) const { return id > in.id; } + bool operator>=(const Result & in) const { return id >= in.id; } + + size_t GetID() const { return id; } + size_t GetSize() const { return WORD_SIZE; } + size_t size() const { return WORD_SIZE; } + + PositionResult operator[](size_t id) const { return results[id]; } + + // Test if this result is valid for the given word. + bool IsValid(const std::string & word) const { + // Disallow letters marked "NOWHERE" that are subsequently marked "ELSEWHERE" + // (other order is okay). + for (size_t pos = 0; pos < WORD_SIZE-1; ++pos) { + if (results[pos] == NOWHERE) { + for (size_t pos2 = pos+1; pos2 < WORD_SIZE; ++pos2) { + if (results[pos2] == ELSEWHERE && word[pos] == word[pos2]) return false; + } + } + } + + return true; + } + + std::string ToString( + const std::string & here="H", + const std::string & elsewhere="E", + const std::string & nowhere="N" + ) const { + std::string out; // = emp::to_string(id, "-"); + for (auto x : results) { + if (x == HERE) out += here; + else if (x == ELSEWHERE) out += elsewhere; + else if (x == NOWHERE) out += nowhere; + } + return out; + } +}; + +#endif // #ifndef DEMOS_UTILS_WORDS_WORDLE_RESULT_HPP_INCLUDE diff --git a/demos/utils/words/Wordle/Wordle.cpp b/demos/utils/words/Wordle/Wordle.cpp new file mode 100644 index 0000000000..541817c273 --- /dev/null +++ b/demos/utils/words/Wordle/Wordle.cpp @@ -0,0 +1,579 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Wordle.cpp + */ + +#include +#include +#include +#include +#include + +#include "emp/base/Ptr.hpp" +#include "emp/base/vector.hpp" +#include "emp/bits/BitSet.hpp" +#include "emp/bits/BitVector.hpp" +#include "emp/config/command_line.hpp" +#include "emp/datastructs/map_utils.hpp" +#include "emp/datastructs/vector_utils.hpp" +#include "emp/io/File.hpp" +#include "emp/tools/string_utils.hpp" + +#include "Result.hpp" + + +template +class WordSet { +private: + static constexpr size_t MAX_LETTER_REPEAT = 4; + using word_list_t = emp::BitVector; + using result_t = Result; + + // Get the ID (0-26) associated with a letter. + static size_t ToID(char letter) { + emp_assert(letter >= 'a' && letter <= 'z'); + return static_cast(letter - 'a'); + } + + static char ToLetter(size_t id) { + emp_assert(id < 26); + return static_cast(id + 'a'); + } + + // All of the clues for a given position. + struct PositionClues { + size_t pos; + std::array here; // Is a given letter at this position? + + void SetNumWords(size_t num_words) { + for (auto & x : here) x.resize(num_words); + } + }; + + // All of the clues for zero or more instances of a given letter. + struct LetterClues { + size_t letter; // [0-25] + std::array at_least; ///< Are there at least x instances of letter? (0 is meaningless) + std::array exactly; ///< Are there exactly x instances of letter? + + void SetNumWords(size_t num_words) { + for (auto & x : at_least) x.resize(num_words); + for (auto & x : exactly) x.resize(num_words); + } + }; + + struct WordData { + std::string word; + // Pre=processed data + emp::BitSet<26> letters; // What letters are in this word? + emp::BitSet<26> multi_letters; // What letters are in this word more than once? + std::array next_words; + + // Collected data + size_t max_options = 0; // Maximum number of word options after used as a guess. + double ave_options = 0.0; // Average number of options after used as a guess. + double entropy = 0.0; // What is the entropy (and thus information gained) for this choice? + + WordData(const std::string & in_word) : word(in_word) { + for (char x : word) { + size_t let_id = ToID(x); + if (letters.Has(let_id)) multi_letters.Set(let_id); + else letters.Set(let_id); + } + } + }; + + emp::vector words; ///< Data about all words in this Wordle + emp::array pos_clues; ///< A PositionClues object for each position. + emp::array let_clues; ///< Clues based off the number of letters. + std::unordered_map pos_map; ///< Map of words to their position ids. + word_list_t start_options; ///< Current options. + size_t start_count; ///< Count of start options (cached) + + std::istream & is; + std::ostream & os; + + bool verbose = true; + +public: + WordSet(std::istream & _is, std::ostream & _os) : is(_is), os(_os) { } + + /// Include a single word into this WordSet. + void AddWord(std::string & in_word) { + size_t id = words.size(); // Set a unique ID for this word. + pos_map[in_word] = id; // Keep track of the ID for this word. + words.emplace_back(in_word); // Setup the word data. + } + + /// Load a whole series for words (from a file) into this WordSet + void Load() { + // Load in all of the words. + std::string in_word; + size_t wrong_size_count = 0; + size_t invalid_char_count = 0; + size_t dup_count = 0; + while (is) { + is >> in_word; + // Only keep words of the correct size and all lowercase. + if (in_word.size() != WORD_SIZE) { wrong_size_count++; continue; } + if (!emp::is_lower(in_word)) { invalid_char_count++; continue; } + if (emp::Has(pos_map, in_word)) { dup_count++; continue; } + AddWord(in_word); + } + + if (wrong_size_count) { + std::cerr << "Warning: eliminated " << wrong_size_count << " words of the wrong size." + << std::endl; + } + if (invalid_char_count) { + std::cerr << "Warning: eliminated " << invalid_char_count << " words with invalid characters." + << std::endl; + } + if (dup_count) { + std::cerr << "Warning: eliminated " << dup_count << " words that were duplicates." + << std::endl; + } + + if (verbose) std::cerr << "Loaded " << words.size() << " valid words." << std::endl; + } + + /// Clear out all prior guess information. + void ResetOptions() { + start_count = words.size(); + start_options.resize(start_count); + start_options.SetAll(); + } + + // Limit the current options based on a single guess and its result. + + word_list_t EvalGuess(const std::string & guess, const result_t & result) { + emp_assert(guess.size() == WORD_SIZE); + emp_assert(result.size() == WORD_SIZE); + + emp::array letter_counts; + std::fill(letter_counts.begin(), letter_counts.end(), 0); + emp::BitSet<26> letter_fail; + word_list_t word_options = start_options; + + // First add letter clues and collect letter information. + for (size_t i = 0; i < WORD_SIZE; ++i) { + const size_t cur_letter = ToID(guess[i]); + if (result[i] == result_t::HERE) { + word_options &= pos_clues[i].here[cur_letter]; + ++letter_counts[cur_letter]; + } else if (result[i] == result_t::ELSEWHERE) { + word_options &= ~pos_clues[i].here[cur_letter]; + ++letter_counts[cur_letter]; + } else { // Must be 'N' + word_options &= ~pos_clues[i].here[cur_letter]; + letter_fail.Set(cur_letter); + } + } + + // Next add letter clues. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + const size_t let_count = letter_counts[letter_id]; + if (let_count) { + word_options &= let_clues[letter_id].at_least[let_count]; + } + if (letter_fail.Has(letter_id)) { + word_options &= let_clues[letter_id].exactly[let_count]; + } + } + + return word_options; + } + + + void AnalyzeGuess(WordData & guess, const word_list_t & cur_words) { + size_t max_options = 0; + size_t total_options = 0; + size_t option_count = 0; + double entropy = 0.0; + const double word_count = static_cast(words.size()); + + // Scan through all of the possible result IDs. + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + word_list_t next_options = guess.next_words[result_id] & cur_words; + size_t num_options = next_options.CountOnes(); + if (num_options > max_options) max_options = num_options; + total_options += num_options * num_options; + option_count++; + double p = static_cast(num_options) / word_count; + if (p > 0.0) entropy -= p * std::log2(p); + } + + guess.max_options = max_options; + guess.ave_options = static_cast(total_options) / static_cast(words.size()); + guess.entropy = entropy; + } + + + /// Once the words are loaded, Preprocess will collect info. + void Preprocess() { + std::cout << "Beginning pre-process phase..." << std::endl; + + // Setup all position clue info to know the number of words. + for (size_t i=0; i < WORD_SIZE; ++i) { + pos_clues[i].pos = i; + pos_clues[i].SetNumWords(words.size()); + } + + // Setup all letter clue information + for (size_t let=0; let < 26; let++) { + let_clues[let].letter = let; + let_clues[let].SetNumWords(words.size()); + } + + // Counters for number of letters. + emp::array letter_counts; + + // Loop through each word, indicating which clues it is consistent with. + for (size_t word_id = 0; word_id < words.size(); ++word_id) { + const std::string & word = words[word_id].word; + + // Figure out which letters are in this word. + std::fill(letter_counts.begin(), letter_counts.end(), 0); // Reset counters to zero. + for (const char letter : word) ++letter_counts[ToID(letter)]; // Count letters. + + // Setup the LETTER clues that word is consistent with. + for (size_t letter_id = 0; letter_id < 26; ++letter_id) { + const size_t cur_count = letter_counts[letter_id]; + let_clues[letter_id].exactly[cur_count].Set(word_id); + for (uint8_t count = 0; count <= cur_count; ++count) { + let_clues[letter_id].at_least[count].Set(word_id); + } + } + + // Now figure out what POSITION clues it is consistent with. + for (size_t pos=0; pos < word.size(); ++pos) { + const size_t cur_letter = ToID(word[pos]); + pos_clues[pos].here[cur_letter].Set(word_id); + } + } + + std::cout << "...clues are initialized..." << std::endl; + + ResetOptions(); + + // Loop through words one more time, filling out result lists and collecting data. + size_t word_count = 0; + const size_t step = words.size() / 100; + for (auto & word_info : words) { + if (++word_count % step == 0) { + std::cout << "."; + std::cout.flush(); + } + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + Result result(result_id); + if (!result.IsValid(word_info.word)) continue; + word_info.next_words[result_id] = EvalGuess(word_info.word, result_id); + } + AnalyzeGuess(word_info, start_options); + } + + std::cout << "...words are analyzed..." << std::endl; + } + + // /// Also analyze non-word guesses. + // void AnalyzeAll() { + // std::string guess(WORD_SIZE, 'a'); + // size_t best_max_options = 10000; + // double best_ave_options = 10000.0; + // double best_entropy = 0.0; + // std::string best_max_options_word = ""; + // std::string best_ave_options_word = ""; + // std::string best_entropy_word = ""; + + // size_t silent_count = 0; // Keep a count of how many loops since out last output. + // while (true) { + // size_t max_options = 0; + // size_t total_options = 0; + // double entropy = 0.0; + + // // Scan through all possible answers... + // for (WordData & answer : words) { + // size_t options = AnalyzeGuess(guess, answer).CountOnes(); + // if (options > max_options) max_options = options; + // total_options += options; + // const double p = static_cast(options) / static_cast(start_count); + // entropy -= p * std::log2(p); + // } + // double ave_options = static_cast(total_options) / static_cast(words.size()); + + // ++silent_count; + // if (max_options < best_max_options) { + // best_max_options = max_options; + // best_max_options_word = guess; + // std::cout << "New best MAX options: " << guess << " : " << max_options << std::endl; + // silent_count = 0; + // } + // if (ave_options < best_ave_options) { + // best_ave_options = ave_options; + // best_ave_options_word = guess; + // std::cout << "New best AVE options: " << guess << " : " << ave_options << std::endl; + // silent_count = 0; + // } + // if (entropy > best_entropy) { + // best_entropy = entropy; + // best_entropy_word = guess; + // std::cout << "New best ENTROPY: " << guess << " : " << entropy << std::endl; + // silent_count = 0; + // } + // if (silent_count >= 10000) { + // std::cout << "...processing... ('" << guess << "')" << std::endl; + // silent_count = 0; + // } + + // // Now move on to the next word... + // size_t inc_pos = WORD_SIZE - 1; // find the first non-z letter. + // while (inc_pos < WORD_SIZE && guess[inc_pos] == 'z') { + // guess[inc_pos] = 'a'; + // --inc_pos; + // } + // if (inc_pos == WORD_SIZE) break; + // ++guess[inc_pos]; + // } + // } + + /// Print all of the words with a given set of IDs. + void PrintWords(const word_list_t & word_ids, size_t max_count=(size_t)-1) const { + std::cout << "(" << word_ids.CountOnes() << " words) "; + size_t count = 0; + for (int id = word_ids.FindOne(); id >= 0; id = word_ids.FindOne(id+1)) { + if (count) std::cout << ","; + std::cout << words[id].word; + if (++count == max_count) { + if (id > 0) std::cout << " ..."; + break; + } + } + // std::cout << " (" << word_is.CountOnes() << " words)" << std::endl; + } + + void PrintPosClues(size_t pos) const { + const PositionClues & clue = pos_clues[pos]; + std::cout << "Position " << pos << ":\n"; + for (uint8_t i = 0; i < 26; ++i) { + std::cout << " '" << clue.let << "' : "; + PrintWords(clue.here[i], 10); + std::cout << std::endl; + } + } + + void PrintLetterClues(char letter) const { + const LetterClues & clue = let_clues[ToID(letter)]; + std::cout << "Letter '" << clue.letter << "':\n"; + for (size_t i = 0; i <= MAX_LETTER_REPEAT; ++i) { + std::cout << "EXACTLY " << i << ": "; + PrintWords(clue.exactly[i], 20); + std::cout << std::endl; + } + for (size_t i = 0; i <= MAX_LETTER_REPEAT; ++i) { + std::cout << "AT LEAST " << i << ": "; + PrintWords(clue.at_least[i], 20); + std::cout << std::endl; + } + } + + void PrintWordData(const WordData & word) const { + std::cout << "WORD: " << word.word << std::endl; + std::cout << "Letters: " << word.letters << std::endl; + std::cout << "Multi: " << word.multi_letters << std::endl; + std::cout << "MAX Opts: " << word.max_options << std::endl; + std::cout << "AVE Opts: " << word.ave_options << std::endl; + std::cout << "Entropy: " << word.entropy << std::endl; + std::cout << std::endl; + + size_t total_count = 0; + for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + result_t result(result_id); + word_list_t result_words = word.next_words[result_id]; + std::cout << result_id << " - " << result.ToString() << " "; + PrintWords(result_words, 10); + total_count += result_words.CountOnes(); + std::cout << std::endl; + } + std::cout << "Total Count: " << total_count << std::endl; + } + + void PrintWordData(size_t id) const { PrintWordData(words[id]); } + void PrintWordData(const std::string & word) { + PrintWordData(words[pos_map[word]]); + } + + // Reorder words. NOTE: This is destructive to all word_list data! + void SortWords(const std::string & sort_type="max") { + using wd_t = const WordData &; + if (sort_type == "max") { + emp::Sort(words, [](wd_t w1, wd_t w2){ + if (w1.max_options == w2.max_options) return w1.ave_options < w2.ave_options; // tiebreak + return w1.max_options < w2.max_options; + } ); + } else if (sort_type == "ave") { + emp::Sort(words, [](wd_t w1, wd_t w2){ + if (w1.ave_options == w2.ave_options) return w1.max_options < w2.max_options; // tiebreak + return w1.ave_options < w2.ave_options; + } ); + } else if (sort_type == "entropy") { + emp::Sort(words, [](wd_t w1, wd_t w2){ return w1.entropy > w2.entropy; } ); + } else if (sort_type == "word") { + emp::Sort(words, [](wd_t w1, wd_t w2){ return w1.word < w2.word; } ); + } + for (size_t i = 0; i < words.size(); i++) { pos_map[words[i].word] = i; } // Update ID tracking. + } + + /// Print all of the results, sorted by max number of options. + void PrintResults() { + SortWords(); + for (auto & word : words) { + std::cout << word.word + << ", " << word.max_options + << ", " << word.ave_options + << ", " << word.entropy + << std::endl; + } + } + + /// Print out all words as HTML. + void PrintHTMLWord(const WordData & word) const { + std::string filename = emp::to_string("web/words/", word.word, ".html"); + std::ofstream of(filename); + + // const std::string black("⬛"); + static const std::string white("⬜"); + static const std::string green("🟩"); + static const std::string yellow("🟨"); + + of << "\n\n\n Wordle Analysis: '" + << word.word << "'\n\n\n"; + + of << "

Wordle Analysis: " << word.word << "

\n\n"; + of << "Worst case words remaining: " << word.max_options << "
\n"; + of << "Expected words remaining: " << word.ave_options << "
\n"; + of << "Information provided: " << word.entropy << "
\n

\n"; + + // Loop through all possible results. + // for (size_t result_id = 0; result_id < result_t::NUM_IDS; ++result_id) { + for (size_t result_id = result_t::NUM_IDS-1; result_id < result_t::NUM_IDS; --result_id) { + result_t result(result_id); + word_list_t result_words = word.next_words[result_id]; + + of << result.ToString(green, yellow, white) << " (" << result_words.CountOnes() << " words) : "; + + for (int id = result_words.FindOne(); id >= 0; id = result_words.FindOne(id+1)) { + of << "" << words[id].word << " "; + } + + of << "
\n"; + } + + + of << "\n\n"; + + os << "Printed file '" << filename << "'." << std::endl; + } + + void PrintHTMLWordID(int id) const { PrintHTMLWord(words[(size_t) id]); } + void PrintHTMLWord(const std::string & word) { + PrintHTMLWord(words[pos_map[word]]); + } + + void PrintHTMLIndex(const std::string & order) { + SortWords(order); + std::string filename = emp::to_string("web/index-", order, ".html"); + std::ofstream of(filename); + + of << "\n\n\n Wordle Analysis: INDEX" + "\n\n\n" + "

Analysis of Wordle Guesses

\n" + "

\nWhen a guess is made in a game of Wordle, the results limit the set of words for the answer." + " A more useful guess will limit the remaining possibilities to be as small as possible." + " But the question remains: Which word should we choose first?" + " Here are some analyses to help make that decision.\n" + "

\nBelow are a list of 5-letter words " + "(from here)" + " with data on each. The columns are:
\n" + "\n" + " \n" + " \n" + "
ExpectedWords:" + " The average number of possible words if this were your first guess. (smaller is better!)
MaximumWords:" + " The largest possible number of words remaining after this guess. (smaller is better!)
Information:" + " The number of bits of information this guess provides about the final answer. (larger is better!)

\n" + "Click on any column to sort by it. " + "Click on any word to see the exact breakdown of how possible first guesses limit future options.\n" + "

\n"; + + of << "\n"; + for (const auto & word : words) { + of << "\n"; + } + } + + void PrintHTML() { + size_t count = 0; + std::cout << "Printing HTML files..." << std::endl; + size_t step = words.size() / 100; + for (auto & word : words) { + if (count % step == 0) { std::cout << "."; std::cout.flush(); } + PrintHTMLWord(word); + } + PrintHTMLIndex("ave"); + PrintHTMLIndex("entropy"); + PrintHTMLIndex("max"); + PrintHTMLIndex("word"); + } + +}; + +int main(int argc, char* argv[]) +{ + emp::vector args = emp::cl::args_to_strings(argc, argv); + + if (args.size() > 3) { + std::cerr << "May provide am input filename (with the word list to use) and output filename (for results)" + << std::endl; + exit(1); + } + + emp::Ptr is_ptr = &std::cin; + if (args.size() > 1) is_ptr = emp::NewPtr(args[1]); + + emp::Ptr os_ptr = &std::cout; + if (args.size() > 2) os_ptr = emp::NewPtr(args[2]); + + WordSet<5> word_set(*is_ptr, *os_ptr); + word_set.Load(); + word_set.SortWords("word"); + + word_set.Preprocess(); + // word_set.AddClue(0,'a',result_t::ELSEWHERE); + // word_set.AddClue(1,'l',result_t::ELSEWHERE); + // word_set.AddClue(2,'o',result_t::NOWHERE); + // word_set.AddClue(3,'e',result_t::NOWHERE); + // word_set.AddClue(4,'s',result_t::NOWHERE); + + // word_set.PrintLetterClues('x'); + // word_set.PrintPosClues(0); + // word_set.PrintWordData(0); + // word_set.PrintWordData("aloes"); + // word_set.PrintResults(); + // word_set.AnalyzeAll(); + // word_set.PrintHTMLWordID(0); + // word_set.PrintHTMLWord("aloes"); + word_set.PrintHTML(); + + if (args.size() > 1) is_ptr.Delete(); + if (args.size() > 2) os_ptr.Delete(); +} diff --git a/doc/QuickStartGuides/3-WebTools.md b/doc/QuickStartGuides/3-WebTools.md index 6a4bf8577c..4cb630c074 100644 --- a/doc/QuickStartGuides/3-WebTools.md +++ b/doc/QuickStartGuides/3-WebTools.md @@ -78,13 +78,13 @@ do is compile. The provided Makefile can be run by typing `make Example.js`. This will trigger: ```shell -emcc -std=c++17 -Wall -Wno-unused-function -I../../include/emp/ -Os -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 Example.cc -o Example.js +emcc -std=c++20 -Wall -Wno-unused-function -I../../include/emp/ -Os -s "EXTRA_EXPORTED_RUNTIME_METHODS=['ccall', 'cwrap']" -s TOTAL_MEMORY=67108864 --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 Example.cc -o Example.js ``` - emscripten uses the `emcc` compiler (or `em++`, since we are using C++). -- `-std=c++17` : Empirical requires c++17. +- `-std=c++20` : Empirical requires c++20. - `-Wall -Wno-unused-function` : turn on all warnings by default except for unused functions, since not all library functions are diff --git a/doc/blogs/Binomial.md b/doc/blogs/Binomial.md new file mode 100644 index 0000000000..551b6d85bf --- /dev/null +++ b/doc/blogs/Binomial.md @@ -0,0 +1,43 @@ +# Drawing random values from from non-uniform distributions: A binomial case study + +One common challenge in scientific computing is drawing from specific random distributions. +These can be time-consuming and hard to be acurate, especially when rare events are +important to include. + +There are many different mathematical distributions to consider. For any common distribution, +you should be able to find plenty of information about it on the internet. There are a +handful of specific questions to ask: + +1. Is there a simple, accurate conversion from a uniform [0.0, 1.0) distribution -- like those +produced by most random number generators -- to the distribution I need. + +2. If not, how close of an approximation can I get? Is it good enough? + +3. If not, am I going to be using the same parameters over and over such that I can do some +pre-processing to produce a fast result? (For example, am I using a fair 6-sided die and so I +know each outcome always has a 1/6 chance of showing up?) + +4. If not, how much traditional optimization can I use in the brute-force calculation? + +Here, I am going to focus on *Binomial Distributions* and some other related distributions, +but the logic that I use is applicable elsewhere. + +As a reminder: + +A **Binomial Distribution** asks: If an event is going to occur with probability *p* and we test +for it *N* times, how many times will the event actually occur? *Example*: Each time an +programmer writes a line of code, there is a *p*=0.03 chance that she introduces a bug. How many +bugs does she create after *N*=100 lines of code? + +A **Negative Binomial Distribution** turns this around: If an event is going to occur with +probability *p*, how many times do we need to test for it for it to actually occur *N* times. +*Example*: Given a *p*=0.03 chance of introducing a bug with each line of code, how many lines +would a programmer need to write to reach *N*=10 bugs? + +A **Geometric Distribution** is a special case of the Negative Binomial Distribution where *N*=1. +*Example*: Given a *p*=0.03 chance of introducing a bug, how many lines can a programmer write +before introducing the next bug? + +A **Poisson Distribution** is a continuous version of a Binomial Distribution, used for measuring +the number of independent events that occur in a time period rather than during a specified +number of events. diff --git a/doc/conf.py b/doc/conf.py index 93843550ce..d53e238192 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Empirical documentation build configuration file, created by @@ -22,6 +22,7 @@ import sphinx_rtd_theme import subprocess import sys +import textwrap # -- General configuration --------------------------------------------- @@ -60,7 +61,10 @@ # TIP: if using the sphinx-bootstrap-theme, you need # "treeViewIsBootstrap": True, "exhaleExecutesDoxygen": True, - "exhaleDoxygenStdin": "INPUT = ../include" + "exhaleDoxygenStdin": textwrap.dedent(""" + INPUT = ../include + EXCLUDE_SYMBOLS += internal + """) } # Tell sphinx what the primary language being documented is. diff --git a/doc/library/Evolve/evolve.md b/doc/library/Evolve/evolve.md new file mode 100644 index 0000000000..d0e2a65743 --- /dev/null +++ b/doc/library/Evolve/evolve.md @@ -0,0 +1,38 @@ +# Evolution tools + +## World + +```{eval-rst} +.. doxygenfile:: emp/Evolve/World.hpp + :project: Empirical + :no-link: +``` + +## Systematics Manager + +```{include} systematics.md +``` + +### Systematics API + +```{eval-rst} +.. doxygenfile:: emp/Evolve/Systematics.hpp + :project: Empirical + :no-link: +``` + +## NK + +```{eval-rst} +.. doxygenfile:: emp/Evolve/NK.hpp + :project: Empirical + :no-link: +``` + +## Selection + +```{eval-rst} +.. doxygenfile:: emp/Evolve/World_select.hpp + :project: Empirical + :no-link: +``` diff --git a/doc/library/Evolve/systematics.md b/doc/library/Evolve/systematics.md new file mode 100644 index 0000000000..354f5096c2 --- /dev/null +++ b/doc/library/Evolve/systematics.md @@ -0,0 +1,252 @@ + +The systematics manager tracks phylogenetic relationships among organisms within a digital +evolution system. For asexual systems, these relationships forma phylogenetic tree +(phylogeny). Systems with recombination (i.e. sexual reproduction systems) are not +yet supported. One of the major benefits of doing *in silico* evolution experiments (instead of or in addition to laboratory or field experiments) is that they allow perfect measurement of quantities that can only be inferred in nature. Once such property is the precise phylogeny (i.e. ancestry tree) of the population. + +![An example phylogeny](../images/phylogeny.jpg) + +At face value, measuring a phylogeny in *in silico* evolution may seem very straightforward: you just need to keep track of what gives birth to what. However, multiple aspects turn out to be non-trivial (see below). The Empirical systematics manager is designed to handle these challenges in a flexible way such that it can be easily plugged into any digital evolution system. It flexibly handle all aspects of recording phylogenies in *in silico* evolution. + +Note: A python wrapper for systematics manager exists in the form of the [Phylotrackpy library](https://phylotrackpy.readthedocs.io/en/latest/). + +### Features + +#### Flexible taxon definitions + +One of the central decisions when creating a phylogeny is choosing what the taxonomic units (i.e. the nodes in the tree) are. In a traditional phylogeny, these nodes are species. However, the concept of species is so murky that it is impossible to generically apply to computational evolution systems (we'd argue that it's questionable whether it could even be applied to biological data recorded at perfect temporal resolution, but that's a separate conversation). One alternative would be to make a phylogeny in which all nodes are individuals, but these trees are usually so large that they are impractical to work with. + +Increasingly, biologists have embraced the idea of building trees in which the taxonomic units are not species. Often, these are denoted by referring to them as an "X tree", where X is the taxonomic unit of interest. A traditional phylogeny, then, is a species tree. This terminology is particularly common in cancer evolution research, in which species trees are often contrasted with "clone trees" or "gene trees", in which the taxonomic units are genotypes. + +We can generalize this concept - any phylogeny of individuals can be abstracted by lumping individuals together based on a shared feature (see figure). This feature could be something simple like a phenotypic or genotypic trait, or it could be something more complex. For example, to approximate something more like a traditional biological species concept, you could choose to define an individual as being a member of a new taxonomic unit if it fails to produce successful offspring when recombined with an individual prototypical of its parent species (although note that the stochasticity inherent in this definition could have some unexpected side effects). The broader the grouping, the smaller the phylogeny will be (e.g. a genotype tree will be larger than a phenotype tree). + +![Illustration of different ways taxonomic units could be defined](https://raw.githubusercontent.com/emilydolson/interpreting_the_tape_of_life/master/figs/dolson.lineage_metrics_cartoon.png) +(Figure from "Quantifying the tape of life: Ancestry-based metrics provide insights and intuition about evolutionary dynamics" published in the proceedings of [ALIFE 2018](http://2018.alife.org/)) + +So how does the systematics manager handle this problem? By giving you the power to define taxonomic groupings however you want! When you construct a `Systematics` object, you give it a function that it can use to determine the taxonomic unit of an organism. Later, when organisms are born, you will pass them to the `Systematics` object and it will run that function on them. If the result matches the result of calling that function on the new organism's parent, then the organism will be considered to be part of the same taxonomic unit (taxon) as its parent. If the results do not match, the new organism will be considered to be the start of a new taxon descended from the parent's taxon. + +Note that multiple taxa may evolve that are the "same" (i.e. running the function on organisms in each yields the same result); each unique evolutionary origin will be counted as a distinct taxon. For example, let's imagine we are building a phylogeny of real animals in nature and grouping them into taxa based on whether they spend more than 50% of their lives in water. Fish and whales would be parts of two different taxa. Even though they both live their whole lives in the water, there would be a "land" taxon in between them on the line of descent. + +Example: + +```cpp +#include "Systematics.hpp" + +// Assuming that the org_t class has a member variable called genotype that stores +// its genotype, this will create a phylogeny based on genotypes +// The org_t template parameter is the type of the organisms living in your world. +// The info_t template parameter is the type of the piece of information you will +// return to indicate which organisms count as the same taxon. +// e.g. here, info_t should be whatever the type of org.genotype is. +sys = emp::Systematics sys([](const org_t & org){return org.genotype;}); +``` + +#### Pruning + +Phylogenies can get very large. So large that they can cause you program to exceed its available memory. To combat this problem, phylogenies can be "pruned" so they only contain extant (i.e. not extinct) taxa and their ancestors. If the `store_outside` variable for a systematics object is set to `False` (the default), this pruning will happen automatically. If you truly want to keep track of every taxon that ever existed, you can do so by setting `store_outside` to `True`. If you want to keep track of some historical data but can't afford the memory overhead of storing every taxon that ever existed, an intermediate options is to periodically print "snapshot" files containing all taxa currently in the phylogeny. + +#### Phylostatistics calculations + +Phylogenies are very information-dense data structures, but it can sometimes be hard to know how to usefully compare them. A variety of phylogenetic summary statistics (mostly based on topology) have been developed for the purpose of usefully making high-level comparisons. The systematics manager has many of these statistics built-in and can automatically output them. It can even keep running data (mean, variance, maximum, and minimum) on each statistic over time in a highly efficient format. + +Available statistics include: + +- Mean/max/min/sum/variance pairwise distance +- Colless-like index (a variant of the Colless index adjusted for trees with multifurcations) +- Sackin index +- Phylogenetic diversity + +#### Efficiency + +Tracking phylogenies can be computationally expensive. We have sought to keep the computational overhead as low as possible. + +We also provide the option to remove all taxa that died before a certain time point (the `remove_before` method). Use this with caution, as it will inhibit the use of many phylogenetic topology metrics. In extreme cases it may be necessary to keep your memory footprint sufficiently low, though. + +If you need substantially higher efficiency (in terms of time or memory) or are working in a distributed computing environment (where having a centralized phylogeny tracker can pose a large bottleneck), check out the [hstrat library](https://github.com/mmore500/hstrat), which lets you sacrifice some precision to achieve lower computational overhead. + +#### Flexible output options + +At any time, you can tell the systematics manager to print out the full contents of its current phylogeny in a "snapshot" file. These files will be formatted according to the [Artificial Life Phylogeny Data Standard format](https://alife-data-standards.github.io/alife-data-standards/phylogeny.html). By default they will contain the following columns for each taxon: 1) unique ID, 2) ancestor list, 3) origin time, and 4) destruction time. However, you can add additional columns with the `add_snapshot_fun` method. + +You can also print information on a single lineage. + +### Useful background information + +There are certain quirks associated with real-time phylogenies that you might not be used to thinking about if you're used to dealing with reconstructed phylogenies. Many of these discrepancies are the result of the very different temporal resolutions on which these types of phylogenies are measured, and the fact that the taxonomic units we work with are often at a finer resolution than species. We document some here so that they don't catch you off guard: + +- **Multifurcations are real**: In phylogenetic reconstructions, there is usually an assumption that any multifurcation/polytomy (i.e. a node that has more than two child nodes) is an artifact of having insufficient data. In real-time phylogenies, however, we often observe multifurcations that we know for sure actually happened. +- **Not all extant taxa are leaf nodes**: In phylogenetic reconstructions, there is usually an assumption that all extant (i.e. still living) taxa are leaf nodes in the phylogeny (i.e. none of them are parents/offspring of each other; similar taxa are descended from a shared common ancestor). In real-time phylogenies it is entirely possible that one taxon gives birth to something that we have defined as a different taxon and then continues to coexist with that child taxon. +- **Not all nodes are branch points**: In phylogenetic reconstructions, we only attempt to infer where branch points (i.e. common ancestors of multiple taxa) occurred. We do not try to infer how many taxa existed on a line of descent between a branch point and an extant taxa. In real-time phylogenies we observe exactly how many taxa exist on this line of descent and we keep a record of them. In practice there are often a lot of them, depending on you define your taxa. It is unclear whether we should include these non-branching nodes when calculating phylogenetic statistics (which is why the systematics manager lets you choose whether you want to). + +![An example of a full digital evolution phylogeny](images/FullPhylogeny.png) + +The above image represents an actual phylogeny measured from digital evolution. Each rectangle represents a different taxon. It's position along the x axis represents the span of time it existed for. Note that there are often sections along a single branch where multiple taxa coexisted for a period of time. Circles represent extant taxa at the end of this run. + +### Glossary + +Some useful terminology that might be useful in understanding the documentation (and especially the code base) for the systematics manager, particularly in light of the fact that different sub-fields of evolutionary biology tend to use different words in many of these contexts. + +- **Taxon**: a generic word for a specific taxonomic unit. We use "taxon" as a generic term to represent a node in a phylogeny. For example, species are a common type of taxon to use when depicting portions of the phylogeny of life on earth. However, sometimes people choose to use higher-order types of taxa (e.g. genus, family, order, class, etc.) when they are trying to depict a larger swath of the whole phylogeny. +- **Taxa**: Plural of taxon. +- **Multifurcation/polytomy**: A node in a phylogeny that has more than two child nodes +- **Bifurcation**: A node in a phylogeny that has exactly two child nodes. +- **Non-branch node**: A node in a phylogeny with only one child node. +- **Leaf node**: A node in a phylogeny with no children. +- **Most Recent Common Ancestor (MRCA)**: The most recent node in a phylogeny that is a common ancestor of all nodes associated with extant taxa. If the phylogeny is pruned, there won't be any branch points before the MRCA (because any branches not leading to the MRCA would lead to taxa that are now extinct). +- **Coalescence events**: Occur when the most recent common ancestor changes (i.e. all descendants from one side of the deepest branch of the phylogeny have gone extinct). In the absence of diversity-preserving features coalescence events are expected to occur by chance with a frequency dependent on population size and spatial structure (but be careful of distributional assumptions). Observing coalescence less frequently than you would expect by chance can be an indication that ecological interactions are present (we have discussed this more [here](https://direct.mit.edu/artl/article/26/1/58/93272/Interpreting-the-Tape-of-Life-Ancestry-Based) and [here](https://direct.mit.edu/artl/article/25/1/50/2915/The-MODES-Toolbox-Measurements-of-Open-Ended)). + +### Quickstart + +#### Installation + +The Systematics manager is part of Empirical. Because Empirical is header-only, you can include whichever parts of it you want. To just use the Systematics manager, you just need to include the Systematics.hpp header. Note that the Systematics manager depends on the result of Empirical, so you will need to download the entire library. Currently, we recommend using the mabe-systematics branch: + +```bash +git clone --recursive git@github.com:devosoft/Empirical.git +cd Empirical +git checkout mabe-systematics +``` + +Then in your C++ file: + +```cpp +#include "Evolve/Systematics.hpp" +``` + +To compile your code using the systematics manager, you will need to tell you compiler where to find Systematics.hpp with the `-I` flag (the below assumes you are compiling from the directory you cloned Empirical into; if you aren't, you will need to include the full path to Empirical): + +```bash +g++ -IEmpirical/include/emp my_source_file.cc +``` + +#### Usage + +##### Creating a systematics object + +The first step in tracking a phylogeny with the systematics manager is to make a systematics object. The most important decision to make at this point is how to define taxa in your phylogeny (for more information, see the "flexible taxon definition" section under "features"). You can do so by passing a function to the systematics constructor which takes an organism object and returns a string that specifies a taxon. + +For example, to build a phylogeny based on genotypes, you could do the following: + +```cpp +#include "Evolve/Systematics.hpp" + +struct MyOrg { + std::string genotype; +}; + +// The first template argument is the type of your organisms +// The second template argument is the type of the piece of information you +// are using to differentiate taxa (here its a string because the genotype +// member variable of our organism struct is a string) +sys = emp::Systematics sys([](const MyOrg & org){return org.genotype;}); +``` + +There are a couple of other decisions that you also need to make at this point. The first is which set of taxa to store in the systematics manager. The defaults here are most likely what you want to use, but in case they aren't, the systematics manager can be told to store or not store the following sets of taxa: + +- **active**: the taxa that still currently have living members. You almost certainly want to store these (without them you don't really have a phylogeny), but can technically disable them by setting the `store_active` keyword argument in the constructor to false. +- **ancestors**: the taxa that are ancestors of active taxa. You almost certainly want to store these too (without them you don't really have a phylogeny), but can technically disable them by setting the `store_ancestors` keyword argument in the constructor to false. +- **outside**: the taxa that are not in either of the other two groups (i.e. taxa that have gone extinct and all of their ancestors have gone extinct). If you store these, your phylogeny will get very large very fast, so doing so is generally not recommended. It is occasionally useful, though, so you can enable storing these taxa by setting the `store_all` keyword argument in the constructor to true. + +The second decision is slightly trickier. Once you start adding organisms to the systematics manager, it will create `Taxon` objects associated with each one to keep track of which taxon it is part of. You will need to use these taxon objects when adding future organisms, to specify which taxon their parent was part of. If you have control over your organism class, it is likely that the easiest option is to add a `self.taxon` attribute and store the taxon there. However, if you cannot add arbitrary data to your organism class, keeping track of taxon objects can get annoying. For this reason, the systematics manager gives you the option of letting it manage them. To do so, it needs a way to map individuals to taxa (since its possible there are duplicate taxa, simply running the organism to taxon function again won't work). It achieves this mapping by keeping track of each organism's position in the population. Thus, to have the systematics manager keep track of taxon objects itself, you must set the `store_pos` keyword argument in the constructor to true. You must also use the position-based versions of add_org and remove_org, and make sure to notify the systematics manager if any organism ever changes position during its lifetime for any reason. + +Once you have created the systematics object, you just need to do two things: 1) notify it when something is born, and 2) notify it when something dies. + +##### Notifying the systematics object of births + +You must notify the systematics manager of births using the `add_org` family of functions. These functions require that you provide the newly born organism as well as either the taxon object of its parent or the position of its parent (if the systematics manager is tracking positions). + +Example of tracking taxa as object attributes (assume we're building on our example above, and already have created a systematics manager called `sys`): + +```cpp +// Do whatever you would normally do to create your first organism +// Here, we're assuming we can just call a constructor called Organism() +MyOrg my_org; + +// Notify systematics manager of this organism's birth +// This is the first org, so it doesn't have a parent +// so we do not pass a second argument/ +// add_org will return a pointer to this organism's taxon object, which we +// store for future reference +emp::Ptr > taxon = sys.AddOrg(my_org); + +// Assume stuff happens here that leads to my_org having offspring +// Here, we'll pretend that our organism class has a Reproduce method that +// returns a new offspring organism. You should handle this however you +// normally would +MyOrg org_2 = my_org.Reproduce(); + +// Notify the systematics manager of org_2's birth. Since it has a parent, +// we pass the taxon of that parent in as the second argument +sys.AddOrg(org_2, taxon) + +``` + +An example of tracking positions is coming soon. For now, feel free to contact us with questions! + +#### Notifying the systematics object of deaths + +You must notify the systematics manager of deaths using the `remove_org` family of functions. + +As an example (again, building on the previous examples): + +```cpp +// Assume stuff happens that causes my_org to die + +// We notify the systematics manager that this has happened by calling remove_org +// Note that remove_org takes the taxon of the dead organism as an argument, not +// the organism itself +sys.remove_org(taxon) + +``` + +#### Taxon properties + +Taxon objects maintain the following information: + +- taxon ID# ``GetID()`` +- details of organisms in the taxon ``GetInfo()`` +- pointer to the parent group (will return a null pointer if the species was injected) ``GetParent()`` +- how many organisms currently exist in the group and how many total organisms have ever existed in the group ``GetNumOrgs()`` or ``GetTotOrgs()`` +- how many direct offspring groups exist from this group and how many total extant offspring that exist from this taxa ``GetTotalOffspring()`` +- how deep in the tree the node you are examining is ``GetDepth()`` +- when did this taxon first appear in the population ``GetOriginationTime()`` +- when did the taxon leave the population ``GetDestructionTime()`` + +#### Systematics manager properties + +A systematics manager object maintains the following information: + +- Are we tracking a synchronous population? ``GetTrackSynchronous()`` ``SetTrackSynchronous()`` +- Are we storing all taxa that are still alive in the population? ``GetStoreActive()`` ``SetStoreActive()`` +- Are we storing all taxa that are ancestors of the living organisms in the population? ``GetStoreAncestors()`` ``SetStoreAncestors()`` +- Are we storing all taxa that have died out, as have all of their descendants? ``GetStoreOutside()`` ``SetStoreOutside()`` +- Are we storing any taxa types that have died out? ``GetArchive()`` ``SetArchive()`` +- Are we storing the positions of taxa? ``GetStorePosition()`` ``SetStorePosition()`` +- How many living organisms are currently being tracked? ``GetTotalOrgs()`` +- How many independent trees are being tracked? ``GetNumRoots()`` +- What ID will the next taxon have? ``GetNextID()`` +- What is the average phylogenetic depth of organisms in the population? ``GetAveDepth()`` +- To find the most recent common ancestor (MRCA) use ``GetMRCA()`` or ``GetMRCADepth()`` to find the distance to the MRCA. + +#### Phylogeny metrics + +Many different metrics can be used to quantify th topology of a phylogeny. For more information, see (Winters et al., 2013; Tucker et al. 2017). + +The Empirical systematics manager can calculate + +- Phylogenetic diversity (Faith, 1992) +- Taxon Distinctiveness (From Vane-Wright et al., 1991) +- Evolutionary Distinctiveness (Isaac, 2007) (mean, sum, and variance) +- Mean pairwise distance (Webb and Losos, 2000), which is equivalent to Average Taxonomic Diversity (Warwick and Clark, 1998, Tucker et al., 2016) +- Sum pairwise distance +- Variance pairwise distance +- Out-degree distribution +- Average origination time +- Colless-like Index (Mir, 2018, PLoS One) +- Sackin Index (Sackin, 1972; reviewed in Shao, 1990) +- Depth of most recent common ancestor +- Phenotypic volatility (Dolson et al., 2019) +- Unique taxa on lineage (Dolson et al., 2019) +- Mutation count along lineage (Dolson et al., 2019) +- Tree size +- Maximum depth diff --git a/doc/library/images/FullPhylogeny.png b/doc/library/images/FullPhylogeny.png new file mode 100644 index 0000000000..af33ec5f47 Binary files /dev/null and b/doc/library/images/FullPhylogeny.png differ diff --git a/doc/library/images/phylogeny.jpg b/doc/library/images/phylogeny.jpg new file mode 100644 index 0000000000..3652ff4d6a Binary files /dev/null and b/doc/library/images/phylogeny.jpg differ diff --git a/doc/library/index.md b/doc/library/index.md index e072b1036f..642bfc67e9 100644 --- a/doc/library/index.md +++ b/doc/library/index.md @@ -11,6 +11,7 @@ compiler/compiler data/data datastructs/datastructs debug/debug +Evolve/evolve functional/functional io/io math/math diff --git a/doc/requirements.in b/doc/requirements.in index 20b7f4bd79..74a9e9b744 100644 --- a/doc/requirements.in +++ b/doc/requirements.in @@ -1,10 +1,8 @@ -sphinx==3.2.1 -exhale==0.2.3 -sphinx-rtd-theme==0.5.0 -coverxygen==1.5.0 -breathe==4.26.1 -myst-parser==0.12.9 -# @mmore500 2021-10 -# docutils 0.18.0 crashes docs build due to exception -# AttributeError: 'Values' object has no attribute 'section_self_link' -docutils==0.17.1 +sphinx==7.2.6 +sphinx-rtd-theme==2.0.0 +coverxygen==1.8.1 +breathe==4.35.0 +myst-parser==2.0.0 +sphinx-tippy==0.4.1 +sphinxcontrib-bibtex==2.6.1 +docutils==0.20.1 \ No newline at end of file diff --git a/doc/requirements.txt b/doc/requirements.txt index 8a720136f4..6d06c70c47 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,94 +1,114 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # -# pip-compile requirements.in +# pip-compile /tmp/requirements.in # -alabaster==0.7.12 +alabaster==0.7.13 # via sphinx -attrs==20.3.0 - # via markdown-it-py -babel==2.9.1 +babel==2.13.1 # via sphinx -beautifulsoup4==4.10.0 - # via bs4 -breathe==4.26.1 - # via - # -r requirements.in - # exhale -bs4==0.0.1 - # via exhale -certifi==2021.10.8 +beautifulsoup4==4.12.2 + # via sphinx-tippy +breathe==4.35.0 + # via -r /tmp/requirements.in +certifi==2023.11.17 # via requests -charset-normalizer==2.0.7 +charset-normalizer==3.3.2 # via requests -coverxygen==1.5.0 - # via -r requirements.in -docutils==0.17.1 +coverxygen==1.8.1 + # via -r /tmp/requirements.in +docutils==0.20.1 # via - # -r requirements.in + # -r /tmp/requirements.in # breathe # myst-parser + # pybtex-docutils # sphinx -exhale==0.2.3 - # via -r requirements.in -idna==3.3 + # sphinx-rtd-theme + # sphinxcontrib-bibtex +idna==3.6 # via requests -imagesize==1.2.0 +imagesize==1.4.1 # via sphinx -jinja2==3.0.2 - # via sphinx -lxml==4.6.3 - # via exhale -markdown-it-py==0.5.8 - # via myst-parser -markupsafe==2.0.1 +jinja2==3.1.2 + # via + # myst-parser + # sphinx + # sphinx-tippy +latexcodec==2.0.1 + # via pybtex +markdown-it-py==3.0.0 + # via + # mdit-py-plugins + # myst-parser +markupsafe==2.1.3 # via jinja2 -myst-parser==0.12.9 - # via -r requirements.in -packaging==21.0 - # via sphinx -pygments==2.10.0 - # via sphinx -pyparsing==3.0.2 - # via packaging -pytz==2021.3 - # via babel -pyyaml==6.0 +mdit-py-plugins==0.4.0 # via myst-parser -requests==2.26.0 +mdurl==0.1.2 + # via markdown-it-py +myst-parser==2.0.0 + # via -r /tmp/requirements.in +packaging==23.2 + # via sphinx +pybtex==0.24.0 + # via + # pybtex-docutils + # sphinxcontrib-bibtex +pybtex-docutils==1.0.3 + # via sphinxcontrib-bibtex +pygments==2.17.2 # via sphinx +pyyaml==6.0.1 + # via + # myst-parser + # pybtex +requests==2.31.0 + # via + # sphinx + # sphinx-tippy six==1.16.0 # via - # breathe - # exhale -snowballstemmer==2.1.0 + # latexcodec + # pybtex +snowballstemmer==2.2.0 # via sphinx -soupsieve==2.2.1 +soupsieve==2.5 # via beautifulsoup4 -sphinx-rtd-theme==0.5.0 - # via -r requirements.in -sphinx==3.2.1 +sphinx==7.2.6 # via - # -r requirements.in + # -r /tmp/requirements.in # breathe - # exhale # myst-parser # sphinx-rtd-theme -sphinxcontrib-applehelp==1.0.2 + # sphinx-tippy + # sphinxcontrib-applehelp + # sphinxcontrib-bibtex + # sphinxcontrib-devhelp + # sphinxcontrib-htmlhelp + # sphinxcontrib-jquery + # sphinxcontrib-qthelp + # sphinxcontrib-serializinghtml +sphinx-rtd-theme==2.0.0 + # via -r /tmp/requirements.in +sphinx-tippy==0.4.1 + # via -r /tmp/requirements.in +sphinxcontrib-applehelp==1.0.7 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-bibtex==2.6.1 + # via -r /tmp/requirements.in +sphinxcontrib-devhelp==1.0.5 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.4 # via sphinx +sphinxcontrib-jquery==4.1 + # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==1.0.6 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==1.1.9 # via sphinx -urllib3==1.26.7 +urllib3==2.1.0 # via requests - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/examples/Evolve/Makefile b/examples/Evolve/Makefile index ffda44e599..79e9682ea1 100644 --- a/examples/Evolve/Makefile +++ b/examples/Evolve/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/Evolve/ShrinkPop.cpp b/examples/Evolve/ShrinkPop.cpp index 34a28eca44..4be742563c 100644 --- a/examples/Evolve/ShrinkPop.cpp +++ b/examples/Evolve/ShrinkPop.cpp @@ -16,7 +16,7 @@ int main() { constexpr size_t POP_SIZE = 3600; - constexpr size_t GENS = 10000; + // constexpr size_t GENS = 10000; const size_t POP_SIDE = (size_t) std::sqrt(POP_SIZE); emp::Random random; diff --git a/examples/OLD/Empower/Makefile b/examples/OLD/Empower/Makefile index afbb1ca155..949dc971ef 100644 --- a/examples/OLD/Empower/Makefile +++ b/examples/OLD/Empower/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/ProjectTemplate/Makefile b/examples/ProjectTemplate/Makefile index dc041579d5..7ff537c25f 100644 --- a/examples/ProjectTemplate/Makefile +++ b/examples/ProjectTemplate/Makefile @@ -3,7 +3,7 @@ PROJECT := project_name EMP_DIR := ../../../Empirical/include # Flags to use regardless of compiler -CFLAGS_all := -Wall -Wno-unused-function -std=c++17 -I$(EMP_DIR)/ +CFLAGS_all := -Wall -Wno-unused-function -std=c++20 -I$(EMP_DIR)/ # Native compiler information CXX_nat := g++ diff --git a/examples/base/Makefile b/examples/base/Makefile index 0540edf6de..9223056b51 100644 --- a/examples/base/Makefile +++ b/examples/base/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/base/assert.cpp b/examples/base/assert.cpp index 44ab837b07..854133a227 100644 --- a/examples/base/assert.cpp +++ b/examples/base/assert.cpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2020 + * @date 2020-2022. * * @file assert.cpp * @brief Some examples code demonstrating use of the error system. @@ -11,7 +11,7 @@ int main() { - int x{ 42 }; + [[maybe_unused]] int x{ 42 }; emp_assert(x > 1, "This assert passes in debug mode!", x); diff --git a/examples/bits/BitVector.cpp b/examples/bits/BitVector.cpp index d57d8c2db5..bee2f6d069 100644 --- a/examples/bits/BitVector.cpp +++ b/examples/bits/BitVector.cpp @@ -45,6 +45,7 @@ int main() auto set5 = set3 & set4; total += set5.CountOnes(); } + std::cout << "Total = " << total << std::endl; std::clock_t emp_tot_time = std::clock() - emp_start_time; double time = 1000.0 * ((double) emp_tot_time) / (double) CLOCKS_PER_SEC; diff --git a/examples/bits/Makefile b/examples/bits/Makefile index 2fecbac619..58396ca87e 100644 --- a/examples/bits/Makefile +++ b/examples/bits/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/compiler/Makefile b/examples/compiler/Makefile index 035b3dd1a6..5c03062b0f 100644 --- a/examples/compiler/Makefile +++ b/examples/compiler/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/config/Makefile b/examples/config/Makefile index 837c3a6f7b..534041b3a4 100644 --- a/examples/config/Makefile +++ b/examples/config/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/control/Makefile b/examples/control/Makefile index f20b711572..ecdb48d9d2 100644 --- a/examples/control/Makefile +++ b/examples/control/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/data/Makefile b/examples/data/Makefile index 48766adf83..4c0366926b 100644 --- a/examples/data/Makefile +++ b/examples/data/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/datastructs/Makefile b/examples/datastructs/Makefile index a4ca0afc60..9d79199f4c 100644 --- a/examples/datastructs/Makefile +++ b/examples/datastructs/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/functional/Makefile b/examples/functional/Makefile index 72b888fa6a..cb3b2e3280 100644 --- a/examples/functional/Makefile +++ b/examples/functional/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/games/Makefile b/examples/games/Makefile index c4b7a366c8..0a54a5eaf9 100644 --- a/examples/games/Makefile +++ b/examples/games/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/geometry/Makefile b/examples/geometry/Makefile index d94514c4bb..2aaada367f 100644 --- a/examples/geometry/Makefile +++ b/examples/geometry/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/hardware/Makefile b/examples/hardware/Makefile index cb73cfa432..f20f3246cc 100644 --- a/examples/hardware/Makefile +++ b/examples/hardware/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/io/Makefile b/examples/io/Makefile index ab2d1f9620..12d64df3e5 100644 --- a/examples/io/Makefile +++ b/examples/io/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/math/CombinedBinomialDistribution.cpp b/examples/math/CombinedBinomialDistribution.cpp new file mode 100644 index 0000000000..f053aba8a6 --- /dev/null +++ b/examples/math/CombinedBinomialDistribution.cpp @@ -0,0 +1,35 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022-2022 + * + * @file CombinedBinomialDistribution.cpp + * @brief Some examples code for using emp::CombinedBinomialDistribution + */ + + +#include "emp/math/CombinedBinomialDistribution.hpp" +#include "emp/math/Random.hpp" + +int main(int argc, char* argv[]) +{ + if(argc != 4){ + std::cout << "Error! Expecting exactly three command line arguments: " + << "p n num_trials" << std::endl; + emp_assert(false); + } + double p = std::stod(argv[1]); + size_t n = std::stoi(argv[2]); + size_t num_trials = std::stoi(argv[3]); + + emp::Random random; + emp::CombinedBinomialDistribution distribution(p, 1); + + double mean = 0; + + for(size_t i = 0; i < num_trials; i++){ + mean += (double)distribution.PickRandom(n, random) / num_trials; + } + std::cout << "Mean after " << num_trials << " trials: " << mean << std::endl; + return 0; +} diff --git a/examples/math/Makefile b/examples/math/Makefile index 0273c03544..d582e906cd 100644 --- a/examples/math/Makefile +++ b/examples/math/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc @@ -19,7 +19,7 @@ CFLAGS_web_debug := $(CFLAGS_all) $(OFLAGS_web_debug) --js-library ../../include CFLAGS_web_opt := $(CFLAGS_all) $(OFLAGS_web_opt) --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s NO_EXIT_RUNTIME=1 #CFLAGS_web := $(CFLAGS_all) $(OFLAGS_web) --js-library ../../include/emp/web/library_emp.js -s EXPORTED_FUNCTIONS="['_main', '_empCppCallback']" -s DISABLE_EXCEPTION_CATCHING=1 -s NO_EXIT_RUNTIME=1 -TARGETS := combos constants Distribution info_theory math Random Range stats +TARGETS := combos constants Distribution info_theory math Random Range stats CombinedBinomialDistribution default: native diff --git a/examples/math/Random.cpp b/examples/math/Random.cpp index 45be7dffb1..7e6d626159 100644 --- a/examples/math/Random.cpp +++ b/examples/math/Random.cpp @@ -20,6 +20,8 @@ int main() std::cout << "Digits in random orders:" << std::endl; size_t num_samples = 10; + + std::cout << "Permutations: " << std::endl; for (size_t s = 0; s < num_samples; s++) { emp::vector permut = emp::GetPermutation(random, 10); for (size_t i = 0; i < 10; i++) { @@ -27,4 +29,17 @@ int main() } std::cout << std::endl; } + + std::cout << "Exponentials: " << std::endl; + double p = 0.5; + std::cout << "p = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; + + p = 0.1; + std::cout << "\np = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; + + p = 0.9; + std::cout << "\np = " << p << std::endl; + for (size_t s = 0; s < num_samples; s++) std::cout << random.GetExponential(p) << std::endl; } diff --git a/examples/meta/Makefile b/examples/meta/Makefile index 8b534f319b..d162e47b45 100644 --- a/examples/meta/Makefile +++ b/examples/meta/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/prefab/Makefile b/examples/prefab/Makefile index e2d39568f5..37cff3b33f 100644 --- a/examples/prefab/Makefile +++ b/examples/prefab/Makefile @@ -7,7 +7,7 @@ # WebAssembly. # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/examples/scholar/Makefile b/examples/scholar/Makefile index f32e235068..16e119f2df 100644 --- a/examples/scholar/Makefile +++ b/examples/scholar/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/testing/Makefile b/examples/testing/Makefile index 9cdb06c2db..09e5b3b8aa 100644 --- a/examples/testing/Makefile +++ b/examples/testing/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -pthread -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/timing/Binomial.cpp b/examples/timing/Binomial.cpp new file mode 100644 index 0000000000..f69cfa2e9e --- /dev/null +++ b/examples/timing/Binomial.cpp @@ -0,0 +1,198 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022 + * + * @file Binomial.cpp + * + */ + +#include +#include + +#include "../../include/emp/math/Distribution.hpp" +#include "../../include/emp/math/Random.hpp" +#include "../../include/emp/tools/string_utils.hpp" + +void TestGeometric(emp::Random & random, const double p, const size_t num_tests=1000000) { + std::cout << emp::ANSI_GreenBG() << emp::ANSI_Black() + << "---- Geometric Tests: p = " << p << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::NegativeBinomial dist(p, 1); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Negative Binomial Distribution with p = " << p << " (and N=1)\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += random.GetGeometric(p); + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + +} + +void TestNegBinomial( + emp::Random & random, + const double p, + const size_t N, + const size_t num_tests=1000000) +{ + std::cout << emp::ANSI_BrightBlueBG() + << "---- Negative Binomial Tests: p = " << p << " ; N = " << N << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::NegativeBinomial dist(p, N); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Negative Binomial Distribution with p = " << p << " and N = " << N << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + for (size_t n = 0; n < N; ++n) { + total += random.GetGeometric(p); + } + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "N = " << N << " calls to random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; +} + +void TestBinomial( + emp::Random & random, + const double p, + const size_t N, + const size_t num_tests=1000000) +{ + std::cout << emp::ANSI_MagentaBG() + << "---- Binomial Tests: p = " << p << " ; N = " << N << " ----" + << emp::ANSI_Reset() + << std::endl; + + ////////- Pre-processed distribution + emp::Binomial dist(p, N); + + std::clock_t start_time = std::clock(); + + double total = 0; + for (size_t i = 0; i < num_tests; i++) { + total += dist.PickRandom(random); + } + + std::clock_t tot_time = std::clock() - start_time; + double result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "Binomial Distribution with p = " << p << " and N = " << N << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " dist size = " << dist.GetSize() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; + + ////////- Random call (no pre-process) + start_time = std::clock(); + + total = 0; + for (size_t i = 0; i < num_tests; i++) { + size_t pos = 0; + while( (pos += random.GetGeometric(p)) < N ) { + total++; + } + } + + tot_time = std::clock() - start_time; + result = ((double) tot_time) / (double) CLOCKS_PER_SEC; + + std::cout << "N = " << N << " calls to random.GetGeometric(p) with p = " << p << "\n" + << " time = " << emp::ANSI_Bold() << result << " seconds." << emp::ANSI_NoBold() << "\n" + << " average = " << (total / num_tests) << "\n" + << std::endl; +} + +int main() +{ + size_t num_tests = 1000000; + emp::Random random; + + TestGeometric(random, 0.9, num_tests); + TestGeometric(random, 0.5, num_tests); + TestGeometric(random, 0.1, num_tests); + TestGeometric(random, 0.01, num_tests); + TestGeometric(random, 0.001, num_tests); + TestGeometric(random, 0.0001, num_tests); + + TestNegBinomial(random, 0.9, 10, num_tests); + TestNegBinomial(random, 0.5, 10, num_tests); + TestNegBinomial(random, 0.1, 10, num_tests); + TestNegBinomial(random, 0.01, 10, num_tests); + TestNegBinomial(random, 0.001, 10, num_tests); + TestNegBinomial(random, 0.0001, 10, num_tests); + + TestNegBinomial(random, 0.9, 100, num_tests); + TestNegBinomial(random, 0.5, 100, num_tests); + TestNegBinomial(random, 0.1, 100, num_tests); + TestNegBinomial(random, 0.01, 100, num_tests); + TestNegBinomial(random, 0.001, 100, num_tests); + TestNegBinomial(random, 0.0001, 100, num_tests); + + TestBinomial(random, 0.9, 100, num_tests); + TestBinomial(random, 0.5, 100, num_tests); + TestBinomial(random, 0.1, 100, num_tests); + TestBinomial(random, 0.01, 100, num_tests); + TestBinomial(random, 0.001, 100, num_tests); + TestBinomial(random, 0.0001, 100, num_tests); + + TestBinomial(random, 0.9, 1000, num_tests); + TestBinomial(random, 0.5, 1000, num_tests); + TestBinomial(random, 0.1, 1000, num_tests); + TestBinomial(random, 0.01, 1000, num_tests); + TestBinomial(random, 0.001, 1000, num_tests); + TestBinomial(random, 0.0001, 1000, num_tests); +} diff --git a/examples/timing/Makefile b/examples/timing/Makefile index c1ae00ec39..f5a8deb74b 100644 --- a/examples/timing/Makefile +++ b/examples/timing/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/tools/Makefile b/examples/tools/Makefile index 60251f1d80..f714250b0f 100644 --- a/examples/tools/Makefile +++ b/examples/tools/Makefile @@ -1,6 +1,6 @@ # Flags to use regardless of compiler CFLAGS_all := -Wall -Wno-unused-function -I../../include/ -CFLAGS_version := -std=c++17 +CFLAGS_version := -std=c++20 # Emscripten compiler information CXX_web := emcc diff --git a/examples/web/Makefile b/examples/web/Makefile index ecc3c5871a..42404c97a7 100644 --- a/examples/web/Makefile +++ b/examples/web/Makefile @@ -7,7 +7,7 @@ # WebAssembly. # Flags to use regardless of compiler -CFLAGS_all := -std=c++17 -Wall -Wno-unused-function -I../../include/ +CFLAGS_all := -std=c++20 -Wall -Wno-unused-function -I../../include/ # Emscripten compiler information CXX_web := emcc diff --git a/examples/web/Sudoku.cpp b/examples/web/Sudoku.cpp index def1a8c3eb..66e29cfac3 100644 --- a/examples/web/Sudoku.cpp +++ b/examples/web/Sudoku.cpp @@ -83,7 +83,7 @@ class SudokuBoard : public UI::Div { for (size_t r = 0; r < 9; r++) { for (size_t c = 0; c < 9; c++) { auto cell = table.GetCell(r,c); - cell.On("mousedown", [cell,r,c]() mutable { + cell.On("mousedown", [cell/*,r,c*/]() mutable { // doc.Div("table_bg").SetBackground("red"); // cell.SetCSS("BackgroundColor", "grey"); cell.Clear(); diff --git a/examples/web/assert.cpp b/examples/web/assert.cpp index d023d395bc..a09aacc51e 100644 --- a/examples/web/assert.cpp +++ b/examples/web/assert.cpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2020 + * @date 2020-2022. * * @file assert.cpp */ @@ -11,7 +11,7 @@ int main() { - int x{ 42 }; + [[maybe_unused]] int x{ 42 }; emp_assert(x > 1, "This assert passes in debug mode!", x); diff --git a/include/emp/Evolve/Systematics.hpp b/include/emp/Evolve/Systematics.hpp index c7f220ba84..26728f0e83 100644 --- a/include/emp/Evolve/Systematics.hpp +++ b/include/emp/Evolve/Systematics.hpp @@ -1,32 +1,30 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2017-2018 + * @date 2017-2023 * * @file Systematics.hpp * @brief Track genotypes, species, clades, or lineages of organisms in a world. * - * - * @todo Technically, we don't need to keep the ancestors in a set in order to track a lineage... - * If we delete all of their descendants they should automaticaly be deleted. * @todo We should provide an option to back up systematics data to a file so that it doesn't all * need to be kept in memory, especially if we're only doing post-analysis. * @todo This inheritance system makes adding new systematics-related data tracking kind of a pain. - * Over time, this will probably become a maintainability problem. We can probably make the - * whole inheritance thing go away through judicious use of signals. - * @todo This does not currently handle situations where organisms change locations during their - * lifetimes gracefully. + * Over time, this will probably become a maintainability problem. We could make the inheritance + * go away and just use signals, but then the World could not maintain systematics managers. */ #ifndef EMP_EVOLVE_SYSTEMATICS_HPP_INCLUDE #define EMP_EVOLVE_SYSTEMATICS_HPP_INCLUDE - +#include +#include #include #include #include #include +#include #include +#include #include #include "../base/Ptr.hpp" @@ -36,6 +34,7 @@ #include "../data/DataNode.hpp" #include "../datastructs/map_utils.hpp" #include "../datastructs/set_utils.hpp" +#include "../io/File.hpp" #include "../math/info_theory.hpp" #include "../math/stats.hpp" #include "../tools/string_utils.hpp" @@ -45,6 +44,9 @@ namespace emp { + template + class Systematics; + /// The systematics manager allows an optional second template type that /// can store additional data about each taxon in the phylogeny. Here are /// some structs containing common pieces of additional data to track. @@ -68,34 +70,41 @@ namespace emp { fitness.Add(fit); } - const double GetFitness() const { + double GetFitness() const { return fitness.GetMean(); } }; + /// Track information related to the mutational landscape + /// Maps a string representing a type of mutation to a count representing + /// the number of that type of mutation that occurred to bring about this taxon. template - struct mut_landscape_info { /// Track information related to the mutational landscape - /// Maps a string representing a type of mutation to a count representing - /// the number of that type of mutation that occurred to bring about this taxon. + struct mut_landscape_info { using phen_t = PHEN_TYPE; using has_phen_t = std::true_type; using has_mutations_t = std::true_type; using has_fitness_t = std::true_type; // using has_phenotype_t = true; - std::unordered_map mut_counts; + std::unordered_map mut_counts = {}; /// The number of mutations of each type that occurred to make this taxon DataNode fitness; /// This taxon's fitness (for assessing deleterious mutational steps) PHEN_TYPE phenotype; /// This taxon's phenotype (for assessing phenotypic change) + /// @returns this taxon's phenotype const PHEN_TYPE & GetPhenotype() const { return phenotype; } - const double GetFitness() const { + /// @returns this taxon's fitness + double GetFitness() const { return fitness.GetMean(); } - void RecordMutation(std::unordered_map muts) { + /// Adds mutations to the list of mutations that occurred to make this taxon + /// @param muts can contain as many strings (types of mutation) as desired, each accompanied + /// by a number indicating how many of that mutation occurred + /// Example: {"point_mutation":2, "insertion":1} + void RecordMutation(std::unordered_map & muts) { for (auto mut : muts) { if (Has(mut_counts, mut.first)) { mut_counts[mut.first] += mut.second; @@ -105,10 +114,14 @@ namespace emp { } } + /// Record the fitness of this taxon + /// @param fit the fitness void RecordFitness(double fit) { fitness.Add(fit); } + /// Record the phenotype of this taxon + /// @param phen the phenotype void RecordPhenotype(PHEN_TYPE phen) { phenotype = phen; } @@ -124,6 +137,8 @@ namespace emp { /// track an evolutionary pathway) template class Taxon { + template + friend class Systematics; protected: using this_t = Taxon; using info_t = ORG_INFO; @@ -173,26 +188,40 @@ namespace emp { /// Get the number of living organisms currently associated with this Taxon. size_t GetNumOrgs() const { return num_orgs; } + /// Set the number of living organisms currently associated with this Taxon. + void SetNumOrgs(size_t n) { num_orgs = n; } + /// Get the total number of organisms that have ever lived associated with this Taxon size_t GetTotOrgs() const { return tot_orgs; } + /// Set the total number of organisms that have ever lived associated with this Taxon + void SetTotOrgs(size_t n) { tot_orgs = n; } + /// Get the number of taxa that were produced by organisms from this Taxon. size_t GetNumOff() const { return num_offspring; } /// Get the number of taxanomic steps since the ancestral organism was injected into the World. size_t GetDepth() const { return depth; } + /// Get data struct associated with this taxon data_t & GetData() {return data;} + /// Get data struct associated with this taxon const data_t & GetData() const {return data;} + /// Get pointers to this taxon's offspring std::set > GetOffspring() {return offspring;} + /// Set this taxon's data struct to the given value void SetData(data_t d) {data = d;} + /// @returns this taxon's origination time double GetOriginationTime() const {return origination_time;} + /// Set this taxon's origination time void SetOriginationTime(double time) {origination_time = time;} + /// @returns this taxon's destruction time double GetDestructionTime() const {return destruction_time;} + /// Sets this taxon's destruction time void SetDestructionTime(double time) {destruction_time = time;} /// Add a new organism to this Taxon. @@ -228,6 +257,7 @@ namespace emp { return num_orgs; } + /// Remove specified taxon from this taxon's offspring list void RemoveFromOffspring(Ptr offspring_tax) { offspring.erase(offspring_tax); } @@ -254,8 +284,8 @@ namespace emp { /// A base class for Systematics, maintaining information common to all systematics managers - /// and providing virtual functions. - + /// and providing virtual functions. You probably don't want to instantiate this. It just + /// exists so that you can make containers of Systematics managers of different types. template class SystematicsBase { protected: @@ -269,7 +299,7 @@ namespace emp { // Stats about active taxa... (totals are across orgs, not taxa) size_t org_count; ///< How many organisms are currently active? size_t total_depth; ///< Sum of taxa depths for calculating average. - size_t num_roots; ///< How many distint injected ancestors are currently in population? + size_t num_roots; ///< How many distinct injected ancestors are currently in population? int max_depth; ///< Depth of deepest taxon. -1 means needs to be recalculated size_t next_id; ///< What ID value should the next new taxon have? @@ -318,6 +348,9 @@ namespace emp { /// What is the average phylogenetic depth of organisms in the population? double GetAveDepth() const { return ((double) total_depth) / (double) org_count; } + /// @returns current update/time step + size_t GetUpdate() const {return curr_update;} + /// Are we tracking organisms evolving in synchronous generations? void SetTrackSynchronous(bool new_val) {track_synchronous = new_val; } @@ -336,14 +369,19 @@ namespace emp { /// Are we storing the location of taxa? void SetStorePosition(bool new_val) { store_position = new_val; } - // Returns a reference so that capturing it in a lambda to call on update - // is less confusing. It's possible we should change it to be consistent - // with GetFitnessDataNode, though. + /// Sets the current update/time step + void SetUpdate(size_t ud) {curr_update = ud;} + + /// Add a data node to this systematics manager + /// @param name the name of the data node (so it can be found later) data_ptr_t AddDataNode(const std::string & name) { emp_assert(!data_nodes.HasNode(name)); return &(data_nodes.New(name)); } + /// Add a data node to this systematics manager + /// @param name the name of the data node (so it can be found later) + /// @param pull_set_fun a function to run when the data node is requested to pull data (returns vector of values) data_ptr_t AddDataNode(std::function()> pull_set_fun, const std::string & name) { emp_assert(!data_nodes.HasNode(name)); auto node = AddDataNode(name); @@ -351,6 +389,9 @@ namespace emp { return node; } + /// Add a data node to this systematics manager + /// @param name the name of the data node (so it can be found later) + /// @param pull_fun a function to run when the data node is requested to pull data (returns single value) data_ptr_t AddDataNode(std::function pull_fun, const std::string & name) { emp_assert(!data_nodes.HasNode(name)); auto node = AddDataNode(name); @@ -358,7 +399,7 @@ namespace emp { return node; } - + /// @returns a pointer to the data node with the specified name data_ptr_t GetDataNode(const std::string & name) { return &(data_nodes.Get(name)); } @@ -384,26 +425,32 @@ namespace emp { virtual emp::vector GetPairwiseDistances(bool branch_only) const = 0; virtual int SackinIndex() const = 0; virtual double CollessLikeIndex() const = 0; + virtual std::unordered_map GetOutDegreeDistribution() const = 0; + virtual double GetAverageOriginTime(bool) const = 0; virtual int GetMRCADepth() const = 0; - virtual void AddOrg(ORG && org, WorldPosition pos, int update) = 0; - virtual void AddOrg(ORG & org, WorldPosition pos, int update) = 0; - virtual bool RemoveOrg(WorldPosition pos, int time=-1) = 0; - virtual void RemoveOrgAfterRepro(WorldPosition pos, int time=-1) = 0; - // virtual bool RemoveNextOrg(WorldPosition pos, int time=-1) = 0; + virtual void AddOrg(ORG && org, WorldPosition pos) = 0; + virtual void AddOrg(ORG & org, WorldPosition pos) = 0; + virtual void AddOrg(ORG && org, WorldPosition pos, WorldPosition parent) = 0; + virtual void AddOrg(ORG & org, WorldPosition pos, WorldPosition parent) = 0; + virtual bool RemoveOrg(WorldPosition pos) = 0; + virtual void RemoveOrgAfterRepro(WorldPosition pos) = 0; virtual void PrintStatus(std::ostream & os) const = 0; virtual double CalcDiversity() const = 0; virtual void Update() = 0; - virtual void SetNextParent(int pos) = 0; - virtual void SetNextParent(WorldPosition & pos) = 0; + virtual void SetNextParent(WorldPosition pos) = 0; + virtual void SwapPositions(WorldPosition p1, WorldPosition p2) = 0; }; + // Forward-declare CollessStruct for use in calculating Colless metric + struct CollessStruct; + /// @brief A tool to track phylogenetic relationships among organisms. /// The systematics class tracks the relationships among all organisms based on the INFO_TYPE /// provided. If an offspring has the same value for INFO_TYPE as its parent, it is grouped into /// the same taxon. Otherwise a new Taxon is created and the old one is used as its parent in - /// the phylogeny. If the provided INFO_TYPE is the organsism's genome, a traditional phylogeny + /// the phylogeny. If the provided INFO_TYPE is the organism's genome, a traditional phylogeny /// is formed, with genotypes. If the organism's behavior/task set is used, then organisms are - /// grouped by phenotypes. If the organsims's position is used, the evolutionary path through + /// grouped by phenotypes. If the organism's position is used, the evolutionary path through /// space is tracked. Any other aspect of organisms can be tracked this way as well. template class Systematics : public SystematicsBase { @@ -416,9 +463,13 @@ namespace emp { using hash_t = typename Ptr::hash_t; using fun_calc_info_t = std::function; - fun_calc_info_t calc_info_fun; - Ptr next_parent; - Ptr most_recent; + fun_calc_info_t calc_info_fun; ///< Function that takes an organism and returns the unit being tracked by systematics + Ptr next_parent; ///< The taxon that has been marked as parent for next new org + Ptr most_recent; ///< The most-recently added taxon + bool num_orgs_wrong = false; ///< Keep track of whether we have loaded from a file that didn't + /// provide num_orgs + bool total_offspring_wrong = false; ///< Keep track of whether we have loaded from a file without + /// recalculating total offspring using parent_t::store_active; using parent_t::store_ancestors; @@ -441,28 +492,22 @@ namespace emp { using parent_t::GetNumOutside; using parent_t::GetTreeSize; using parent_t::GetNumTaxa; - // using parent_t::OnNew; - // using parent_t::OnPrune; using parent_t::GetPhylogeneticDiversity; - // using parent_t::GetTaxonDistinctiveness; - // using parent_t::GetEvolutionaryDistinctiveness; using parent_t::GetMeanPairwiseDistance; using parent_t::GetSumPairwiseDistance; using parent_t::GetVariancePairwiseDistance; using parent_t::GetPairwiseDistances; - // using parent_t::GetDistanceToRoot; - // using parent_t::GetBranchesToRoot; - // using parent_t::GetMRCA; + using parent_t::GetOutDegreeDistribution; + using parent_t::GetAverageOriginTime; using parent_t::GetMRCADepth; using parent_t::AddOrg; using parent_t::RemoveOrg; using parent_t::RemoveOrgAfterRepro; - // using parent_t::RemoveNextOrg; - // using parent_t::Parent; using parent_t::PrintStatus; - // using parent_t::PrintLineage; using parent_t::CalcDiversity; using parent_t::Update; + using parent_t::GetUpdate; + using parent_t::SetUpdate; using parent_t::SetNextParent; using parent_t::GetDataNode; @@ -476,11 +521,12 @@ namespace emp { using parent_t::AddMutationCountDataNode; using parent_t::GetMaxDepth; + /// Struct for keeping track of what information to print out in snapshot files struct SnapshotInfo { using snapshot_fun_t = std::function; - snapshot_fun_t fun; - std::string key; - std::string desc; + snapshot_fun_t fun; ///< Function for converting taxon to string containing desired data + std::string key; ///< Column name for data calculated with this function + std::string desc; ///< Description of data in this function SnapshotInfo(const snapshot_fun_t & _fun, const std::string & _key, const std::string & _desc="") : fun(_fun), @@ -489,32 +535,40 @@ namespace emp { { ; } }; - emp::vector user_snapshot_funs; + emp::vector user_snapshot_funs; ///< Collection of all desired snapshot file columns std::unordered_set< Ptr, hash_t > active_taxa; ///< A set of all living taxa. std::unordered_set< Ptr, hash_t > ancestor_taxa; ///< A set of all dead, ancestral taxa. std::unordered_set< Ptr, hash_t > outside_taxa; ///< A set of all dead taxa w/o descendants. - Ptr to_be_removed = nullptr; - int removal_time = -1; - int removal_pos = -1; + Ptr to_be_removed = nullptr; ///< Taxon to remove org from after next call to AddOrg + emp::WorldPosition removal_pos = {0, 0}; ///< Position of taxon to next be removed - emp::vector > taxon_locations; - emp::vector > next_taxon_locations; + emp::vector > > taxon_locations; ///< Positions in this vector indicate taxon positions in world - Signal, ORG & org)> on_new_sig; ///< Trigger when any organism is pruned from tree + Signal, ORG & org)> on_new_sig; ///< Trigger when a new taxon is created + Signal)> on_extinct_sig; ///< Trigger when a taxon goes extinct Signal)> on_prune_sig; ///< Trigger when any organism is pruned from tree mutable Ptr mrca; ///< Most recent common ancestor in the population. - /// Called wheneven a taxon has no organisms AND no descendants. + /// Called whenever a taxon has no organisms AND no descendants. void Prune(Ptr taxon); /// Called when an offspring taxa has been deleted. void RemoveOffspring(Ptr offspring, Ptr taxon); /// Called when there are no more living members of a taxon. There may be descendants. - void MarkExtinct(Ptr taxon, int time=-1); + void MarkExtinct(Ptr taxon); + + #ifndef DOXYGEN_SHOULD_SKIP_THIS + /// Helper function for RemoveBefore + /// @returns true if a a taxon can safely be + /// removed by RemoveBefore + bool CanRemove(Ptr t, int ud); + // Helper for Colless function calculation + CollessStruct RecursiveCollessStep(Ptr curr) const; + #endif // DOXYGEN_SHOULD_SKIP_THIS @@ -522,6 +576,7 @@ namespace emp { /** * Contructor for Systematics; controls what information should be stored. + * @param calc_taxon A function that takes an organism and calculates what taxon it belongs to * @param store_active Should living organisms' taxa be tracked? (typically yes!) * @param store_ancestors Should ancestral organisms' taxa be maintained? (yes for lineages!) * @param store_all Should all dead taxa be maintained? (typically no; it gets BIG!) @@ -545,100 +600,266 @@ namespace emp { outside_taxa.clear(); } + // ===== Functions for modifying phylogeny/systematics manager internal state ==== - void Update() { - ++curr_update; - if (track_synchronous) { - - // Clear pending removal - if (to_be_removed != nullptr) { - RemoveOrg(to_be_removed, removal_time); - taxon_locations[removal_pos] = nullptr; - to_be_removed = nullptr; - removal_pos = -1; - } + /// Switch to next update/time step + /// Useful for keeping track of taxon survival times + /// and population positions in synchronous generation worlds. + void Update(); - std::swap(taxon_locations, next_taxon_locations); - next_taxon_locations.resize(0); + ///@{ + /// Add information about a new organism, including its stored info and parent's taxon; + /// If you would like the systematics manager to track taxon age, you can also supply + /// the update at which the taxon is being added. + /// return a pointer for the associated taxon. + /// @returns a pointer for the associated taxon. + /// @param org a reference to the organism being added + /// @param pos the position of the organism being added + /// @param parent a pointer to the org's parent + void AddOrg(ORG && org, WorldPosition pos); + void AddOrg(ORG && org, WorldPosition pos, WorldPosition parent); + Ptr AddOrg(ORG && org, WorldPosition pos, Ptr parent); + Ptr AddOrg(ORG && org, Ptr parent=nullptr); + + void AddOrg(ORG & org, WorldPosition pos); + void AddOrg(ORG & org, WorldPosition pos, WorldPosition parent); + Ptr AddOrg(ORG & org, WorldPosition pos, Ptr parent); + Ptr AddOrg(ORG & org, Ptr parent=nullptr); + ///@} + + ///@{ + /// Remove an instance of an organism; track when it's gone. + /// @param pos the world position of the individual being removed + /// @param taxon a pointer to the taxon of the individual being removed + bool RemoveOrg(WorldPosition pos); + bool RemoveOrg(Ptr taxon); + ///@} + + ///@{ + /// Mark an instance of a taxon to be removed; track when it's gone. + /// This is a work-around to deal with steady state/non-synchronous + /// populations in which an organism might die as its offspring is born + /// (e.g. in a spatial world where the offspring replaces the parent). + /// If the bookkeeping is not handled correctly, we could accidentally + /// mark the taxon as extinct when it is actually continuing. + /// By using this method, the taxon won't be removed until after the + /// next org is added or the next time an org is marked for removal. + /// @param pos the world position of the individual being removed + /// @param taxon a pointer to the taxon of the individual being removed + void RemoveOrgAfterRepro(WorldPosition pos); + void RemoveOrgAfterRepro(Ptr taxon); + ///@} + + + ///@{ + /// Tell systematics manager that the parent of the next taxon added + /// will be the one specified by this function (either at the specified + /// position or the one pointed to by the given pointer) + /// Works with version of AddOrg that only takes org, position, and + /// update. + /// Will be set to null after being assigned as the parent of a taxon + void SetNextParent(WorldPosition pos) { + emp_assert(pos.IsActive() || !pos.IsValid()); + if (!pos.IsValid()) { + next_parent = nullptr; + } else { + next_parent = taxon_locations[pos.GetPopID()][pos.GetIndex()]; } } + void SetNextParent(Ptr p) { + next_parent = p; + } + ///@} + + /// Set function used to calculate taxons from organisms void SetCalcInfoFun(fun_calc_info_t f) {calc_info_fun = f;} - // Currently using raw pointers because of a weird bug in emp::Ptr. Should switch when fixed. + /// Remove all taxa that 1) went extinct before the specified update/time step, + /// and 2) only have ancestors that went extinct before the specified update/time step. + /// Warning: this function invalidates most measurements you could make about tree topology. + /// It is useful in select situations where you need to store ancestors for some period of time, + /// but cannot computationally afford to store all ancestors for your entire run. + void RemoveBefore(int ud); + + /// Run the given function on every active taxon (const version) + /// @param fun the function to run on each taxon + void ApplyToActiveTaxa(const std::function tax)> & fun) const { + std::for_each(active_taxa.begin(), active_taxa.end(), fun); + } + + /// Run the given function on every active taxon + /// @param fun the function to run on each taxon + void ApplyToActiveTaxa(const std::function tax)> & fun) { + std::for_each(active_taxa.begin(), active_taxa.end(), fun); + } + + /// Run the given function on every ancestor taxon (const version) + /// @param fun the function to run on each taxon + void ApplyToAncestorTaxa(const std::function tax)> & fun) const { + std::for_each(ancestor_taxa.begin(), ancestor_taxa.end(), fun); + } + + /// Run the given function on every ancestor taxon + /// @param fun the function to run on each taxon + void ApplyToAncestorTaxa(const std::function tax)> & fun) { + std::for_each(ancestor_taxa.begin(), ancestor_taxa.end(), fun); + } + + /// Run the given function on every outside taxon (const version) + /// @param fun the function to run on each taxon + void ApplyToOutsideTaxa(const std::function tax)> & fun) const { + std::for_each(outside_taxa.begin(), outside_taxa.end(), fun); + } + + /// Run the given function on every outside taxon + /// @param fun the function to run on each taxon + void ApplyToOutsideTaxa(const std::function tax)> & fun) { + std::for_each(outside_taxa.begin(), outside_taxa.end(), fun); + } + + /// Run given function on all taxa (const version) + /// @param fun the function to run on each taxon + void ApplyToAllTaxa(const std::function tax)> & fun) const { + ApplyToActiveTaxa(fun); + ApplyToAncestorTaxa(fun); + ApplyToOutsideTaxa(fun); + } + + /// Run given function on all taxa + /// @param fun the function to run on each taxon + void ApplyToAllTaxa(const std::function tax)> & fun) { + ApplyToActiveTaxa(fun); + ApplyToAncestorTaxa(fun); + ApplyToOutsideTaxa(fun); + } + + /// Run given function on all taxa and return result (const version) + /// @param fun the function to run on each taxon + /// @returns a vector containing the results of running the function on each taxon + template + emp::vector ApplyToAllTaxa(const std::function tax)> & fun) const { + emp::vector result; + // const auto all = {std::ranges::ref_view(active_taxa), std::ranges::ref_view(ancestor_taxa), + // std::ranges::ref_view(outside_taxa)}; + // for (emp::Ptr tax : all | std::views::join) { + // result.push_back(fun(tax)); + // } + for (emp::Ptr tax : active_taxa) { + result.push_back(fun(tax)); + } + for (emp::Ptr tax : ancestor_taxa) { + result.push_back(fun(tax)); + } + for (emp::Ptr tax : ancestor_taxa) { + result.push_back(fun(tax)); + } + return result; + } + + /// Run given function on all taxa and return result (const version) + /// @param fun the function to run on each taxon + /// @returns a vector containing the results of running the function on each taxon + template + emp::vector ApplyToAllTaxa(const std::function tax)> & fun) { + emp::vector result; + // const auto all = {std::ranges::ref_view(active_taxa), + // std::ranges::ref_view(ancestor_taxa), + // std::ranges::ref_view(outside_taxa)}; + // for (emp::Ptr tax : all | std::views::join) { + for (emp::Ptr tax : active_taxa) { + result.push_back(fun(tax)); + } + for (emp::Ptr tax : ancestor_taxa) { + result.push_back(fun(tax)); + } + for (emp::Ptr tax : ancestor_taxa) { + result.push_back(fun(tax)); + } + + return result; + } + + // ===== Functions for querying phylogeny/systematics manager internal state ==== + + // Currently using raw pointer because of a weird bug in emp::Ptr. Should switch when fixed. std::unordered_set< Ptr, hash_t > * GetActivePtr() { return &active_taxa; } + /// @returns set of active (extant/living) taxa0 const std::unordered_set< Ptr, hash_t > & GetActive() const { return active_taxa; } + /// @returns set of ancestor taxa (extinct, but have active descendants) const std::unordered_set< Ptr, hash_t > & GetAncestors() const { return ancestor_taxa; } + /// @returns set of outside taxa (extinct, with no active descendants) const std::unordered_set< Ptr, hash_t > & GetOutside() const { return outside_taxa; } - /// How many taxa are still active in the population? + /// @returns the number of taxa that are still active in the population size_t GetNumActive() const { return active_taxa.size(); } - /// How many taxa are ancestors of living organisms (but have died out themselves)? + /// @returns the number of taxa that are ancestors of living organisms (but have died out themselves) size_t GetNumAncestors() const { return ancestor_taxa.size(); } - /// How many taxa are stored that have died out, as have their descendents? + /// @returns the number of taxa that are stored that have died out, as have their descendents size_t GetNumOutside() const { return outside_taxa.size(); } - /// How many taxa are in the current phylogeny? + /// @returns the number of taxa that are in the current phylogeny size_t GetTreeSize() const { return GetNumActive() + GetNumAncestors(); } - /// How many taxa are stored in total? + /// @returns the number of taxa that are stored in total size_t GetNumTaxa() const { return GetTreeSize() + GetNumOutside(); } - int GetMaxDepth() { - if (max_depth != -1) { - return max_depth; - } + /// @returns the phylogenetic depth (lineage length) of the taxon with + /// the longest lineage out of all active taxa + int GetMaxDepth(); - for (auto tax : active_taxa) { - int depth = tax->GetDepth(); - if (depth > max_depth) { - max_depth = depth; - } - } - return max_depth; + /// @returns the taxon that will be used as the parent + /// of the next taxon created via the version of AddOrg + /// that does not accept a parent + Ptr GetNextParent() { + return next_parent; } - void SetNextParent(WorldPosition & pos) { - emp_assert(pos.IsActive() || !pos.IsValid()); - if (!pos.IsValid()) { - next_parent = nullptr; - } else { - next_parent = taxon_locations[pos.GetIndex()]; - } + /// @returns the most recently created taxon + Ptr GetMostRecent() { + return most_recent; } - void SetNextParent(int pos) { - emp_assert(pos < (int)taxon_locations.size(), "Invalid parent", pos, taxon_locations.size()); - if (pos == -1) { - next_parent = nullptr; - } else { - emp_assert(pos >= 0, "Invalid parent", pos); - emp_assert(taxon_locations[pos], pos); - next_parent = taxon_locations[pos]; - } - } + /// @returns a pointer to the parent of a given taxon + Ptr Parent(Ptr taxon) const; - void SetNextParent(Ptr p) { - next_parent = p; + /// @returns true if there is a taxon at specified location + bool IsTaxonAt(WorldPosition id) { + emp_assert(id.GetPopID() < taxon_locations.size(), "Invalid population id", id, taxon_locations.size()); + emp_assert(id.GetIndex() < taxon_locations[id.GetPopID()].size(), "Invalid taxon location", id, taxon_locations[id.GetPopID()].size()); + return taxon_locations[id.GetPopID()][id.GetIndex()] != nullptr; } - Ptr GetNextParent() { - return next_parent; + /// @returns pointer to taxon at specified location + Ptr GetTaxonAt(WorldPosition id) { + emp_assert(id.GetPopID() < taxon_locations.size(), "Invalid population id", id, taxon_locations.size()); + emp_assert(id.GetIndex() < taxon_locations[id.GetPopID()].size(), "Invalid taxon location", id, taxon_locations[id.GetPopID()].size()); + return taxon_locations[id.GetPopID()][id.GetIndex()]; } - Ptr GetMostRecent() { - return most_recent; - } + // ===== Functions for adding actions to systematics manager signals ==== - SignalKey OnNew(std::function, ORG & org)> & fun) { return on_new_sig.AddAction(fun); } + /// Provide a function for Systematics to call each time a new taxon is created. + /// Trigger: New taxon is made + /// Argument: Pointer to taxon, reference to org taxon was created from + SignalKey OnNew(std::function t, ORG & org)> & fun) { return on_new_sig.AddAction(fun); } - /// Privide a function for Systematics to call each time a taxon is about to be pruned. + /// Provide a function for Systematics to call each time a taxon goes extinct. + /// Trigger: Taxon is going extinct + /// Argument: Pointer to taxon + SignalKey OnExtinct(std::function t)> & fun) { return on_extinct_sig.AddAction(fun); } + + /// Provide a function for Systematics to call each time a taxon is about to be pruned (removed from ancestors). /// Trigger: Taxon is about to be killed /// Argument: Pointer to taxon SignalKey OnPrune(std::function)> & fun) { return on_prune_sig.AddAction(fun); } + // ===== Functions for adding data nodes to systematics manager ==== + + /// Add data node that records evolutionary distinctiveness when requested to pull. + /// Used by AddPhylodiversityFile in World_output.hpp virtual data_ptr_t AddEvolutionaryDistinctivenessDataNode(const std::string & name = "evolutionary_distinctiveness") { auto node = AddDataNode(name); @@ -653,6 +874,8 @@ namespace emp { return node; } + /// Add data node that records pairwise distance when requested to pull. + /// Used by AddPhylodiversityFile in World_output.hpp virtual data_ptr_t AddPairwiseDistanceDataNode(const std::string & name = "pairwise_distance") { auto node = AddDataNode(name); node->AddPullSet([this](){ @@ -661,6 +884,8 @@ namespace emp { return node; } + /// Add data node that records phylogenetic distinctiveness when requested to pull. + /// Used by AddPhylodiversityFile in World_output.hpp virtual data_ptr_t AddPhylogeneticDiversityDataNode(const std::string & name = "phylogenetic_diversity") { auto node = AddDataNode(name); node->AddPull([this](){ @@ -669,133 +894,102 @@ namespace emp { return node; } - + /// Add data node that records counts of deleterious steps along + /// lineages in this systematics manager when requested to pull. + /// Used by AddLineageMutationFile in World_output.hpp virtual data_ptr_t AddDeleteriousStepDataNode(const std::string & name = "deleterious_steps") { - return AddDeleteriousStepDataNodeImpl(1, name); - } - - data_ptr_t AddDeleteriousStepDataNodeImpl(bool decoy, const std::string & name = "deleterious_steps") { - emp_assert(false, "Calculating deleterious steps requires suitable DATA_STRUCT"); - return AddDataNode(name); - } - - template - data_ptr_t - AddDeleteriousStepDataNodeImpl(typename std::enable_if::type decoy, const std::string & name = "deleterious_steps") { auto node = AddDataNode(name); - node->AddPullSet([this](){ - emp::vector result; - for (auto tax : active_taxa) { - result.push_back(CountDeleteriousSteps(tax)); - } - return result; - }); + if constexpr (!DATA_STRUCT::has_fitness_t::value) { + emp_assert(false && + "Error: Trying to track deleterious steps in Systematics manager that doesn't track fitness" && + "Please use a DATA_STRUCT type that supports fitness tracking."); + } else { + node->AddPullSet([this](){ + emp::vector result; + for (auto tax : active_taxa) { + result.push_back(CountDeleteriousSteps(tax)); + } + return result; + }); + } return node; } + /// Add data node that phenotypic volatility (changes in phenotype) along + /// lineages in this systematics manager when requested to pull. + /// Used by AddLineageMutationFile in World_output.hpp virtual data_ptr_t AddVolatilityDataNode(const std::string & name = "volatility") { - return AddVolatilityDataNodeImpl(1, name); - } - - data_ptr_t AddVolatilityDataNodeImpl(bool decoy, const std::string & name = "volatility") { - emp_assert(false, "Calculating taxon volatility requires suitable DATA_STRUCT"); - return AddDataNode(name); - } - - template - data_ptr_t - AddVolatilityDataNodeImpl(typename std::enable_if::type decoy, const std::string & name = "volatility") { auto node = AddDataNode(name); - node->AddPullSet([this](){ - emp::vector result; - for (auto tax : active_taxa) { - result.push_back(CountPhenotypeChanges(tax)); - } - return result; - }); + if constexpr (!DATA_STRUCT::has_phen_t::value) { + emp_assert(false && + "Error: Trying to track phenotypic volatility in Systematics manager that doesn't track fitness" && + "Please use a DATA_STRUCT type that supports phenotype tracking."); + } else { + node->AddPullSet([this](){ + emp::vector result; + for (auto tax : active_taxa) { + result.push_back(CountPhenotypeChanges(tax)); + } + return result; + }); + } return node; } + /// Add data node that records counts of unique taxa along + /// lineages in this systematics manager when requested to pull. + /// Used by AddLineageMutationFile in World_output.hpp virtual data_ptr_t AddUniqueTaxaDataNode(const std::string & name = "unique_taxa") { - return AddUniqueTaxaDataNodeImpl(1, name); - } + auto node = AddDataNode(name); - data_ptr_t AddUniqueTaxaDataNodeImpl(bool decoy, const std::string & name = "unique_taxa") { - emp_assert(false, "Calculating unique taxa requires suitable DATA_STRUCT"); - return AddDataNode(name); - } + if constexpr (!DATA_STRUCT::has_phen_t::value) { + emp_assert(false && + "Error: Trying to track phenotypic volatility in Systematics manager that doesn't track fitness" && + "Please use a DATA_STRUCT type that supports phenotype tracking."); + } else { - template - data_ptr_t - AddUniqueTaxaDataNodeImpl(typename std::enable_if::type decoy, const std::string & name = "unique_taxa") { - auto node = AddDataNode(name); - node->AddPullSet([this](){ - emp::vector result; - for (auto tax : active_taxa) { - result.push_back(CountUniquePhenotypes(tax)); - } - return result; - }); + node->AddPullSet([this](){ + emp::vector result; + for (auto tax : active_taxa) { + result.push_back(CountUniquePhenotypes(tax)); + } + return result; + }); + } return node; } + /// Add data node that records counts of mutations of the specified type along + /// lineages in this systematics manager when requested to pull. + /// Used by AddLineageMutationFile in World_output.hpp virtual data_ptr_t AddMutationCountDataNode(const std::string & name = "mutation_count", const std::string & mutation = "substitution") { - return AddMutationCountDataNodeImpl(1, name, mutation); - } - - data_ptr_t AddMutationCountDataNodeImpl(bool decoy, const std::string & name = "mutation_count", const std::string & mutation = "substitution") { - emp_assert(false, "Calculating mutation count requires suitable DATA_STRUCT"); - return AddDataNode(name); - } - - template - data_ptr_t - AddMutationCountDataNodeImpl(typename std::enable_if::type decoy, const std::string & name = "mutation_count", const std::string & mutation = "substitution") { auto node = AddDataNode(name); - node->AddPullSet([this,mutation](){ - emp::vector result; - for (auto tax : active_taxa) { - result.push_back(CountMuts(tax, mutation)); - } - return result; - }); + if constexpr (!DATA_STRUCT::has_mutations_t::value) { + emp_assert(false && + "Error: Trying to track phenotypic volatility in Systematics manager that doesn't track mutations" && + "Please use a DATA_STRUCT type that supports mutation tracking."); + } else { + node->AddPullSet([this,mutation](){ + emp::vector result; + for (auto tax : active_taxa) { + result.push_back(CountMuts(tax, mutation)); + } + return result; + }); + } return node; } - /// Add a new snapshot function. - /// When a snapshot of the systematics is taken, in addition to the default - /// set of functions, all user-added snapshot functions are run. Functions - /// take a reference to a taxon as input and return the string to be dumped - /// in the file at the given key. - void AddSnapshotFun(const std::function & fun, - const std::string & key, const std::string & desc="") { - user_snapshot_funs.emplace_back(fun, key, desc); - } - - bool IsTaxonAt(int id) { - emp_assert(id < (int) taxon_locations.size(), "Invalid taxon location", id, taxon_locations.size()); - return taxon_locations[id]; - } - - Ptr GetTaxonAt(int id) { - emp_assert(id < (int) taxon_locations.size(), "Invalid taxon location", id, taxon_locations.size()); - emp_assert(taxon_locations[id], "No taxon at specified location"); - return taxon_locations[id]; - } - Ptr GetNextTaxonAt(int id) { - emp_assert(id < (int)next_taxon_locations.size(), "Invalid taxon location"); - emp_assert(next_taxon_locations[id], "No taxon at specified location"); - return next_taxon_locations[id]; - } + // ===== Functions for calculating phylogeny topology metrics ==== /** From (Faith 1992, reviewed in Winters et al., 2013), phylogenetic diversity is * the sum of edges in the minimal spanning tree connected the taxa you're @@ -812,78 +1006,70 @@ namespace emp { int GetPhylogeneticDiversity() const { // As shown on page 5 of Faith 1992, when all branch lengths are equal the phylogenetic // diversity is the number of internal nodes plus the number of extant taxa - 1. + //int phylodiversity = ancestor_taxa.size() + active_taxa.size() -1; + return ancestor_taxa.size() + active_taxa.size() - 1; } - /** This is a metric of how distinct @param tax is from the rest of the population. + + /// @returns phylogenetic diversity if used without any arguments . + /// If you want to receive normalized data, you need to include the number of generations + /// your tree has (multiples of 10 from 10 to 100 are allowed) + /// you also need to specify a file with which to normalize your data. + /// If value is outside of the values in the file, 100th percentile will be returned + int GetPhylogeneticDiversityNormalize(int generation = 0, std::string filename = "") const; + + + /** This is a metric of how distinct \c tax is from the rest of the population. * - * (From Vane-Wright et al., 1991; reviewed in Winter et al., 2013) - */ + * (From Vane-Wright et al., 1991; reviewed in Winter et al., 2013) */ double GetTaxonDistinctiveness(Ptr tax) const {return 1.0/GetDistanceToRoot(tax);} /** This metric (from Isaac, 2007; reviewed in Winter et al., 2013) measures how - * distinct @param tax is from the rest of the population, weighted for the amount of + * distinct \c tax is from the rest of the population, weighted for the amount of * unique evolutionary history that it represents. * - * To quantify length of evolutionary history, this method needs @param time: the current + * To quantify length of evolutionary history, this method needs \c time: the current * time, in whatever units time is being measured in when taxa are added to the systematics - * manager. Note that passing a time in the past will produce inacurate results (since we + * manager. Note that passing a time in the past will produce inaccurate results (since we * don't know what the state of the tree was at that time). * - * Assumes the tree is all connected. Will return -1 if this assumption isn't met. - */ - double GetEvolutionaryDistinctiveness(Ptr tax, double time) const { - - double depth = 0; // Length (in time units) of section we're currently exploring - double total = 0; // Count up scores for each section of tree - double divisor = tax->GetTotalOffspring() + 1; // Number of extant taxa this will split into (1 for current taxa, plus its offspring) + * Assumes the tree is all connected. Will return -1 if this assumption isn't met.*/ + double GetEvolutionaryDistinctiveness(Ptr tax, double time) const; - // We're stopping when we hit MRCA, so we need to make sure it's been calculated. - GetMRCA(); - if (tax == mrca) { - return 0; + /** @returns A vector of evolutionary distinctiveness of all active taxa + * @param time The time step at which the calculation is being done + */ + emp::vector GetAllEvolutionaryDistinctivenesses(double time) const { + emp::vector eds; + for (emp::Ptr tax : active_taxa) { + eds.push_back(GetEvolutionaryDistinctiveness(tax, time)); } + return eds; + } - // std::cout << "Initializing divisor to " << divisor << " Offspring: " << tax->GetTotalOffspring() << std::endl; - // std::cout << "MRCA ID: " << mrca->GetID() << " Tax ID: " << tax->GetID() << " time: " << time << " Orig: " << tax->GetOriginationTime() << std::endl; - - Ptr test_taxon = tax->GetParent(); - - emp_assert(time != -1 && "Invalid time - are you passing time to rg?", time); - emp_assert(time >= tax->GetOriginationTime() - && "GetEvolutionaryDistinctiveness received a time that is earlier than the taxon's origination time.", tax->GetOriginationTime(), time); - - while (test_taxon) { - - // emp_assert(test_taxon->GetOriginationTime() != -1 && - // "Invalid time - are you passing time to rg?", time); - - depth += time - test_taxon->GetOriginationTime(); - // std::cout << "Tax: " << test_taxon->GetID() << " depth: " << depth << " time: " << time << " Orig: " << test_taxon->GetOriginationTime() << " divisor: " << divisor << std::endl; - time = test_taxon->GetOriginationTime(); - if (test_taxon == mrca || !test_taxon) { - // Stop when everything has converged or when we hit the root. - // std::cout << (int)(test_taxon == mrca) << " depth: " << depth << " divisor: " << divisor << std::endl; - total += depth/divisor; - return total; - } else if (test_taxon->GetNumOrgs() > 0) { - // If this taxon is still alive we need to update the divisor - // std::cout << "Alive point" << " depth: " << depth << " divisor: " << divisor << std::endl; - total += depth/divisor; - depth = 0; - divisor = test_taxon->GetTotalOffspring() + 1; - } else if (test_taxon->GetNumOff() > 1) { - // This is a branch point. We need to add the things on the other branch to the divisor.. - // std::cout << "Branch point" << " depth: " << depth << " divisor: " << divisor << std::endl; - total += depth/divisor; - depth = 0; - divisor = test_taxon->GetTotalOffspring(); - } + /** @returns Mean evolutionary distinctiveness of all active taxa + * @param time The time step at which the calculation is being done + */ + double GetMeanEvolutionaryDistinctiveness(double time) const { + emp::vector eds = GetAllEvolutionaryDistinctivenesses(time); + return emp::Mean(eds); + } - test_taxon = test_taxon->GetParent(); - } + /** @returns Sum of evolutionary distinctiveness of all active taxa + * @param time The time step at which the calculation is being done + */ + double GetSumEvolutionaryDistinctiveness(double time) const { + emp::vector eds = GetAllEvolutionaryDistinctivenesses(time); + return emp::Sum(eds); + } - return -1; + /** @returns Variance of evolutionary distinctiveness of all active taxa + * @param time The time step at which the calculation is being done + */ + double GetVarianceEvolutionaryDistinctiveness(double time) const { + emp::vector eds = GetAllEvolutionaryDistinctivenesses(time); + return emp::Variance(eds); } /** Calculates mean pairwise distance between extant taxa (Webb and Losos, 2000). @@ -891,13 +1077,12 @@ namespace emp { * (for demonstration of equivalence see Tucker et al, 2016). This measurement tells * you about the amount of distinctness in the community as a whole. * - * @param branch_only only counts distance in terms of nodes that represent a branch - * between two extant taxa (potentially useful for comparison to biological data, where - * non-branching nodes generally cannot be inferred). - * * This measurement assumes that the tree is fully connected. Will return -1 * if this is not the case. - * */ + * + * @param branch_only only counts distance in terms of nodes that represent a branch + * between two extant taxa (potentially useful for comparison to biological data, where + * non-branching nodes generally cannot be inferred). */ double GetMeanPairwiseDistance(bool branch_only=false) const { emp::vector dists = GetPairwiseDistances(branch_only); return (double)Sum(dists)/dists.size(); @@ -906,13 +1091,12 @@ namespace emp { /** Calculates summed pairwise distance between extant taxa. Tucker et al 2017 points * out that this is a measure of phylogenetic richness. * - * @param branch_only only counts distance in terms of nodes that represent a branch - * between two extant taxa (potentially useful for comparison to biological data, where - * non-branching nodes generally cannot be inferred). - * * This measurement assumes that the tree is fully connected. Will return -1 * if this is not the case. - * */ + * + * @param branch_only only counts distance in terms of nodes that represent a branch + * between two extant taxa (potentially useful for comparison to biological data, where + * non-branching nodes generally cannot be inferred) */ double GetSumPairwiseDistance(bool branch_only=false) const { emp::vector v = GetPairwiseDistances(branch_only); return Sum(v); @@ -921,490 +1105,203 @@ namespace emp { /** Calculates variance of pairwise distance between extant taxa. Tucker et al 2017 points * out that this is a measure of phylogenetic regularity. * - * @param branch_only only counts distance in terms of nodes that represent a branch - * between two extant taxa (potentially useful for comparison to biological data, where - * non-branching nodes generally cannot be inferred). - * * This measurement assumes that the tree is fully connected. Will return -1 * if this is not the case. - * */ + * + * @param branch_only only counts distance in terms of nodes that represent a branch + * between two extant taxa (potentially useful for comparison to biological data, where + * non-branching nodes generally cannot be inferred). */ double GetVariancePairwiseDistance(bool branch_only=false) const { emp::vector v = GetPairwiseDistances(branch_only); return Variance(v); } - /** Calculates a vector of all pairwise distances between extant taxa. - * - * @param branch_only only counts distance in terms of nodes that represent a branch - * between two extant taxa (potentially useful for comparison to biological data, where - * non-branching nodes generally cannot be inferred). * * This method assumes that the tree is fully connected. Will return -1 * if this is not the case. - * */ - emp::vector GetPairwiseDistances(bool branch_only=false) const { - // The overarching approach here is to start with a bunch of pointers to all - // extant organisms (since that will include all leaves). Then we trace back up - // the tree, keeping track of distances. When things meet up, we calculate - // distances between the nodes on the sides that just met up. - - emp::vector dists; - - std::map< Ptr, emp::vector> > curr_pointers; - std::map< Ptr, emp::vector> > next_pointers; - - - for (Ptr tax : active_taxa) { - curr_pointers[tax] = emp::vector>({{0}}); - } + * + * @param branch_only only counts distance in terms of nodes that represent a branch + * between two extant taxa (potentially useful for comparison to biological data, where + * non-branching nodes generally cannot be inferred). * */ + emp::vector GetPairwiseDistances(bool branch_only=false) const; - // std::cout << "Starting curr_pointers size: " << curr_pointers.size() << std::endl; - - while (curr_pointers.size() > 0) { - for (auto & tax : curr_pointers) { - bool alive = tax.first->GetNumOrgs() > 0; - // std::cout << tax.first << " has " << to_string(tax.second) << "and is waiting for " << tax.first->GetNumOff() + int(alive) << std::endl; - if ( tax.second.size() < tax.first->GetNumOff() + int(alive)) { - if (Has(next_pointers, tax.first)) { - // In case an earlier iteration added this node to next_pointers - for (auto vec : tax.second) { - next_pointers[tax.first].push_back(vec); - } - } else { - next_pointers[tax.first] = curr_pointers[tax.first]; - } - continue; - } - emp_assert(tax.first->GetNumOff() + int(alive) == tax.second.size(), tax.first->GetNumOff(), alive, to_string(tax.second), tax.second.size()); - // Okay, things should have just met up. Let's compute the distances - // between everything that just met. + /** + * Returns a vector containing all taxa that were extant at \c time_point and + * were at that time the most recent ancestors of taxa that are now extant + * Example: Say the only current extant taxon is C, its lineage goes A -> B -> C, + * and B and C were both alive at the specified time_point. This function would + * only return B. If, however, there were another currently extant taxon that were + * descended directly from A, then this function would return both A and B. */ + std::set> GetCanopyExtantRoots(int time_point = 0) const; - if (tax.second.size() > 1) { - for (size_t i = 0; i < tax.second.size(); i++ ) { - for (size_t j = i+1; j < tax.second.size(); j++) { - for (int disti : tax.second[i]) { - for (int distj : tax.second[j]) { - // std::cout << "Adding " << disti << " and " << distj << std::endl; - dists.push_back(disti+distj); - } - } - } - } - } - // std::cout << "dists " << to_string(dists) << std::endl; - // Increment distances and stick them in new vector - emp::vector new_dist_vec; - for (auto & vec : tax.second) { - for (int el : vec) { - new_dist_vec.push_back(el+1); - } - } + /** @returns the total number of ancestors between the given taxon and MRCA, if there is one. If + * there is no common ancestor, distance to the root of this tree is calculated instead. + * @param tax the taxon who's distance to root you want to calculate + * */ + int GetDistanceToRoot(Ptr tax) const ; - // std::cout << "new_dist_vec " << to_string(new_dist_vec) << std::endl; + /** Calculates the number of branching points leading to multiple extant taxa + * between the given taxon and the most-recent common ancestor (or the root of its subtree, + * if no MRCA exists). This is useful because a lot + * of stats for phylogenies are designed for phylogenies reconstructed from extant taxa. + * These phylogenies generally only contain branching points, rather than every ancestor + * along the way to the current taxon. + * @returns Number of branching points between tax and root + * @param tax taxon to calculate branches from */ + int GetBranchesToRoot(Ptr tax) const; - next_pointers.erase(tax.first); + /** @returns Sackin Index of this tree (Sackin, 1972; reviewed in Shao, 1990). + * Measures tree balance*/ + int SackinIndex() const { + int sackin = 0; + for (auto taxon : active_taxa) { + sackin += GetBranchesToRoot(taxon) + 1; // Sackin index counts root as branch + } + return sackin; + } - Ptr test_taxon = tax.first->GetParent(); - while (test_taxon && test_taxon->GetNumOff() == 1 && test_taxon->GetNumOrgs() == 0) { - if (!branch_only) { - for (size_t i = 0; i < new_dist_vec.size(); i++){ - new_dist_vec[i]++; - } - } - test_taxon = test_taxon->GetParent(); - } + /** Returns dictionary containing a histogram of node out degrees + * e.g. {1:4, 2:10, 3:4} means the tree has 4 unifurcations, + * 10 bifurcations, and 4 trifurcations + * **/ + std::unordered_map GetOutDegreeDistribution() const { + std::unordered_map dist; + ApplyToAllTaxa([&dist](emp::Ptr tax){emp::IncrementCounter(dist, tax->GetNumOff());}); + return dist; + } - if (!test_taxon) { - continue; - } else if (!Has(next_pointers, test_taxon)) { - next_pointers[test_taxon] = emp::vector >({new_dist_vec}); - } else { - next_pointers[test_taxon].push_back(new_dist_vec); - } + /** Get average origin time for whole phylogeny. + * If @param normalize is set to true, will apply normalization to make result + * comparable to what you would expect from a strictly bifurcating tree (as most + * reconstruction methods will produce). This normalization is achieved by multiplying + * each taxon's values by the number of offspring taxa it has minus one. + */ + double GetAverageOriginTime(bool normalize=false) const { + double total = 0; + double count = 0; + // const auto all = {std::ranges::ref_view(active_taxa), + // std::ranges::ref_view(ancestor_taxa), + // std::ranges::ref_view(outside_taxa)}; + // for (emp::Ptr tax : all | std::views::join) { + for (emp::Ptr tax : active_taxa) { + double weight = 1; + if (normalize) { + weight = std::max(0, (int)tax->GetNumOff() - 1); } - curr_pointers = next_pointers; - next_pointers.clear(); - // std::cout << curr_pointers.size() << std::endl; + total += tax->GetOriginationTime() * weight; + count += weight; } - - if (dists.size() != (active_taxa.size()*(active_taxa.size()-1))/2) { - // The tree is not connected - // It's possible we should do something different here... - return dists; + for (emp::Ptr tax : ancestor_taxa) { + double weight = 1; + if (normalize) { + weight = std::max(0, (int)tax->GetNumOff() - 1); + } + total += tax->GetOriginationTime() * weight; + count += weight; + } + for (emp::Ptr tax : outside_taxa) { + double weight = 1; + if (normalize) { + weight = std::max(0, (int)tax->GetNumOff() - 1); + } + total += tax->GetOriginationTime() * weight; + count += weight; } - // std::cout << "Total: " << total << "Dists: " << dists.size() << std::endl; - - return dists; + if (count == 0) { + return 0; + } + return total/count; + } + /** Calculate Colless Index of this tree (Colless, 1982; reviewed in Shao, 1990). + * Measures tree balance. The standard Colless index only works for bifurcating trees, + * so this will be a Colless-like Index, as suggested in + * "Sound Colless-like balance indices for multifurcating trees" (Mir, 2018, PLoS One)*/ + double CollessLikeIndex() const { + GetMRCA(); + return RecursiveCollessStep(mrca).total; } + /// @returns a pointer to the Most-Recent Common Ancestor for the population. + Ptr GetMRCA() const; - /** - * Returns a vector containing all taxa from @param time_point that were - * - * */ - std::set> GetCanopyExtantRoots(int time_point = 0) const { - // NOTE: This could be made faster by doing something similar to the pairwise distance - // function - - std::set< Ptr> result; - // std::cout << "starting " << time_point << std::endl; - for (Ptr tax : active_taxa) { - // std::cout << tax->GetInfo() << std::endl; - while (tax) { - // std::cout << tax->GetInfo() << " " << tax->GetOriginationTime() << " " << tax->GetDestructionTime() << std::endl; - if (tax->GetOriginationTime() <= time_point && tax->GetDestructionTime() > time_point ) { - result.insert(tax); - // std::cout << "inserting " << tax->GetInfo() << std::endl; - break; - } - tax = tax->GetParent(); - } - } - - return result; - - } - - - - /** Counts the total number of ancestors between @param tax and MRCA, if there is one. If - * there is no common ancestor, distance to the root of this tree is calculated instead. - */ - int GetDistanceToRoot(Ptr tax) const { - // Now, trace the line of descent, updating the candidate as we go. - GetMRCA(); - - int depth = 0; - Ptr test_taxon = tax->GetParent(); - while (test_taxon) { - depth++; - if (test_taxon == mrca || !test_taxon) { - return depth; - } - test_taxon = test_taxon->GetParent(); - } - return depth; - } - - /** Counts the number of branching points leading to multiple extant taxa - * between @param tax and the most-recent common ancestor (or the root of its subtree, - * if no MRCA exists). This is useful because a lot - * of stats for phylogenies are designed for phylogenies reconstructed from extant taxa. - * These phylogenies generally only contain branching points, rather than every ancestor - * along the way to the current taxon.*/ - int GetBranchesToRoot(Ptr tax) const { - GetMRCA(); - - int depth = 0; - Ptr test_taxon = tax->GetParent(); - while (test_taxon) { - if (test_taxon == mrca || !test_taxon) { - return depth; - } else if (test_taxon->GetNumOff() > 1) { - depth++; - } - test_taxon = test_taxon->GetParent(); - } - return depth; - } - - /** Calculate Sackin Index of this tree (Sackin, 1972; reviewed in Shao, 1990). - * Measures tree balance - */ - int SackinIndex() const { - int sackin = 0; - - for (auto taxon : active_taxa) { - sackin += GetBranchesToRoot(taxon) + 1; // Sackin index counts root as branch - } - - return sackin; - } - - - // Graph ToGraph() const { - - // std::map, int> ids; - // int next_id = 0; - - // for (Ptr tax : active_taxa) { - // ids[tax] = next_id; - // next_id++; - // } - - // for (Ptr tax : ancestor_taxa) { - // ids[tax] = next_id; - // next_id++; - // } - - // for (Ptr tax : outside_taxa) { - // ids[tax] = next_id; - // next_id++; - // } - - // Graph g(next_id); - - // for (Ptr tax : active_taxa) { - // if (tax->GetParent()) { - // g.AddEdge(ids[tax->GetParent()], ids[tax]); - // } - // } - - // for (Ptr tax : ancestor_taxa) { - // if (tax->GetParent()) { - // g.AddEdge(ids[tax->GetParent()], ids[tax]); - // } - // } - - // for (Ptr tax : outside_taxa) { - // if (tax->GetParent()) { - // g.AddEdge(ids[tax->GetParent()], ids[tax]); - // } - // } - - // return g; - // } - - // Graph ToMinimalGraph() const { - // std::map, int> ids; - // int next_id = 0; - - // for (Ptr tax : active_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - // ids[tax] = next_id; - // next_id++; - // } - - // for (Ptr tax : ancestor_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - // ids[tax] = next_id; - // next_id++; - // } - - // for (Ptr tax : outside_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - // ids[tax] = next_id; - // next_id++; - // } - - // Graph g(next_id); - - // for (Ptr tax : active_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - - // Ptr parent = tax->GetParent(); - // while (parent) { - // if (parent->GetNumOff() == 1) { - // parent = parent->GetParent(); - // } else { - // g.AddEdge(ids[parent], ids[tax]); - // } - // } - // } - - // for (Ptr tax : ancestor_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - - // Ptr parent = tax->GetParent(); - // while (parent) { - // if (parent->GetNumOff() == 1) { - // parent = parent->GetParent(); - // } else { - // g.AddEdge(ids[parent], ids[tax]); - // } - // } - // } - - // for (Ptr tax : outside_taxa) { - // if (tax->GetNumOff() == 1) { - // continue; - // } - - // Ptr parent = tax->GetParent(); - // while (parent) { - // if (parent->GetNumOff() == 1) { - // parent = parent->GetParent(); - // } else { - // g.AddEdge(ids[parent], ids[tax]); - // } - // } - // } - - // return g; - // } - - struct CollessStruct { - double total = 0; - emp::vector ns; - }; - - CollessStruct RecursiveCollessStep(Ptr curr) const { - CollessStruct result; - - while (curr->GetNumOff() == 1) { - curr = *(curr->GetOffspring().begin()); - } - - if (curr->GetNumOff() == 0) { - result.ns.push_back(0); // Node itself is calculated at level above - return result; - } + /// @returns the depth of the Most-Recent Common Ancestor; return -1 for none. + int GetMRCADepth() const; - for (Ptr off : curr->GetOffspring()) { - // std::cout << "Recursing on ID: " << off->GetID() << " Offspring: " << off->GetTotalOffspring() << std::endl; + /// @returns a pointer to the Most-Recent Ancestor shared by two taxa. + Ptr GetSharedAncestor(Ptr t1, Ptr t2) const; - CollessStruct new_result = RecursiveCollessStep(off); - result.ns.push_back(Sum(new_result.ns) + log(off->GetOffspring().size() + exp(1))); - result.total += new_result.total; - } + /// @returns the genetic diversity of the population. + double CalcDiversity() const; - // std::cout << "Evaluating: " << curr->GetID() << std::endl; + /// @returns vector containing the lineages of the specified taxon + emp::vector> GetLineage(Ptr tax) const { + emp::vector> lineage; + lineage.push_back(tax); - double med = Median(result.ns); - double sum_diffs = 0; - // std::cout << "Median: " << med << std::endl; - for (double n : result.ns) { - // std::cout << n << std::endl; - sum_diffs += std::abs(n-med); + while (tax) { + tax = Parent(tax); + lineage.push_back(tax); } - // std::cout << "Sumdiffs: " << sum_diffs << " n: " << result.ns.size() << " average: " << sum_diffs/result.ns.size() << std::endl; - result.total += sum_diffs/result.ns.size(); - return result; + return lineage; } - /** Calculate Colless Index of this tree (Colless, 1982; reviewed in Shao, 1990). - * Measures tree balance. The standard Colless index only works for bifurcating trees, - * so this will be a Colless-like Index, as suggested in - * "Sound Colless-like balance indices for multifurcating trees" (Mir, 2018, PLoS One) - */ - double CollessLikeIndex() const { + /// @returns vector containing the lineages of the specified taxon + /// up to and including the MRCA, but not past the MRCA + emp::vector> GetLineageToMRCA(Ptr tax) const { GetMRCA(); + emp::vector> lineage; + lineage.push_back(tax); - return RecursiveCollessStep(mrca).total; - } - - - - void RemoveBefore(int ud) { - - // @ELD: This would be such a nice way to do it - // but we can't because we need to notify offspring - // when their parents are un-tracked - // std::set> to_remove; - // for (Ptr tax : ancestor_taxa) { - // if (tax->GetDestructionTime() < ud) { - // to_remove.insert(tax); - // } - // } - - // for (Ptr tax : to_remove) { - // ancestor_taxa.erase(tax); - // tax.Delete(); - // } - - std::map, std::set>> to_remove; - - for (Ptr tax : active_taxa) { - Ptr curr = tax; - - while (curr && !CanRemove(curr->GetParent(), ud)) { - curr = curr->GetParent(); - } - - if (curr) { - Ptr next = curr->GetParent(); - while (next) { - to_remove[next].insert(curr); - curr = next; - next = next->GetParent(); - } - } - } - // std::cout << "About to remove " << to_remove.size() << " orgs" << std::endl; - for (std::pair, std::set>> el : to_remove) { - emp_assert(el.first->GetDestructionTime() < ud, el.first->GetDestructionTime(), ud); - if (el.first->GetNumOff() == el.second.size()) { - // Everything is account for - for (auto tax : el.second) { - tax->NullifyParent(); - } - ancestor_taxa.erase(el.first); - el.first.Delete(); - } - } - - } - - bool CanRemove(Ptr t, int ud) { - if (!t) { - return false; + while (tax && tax != mrca) { + tax = Parent(tax); + lineage.push_back(tax); } - while (t) { - if (t->GetNumOrgs() > 0 || t->GetDestructionTime() >= ud) { - return false; - } - t = t->GetParent(); - } - return true; + return lineage; } - /// Request a pointer to the Most-Recent Common Ancestor for the population. - Ptr GetMRCA() const; - /// Request the depth of the Most-Recent Common Ancestor; return -1 for none. - int GetMRCADepth() const; - - /// Add information about a new organism, including its stored info and parent's taxon; - /// If you would like the systematics manager to track taxon age, you can also supply - /// the update at which the taxon is being added. - /// return a pointer for the associated taxon. - void AddOrg(ORG && org, WorldPosition pos, int update=-1); - Ptr AddOrg(ORG && org, WorldPosition pos, Ptr parent=nullptr, int update=-1); - Ptr AddOrg(ORG && org, Ptr parent=nullptr, int update=-1); - - void AddOrg(ORG & org, WorldPosition pos, int update=-1); - Ptr AddOrg(ORG & org, WorldPosition pos, Ptr parent=nullptr, int update=-1); - Ptr AddOrg(ORG & org, Ptr parent=nullptr, int update=-1); - - - /// Remove an instance of an organism; track when it's gone. - bool RemoveOrg(WorldPosition pos, int time=-1); - bool RemoveOrg(Ptr taxon, int time=-1); - - void RemoveOrgAfterRepro(WorldPosition pos, int time=-1); - void RemoveOrgAfterRepro(Ptr taxon, int time=-1); - - /// Remove org from next population (for use with synchronous generations) - // bool RemoveNextOrg(WorldPosition pos, int time=-1); - // bool RemoveNextOrg(Ptr taxon, int time=-1); - - /// Climb up a lineage... - Ptr Parent(Ptr taxon) const; + // ===== Output functions ==== /// Print details about the Systematics manager. + /// First prints setting, followed by all active, ancestor, and outside + /// taxa being stored. Format for taxa is + /// [ id | number of orgs in this taxon, number of offspring taxa of this taxon | parent taxon] + /// @param os output stream to print to void PrintStatus(std::ostream & os=std::cout) const; - /// Print whole lineage. + /// Print a whole lineage. Format: "Lineage:", followed by each taxon in the lineage, each on new line + /// @param taxon a pointer to the taxon to print the lineage of + /// @param os output stream to print to void PrintLineage(Ptr taxon, std::ostream & os=std::cout) const; + /// Add a new snapshot function. + /// When a snapshot of the systematics is taken, in addition to the default + /// set of functions, all user-added snapshot functions are run. Functions + /// take a reference to a taxon as input and return the string to be dumped + /// in the file at the given key. + void AddSnapshotFun(const std::function & fun, + const std::string & key, const std::string & desc="") { + user_snapshot_funs.emplace_back(fun, key, desc); + } + + /// Take a snapshot of current state of taxon phylogeny. + /// WARNING: Current, this function assumes one parent taxon per-taxon. + /// @param file_path the file to store the snapshot data in void Snapshot(const std::string & file_path) const; - /// Calculate the genetic diversity of the population. - double CalcDiversity() const; + void SwapPositions(WorldPosition p1, WorldPosition p2) { + emp::vector > & v1 = taxon_locations[p1.GetPopID()]; + emp::vector > & v2 = taxon_locations[p2.GetPopID()]; + std::swap(v1[p1.GetIndex()], v2[p2.GetIndex()]); + } + + void LoadFromFile(const std::string & file_path, const std::string & info_col = "info", + bool assume_leaves_extant=true, + bool adjust_total_offspring = true); }; @@ -1414,6 +1311,28 @@ namespace emp { // === === // ============================================================= + // ======= Functions for manipulating systematics manager internals + + template + void Systematics::Update() { + if (track_synchronous) { + + // Clear pending removal + if (to_be_removed != nullptr) { + RemoveOrg(to_be_removed); + taxon_locations[removal_pos.GetPopID()][removal_pos.GetIndex()] = nullptr; + to_be_removed = nullptr; + removal_pos = {0, 0}; + } + + // Assumes that synchronous worlds have two populations, with 0 + // being currently alive and 1 being the one being created + std::swap(taxon_locations[0], taxon_locations[1]); + taxon_locations[1].resize(0); + } + ++curr_update; + } + // Should be called wheneven a taxon has no organisms AND no descendants. template void Systematics::Prune(Ptr taxon) { @@ -1437,15 +1356,23 @@ namespace emp { // If the taxon is still active AND the is the current mrca AND now has only one offspring, // clear the MRCA for lazy re-evaluation later. - else if (taxon == mrca && taxon->GetNumOff() == 1) mrca = nullptr; + else if (taxon == mrca && taxon->GetNumOff() == 1) { + mrca = nullptr; + } } // Mark a taxon extinct if there are no more living members. There may be descendants. template - void Systematics::MarkExtinct(Ptr taxon, int time) { + void Systematics::MarkExtinct(Ptr taxon) { emp_assert(taxon); emp_assert(taxon->GetNumOrgs() == 0); + // Track destruction time + taxon->SetDestructionTime(curr_update); + + // Give other functions a chance to do stuff with taxon before extinction + on_extinct_sig.Trigger(taxon); + if (max_depth == (int)taxon->GetDepth()) { // We no longer know the max depth max_depth = -1; @@ -1465,105 +1392,92 @@ namespace emp { taxon.Delete(); return; } - // std::cout << "About to set destruction time " << time << std::endl; - // Only need to track destruction time if we're archiving taxa - taxon->SetDestructionTime(time); if (store_ancestors) { ancestor_taxa.insert(taxon); // Move taxon to ancestors... } + if (taxon == mrca && taxon->GetNumOff() <= 1) { + // If this taxon was mrca and has only one offspring, then the new + // mrca is somewhere farther down the chain. + // If this taxon was mrca and now has no offspring, something very + // strange has happened. + // Either way, we should mark mrca for lazy recalculation + mrca = nullptr; + } if (taxon->GetNumOff() == 0) Prune(taxon); // ...and prune from there if needed. } - - // Request a pointer to the Most-Recent Common Ancestor for the population. + // Add information about a new organism, including its stored info and parent's taxon; + // Can't return a pointer for the associated taxon because of obnoxious inheritance problems template - Ptr::taxon_t> Systematics::GetMRCA() const { - if (!mrca && num_roots == 1) { // Determine if we need to calculate the MRCA. - // First, find a candidate among the living taxa. Only taxa that have one offsrping - // can be on the line-of-descent to the MRCA, so anything else is a good start point. - // There must be at least one! Stop as soon as we find a candidate. - Ptr candidate(nullptr); - for (auto x : active_taxa) { - if (x->GetNumOff() != 1) { candidate = x; break; } - } - - // Now, trace the line of descent, updating the candidate as we go. - Ptr test_taxon = candidate->GetParent(); - while (test_taxon) { - emp_assert(test_taxon->GetNumOff() >= 1); - // If the test_taxon is dead, we only want to update candidate when we hit a new branch point - // If test_taxon is still alive, though, we always need to update it - if (test_taxon->GetNumOff() > 1 || test_taxon->GetNumOrgs() > 0) candidate = test_taxon; - test_taxon = test_taxon->GetParent(); - } - mrca = candidate; - } - return mrca; + // Ptr::taxon_t> + void Systematics::AddOrg(ORG & org, WorldPosition pos) { + emp_assert(store_position, "Trying to pass position to a systematics manager that can't use it"); + // emp_assert(next_parent, "Adding organism with no parent specified and no next_parent set"); + AddOrg(org, pos, next_parent); + next_parent = nullptr; } - // Request the depth of the Most-Recent Common Ancestor; return -1 for none. + // Add information about a new organism, including its stored info and parent's taxon; + // Can't return a pointer for the associated taxon because of obnoxious inheritance problems template - int Systematics::GetMRCADepth() const { - GetMRCA(); - if (mrca) return (int) mrca->GetDepth(); - return -1; + // Ptr::taxon_t> + void Systematics::AddOrg(ORG && org, WorldPosition pos) { + emp_assert(store_position, "Trying to pass position to a systematics manager that can't use it"); + // emp_assert(next_parent, "Adding organism with no parent specified and no next_parent set"); + AddOrg(org, pos, next_parent); + next_parent = nullptr; } - - // Add information about a new organism, including its stored info and parent's taxon; // Can't return a pointer for the associated taxon because of obnoxious inheritance problems template // Ptr::taxon_t> - void Systematics::AddOrg(ORG & org, WorldPosition pos, int update) { + void Systematics::AddOrg(ORG & org, WorldPosition pos, WorldPosition parent) { emp_assert(store_position, "Trying to pass position to a systematics manager that can't use it"); - // emp_assert(next_parent, "Adding organism with no parent specified and no next_parent set"); - AddOrg(org, pos, next_parent, update); - next_parent = nullptr; + AddOrg(org, pos, taxon_locations[parent.GetPopID()][parent.GetIndex()]); } // Add information about a new organism, including its stored info and parent's taxon; // Can't return a pointer for the associated taxon because of obnoxious inheritance problems template // Ptr::taxon_t> - void Systematics::AddOrg(ORG && org, WorldPosition pos, int update) { + void Systematics::AddOrg(ORG && org, WorldPosition pos, WorldPosition parent) { emp_assert(store_position, "Trying to pass position to a systematics manager that can't use it"); - // emp_assert(next_parent, "Adding organism with no parent specified and no next_parent set"); - AddOrg(org, pos, next_parent, update); - next_parent = nullptr; + AddOrg(org, pos, taxon_locations[parent.GetPopID()][parent.GetIndex()]); } // Version for if you aren't tracking positions template Ptr::taxon_t> - Systematics::AddOrg(ORG & org, Ptr parent, int update) { - return AddOrg(org, -1, parent, update); + Systematics::AddOrg(ORG & org, Ptr parent) { + emp_assert(!store_position && + "Trying to add org to position-tracking systematics manager without position. Either specify a valid position or turn of position tracking for systematic manager.", store_position); + return AddOrg(org, WorldPosition::invalid_id, parent); } // Version for if you aren't tracking positions template Ptr::taxon_t> - Systematics::AddOrg(ORG && org, Ptr parent, int update) { + Systematics::AddOrg(ORG && org, Ptr parent) { emp_assert(!store_position && "Trying to add org to position-tracking systematics manager without position. Either specify a valid position or turn of position tracking for systematic manager.", store_position); - return AddOrg(org, WorldPosition::invalid_id, parent, update); + return AddOrg(org, WorldPosition::invalid_id, parent); } // Add information about a new organism, including its stored info and parent's taxon; // return a pointer for the associated taxon. template Ptr::taxon_t> - Systematics::AddOrg(ORG && org, WorldPosition pos, Ptr parent, int update) { - return AddOrg(org, pos, parent, update); + Systematics::AddOrg(ORG && org, WorldPosition pos, Ptr parent) { + return AddOrg(org, pos, parent); } // Add information about a new organism, including its stored info and parent's taxon; - // return a pointer for the associated taxon. template Ptr::taxon_t> - Systematics::AddOrg(ORG & org, WorldPosition pos, Ptr parent, int update) { + Systematics::AddOrg(ORG & org, WorldPosition pos, Ptr parent) { org_count++; // Keep count of how many organisms are being tracked. ORG_INFO info = calc_info_fun(org); @@ -1581,33 +1495,29 @@ namespace emp { if (max_depth != -1 && (int)cur_taxon->GetDepth() > max_depth) { max_depth = cur_taxon->GetDepth(); } - on_new_sig.Trigger(cur_taxon, org); + if (store_active) active_taxa.insert(cur_taxon); // Store new taxon. - if (parent) parent->AddOffspring(cur_taxon); // Track tree info. + if (parent) parent->AddOffspring(cur_taxon); // Track tree info. - cur_taxon->SetOriginationTime(update); + cur_taxon->SetOriginationTime(curr_update); + on_new_sig.Trigger(cur_taxon, org); } // std::cout << "about to store poisition" << std::endl; - if (store_position && pos.GetIndex() >= 0) { - if (pos.GetPopID()) { - if (pos.GetIndex() >= next_taxon_locations.size()) { - next_taxon_locations.resize(pos.GetIndex()+1); - } - next_taxon_locations[pos.GetIndex()] = cur_taxon; - - } else { - if (pos.GetIndex() >= taxon_locations.size()) { - taxon_locations.resize(pos.GetIndex()+1); - } - taxon_locations[pos.GetIndex()] = cur_taxon; + if (store_position) { + if (pos.GetPopID() >= taxon_locations.size()) { + taxon_locations.resize(pos.GetPopID()+1); + } + if (pos.GetIndex() >= taxon_locations[pos.GetPopID()].size()) { + taxon_locations[pos.GetPopID()].resize(pos.GetIndex()+1); } + taxon_locations[pos.GetPopID()][pos.GetIndex()] = cur_taxon; } cur_taxon->AddOrg(); // Record the current organism in its taxon. total_depth += cur_taxon->GetDepth(); // Track the total depth (for averaging) if (to_be_removed) { - RemoveOrg(to_be_removed, removal_time); + RemoveOrg(to_be_removed); to_be_removed = nullptr; } @@ -1616,57 +1526,52 @@ namespace emp { } template - void Systematics::RemoveOrgAfterRepro(WorldPosition pos, int time) { + void Systematics::RemoveOrgAfterRepro(WorldPosition pos) { emp_assert(store_position, "Trying to remove org based on position from systematics manager that doesn't track it."); - if (pos.GetIndex() >= taxon_locations.size() || !taxon_locations[pos.GetIndex()]) { + if (pos.GetPopID() >= taxon_locations.size() || + pos.GetIndex() >= taxon_locations[pos.GetPopID()].size() || + !taxon_locations[pos.GetPopID()][pos.GetIndex()]) { // There's not actually a taxon here return; } - RemoveOrgAfterRepro(taxon_locations[pos.GetIndex()], time); - removal_pos = pos.GetIndex(); + RemoveOrgAfterRepro(taxon_locations[pos.GetPopID()][pos.GetIndex()]); + removal_pos = pos; } template - void Systematics::RemoveOrgAfterRepro(Ptr taxon, int time) { + void Systematics::RemoveOrgAfterRepro(Ptr taxon) { if (to_be_removed != nullptr) { - RemoveOrg(to_be_removed, removal_time); - taxon_locations[removal_pos] = nullptr; + RemoveOrg(to_be_removed); + taxon_locations[removal_pos.GetPopID()][removal_pos.GetIndex()] = nullptr; to_be_removed = nullptr; - removal_pos = -1; + removal_pos = {0, 0}; } to_be_removed = taxon; - // std::cout << "Setting remove time to " << time << std::endl; - removal_time = time; } - // Remove an instance of an organism; track when it's gone. + // Remove an instance of a taxon; track when it's gone. template - bool Systematics::RemoveOrg(WorldPosition pos, int time) { + bool Systematics::RemoveOrg(WorldPosition pos) { emp_assert(store_position, "Trying to remove org based on position from systematics manager that doesn't track it."); + emp_assert(pos.GetPopID() < taxon_locations.size(), "Invalid population requested for removal", pos.GetPopID(), taxon_locations.size()); + emp_assert(pos.GetIndex() < taxon_locations[pos.GetPopID()].size(), "Invalid position requested for removal", pos.GetIndex(), taxon_locations[pos.GetPopID()].size()); - if (pos.GetPopID() == 0) { - emp_assert(pos.GetIndex() < taxon_locations.size(), "Invalid position requested for removal", pos.GetIndex(), taxon_locations.size()); - bool active = false; - if (taxon_locations[pos.GetIndex()]) { - //TODO: Figure out how this can ever not be true - active = RemoveOrg(taxon_locations[pos.GetIndex()], time); - } - taxon_locations[pos.GetIndex()] = nullptr; - return active; - } else { - emp_assert(pos.GetIndex() < next_taxon_locations.size(), "Invalid position requested for removal", pos.GetIndex(), taxon_locations.size()); - bool active = RemoveOrg(next_taxon_locations[pos.GetIndex()], time); - next_taxon_locations[pos.GetIndex()] = nullptr; - return active; + bool active = false; + if (taxon_locations[pos.GetPopID()][pos.GetIndex()]) { + //TODO: Figure out how this can ever not be true + active = RemoveOrg(taxon_locations[pos.GetPopID()][pos.GetIndex()]); } + taxon_locations[pos.GetPopID()][pos.GetIndex()] = nullptr; + return active; } - // Remove an instance of an organism; track when it's gone. + // Remove an instance of a taxon; track when it's gone. + // @param taxon the taxon of which one instance is being removed template - bool Systematics::RemoveOrg(Ptr taxon, int time) { + bool Systematics::RemoveOrg(Ptr taxon) { emp_assert(taxon); // Update stats @@ -1675,12 +1580,58 @@ namespace emp { // emp_assert(Has(active_taxa, taxon)); const bool active = taxon->RemoveOrg(); - if (!active) MarkExtinct(taxon, time); + if (!active) MarkExtinct(taxon); return active; } - // Climb up a lineage... + // Remove all taxa that 1) went extinct before the specified update/time step, + // and 2) only have ancestors that went extinct before the specified update/time step. + // Warning: this function invalidates most measurements you could make about tree topology. + // It is useful in select situations where you need to store ancestors for some period of time, + // but cannot computationally afford to store all ancestors for your entire run. + template + void Systematics::RemoveBefore(int ud) { + + std::set> to_remove; + for (Ptr tax : ancestor_taxa) { + if (tax->GetDestructionTime() < ud && CanRemove(tax, ud)) { + to_remove.insert(tax); + } + } + + for (Ptr tax : to_remove) { + for (Ptr off : tax->GetOffspring()) { + off->NullifyParent(); + } + ancestor_taxa.erase(tax); + tax.Delete(); + } + + } + + #ifndef DOXYGEN_SHOULD_SKIP_THIS + /// Helper function for RemoveBefore + /// @returns true if a a taxon can safely be + /// removed by RemoveBefore + template + bool Systematics::CanRemove(Ptr t, int ud) { + if (!t) { + return false; + } + while (t) { + if (t->GetNumOrgs() > 0 || t->GetDestructionTime() >= ud) { + return false; + } + t = t->GetParent(); + } + return true; + } + #endif // #DOXYGEN_SHOULD_SKIP_THIS + + // ======= Functions for getting information from the systematics manager + + // @returns a pointer to the parent of a given taxon template Ptr::taxon_t> Systematics::Parent(Ptr taxon) const { emp_assert(taxon); @@ -1689,6 +1640,10 @@ namespace emp { } // Print details about the Systematics manager. + // First prints setting, followed by all active, ancestor, and outside + // taxa being stored. Format for taxa is + // [ id | number of orgs in this taxon, number of offspring taxa of this taxon | parent taxon] + // @param os output stream to print to template void Systematics::PrintStatus(std::ostream & os) const { os << "Systematics Status:\n"; @@ -1721,7 +1676,6 @@ namespace emp { os << std::endl; } - // Print whole lineage. template void Systematics::PrintLineage(Ptr taxon, std::ostream & os) const { os << "Lineage:\n"; @@ -1731,8 +1685,6 @@ namespace emp { } } - /// Take a snapshot of current state of taxon phylogeny. - /// WARNING: Current, this function assumes one parent taxon per-taxon. template void Systematics::Snapshot(const std::string & file_path) const { emp::DataFile file(file_path); @@ -1747,7 +1699,7 @@ namespace emp { // - ancestor_list: ancestor list for taxon std::function get_ancestor_list = [&cur_taxon]() -> std::string { - if (cur_taxon->GetParent() == nullptr) { return "[NONE]"; } + if (cur_taxon->GetParent() == nullptr) { return "[\"NONE\"]"; } return "[" + to_string(cur_taxon->GetParent()->GetID()) + "]"; }; file.AddFun(get_ancestor_list, "ancestor_list", "Ancestor list for this taxon."); @@ -1810,31 +1762,600 @@ namespace emp { // Output header information. file.PrintHeaderKeys(); - // Update file w/active taxa information - for (auto tax : active_taxa) { + // Update file w/ taxa information + // const auto all = {std::ranges::ref_view(active_taxa), std::ranges::ref_view(ancestor_taxa), + // std::ranges::ref_view(outside_taxa)}; + for (emp::Ptr tax : active_taxa) { cur_taxon = tax; file.Update(); } - - // Update file w/ancestor taxa information - for (auto tax : ancestor_taxa) { + for (emp::Ptr tax : ancestor_taxa) { cur_taxon = tax; file.Update(); } - - // Update file w/outside taxa information - for (auto tax : outside_taxa) { + for (emp::Ptr tax : outside_taxa) { cur_taxon = tax; file.Update(); } } - // Calculate the genetic diversity of the population. + // ======= Measurements about the systematics manager + + // @returns the genetic diversity of the population. template double Systematics::CalcDiversity() const { + emp_assert(!num_orgs_wrong && "Error: calculating diversity from phylogeny missing org counts"); return emp::Entropy(active_taxa, [](Ptr x){ return x->GetNumOrgs(); }, (double) org_count); } + + // @returns a pointer to the Most-Recent Common Ancestor for the population or null pointer if there isn't one + template + Ptr::taxon_t> Systematics::GetMRCA() const { + if (!mrca && num_roots == 1) { // Determine if we need to calculate the MRCA. + // First, find a candidate among the living taxa. Only taxa that have one offsrping + // can be on the line-of-descent to the MRCA, so anything else is a good start point. + // There must be at least one! Stop as soon as we find a candidate. + Ptr candidate(nullptr); + for (auto x : active_taxa) { + if (x->GetNumOff() != 1) { candidate = x; break; } + } + + // Now, trace the line of descent, updating the candidate as we go. + Ptr test_taxon = candidate->GetParent(); + while (test_taxon) { + emp_assert(test_taxon->GetNumOff() >= 1); + // If the test_taxon is dead, we only want to update candidate when we hit a new branch point + // If test_taxon is still alive, though, we always need to update it + if (test_taxon->GetNumOff() > 1 || test_taxon->GetNumOrgs() > 0) candidate = test_taxon; + test_taxon = test_taxon->GetParent(); + } + mrca = candidate; + } + return mrca; + } + + // @returns the depth of the Most-Recent Common Ancestor or -1 for none. + template + int Systematics::GetMRCADepth() const { + GetMRCA(); + if (mrca) return (int) mrca->GetDepth(); + return -1; + } + + template + Ptr::taxon_t> Systematics::GetSharedAncestor(Ptr t1, Ptr t2) const { + // Same taxon + if (t1 == t2) { + return t1; + } + + // If not same, we have to actually do work + emp::vector > lineage1 = GetLineageToMRCA(t1); + emp::vector > lineage2 = GetLineageToMRCA(t2); + + size_t l1 = lineage1.size() - 1; + size_t l2 = lineage2.size() - 1; + + emp_assert(lineage1[l1] == lineage2[l2], + "Both lineages should start with MRCA"); + + while (lineage1[l1] == lineage2[l2]) { + l1--; + l2--; + } + + return lineage1[l1+1]; + } + + #ifndef DOXYGEN_SHOULD_SKIP_THIS + // Helper for Colless function calculation + struct CollessStruct { + double total = 0; + emp::vector ns; + }; + + // Helper for Colless function calculation + template + CollessStruct Systematics::RecursiveCollessStep(Ptr curr) const { + CollessStruct result; + + while (curr->GetNumOff() == 1) { + curr = *(curr->GetOffspring().begin()); + } + + if (curr->GetNumOff() == 0) { + result.ns.push_back(0); // Node itself is calculated at level above + return result; + } + + for (Ptr off : curr->GetOffspring()) { + // std::cout << "Recursing on ID: " << off->GetID() << " Offspring: " << off->GetTotalOffspring() << std::endl; + + CollessStruct new_result = RecursiveCollessStep(off); + result.ns.push_back(Sum(new_result.ns) + log(off->GetOffspring().size() + exp(1))); + result.total += new_result.total; + } + + // std::cout << "Evaluating: " << curr->GetID() << std::endl; + + double med = Median(result.ns); + double sum_diffs = 0; + // std::cout << "Median: " << med << std::endl; + for (double n : result.ns) { + // std::cout << n << std::endl; + sum_diffs += std::abs(n-med); + } + // std::cout << "Sumdiffs: " << sum_diffs << " n: " << result.ns.size() << " average: " << sum_diffs/result.ns.size() << std::endl; + result.total += sum_diffs/result.ns.size(); + return result; + } + #endif // #DOXYGEN_SHOULD_SKIP_THIS + + template + emp::vector Systematics::GetPairwiseDistances(bool branch_only) const { + // The overarching approach here is to start with a bunch of pointers to all + // extant organisms (since that will include all leaves). Then we trace back up + // the tree, keeping track of distances. When things meet up, we calculate + // distances between the nodes on the sides that just met up. + + emp::vector dists; + + std::map< Ptr, emp::vector> > curr_pointers; + std::map< Ptr, emp::vector> > next_pointers; + + + for (Ptr tax : active_taxa) { + curr_pointers[tax] = emp::vector>({{0}}); + } + + // std::cout << "Starting curr_pointers size: " << curr_pointers.size() << std::endl; + + while (curr_pointers.size() > 0) { + for (auto & tax : curr_pointers) { + bool alive = tax.first->GetNumOrgs() > 0; + // std::cout << tax.first << " has " << to_string(tax.second) << "and is waiting for " << tax.first->GetNumOff() + int(alive) << std::endl; + if ( tax.second.size() < tax.first->GetNumOff() + int(alive)) { + if (Has(next_pointers, tax.first)) { + // In case an earlier iteration added this node to next_pointers + for (auto vec : tax.second) { + next_pointers[tax.first].push_back(vec); + } + } else { + next_pointers[tax.first] = curr_pointers[tax.first]; + } + continue; + } + emp_assert(tax.first->GetNumOff() + int(alive) == tax.second.size(), tax.first->GetNumOff(), alive, to_string(tax.second), tax.second.size()); + + // Okay, things should have just met up. Let's compute the distances + // between everything that just met. + + if (tax.second.size() > 1) { + + for (size_t i = 0; i < tax.second.size(); i++ ) { + for (size_t j = i+1; j < tax.second.size(); j++) { + for (int disti : tax.second[i]) { + for (int distj : tax.second[j]) { + // std::cout << "Adding " << disti << " and " << distj << std::endl; + dists.push_back(disti+distj); + } + } + } + } + } + // std::cout << "dists " << to_string(dists) << std::endl; + // Increment distances and stick them in new vector + emp::vector new_dist_vec; + for (auto & vec : tax.second) { + for (int el : vec) { + new_dist_vec.push_back(el+1); + } + } + + // std::cout << "new_dist_vec " << to_string(new_dist_vec) << std::endl; + + next_pointers.erase(tax.first); + + Ptr test_taxon = tax.first->GetParent(); + while (test_taxon && test_taxon->GetNumOff() == 1 && test_taxon->GetNumOrgs() == 0) { + if (!branch_only) { + for (size_t i = 0; i < new_dist_vec.size(); i++){ + new_dist_vec[i]++; + } + } + test_taxon = test_taxon->GetParent(); + } + + if (!test_taxon) { + continue; + } else if (!Has(next_pointers, test_taxon)) { + next_pointers[test_taxon] = emp::vector >({new_dist_vec}); + } else { + next_pointers[test_taxon].push_back(new_dist_vec); + } + } + curr_pointers = next_pointers; + next_pointers.clear(); + // std::cout << curr_pointers.size() << std::endl; + } + + if (dists.size() != (active_taxa.size()*(active_taxa.size()-1))/2) { + // The tree is not connected + // It's possible we should do something different here... + return dists; + } + + // std::cout << "Total: " << total << "Dists: " << dists.size() << std::endl; + + return dists; + + } + + template + double Systematics::GetEvolutionaryDistinctiveness(Ptr tax, double time) const { + + // If we loaded this phylogeny from a file without calculating total offspring, + // we need to actually calculate it here + emp_assert(!total_offspring_wrong && "To calculate evolutionary distinctiveness on phylogeny loaded from file you must calculate total offspring."); + + double depth = 0; // Length (in time units) of section we're currently exploring + double total = 0; // Count up scores for each section of tree + double divisor = tax->GetTotalOffspring() + 1; // Number of extant taxa this will split into (1 for current taxa, plus its offspring) + + // We're stopping when we hit MRCA, so we need to make sure it's been calculated. + GetMRCA(); + if (tax == mrca) { + return 0; + } + + // std::cout << "Initializing divisor to " << divisor << " Offspring: " << tax->GetTotalOffspring() << std::endl; + // std::cout << "MRCA ID: " << mrca->GetID() << " Tax ID: " << tax->GetID() << " time: " << time << " Orig: " << tax->GetOriginationTime() << std::endl; + + Ptr test_taxon = tax->GetParent(); + + emp_assert(time != -1 && "Invalid time - are you passing time to rg?", time); + emp_assert(time >= tax->GetOriginationTime() + && "GetEvolutionaryDistinctiveness received a time that is earlier than the taxon's origination time.", tax->GetOriginationTime(), time); + + while (test_taxon) { + + // emp_assert(test_taxon->GetOriginationTime() != -1 && + // "Invalid time - are you passing time to rg?", time); + + depth += time - test_taxon->GetOriginationTime(); + // std::cout << "Tax: " << test_taxon->GetID() << " depth: " << depth << " time: " << time << " Orig: " << test_taxon->GetOriginationTime() << " divisor: " << divisor << std::endl; + time = test_taxon->GetOriginationTime(); + if (test_taxon == mrca || !test_taxon) { + // Stop when everything has converged or when we hit the root. + // std::cout << (int)(test_taxon == mrca) << " depth: " << depth << " divisor: " << divisor << std::endl; + total += depth/divisor; + return total; + } else if (test_taxon->GetNumOrgs() > 0) { + // If this taxon is still alive we need to update the divisor + // std::cout << "Alive point" << " depth: " << depth << " divisor: " << divisor << std::endl; + total += depth/divisor; + depth = 0; + divisor = test_taxon->GetTotalOffspring() + 1; + } else if (test_taxon->GetNumOff() > 1) { + // This is a branch point. We need to add the things on the other branch to the divisor.. + // std::cout << "Branch point" << " depth: " << depth << " divisor: " << divisor << std::endl; + total += depth/divisor; + depth = 0; + divisor = test_taxon->GetTotalOffspring(); + } + + test_taxon = test_taxon->GetParent(); + } + + return -1; + } + + template + int Systematics::GetBranchesToRoot(Ptr tax) const { + GetMRCA(); + + int depth = 0; + Ptr test_taxon = tax->GetParent(); + while (test_taxon) { + if (test_taxon == mrca || !test_taxon) { + return depth; + } else if (test_taxon->GetNumOff() > 1) { + depth++; + } + test_taxon = test_taxon->GetParent(); + } + return depth; + } + + template + int Systematics::GetDistanceToRoot(Ptr tax) const { + // Now, trace the line of descent, updating the candidate as we go. + GetMRCA(); + + int depth = 0; + Ptr test_taxon = tax->GetParent(); + while (test_taxon) { + depth++; + if (test_taxon == mrca || !test_taxon) { + return depth; + } + test_taxon = test_taxon->GetParent(); + } + return depth; + } + + template + std::set::taxon_t>> Systematics::GetCanopyExtantRoots(int time_point) const { + // NOTE: This could be made faster by doing something similar to the pairwise distance + // function + using taxon_t = Systematics::taxon_t; + std::set< Ptr> result; + // std::cout << "starting " << time_point << std::endl; + for (Ptr tax : active_taxa) { + // std::cout << tax->GetInfo() << std::endl; + while (tax) { + // std::cout << tax->GetInfo() << " " << tax->GetOriginationTime() << " " << tax->GetDestructionTime() << std::endl; + if (tax->GetOriginationTime() <= time_point && tax->GetDestructionTime() > time_point ) { + result.insert(tax); + // std::cout << "inserting " << tax->GetInfo() << std::endl; + break; + } + tax = tax->GetParent(); + } + } + + return result; + + } + + template + int Systematics::GetPhylogeneticDiversityNormalize(int generation, std::string filename) const { + int gen_value = ((generation / 10) - 1); //indexes from 0, 100 generations would correspond to the 10th line in the csv + // bool percent_found = false; + int phylogenetic_diversity = ancestor_taxa.size() + active_taxa.size() - 1; + + if(filename == ""){ + //std::cout << "Phylogenetic Diversity is " << phylogenetic_diversity << std::endl; + return phylogenetic_diversity; + } else{ + + emp::File generation_percentiles(filename); //opens file + emp::vector< emp::vector >percentile_data = generation_percentiles.ToData(','); //turns file contents into vector + + for(int j = 0; j <= percentile_data[gen_value].size() - 2; j++){ //searches through vector for slot where phylo diversity fits + + if((percentile_data[gen_value][j] <= phylogenetic_diversity) && (percentile_data[gen_value][j + 1] > phylogenetic_diversity)){ + // std::cout << "phylogenetic diversity is in between: " << percentile_data[gen_value][j] << "and " << percentile_data[gen_value][j+1] << std::endl; + // std::cout << "The phylogenetic diversity value " << phylogenetic_diversity << " is in the " << j << " percentile, in the " << ((gen_value + 1)* 10) << " generation" << std::endl; + return j; + } + } + } + return 100; + } + + template + int Systematics::GetMaxDepth() { + if (max_depth != -1) { + return max_depth; + } + + for (auto tax : active_taxa) { + int depth = tax->GetDepth(); + if (depth > max_depth) { + max_depth = depth; + } + } + return max_depth; + } + + template + void Systematics::LoadFromFile(const std::string & file_path, + const std::string & info_col, + bool assume_leaves_extant, + bool adjust_total_offspring ) { + + // We can only load phylogenies from file if their info can be + // converted to this systematics object's ORG_INFO type (if you + // have a complex type, you can just use a string representation) + if constexpr (!emp::is_streamable::value) { + emp_assert(false && "Failed to load phylogeny from file. ORG_INFO template type cannot be created from string"); + return; + } + + // Load files + emp::File in_file(file_path); + in_file.RemoveWhitespace(); + emp::vector header = in_file.ExtractRow(); + + // Find column ids + auto id_pos_it = std::find(header.begin(), header.end(), "id"); + emp_assert(id_pos_it != header.end() && + "Input phylogeny file must be in ALife Phylogeny Data Standards format" && + "id column is missing"); + size_t id_pos = std::distance(header.begin(), id_pos_it); + + auto anc_pos_it = std::find(header.begin(), header.end(), "ancestor_list"); + emp_assert(anc_pos_it != header.end() && + "Input phylogeny file must be in ALife Phylogeny Data Standards format" && + "ancestor_list column is missing"); + size_t anc_pos = std::distance(header.begin(), anc_pos_it); + + auto origin_pos_it = std::find(header.begin(), header.end(), "origin_time"); + int origin_pos = -1; + if(origin_pos_it != header.end()){ + origin_pos = std::distance(header.begin(), origin_pos_it); + } + + auto destruction_pos_it = std::find(header.begin(), header.end(), "destruction_time"); + int destruction_pos = -1; + if (destruction_pos_it != header.end()) { + destruction_pos = std::distance(header.begin(), destruction_pos_it); + } + + auto num_orgs_pos_it = std::find(header.begin(), header.end(), "num_orgs"); + int num_orgs_pos = -1; + if (num_orgs_pos_it != header.end()) { + num_orgs_pos = std::distance(header.begin(), num_orgs_pos_it); + } + + auto tot_orgs_pos_it = std::find(header.begin(), header.end(), "tot_orgs"); + int tot_orgs_pos = -1; + if (tot_orgs_pos_it != header.end()) { + tot_orgs_pos = std::distance(header.begin(), tot_orgs_pos_it); + } + + auto info_pos_it = std::find(header.begin(), header.end(), info_col); + emp_assert(info_pos_it != header.end() && + "Input phylogeny file must be in ALife Phylogeny Data Standards format" && + "info column name supplied is not in file."); + size_t info_pos = std::distance(header.begin(), info_pos_it); + + // Keep track taxon objects + std::unordered_map > taxa; + // File is out of order, so we have to link up parents + // and offspring after the fact + std::unordered_map unlinked_parents; + // Keep track of roots + emp::vector> roots; + + // Read in each row and make a taxon for it + size_t num_lines = in_file.GetNumLines(); + for (size_t i = 0; i < num_lines; i++) { + emp::vector row = in_file.ViewRowSlices(i); + int id = emp::from_string(row[id_pos]); + + // Inf means this taxon is still alive + // or we don't know which taxa are alive + std::string destruction_time = "inf"; + if (destruction_pos != -1) { + destruction_time = row[destruction_pos]; + } else { + destruction_time = "unknown"; + } + + // Load ancestor list + ORG_INFO info = emp::from_string(row[info_pos]); + std::string ancestor_list_str = {row[anc_pos].begin(), row[anc_pos].end()}; + emp::remove_chars(ancestor_list_str, "[]\""); + emp::Ptr tax; + + // Make taxon (parent is nullptr for now) + tax.New(id, info); + if (destruction_time != "inf") { + ancestor_taxa.insert(tax); + } else { + active_taxa.insert(tax); + } + + // Fill in destruction and origin time if + // provided + if (origin_pos != -1 ){ + double origin_time = emp::from_string(row[origin_pos]); + tax->SetOriginationTime(origin_time); + } + if (destruction_time != "inf" && destruction_time != "unknown") { + tax->SetDestructionTime(emp::from_string(destruction_time)); + } + + // Fill in number of current and total orgs if provided + if (num_orgs_pos != -1) { + size_t num_orgs = emp::from_string(row[num_orgs_pos]); + tax->SetNumOrgs(num_orgs); + } + if (tot_orgs_pos != -1) { + size_t tot_orgs = emp::from_string(row[tot_orgs_pos]); + tax->SetTotOrgs(tot_orgs); + } + + // Store taxon pointer + taxa[id] = tax; + // Keep track of parents so we can link up later + if (emp::to_lower(ancestor_list_str) != "none") { + emp::vector ancestor_split = emp::slice(ancestor_list_str, ','); + emp::vector ancestor_list = emp::from_strings(ancestor_split); + unlinked_parents[id] = ancestor_list[0]; + } else { + // If no parent, this is a root + num_roots++; + roots.push_back(tax); + } + } + + // Link up parents and offspring + for (auto element : unlinked_parents) { + taxa[element.first]->parent = taxa[element.second]; + taxa[element.second]->offspring.insert(taxa[element.first]); + taxa[element.second]->num_offspring++; + } + + // Set up depth + emp::vector> leaves; + emp::vector> to_explore; + for (auto root : roots) { + root->depth = 0; + root->total_offspring = 0; + for (auto offspring : root->GetOffspring()) { + to_explore.push_back(offspring); + } + } + + // Step through all taxa and fix their + // bookkeeping + // Traversal starting at roots to ensure + // parent depth is correct when setting offspring + // depth + emp::Ptr curr; + while(!to_explore.empty()) { + curr = to_explore.back(); + to_explore.pop_back(); + curr->total_offspring = 0; + curr->depth = curr->GetParent()->depth + 1; + total_depth += curr->depth; + for (auto offspring : curr->GetOffspring()){ + to_explore.push_back(offspring); + } + if (curr->GetNumOff() == 0) { + leaves.push_back(curr); + } + } + + // If we're assuming that all leave are extant, + // move leaves to active taxa + if (assume_leaves_extant) { + for (auto leaf : leaves) { + if (!Has(active_taxa, leaf)) { + ancestor_taxa.erase(leaf); + active_taxa.insert(leaf); + } + } + } + + if (num_orgs_pos == -1) { + num_orgs_wrong = true; + for (auto tax : active_taxa) { + tax->SetNumOrgs(1); + tax->SetTotOrgs(1); + org_count++; + } + } + + // Adjust total offspring + if (adjust_total_offspring) { + for (auto tax : active_taxa) { + tax->parent->AddTotalOffspring(); + } + } else { + total_offspring_wrong = true; + } + + // Force stats to be recalculated + max_depth = -1; + mrca = nullptr; + + } + } #endif // #ifndef EMP_EVOLVE_SYSTEMATICS_HPP_INCLUDE diff --git a/include/emp/Evolve/SystematicsAnalysis.hpp b/include/emp/Evolve/SystematicsAnalysis.hpp index 4574207135..6d0d71e16c 100644 --- a/include/emp/Evolve/SystematicsAnalysis.hpp +++ b/include/emp/Evolve/SystematicsAnalysis.hpp @@ -19,24 +19,27 @@ namespace emp { + /// @returns the taxon with the highest fitness out of any active taxon + /// in the given systematics manager. + /// @param s the systematics manager to search in. Must have more than 0 active taxa. template Ptr FindDominant(systematics_t & s) { - double best = -999999; - Ptr best_tax = nullptr; + emp_assert(s.GetNumActive() > 0 && "Trying to call FindDominant on empty population"); + double best = (*(s.GetActive().begin()))->GetData().GetFitness(); + Ptr best_tax = (*(s.GetActive().begin())); for (Ptr tax : s.GetActive()) { double f = tax->GetData().GetFitness(); - if (f > best) { - best = f; - best_tax = tax; + if (f > best) { + best = f; + best_tax = tax; + } } - } - return best_tax; + return best_tax; } - /// Returns the total number of times a mutation of type @param type - /// that along @param taxon 's lineage. (Different from CountMuts in - /// that CountMuts sums them whereas CountMutSteps would count two - /// simultaneous mutations of the same type as one event) + /// Returns the total number of ancestor taxa in \c taxon 's lineage. + /// Requires that taxon is a member of a systematics manager that + /// has ancestor storing turned on template int LineageLength(Ptr taxon) { int count = 0; @@ -49,10 +52,16 @@ namespace emp { return count; } - /// Returns the total number of times a mutation of type @param type - /// that along @param taxon 's lineage. (Different from CountMuts in + /// Returns the total number of times a mutation of type \c type + /// occurred along \c taxon 's lineage. (Different from CountMuts in /// that CountMuts sums them whereas CountMutSteps would count two /// simultaneous mutations of the same type as one event) + /// @param type string corresponding to a type of mutation. + /// Must be in the mut_counts dictionary (i.e. the dictionary + /// passed in when \ref mut_landscape_info::RecordMutation was called) + /// @param taxon a pointer to a taxon to count mutation steps for. + /// Must have a DATA_TYPE that supports mutation tracking + /// (e.g. mut_landscape_info) template int CountMutSteps(Ptr taxon, std::string type="substitution") { int count = 0; diff --git a/include/emp/Evolve/World.hpp b/include/emp/Evolve/World.hpp index 72731006d8..8d1f331a81 100644 --- a/include/emp/Evolve/World.hpp +++ b/include/emp/Evolve/World.hpp @@ -14,8 +14,6 @@ * whether or not they also affect injected organisms. (Right now they always do!!) * @todo We should Specialize World so that ANOTHER world can be used as an ORG, with proper * delegation to facilitate demes, pools, islands, etc. - * @todo We should be able to have any number of systematics managers, based on various type_trait - * information a that we want to track. * @todo Add a signal for DoBirth() for when a birth fails. * @todo Add a signal for population Reset() (and possibly Clear?) * @todo Add a feature to maintain population sorted by each phenotypic trait. This will allow @@ -973,7 +971,7 @@ namespace emp { // Track the new systematics info for (Ptr > s : systematics) { - s->AddOrg(*new_org, pos, (int) update); + s->AddOrg(*new_org, pos); } SetupOrg(*new_org, pos, *random_ptr); @@ -997,7 +995,7 @@ namespace emp { } for (Ptr > s : systematics) { - s->RemoveOrgAfterRepro(pos, update); // Notify systematics about organism removal + s->RemoveOrgAfterRepro(pos); // Notify systematics about organism removal } } @@ -1490,6 +1488,13 @@ namespace emp { pop.resize(0); std::swap(pops[0], pops[1]); // Move next pop into place. + // Tell systematics manager to swap next population and population + // Needs to happen here so that you can refer to systematics in + // OnPlacement functions + for (Ptr> s : systematics) { + s->Update(); + } + // Update the active population. num_orgs = 0; for (size_t i = 0; i < pop.size(); i++) { @@ -1499,12 +1504,7 @@ namespace emp { } } - // 3. Handle systematics and any data files that need to be printed this update. - - // Tell systematics manager to swap next population and population - for (Ptr> s : systematics) { - s->Update(); - } + // 3. Handle any data files that need to be printed this update. for (auto file : files) file->Update(update); diff --git a/include/emp/Evolve/World_structure.hpp b/include/emp/Evolve/World_structure.hpp index 1f6730dd23..5183528a3e 100644 --- a/include/emp/Evolve/World_structure.hpp +++ b/include/emp/Evolve/World_structure.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2017-2018 + * @date 2017-2022. * * @file World_structure.hpp * @brief Functions for popular world structure methods. @@ -47,13 +47,15 @@ namespace emp { } WorldPosition(const WorldPosition &) = default; + WorldPosition & operator=(const WorldPosition &) = default; + uint32_t GetIndex() const { return index; } uint32_t GetPopID() const { return pop_id; } bool IsActive() const { return pop_id == 0; } bool IsValid() const { return index != invalid_id; } - WorldPosition & SetActive(bool _active=true) { pop_id = 0; return *this; } + WorldPosition & SetActive(bool /*_active*/=true) { pop_id = 0; return *this; } WorldPosition & SetPopID(size_t _id) { emp_assert(_id <= invalid_id); pop_id = (uint32_t) _id; return *this; } WorldPosition & SetIndex(size_t _id) { emp_assert(_id <= invalid_id); index = (uint32_t) _id; return *this; } WorldPosition & MarkInvalid() { index = invalid_id; pop_id = invalid_id; return *this; } @@ -96,7 +98,7 @@ namespace emp { }; /// Set the population to be a set of pools that are individually well mixed, but with limited - /// migtation. Arguments are the number of pools, the size of each pool, and whether the + /// migration. Arguments are the number of pools, the size of each pool, and whether the /// generations should be synchronous (true) or not (false, default). template void SetPools(World & world, size_t num_pools, @@ -107,7 +109,7 @@ namespace emp { // -- Setup functions -- // Inject in an empty pool -or- randomly if none empty - world.SetAddInjectFun( [&world,pool_size](Ptr new_org) { + world.SetAddInjectFun( [&world,pool_size](Ptr /*new_org*/) { for (size_t id = 0; id < world.GetSize(); id += pool_size) { if (world.IsOccupied(id) == false) return WorldPosition(id); } @@ -128,7 +130,7 @@ namespace emp { if (synchronous_gen) { // Place births in the next open spot in the new pool (or randomly if full!) - world.SetAddBirthFun( [&world,pool_size](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [&world,pool_size]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { emp_assert(new_org); // New organism must exist. const size_t parent_id = parent_pos.GetIndex(); const size_t pool_id = parent_id / pool_size; @@ -144,7 +146,7 @@ namespace emp { world.SetAttribute("SynchronousGen", "True"); } else { // Asynchronous: always go to a neighbor in current population. - world.SetAddBirthFun( [&world](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [&world]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { auto pos = world.GetRandomNeighborPos(parent_pos); return pos; // Place org in existing population. }); @@ -174,7 +176,7 @@ namespace emp { world.MarkSpaceStructured(false).MarkPhenoStructured(true); // -- Setup functions -- - // Inject into the appropriate positon based on phenotype. Note that an inject will fail + // Inject into the appropriate position based on phenotype. Note that an inject will fail // if a more fit organism is already in place; you must run clear first if you want to // ensure placement. world.SetAddInjectFun( [&world,traits,trait_counts](Ptr new_org) { @@ -272,7 +274,7 @@ namespace emp { emp::vector distance; ///< And what is their distance? World & world; ///< World object being tracked. - TraitSet traits; ///< Traits we are tryng to spread + TraitSet traits; ///< Traits we are trying to spread emp::vector min_vals; ///< Smallest value found for each trait. emp::vector max_vals; ///< Largest value found for each trait. emp::vector bin_width; ///< Largest value found for each trait. @@ -325,10 +327,10 @@ namespace emp { size_t bin_id = org_bins[refresh_id]; Refresh_AgainstBin(refresh_id, bin_id); - // Then check all neighbor bins. Ignoring diagnols for now since they could be expensive... + // Then check all neighbor bins. Ignoring diagonals for now since they could be expensive... // (though technically we need them...) size_t trait_offset = 1; - for (size_t trait_id = 0; trait_id < traits.GetSize(); trait_id++) { + for (size_t trait_id = start_id; trait_id < traits.GetSize(); trait_id++) { size_t prev_bin_id = bin_id - trait_offset; if (prev_bin_id < num_total_bins) { Refresh_AgainstBin(refresh_id, prev_bin_id); @@ -398,7 +400,7 @@ namespace emp { is_setup = false; } - /// Find the best organism to kill in the popualtion. In this case, find the two closest organisms + /// Find the best organism to kill in the population. In this case, find the two closest organisms /// and kill the one with the lower fitness. size_t FindKill() { if (!is_setup) Setup(); // The first time we run out of space and need to kill, setup structure! @@ -487,7 +489,7 @@ namespace emp { emp_assert(org_bins[i] < num_total_bins, i, org_bins[i], num_total_bins, world.GetNumOrgs()); } - size_t org_count = 0; + [[maybe_unused]] size_t org_count = 0; for (size_t i = 0; i < num_total_bins; i++) { org_count += bin_ids[i].size(); for (size_t org_id : bin_ids[i]) { @@ -516,10 +518,10 @@ namespace emp { world.OnPlacement( [info_ptr](size_t pos) mutable { info_ptr->Update(pos); } ); // -- Setup functions -- - // Inject into the appropriate positon based on phenotype. Note that an inject will fail + // Inject into the appropriate position based on phenotype. Note that an inject will fail // if a more fit organism is already in place; you must run clear first if you want to // ensure placement. - world.SetAddInjectFun( [&world, traits, world_size, info_ptr](Ptr new_org) { + world.SetAddInjectFun( [/*&world, traits,*/ world_size, info_ptr]([[maybe_unused]] Ptr new_org) { size_t pos = info_ptr->GetBirthPos(world_size); return WorldPosition(pos); }); @@ -529,7 +531,7 @@ namespace emp { world.SetGetNeighborFun( [](WorldPosition pos) { emp_assert(false); return pos; }); // Find the two closest organisms and kill the lower fit one. (Killing sparsely...) - // Must unsetup population for next birth to work. + // Must un-setup population for next birth to work. world.SetKillOrgFun( [&world, info_ptr](){ const size_t last_id = world.GetSize() - 1; world.Swap(info_ptr->FindKill(), last_id); @@ -540,7 +542,7 @@ namespace emp { }); // Birth is effectively the same as inject. - world.SetAddBirthFun( [&world, traits, world_size, info_ptr](Ptr new_org, WorldPosition parent_pos) { + world.SetAddBirthFun( [/*&world, traits,*/ world_size, info_ptr]([[maybe_unused]] Ptr new_org, WorldPosition parent_pos) { (void) parent_pos; size_t pos = info_ptr->GetBirthPos(world_size); return WorldPosition(pos); diff --git a/include/emp/base/Ptr.hpp b/include/emp/base/Ptr.hpp index 28580757c5..da9c6b082f 100644 --- a/include/emp/base/Ptr.hpp +++ b/include/emp/base/Ptr.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2020. + * @date 2016-2022. * * @file Ptr.hpp * @brief A wrapper for pointers that does careful memory tracking (but only in debug mode). @@ -15,8 +15,11 @@ * intentionally) you can define EMP_NO_PTR_TO_PTR * * If you trip an assert, you can re-do the run a track a specific pointer by defining - * EMP_ABORT_PTR_NEW or EMP_ABORT_PTR_DELETE to the ID of the pointer in question. This will - * allow you to track the pointer more easily in a debugger. + * EMP_ABORT_PTR_NEW or EMP_ABORT_PTR_DELETE to the ID of the pointer in question. + * + * For example: -DEMP_ABORT_PTR_NEW=1691 + * + * This will allow you to track the pointer more easily in a debugger. * * @todo Track information about emp::vector and emp::array objects to make sure we don't * point directly into them? (A resize() could make such pointers invalid!) Or better, warn @@ -83,8 +86,8 @@ namespace emp { } PtrInfo(const PtrInfo &) = default; PtrInfo(PtrInfo &&) = default; - PtrInfo & operator=(const PtrInfo &) = default; - PtrInfo & operator=(PtrInfo &&) = default; + PtrInfo & operator=(const PtrInfo &) & = default; + PtrInfo & operator=(PtrInfo &&) & = default; ~PtrInfo() { if (internal::ptr_debug) std::cout << "Deleted info for pointer " << ptr << std::endl; @@ -180,7 +183,7 @@ namespace emp { } if (undeleted_info.size()) { - std::cerr << undeleted_info.size() << " undeleted pointers at end of exectution.\n"; + std::cerr << undeleted_info.size() << " undeleted pointers at end of execution.\n"; for (size_t i = 0; i < undeleted_info.size() && i < 10; ++i) { const auto & info = undeleted_info[i]; std::cerr << " PTR=" << info.GetPtr() @@ -216,7 +219,7 @@ namespace emp { return ptr_id.find(ptr) != ptr_id.end(); } - /// Retrive the ID associated with a pointer. + /// Retrieve the ID associated with a pointer. size_t GetCurID(const void * ptr) { emp_assert(HasPtr(ptr)); return ptr_id[ptr]; } /// Lookup how many pointers are being tracked. @@ -271,7 +274,7 @@ namespace emp { } #endif if (internal::ptr_debug) std::cout << "New: " << id << " (" << ptr << ")" << std::endl; - // Make sure pointer is not already stored -OR- hase been deleted (since re-use is possible). + // Make sure pointer is not already stored -OR- has been deleted (since re-use is possible). emp_assert(!HasPtr(ptr) || IsDeleted(GetCurID(ptr)), id); id_info.emplace_back(ptr); ptr_id[ptr] = id; @@ -286,14 +289,14 @@ namespace emp { return id; } - /// Increment the nuber of Pointers associated with an ID + /// Increment the number of Pointers associated with an ID void IncID(size_t id) { if (id == UNTRACKED_ID) return; // Not tracked! if (internal::ptr_debug) std::cout << "Inc: " << id << std::endl; id_info[id].Inc(id); } - /// Decrement the nuber of Pointers associated with an ID + /// Decrement the number of Pointers associated with an ID void DecID(size_t id) { if (id == UNTRACKED_ID) return; // Not tracked! auto & info = id_info[id]; @@ -329,7 +332,7 @@ namespace emp { namespace { // @CAO: Build this for real! template - bool PtrIsConvertable(FROM * ptr) { (void) ptr; return true; } + bool PtrIsConvertible(FROM * ptr) { (void) ptr; return true; } // emp_assert( (std::is_same() || dynamic_cast(in_ptr)) ); // Debug information provided for each pointer type. @@ -437,12 +440,14 @@ namespace emp { Tracker().IncID(id); } - /// Construct from a raw pointer of campatable type. + /// Construct from a raw pointer of compatable type. template Ptr(T2 * in_ptr, bool track=false) : BasePtr(in_ptr, UNTRACKED_ID) { - if (internal::ptr_debug) std::cout << "raw construct: " << ptr << ". track=" << track << std::endl; - emp_assert( (PtrIsConvertable(in_ptr)) ); + if (internal::ptr_debug) { + std::cout << "raw construct: " << ((void *) ptr) << ". track=" << track << std::endl; + } + emp_assert( (PtrIsConvertible(in_ptr)) ); // If this pointer is already active, link to it. if (Tracker().IsActive(ptr)) { @@ -464,7 +469,7 @@ namespace emp { if (internal::ptr_debug) std::cout << "raw ARRAY construct: " << ptr << ". size=" << array_size << "(" << array_bytes << " bytes); track=" << track << std::endl; - emp_assert( (PtrIsConvertable(_ptr)) ); + emp_assert( (PtrIsConvertible(_ptr)) ); // If this pointer is already active, link to it. if (Tracker().IsActive(ptr)) { @@ -483,7 +488,7 @@ namespace emp { template Ptr(Ptr _in) : BasePtr(_in.Raw(), _in.GetID()) { if (internal::ptr_debug) std::cout << "inexact copy construct: " << ptr << std::endl; - emp_assert( (PtrIsConvertable(_in.Raw())), id ); + emp_assert( (PtrIsConvertible(_in.Raw())), id ); Tracker().IncID(id); } @@ -496,7 +501,7 @@ namespace emp { ~Ptr() { if (internal::ptr_debug) { std::cout << "destructing Ptr instance "; - if (ptr) std::cout << id << " (" << ptr << ")\n"; + if (ptr) std::cout << id << " (" << ((void *) ptr) << ")\n"; else std::cout << "(nullptr)\n"; } Tracker().DecID(id); @@ -592,7 +597,7 @@ namespace emp { /// Delete this pointer (must NOT be an array). void Delete() { emp_assert(ptr, "Trying to delete null Ptr."); - emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not resposible for."); + emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); emp_assert(Tracker().IsArrayID(id) == false, id, "Trying to delete array pointer as non-array."); emp_assert(Tracker().IsActive(ptr), id, "Trying to delete inactive pointer (already deleted!)"); if (internal::ptr_debug) std::cout << "Ptr::Delete() : " << ptr << std::endl; @@ -603,7 +608,7 @@ namespace emp { /// Delete this pointer to an array (must be an array). void DeleteArray() { - emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not resposible for."); + emp_assert(id < Tracker().GetNumIDs(), id, "Trying to delete Ptr that we are not responsible for."); emp_assert(ptr, "Trying to delete null Ptr."); emp_assert(Tracker().IsArrayID(id), id, "Trying to delete non-array pointer as array."); emp_assert(Tracker().IsActive(ptr), id, "Trying to delete inactive pointer (already deleted!)"); @@ -622,7 +627,7 @@ namespace emp { struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; /// Copy assignment - Ptr & operator=(const Ptr & _in) { + Ptr & operator=(const Ptr & _in) & { if (internal::ptr_debug) { std::cout << "copy assignment from id " << _in.id << " to id " << id << std::endl; @@ -643,9 +648,9 @@ namespace emp { /// Assign to a raw pointer of the correct type; if this is already tracked, hooked in /// correctly, otherwise don't track. template - Ptr & operator=(T2 * _in) { + Ptr & operator=(T2 * _in) & { if (internal::ptr_debug) std::cout << "raw assignment" << std::endl; - emp_assert( (PtrIsConvertable(_in)) ); + emp_assert( (PtrIsConvertible(_in)) ); Tracker().DecID(id); // Decrement references to former pointer at this position. ptr = _in; // Update to new pointer. @@ -663,11 +668,11 @@ namespace emp { return *this; } - /// Assign to a convertable Ptr + /// Assign to a convertible Ptr template - Ptr & operator=(Ptr _in) { + Ptr & operator=(Ptr _in) & { if (internal::ptr_debug) std::cout << "convert-copy assignment" << std::endl; - emp_assert( (PtrIsConvertable(_in.Raw())), _in.id ); + emp_assert( (PtrIsConvertible(_in.Raw())), _in.id ); emp_assert(Tracker().IsDeleted(_in.id) == false, _in.id, "Do not copy deleted pointers."); Tracker().DecID(id); ptr = _in.Raw(); @@ -693,42 +698,28 @@ namespace emp { /// Does this const pointer exist? operator bool() const { return ptr != nullptr; } - /// Does this Ptr point to the same memory position? - bool operator==(const Ptr & in_ptr) const { return ptr == in_ptr.ptr; } - - /// Does this Ptr point to different memory positions? - bool operator!=(const Ptr & in_ptr) const { return ptr != in_ptr.ptr; } - - /// Does this Ptr point to a memory position before another? - bool operator<(const Ptr & in_ptr) const { return ptr < in_ptr.ptr; } - - /// Does this Ptr point to a memory position before or equal to another? - bool operator<=(const Ptr & in_ptr) const { return ptr <= in_ptr.ptr; } - - /// Does this Ptr point to a memory position after another? - bool operator>(const Ptr & in_ptr) const { return ptr > in_ptr.ptr; } - - /// Does this Ptr point to a memory position after or equal to another? - bool operator>=(const Ptr & in_ptr) const { return ptr >= in_ptr.ptr; } - - - /// Does this Ptr point to the same memory position as a raw pointer? - bool operator==(const TYPE * in_ptr) const { return ptr == in_ptr; } - - /// Does this Ptr point to different memory positions as a raw pointer? - bool operator!=(const TYPE * in_ptr) const { return ptr != in_ptr; } - - /// Does this Ptr point to a memory position before a raw pointer? - bool operator<(const TYPE * in_ptr) const { return ptr < in_ptr; } - - /// Does this Ptr point to a memory position before or equal to a raw pointer? - bool operator<=(const TYPE * in_ptr) const { return ptr <= in_ptr; } - - /// Does this Ptr point to a memory position after a raw pointer? - bool operator>(const TYPE * in_ptr) const { return ptr > in_ptr; } + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } - /// Does this Ptr point to a memory position after or equal to a raw pointer? - bool operator>=(const TYPE * in_ptr) const { return ptr >= in_ptr; } + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } @@ -885,11 +876,11 @@ namespace emp { struct hash_t { size_t operator()(const Ptr & t) const noexcept { return t.Hash(); } }; // Copy assignments - Ptr & operator=(const Ptr & _in) { ptr = _in.ptr; return *this; } + Ptr & operator=(const Ptr & _in) & { ptr = _in.ptr; return *this; } // Assign to compatible Ptr or raw (non-managed) pointer. - template Ptr & operator=(T2 * _in) { ptr = _in; return *this; } - template Ptr & operator=(Ptr _in) { ptr = _in.Raw(); return *this; } + template Ptr & operator=(T2 * _in) & { ptr = _in; return *this; } + template Ptr & operator=(Ptr _in) & { ptr = _in.Raw(); return *this; } // Auto-cast to raw pointer type. operator TYPE *() { return ptr; } @@ -897,21 +888,28 @@ namespace emp { operator bool() { return ptr != nullptr; } operator bool() const { return ptr != nullptr; } - // Comparisons to other Ptr objects - bool operator==(const Ptr & in_ptr) const { return ptr == in_ptr.ptr; } - bool operator!=(const Ptr & in_ptr) const { return ptr != in_ptr.ptr; } - bool operator<(const Ptr & in_ptr) const { return ptr < in_ptr.ptr; } - bool operator<=(const Ptr & in_ptr) const { return ptr <= in_ptr.ptr; } - bool operator>(const Ptr & in_ptr) const { return ptr > in_ptr.ptr; } - bool operator>=(const Ptr & in_ptr) const { return ptr >= in_ptr.ptr; } - - // Comparisons to raw pointers. - bool operator==(const TYPE * in_ptr) const { return ptr == in_ptr; } - bool operator!=(const TYPE * in_ptr) const { return ptr != in_ptr; } - bool operator<(const TYPE * in_ptr) const { return ptr < in_ptr; } - bool operator<=(const TYPE * in_ptr) const { return ptr <= in_ptr; } - bool operator>(const TYPE * in_ptr) const { return ptr > in_ptr; } - bool operator>=(const TYPE * in_ptr) const { return ptr >= in_ptr; } + template bool operator==(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr == in_ptr.ptr; } + else { return ptr == in_ptr; } + } + template bool operator!=(const T & in_ptr) const { return !operator==(in_ptr); } + + template bool operator<(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr < in_ptr.ptr; } + else { return ptr < in_ptr; } + } + template bool operator>(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr > in_ptr.ptr; } + else { return ptr > in_ptr; } + } + template bool operator<=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr <= in_ptr.ptr; } + else { return ptr <= in_ptr; } + } + template bool operator>=(const T & in_ptr) const { + if constexpr (std::is_same>()) { return ptr >= in_ptr.ptr; } + else { return ptr >= in_ptr; } + } [[nodiscard]] Ptr operator+(int value) const { return ptr + value; } [[nodiscard]] Ptr operator-(int value) const { return ptr - value; } @@ -1015,7 +1013,7 @@ namespace emp { /// Fill an array with the provided fill_value. /// If fill_value is a function, repeatedly call function. template - void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value) { + void FillMemory(emp::Ptr mem_ptr, const size_t num_bytes, T fill_value) { // If the fill value is a function, call that function for each memory position. if constexpr (std::is_invocable_v) { FillMemoryFunction(mem_ptr, num_bytes, std::forward(fill_value)); @@ -1061,6 +1059,19 @@ namespace emp { } } + /// Copy an array from the provided memory. + template + void CopyMemory( + emp::Ptr from_ptr, + emp::Ptr to_ptr, + const size_t num_items) + { + constexpr size_t FILL_CHUNK = sizeof(T); + const size_t num_bytes = num_items * FILL_CHUNK; + + std::memcpy(to_ptr.Raw(), from_ptr.Raw(), num_bytes); + } + } // namespace emp #endif // #ifndef EMP_BASE_PTR_HPP_INCLUDE diff --git a/include/emp/base/_emscripten_error_trigger.hpp b/include/emp/base/_emscripten_error_trigger.hpp index 202e45a3d6..a00e411b43 100644 --- a/include/emp/base/_emscripten_error_trigger.hpp +++ b/include/emp/base/_emscripten_error_trigger.hpp @@ -12,7 +12,7 @@ #ifndef EMP_BASE__EMSCRIPTEN_ERROR_TRIGGER_HPP_INCLUDE #define EMP_BASE__EMSCRIPTEN_ERROR_TRIGGER_HPP_INCLUDE - +#include #include #include diff --git a/include/emp/base/array.hpp b/include/emp/base/array.hpp index 57076dae3f..a7236bcd99 100644 --- a/include/emp/base/array.hpp +++ b/include/emp/base/array.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2021. + * @date 2016-2022. * * @file array.hpp * @brief A drop-in wrapper for std::array; adds on bounds checking in debug mode. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -36,130 +37,132 @@ namespace emp { namespace emp { + // Pre-declaration of array type. + template struct array; + + /// Setup an iterator wrapper to check validity. + template + struct array_iterator { + using this_t = array_iterator; + using array_t = ARRAY_T; + + ITERATOR_T it; + const array_t * arr_ptr { nullptr }; // Which array was iterator created from? + + array_iterator() { ; } + + array_iterator(ITERATOR_T _in, const array_t * _v) : it(_in), arr_ptr (_v) { ; } + array_iterator(const this_t &) = default; + array_iterator(this_t &&) = default; + ~array_iterator() { ; } + + // Debug tools to make sure this iterator is okay. + bool OK(bool begin_ok=true, bool end_ok=true) const { + if (arr_ptr == nullptr) return false; // Invalid array + if (it < arr_ptr->begin()) return false; // Iterator before array start. + if (it > arr_ptr->end()) return false; // Iterator after array end. + if (!begin_ok && it == arr_ptr->begin()) return false; // Iterator not allowed at start. + if (!end_ok && it == arr_ptr->end()) return false; // Iterator not allowed at end. + return true; + } + + this_t & operator=(const this_t &) = default; + this_t & operator=(this_t &&) = default; + + operator ITERATOR_T() { return it; } + operator const ITERATOR_T() const { return it; } + + auto & operator*() { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return *it; + } + const auto & operator*() const { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return *it; + } + + auto operator->() { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return it; + } + auto operator->() const { + emp_assert(OK(true, false)); // Ensure array is being pointed to properly. + return it; + } + + this_t & operator++() { emp_assert(OK(true,false)); ++it; return *this; } + this_t operator++(int /*x*/) { emp_assert(OK(true,false)); return this_t(it++, arr_ptr); } + this_t & operator--() { emp_assert(OK(false,true)); --it; return *this; } + this_t operator--(int /*x*/) { emp_assert(OK(false,true)); return this_t(it--, arr_ptr); } + + this_t operator+(int in) { emp_assert(OK()); return this_t(it + in, arr_ptr); } + this_t operator-(int in) { emp_assert(OK()); return this_t(it - in, arr_ptr); } + ptrdiff_t operator-(const this_t & in) { emp_assert(OK()); return it - in.it; } + + this_t & operator+=(int in) { emp_assert(OK()); it += in; return *this; } + this_t & operator-=(int in) { emp_assert(OK()); it -= in; return *this; } + + auto & operator[](int index) { emp_assert(OK()); return it[index]; } + const auto & operator[](int index) const { emp_assert(OK()); return it[index]; } + }; + /// We are in debug mode, so emp::array has the same interface as std::array, but with extra /// bounds checking. Using vector as our base since it has the right pieces and is dynamic. - template - class array : public std::vector { - private: + template + struct array { + static constexpr size_t N = NUM_ELEMENTS; using this_t = emp::array; - using base_t = std::vector; - - public: - bool valid; - - /// Setup an iterator wrapper to make sure that they're valid. - template - struct iterator_wrapper : public ITERATOR_T { - using this_t = iterator_wrapper; - using wrapped_t = ITERATOR_T; - using vec_t = emp::array; - - /// What vector was this iterator created from? - const vec_t * v_ptr{ nullptr }; - - iterator_wrapper() { ; } - - iterator_wrapper(const ITERATOR_T & _in, const vec_t * _v) : ITERATOR_T(_in), v_ptr(_v) { ; } - iterator_wrapper(const this_t &) = default; - iterator_wrapper(this_t &&) = default; - ~iterator_wrapper() { ; } - - // Debug tools to make sure this iterator is okay. - bool OK(bool begin_ok=true, bool end_ok=true) const { - if (v_ptr == nullptr) return false; // Invalid vector - if (!v_ptr->valid) return false; // Vector has been deleted! - size_t pos = (size_t) (*this - v_ptr->begin()); - if (pos > v_ptr->size()) return false; // Iterator out of range. - if (!begin_ok && pos == 0) return false; // Iterator not allowed at beginning. - if (!end_ok && pos == v_ptr->size()) return false; // Iterator not allowed at end. - return true; - } - - this_t & operator=(const this_t &) = default; - this_t & operator=(this_t &&) = default; - - operator ITERATOR_T() { return *this; } - operator const ITERATOR_T() const { return *this; } - - auto & operator*() { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator*(); - } - const auto & operator*() const { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator*(); - } - - auto operator->() { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator->(); - } - auto operator->() const { - emp_assert(OK(true, false)); // Ensure array is being pointed to properly. - return wrapped_t::operator->(); - } - - this_t & operator++() { emp_assert(OK(true,false)); wrapped_t::operator++(); return *this; } - this_t operator++(int x) { emp_assert(OK(true,false)); return this_t(wrapped_t::operator++(x), v_ptr); } - this_t & operator--() { emp_assert(OK(false,true)); wrapped_t::operator--(); return *this; } - this_t operator--(int x) { emp_assert(OK(false,true)); return this_t(wrapped_t::operator--(x), v_ptr); } - - auto operator+(int in) { emp_assert(OK()); return this_t(wrapped_t::operator+(in), v_ptr); } - auto operator-(int in) { emp_assert(OK()); return this_t(wrapped_t::operator-(in), v_ptr); } - auto operator-(const this_t & in) { emp_assert(OK()); return ((wrapped_t) *this) - (wrapped_t) in; } - - this_t & operator+=(int in) { emp_assert(OK()); wrapped_t::operator+=(in); return *this; } - this_t & operator-=(int in) { emp_assert(OK()); wrapped_t::operator-=(in); return *this; } - auto & operator[](int offset) { emp_assert(OK()); return wrapped_t::operator[](offset); } - }; - - using iterator = iterator_wrapper< typename base_t::iterator >; - using const_iterator = iterator_wrapper< typename base_t::const_iterator >; - using reverse_iterator = iterator_wrapper< typename base_t::reverse_iterator >; - using const_reverse_iterator = iterator_wrapper< typename base_t::const_reverse_iterator >; + + T _data[ N ? N : 1 ]; + + using iterator = array_iterator< T*, this_t >; + using const_iterator = array_iterator< const T *, this_t >; + using reverse_iterator = array_iterator< std::reverse_iterator, this_t >; + using const_reverse_iterator = array_iterator< std::reverse_iterator, this_t >; using value_type = T; - using size_type = typename base_t::size_type; - using reference = typename base_t::reference; - using const_reference = typename base_t::const_reference; + using size_type = std::size_t; + using reference = value_type&; + using const_reference = const value_type&; + + // -- No constructors, destructors, or assignment operators to preserve aggregate type. - array() : base_t(N), valid(true) {}; - array(const this_t & _in) : base_t(_in), valid(true) { emp_assert(_in.size() == N); }; - array(std::initializer_list in_list) : base_t(in_list), valid(true) { emp_assert(size() == N); } - template - array(InputIt first, InputIt last) : base_t(first, last), valid(true) { emp_assert(size() == N); } - ~array() { valid=false; } // No longer valid when array is deleted. + int operator<=>(const array &) const = default; + // Allow automatic conversion to regular array type. operator std::array() { std::array ar; - for (size_t i = 0; i < N; i++) ar[i] = base_t::operator[](i); + for (size_t i = 0; i < N; i++) ar[i] = _data[i]; return ar; } constexpr size_t size() const { return N; } - iterator begin() noexcept { return iterator(base_t::begin(), this); } - const_iterator begin() const noexcept { return const_iterator(base_t::begin(), this); } - iterator end() noexcept { return iterator(base_t::end(), this); } - const_iterator end() const noexcept { return const_iterator(base_t::end(), this); } + auto & data() { return _data; } + const auto & data() const { return _data; } - this_t & operator=(const this_t &) = default; + iterator begin() noexcept { return iterator(_data, this); } + const_iterator begin() const noexcept { return const_iterator(_data, this); } + iterator end() noexcept { return iterator(_data + N, this); } + const_iterator end() const noexcept { return const_iterator(_data + N, this); } T & operator[](size_t pos) { emp_assert(pos < N, pos, N); - return base_t::operator[](pos); + return _data[pos]; } const T & operator[](size_t pos) const { emp_assert(pos < N, pos, N); - return base_t::operator[](pos); + return _data[pos]; } - T & back() { emp_assert(N > 0); return base_t::back(); } - const T & back() const { emp_assert(N > 0); return base_t::back(); } - T & front() { emp_assert(N > 0); return base_t::front(); } - const T & front() const { emp_assert(N > 0); return base_t::front(); } + T & back() { emp_assert(N > 0); return _data[N-1]; } + const T & back() const { emp_assert(N > 0); return _data[N-1]; } + T & front() { emp_assert(N > 0); return _data[0]; } + const T & front() const { emp_assert(N > 0); return _data[0]; } - void fill(const T & val) { this->assign(N, val); } + void fill(const T & val) { + for (size_t i = 0; i < N; ++i) _data[i] = val; + } // Functions to make sure to throw an error on: @@ -171,21 +174,21 @@ namespace emp { void pop_back() { emp_assert(false, "invalid operation for array!"); } template - iterator insert(ARGS &&... args) { + iterator insert(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::insert(std::forward(args)...), this ); + return end(); } template - iterator erase(ARGS &&... args) { + iterator erase(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::erase(std::forward(args)...), this ); + return end(); } template - iterator emplace(ARGS &&... args) { + iterator emplace(ARGS &&... /* args */) { emp_assert(false, "invalid operation for array!"); - return iterator( base_t::emplace(std::forward(args)...), this ); + return end(); } template diff --git a/include/emp/base/assert.hpp b/include/emp/base/assert.hpp index 2a6a8f5e0c..67508bfe7b 100644 --- a/include/emp/base/assert.hpp +++ b/include/emp/base/assert.hpp @@ -43,6 +43,10 @@ // #define emp_assert(EXPR) ((void) sizeof(EXPR) ) // #define emp_assert(EXPR, ...) { constexpr bool __emp_assert_tmp = false && (EXPR); (void) __emp_assert_tmp; } + namespace emp { + static constexpr bool is_debug_mode = false; + } + #else /// Require a specified condition to be true. If it is false, immediately /// halt execution. Print also extra information on any variables or @@ -51,6 +55,10 @@ /// information will not be printed when compiling with MSVC. #define emp_assert(...) emp_always_assert(__VA_ARGS__) + namespace emp { + static constexpr bool is_debug_mode = true; + } + #endif diff --git a/include/emp/base/notify.hpp b/include/emp/base/notify.hpp new file mode 100644 index 0000000000..4af90f1a4e --- /dev/null +++ b/include/emp/base/notify.hpp @@ -0,0 +1,472 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file notify.hpp + * @brief Tools to alert users of messages (including errors and warnings) in a consistant manner. + * @note Status: ALPHA + * + * + * There are a handful of notification types to consider: + * - Message: A simple notification. + * - Verbose: Optional messages that can be activated by category. + * - Warning: Something looks suspicious, but is not technically a problem (don't exit) + * - Error: Something has gone horribly wrong and is impossible to recover from (exit) + * - Exception: Something didn't go the way we expected, but we can still recover (exit if not handled) + * - Debug: A simple notification that should only be printed when NDEBUG is not set (don't exit) + * + * Messages default to "standard out"; all of the other default to "standard error". Handling of + * these notifications can all be overriden by either whole category or by specific tag. + * + * There are three possible recipients for all errors/warnings. + * - The end-user if the problem stems from inputs they provided to the executable. + * - The library user if the problem is due to mis-use of library functionality. + * - The library developers if something that should be impossible occurs. + * + * The content of this file primarily targets the first group; developers should prefer asserts + * to ensure that supposedly "impossible" situations do not occur. + * + * NOTES: + * - Whenever possible, exceptions should be preferred. They are more specific than warnings + * and can be responded to rather than automatically halting execution like errors. + * - Warnings should always detail what should be done differently to surpress that warning. + * + */ + +#ifndef EMP_BASE_NOTIFY_HPP_INCLUDE +#define EMP_BASE_NOTIFY_HPP_INCLUDE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vector.hpp" + +namespace emp { +namespace notify { + using id_t = std::string; + using message_t = std::string; + using except_data_t = std::any; + + using id_arg_t = const id_t &; + using message_arg_t = const message_t &; + using response_t = bool(id_arg_t, message_arg_t, except_data_t); + using exit_fun_t = std::function; + + /// Information about an exception that has occurred. + struct ExceptInfo { + id_t id = "__NONE__"; ///< Which exception was triggered? + message_t message = ""; ///< A detailed message of this exception. + except_data_t data; ///< Extra data needed to resolve this exception. + }; + + enum class Type { MESSAGE=0, DEBUG, WARNING, ERROR, EXCEPTION, NUM_TYPES }; + static constexpr size_t num_types = static_cast(Type::NUM_TYPES); + + /// Convert a type to a human-readable string. + static id_t TypeID(Type type) { + switch (type) { + case Type::MESSAGE: return "Message"; + case Type::DEBUG: return "Debug"; + case Type::WARNING: return "WARNING"; + case Type::ERROR: return "ERROR"; + case Type::EXCEPTION: return "EXCEPTION"; + default: return "Unknown"; + } + } + + /// Convert a type to a human-readable string in COLOR. + static id_t ColorTypeID(Type type) { + const std::string green_text = "\033[32m"; + const std::string magenta_text = "\033[35m"; + const std::string red_text = "\033[31m"; + const std::string yellow_text = "\033[33m"; + const std::string normal_text = "\033[39m"; + const std::string bold_text = "\033[1m"; + const std::string no_bold_text = "\033[22m"; + switch (type) { + case Type::MESSAGE: return green_text + "Message" + normal_text; + case Type::DEBUG: return green_text + bold_text + "Debug" + no_bold_text + normal_text; + case Type::WARNING: return yellow_text + bold_text + "WARNING" + no_bold_text + normal_text; + case Type::ERROR: return red_text + bold_text + "ERROR" + no_bold_text + normal_text; + case Type::EXCEPTION: return magenta_text + bold_text + "EXCEPTION" + no_bold_text + normal_text; + default: return "Unknown"; + } + } + + // Maintain a specified collection of handlers. + class HandlerSet { + private: + using fun_t = std::function; + using fun_no_data_t = std::function; + using fun_msg_only_t = std::function; + emp::vector handlers; + bool exit_on_fail = false; + + public: + HandlerSet() {} + HandlerSet(const HandlerSet &) = default; + HandlerSet(HandlerSet &&) = default; + ~HandlerSet() { } + + bool GetExitOnFail() const { return exit_on_fail; } + HandlerSet & SetExitOnFail(bool _exit=true) { + exit_on_fail = _exit; + return *this; + } + + /// Trigger all handlers associated with a given ID. + bool Trigger(id_arg_t id, message_arg_t message, except_data_t except_data) { + // Run handlers from most recently added to oldest. + for (auto it = handlers.rbegin(); + it != handlers.rend(); + ++it) { + // Run until "true" result + bool result = (*it)(id, message, except_data); + if (result) return true; // Stop if any handler succeeded. + } + + return false; + } + + // Trigger without providing data. + bool Trigger(id_arg_t id, message_arg_t message) { + return Trigger(id, message, 0); + } + + // Trigger from a stored notification. + bool Trigger(const ExceptInfo & info) { + return Trigger(info.id, info.message, info.data); + } + + // Add a function to this set. + HandlerSet & Add(fun_t in) { handlers.push_back(in); return *this; } + + // Add a function with no data. + HandlerSet & Add(fun_no_data_t in) { + handlers.push_back( + [fun=in](id_arg_t id, message_arg_t msg, except_data_t){ return fun(id,msg); } + ); + return *this; + } + + // Add a function with only a single message + HandlerSet & Add(fun_msg_only_t in) { + handlers.push_back( + [fun=in](id_arg_t, message_arg_t msg, except_data_t){ return fun(msg); } + ); + return *this; + } + + + // Clear all handlers associated with a given id. + HandlerSet & Clear() { handlers.resize(0); return *this; } + + /// Replace all handlers with nothing (i.e., clear them) + void Replace() { Clear(); } + + /// Replace all handlers with the generic ones provided. + template + void Replace(fun_t in, FUN_Ts... extra) { + Replace(extra...); + Add(in); + } + }; + + /// Staticly stored data about current notifications. + struct NotifyData { + // For each exception name we will keep a vector of handlers, appended to in the order + // that they arrive (most recent will be last) + std::unordered_map handler_map; // Map of all handlers to use for notifications. + std::unordered_map verbose_map; // Set of categories for verbose messages. + emp::vector exit_funs; // Set of handlers to run on exit. + emp::vector except_queue; // Unresolved exceptions after handlers have run + emp::vector pause_queue; // Unresolved notifications during pause + bool lethal_exceptions = true; // Should unresolved exceptions end the program? + bool is_paused = false; // When paused, save notifications until unpaused. + + HandlerSet & GetHandler(Type type) { return handler_map[TypeID(type)]; } + + NotifyData() { + // Setup the default handlers and exit rules. + GetHandler(Type::MESSAGE).Add( + [](id_arg_t, message_arg_t msg) { + std::cout << msg << std::endl; + return true; + } + ); + + GetHandler(Type::DEBUG).Add( +#ifdef NDEBUG + [](id_arg_t, message_arg_t){ return true; } +#else + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::DEBUG); + std::cout << tag << ": " << msg << std::endl; + return true; + } +#endif + ); + + GetHandler(Type::WARNING).Add( + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::WARNING); + std::cout << tag << ": " << msg << std::endl; + return true; // Only warning, do not exit. + } + ); + + GetHandler(Type::ERROR).Add( + [](id_arg_t, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::ERROR); + std::cout << tag << ": " << msg << std::endl; + return false; // Does not correct the problem, so exit. + } + ); + + GetHandler(Type::EXCEPTION).Add( + [](id_arg_t id, message_arg_t msg) { + const std::string tag = ColorTypeID(Type::EXCEPTION); + std::cerr << tag << " (" << id << "): " << msg << std::endl; + return false; // Does not correct the problem, so exit. + } + ); + GetHandler(Type::EXCEPTION).SetExitOnFail(); + + // The initial exit handler should actually exit, using the appropriate exit code. + exit_funs.push_back( [](int code){ exit(code); } ); + } + }; + + /// Central call to obtain NotifyData singleton. + static NotifyData & GetData() { static NotifyData data; return data; } + auto & MessageHandlers() { return GetData().GetHandler(Type::MESSAGE); } + auto & DebugHandlers() { return GetData().GetHandler(Type::DEBUG); } + auto & WarningHandlers() { return GetData().GetHandler(Type::WARNING); } + auto & ErrorHandlers() { return GetData().GetHandler(Type::ERROR); } + + [[maybe_unused]] static void AddExitHandler(exit_fun_t fun) { GetData().exit_funs.push_back(fun); } + [[maybe_unused]] static void ClearExitHandlers() { GetData().exit_funs.resize(0); } + [[maybe_unused]] static void ReplaceExitHandlers() { ClearExitHandlers(); } + template + static void ReplaceExitHandlers(exit_fun_t fun, FUN_Ts... extras) { + ReplaceExitHandlers(extras...); + AddExitHandler(fun); + } + + /// Generic exit handler that calls all of the provided functions. + [[maybe_unused]] static void Exit(int exit_code) { + NotifyData & data = GetData(); + + // Run any cleanup functions. + for (auto it = data.exit_funs.rbegin(); it != data.exit_funs.rend(); ++it) { + (*it)(exit_code); + } + + // Exit for real. + exit(exit_code); + } + + /// Generic Notification where type must be specified. + template + static bool Notify(Type type, Ts... args) { + NotifyData & data = GetData(); + const id_t id = TypeID(type); + + // Setup the message in a string stream. + std::stringstream ss; + ((ss << std::forward(args)), ...); + + // If we are are paused, save this notification for later. + if (data.is_paused) { + data.pause_queue.push_back(ExceptInfo{id, ss.str(), 0}); + return true; + } + + bool result = data.handler_map[id].Trigger(id, ss.str()); + + // And return the success result. + return result; + } + + [[maybe_unused]] static void Pause() { + NotifyData & data = GetData(); + data.is_paused = true; + } + + [[maybe_unused]] static void Unpause() { + NotifyData & data = GetData(); + + // Step through the notifications that have accrued. + for (size_t i = 0; i < data.pause_queue.size(); ++i) { + auto & notice = data.pause_queue[i]; + bool result = data.handler_map[notice.id].Trigger(notice); + if (!result) { // Failed; move to exception queue or exit if error. + if (notice.id == "ERROR") Exit(1); + data.except_queue.push_back(notice); + } + } + + data.pause_queue.resize(0); // Clear out the queue. + + data.is_paused = false; + } + + + /// Send out a regular notification. + template + static bool Message(Ts... args) { return Notify(Type::MESSAGE, std::forward(args)...); } + + /// Send out a DEBUG notification. + template + static bool Debug(Ts... args) { return Notify(Type::DEBUG, std::forward(args)...); } + + /// Send out a notification of a WARNING. + template + static bool Warning(Ts... args) { return Notify(Type::WARNING, std::forward(args)...); } + + /// Send out a notification of an ERROR. + template + static bool Error(Ts... args) { + bool success = Notify(Type::ERROR, std::forward(args)...); + if (!success) { +#ifdef NDEBUG + Exit(1); +#else + abort(); +#endif + } + return success; + } + + // Trigger a warning only if a specified condition is true. + template + static bool TestWarning(bool test, Ts... args) { + if (test) return Warning(std::forward(args)...); + return true; + } + + // Trigger an error only if a specified condition is true. + template + static bool TestError(bool test, Ts... args) { + if (test) return Error(std::forward(args)...); + return true; + } + + + /// Add a handler for a particular exception type. + template + static HandlerSet & AddHandler(id_arg_t id, FUN_T fun) { + return GetData().handler_map[id].Add(fun); + } + + /// Add a generic exception handler. + template + static HandlerSet & AddHandler(FUN_T fun) { + return GetData().handler_map["EXCEPTION"].Add(fun); + } + + /// Ignore exceptions of a specific type. + [[maybe_unused]] static HandlerSet & Ignore(id_arg_t id) { + return AddHandler(id, [](id_arg_t, message_arg_t){ return true; }); + } + + /// Turn on a particular verbosity category. + [[maybe_unused]] void SetVerbose(std::string id, bool make_active=true) { + GetData().verbose_map[id] = make_active; + } + + /// Send out a notification of an "verbose" message. + template + [[maybe_unused]] static bool Verbose(const std::string & id, Ts... args) { + NotifyData & data = GetData(); + + if (data.verbose_map[id]) { + return Notify(Type::MESSAGE, std::forward(args)...); + } + + return false; + } + + /// Send out a notification of an Exception. + [[maybe_unused]] static bool Exception(id_arg_t id, message_arg_t message="", except_data_t except_data=0) { + NotifyData & data = GetData(); + + if (data.is_paused) { + data.pause_queue.push_back(ExceptInfo{id, message, except_data}); + return true; + } + + // Retrieve any specialized exception handlers for this type of exception. + bool result = data.handler_map[id].Trigger(id, message, except_data); + + // If unresolved, see if we should quit; else use a generic exception handler. + if (!result) { + if (data.handler_map[id].GetExitOnFail()) Exit(1); + result = data.handler_map["EXCEPTION"].Trigger(id, message, except_data); + } + + // If still unresolved, either give up or save the exception for later analysis. + if (!result) { + if (data.handler_map["EXCEPTION"].GetExitOnFail()) Exit(1); + data.except_queue.push_back(ExceptInfo{id, message, except_data}); + } + + return result; + } + + /// Retrieve a vector of ALL unresolved exceptions. + [[maybe_unused]] static const emp::vector & GetExceptions() { return GetData().except_queue; } + + /// Retrieve the first unresolved exception with a given id. + [[maybe_unused]] static ExceptInfo GetException(id_arg_t id) { + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) return x; + return ExceptInfo{}; + } + + /// Return a total count of how many unresolved exceptions are left. + [[maybe_unused]] static size_t CountExceptions() { return GetData().except_queue.size(); } + + /// Return a total count of how many unresolved exceptions have a given id. + [[maybe_unused]] static size_t CountExceptions(id_arg_t id) { + size_t count = 0; + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) ++count; + return count; + } + + /// Identify whether there are ANY unresolved exceptions. + [[maybe_unused]] static bool HasExceptions() { return CountExceptions(); } + + /// Identify whether there are any unresolved exceptions with a given id. + [[maybe_unused]] static bool HasException(id_arg_t id) { + for (ExceptInfo & x : GetData().except_queue) if (x.id == id) return true; + return false; + } + + /// Remove all unresolved exceptions. + [[maybe_unused]] static void ClearExceptions() { GetData().except_queue.resize(0); } + + /// Remove first exception with a given id. + [[maybe_unused]] static void ClearException(id_arg_t id) { + auto & except_queue = GetData().except_queue; + for (size_t i = 0; i < except_queue.size(); ++i) { + if (except_queue[i].id == id) { + // If exception is NOT in the last position, move last position earlier and reduce size. + if (i < except_queue.size() - 1) except_queue[i] = except_queue.back(); + except_queue.resize(except_queue.size() - 1); + return; + } + } + } + +} +} + + +#endif // #ifndef EMP_BASE_NOTIFY_HPP_INCLUDE diff --git a/include/emp/base/vector.hpp b/include/emp/base/vector.hpp index e4d71241c1..fdcca4c641 100644 --- a/include/emp/base/vector.hpp +++ b/include/emp/base/vector.hpp @@ -30,7 +30,7 @@ #ifdef EMP_NDEBUG -// Seemlessly translate emp::vector to std::vector +// Seamlessly translate emp::vector to std::vector namespace emp { template using vector = std::vector; } @@ -77,11 +77,23 @@ namespace emp { // Debug tools to make sure this iterator is okay. static std::string & ErrorCode() { static std::string code="No Errors Found."; return code; } + static std::string ErrorStart() { + std::string vec_type = std::string("vector<") + typeid(typename stdv_t::value_type).name() + ">"; + std::string it_type = typeid(ITERATOR_T).name(); + if constexpr (std::is_same()) it_type = "iterator"; + if constexpr (std::is_same()) it_type = "const_iterator"; + if constexpr (std::is_same()) it_type = "reverse_iterator"; + if constexpr (std::is_same()) it_type = "const_reverse_iterator"; + return std::string("Iterator (type = '") + vec_type + "::" + it_type + "') "; + } + + bool OK(bool begin_ok=true, bool end_ok=true, std::string op="") const { + std::string type_name = typeid(ITERATOR_T).name();; - bool OK(bool begin_ok=true, bool end_ok=true) const { if (v_ptr == nullptr) { ErrorCode() = "Invalid Vector! (set to nullptr)"; return false; } if (v_ptr->revision == 0) { ErrorCode() = "Vector deleted! (revision==0)"; return false; } - if (revision != v_ptr->revision) { ErrorCode() = "Vector has changed memeory!"; return false; } + if (revision != v_ptr->revision) { ErrorCode() = "Vector has changed memory!"; return false; } + int64_t pos = 0; if constexpr (std::is_same() || std::is_same()) { @@ -93,58 +105,63 @@ namespace emp { pos = *((ITERATOR_T *) this) - ((stdv_t *) v_ptr)->begin(); } if (pos < 0 || ((size_t) pos) > v_ptr->size()) { - ErrorCode() = "Iterator out of range."; - ErrorCode() += " size="; - ErrorCode() += std::to_string(v_ptr->size()); - ErrorCode() += " pos="; - ErrorCode() += std::to_string(pos); + ErrorCode() = ErrorStart() + "out of range." + + " size=" + std::to_string(v_ptr->size()) + " pos=" + std::to_string(pos); + return false; + } + if (!begin_ok && pos == 0) { + ErrorCode() = ErrorStart() + "not allowed at begin() for operation " + op + "."; + return false; + } + if (!end_ok && ((size_t) pos) == v_ptr->size()) { + ErrorCode() = ErrorStart() + "not allowed at end() for operation " + op + "."; return false; } - if (!begin_ok && pos == 0) { ErrorCode() = "Iterator not allowed at begin()."; return false; } - if (!end_ok && ((size_t) pos) == v_ptr->size()) { ErrorCode() = "Iterator not allowed at end()."; return false; } return true; } - this_t & operator=(const this_t &) = default; - this_t & operator=(this_t &&) = default; + this_t & operator=(const this_t &) & = default; + this_t & operator=(this_t &&) & = default; operator ITERATOR_T() { return *this; } operator const ITERATOR_T() const { return *this; } auto & operator*() { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. + emp_assert(OK(true, false, "dereference"), ErrorCode()); return wrapped_t::operator*(); } const auto & operator*() const { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. + emp_assert(OK(true, false, "const dereference"), ErrorCode()); return wrapped_t::operator*(); } auto operator->() { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. +// emp_assert(OK(true, false, "->"), ErrorCode()); + emp_assert(OK(true, true, "->"), ErrorCode()); // Technically can use -> on end() for memory identification, just can't use result. return wrapped_t::operator->(); } auto operator->() const { - emp_assert(OK(true, false), ErrorCode()); // Ensure vector hasn't changed since making iterator. +// emp_assert(OK(true, false, "const ->"), ErrorCode()); + emp_assert(OK(true, true, "const ->"), ErrorCode()); // Technically can use -> on end() for memory identification, just can't use result. return wrapped_t::operator->(); } this_t & operator++() { - emp_assert(OK(true,false), ErrorCode()); + emp_assert(OK(true,false, "++ (post)"), ErrorCode()); wrapped_t::operator++(); return *this; } this_t operator++(int x) { - emp_assert(OK(true,false), ErrorCode()); + emp_assert(OK(true,false, "++ (pre)"), ErrorCode()); return this_t(wrapped_t::operator++(x), v_ptr); } this_t & operator--() { - emp_assert(OK(false,true), ErrorCode()); + emp_assert(OK(false,true, "-- (post)"), ErrorCode()); wrapped_t::operator--(); return *this; } this_t operator--(int x) { - emp_assert(OK(false,true), ErrorCode()); + emp_assert(OK(false,true, "-- (pre)"), ErrorCode()); return this_t(wrapped_t::operator--(x), v_ptr); } @@ -215,7 +232,7 @@ namespace emp { stdv_t::resize(new_size, val); revision++; } - this_t & operator=(const this_t &) = default; + this_t & operator=(const this_t &) & = default; T & operator[](size_t pos) { emp_assert(pos < stdv_t::size(), pos, stdv_t::size()); @@ -305,7 +322,7 @@ namespace emp { emp_assert(new_size < MAX_SIZE, new_size); stdv_t::resize(new_size, val); } - this_t & operator=(const this_t &) = default; + this_t & operator=(const this_t &) & = default; auto operator[](size_t pos) -> decltype(stdv_t::operator[](pos)) { emp_assert(pos < stdv_t::size(), pos, stdv_t::size()); diff --git a/include/emp/bits/BitArray.hpp b/include/emp/bits/BitArray.hpp index 66c7361217..847be6c0ae 100644 --- a/include/emp/bits/BitArray.hpp +++ b/include/emp/bits/BitArray.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "../base/assert.hpp" @@ -30,7 +31,6 @@ #include "../math/math.hpp" #include "../math/Random.hpp" #include "../meta/type_traits.hpp" -#include "../polyfill/span.hpp" #include "_bitset_helpers.hpp" #include "bitset_utils.hpp" @@ -169,17 +169,17 @@ namespace emp { /// Destructor. ~BitArray() = default; - /// Assignment operator (no separate move opperator since no resources to move...) - BitArray & operator=(const this_t & in_bits) noexcept { return Copy(in_bits.bits); } + /// Assignment operator (no separate move operator since no resources to move...) + BitArray & operator=(const this_t & in_bits) & noexcept { return Copy(in_bits.bits); } /// Assignment operator from a std::bitset. - BitArray & operator=(const std::bitset & bitset); + BitArray & operator=(const std::bitset & bitset) &; /// Assignment operator from a string of '0's and '1's. - BitArray & operator=(const std::string & bitstring); + BitArray & operator=(const std::string & bitstring) &; /// Assignment operator from a literal string of '0's and '1's. - BitArray & operator=(const char * bitstring) { return operator=(std::string(bitstring)); } + BitArray & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } /// Assignment from another BitArray of a different size. template @@ -189,7 +189,7 @@ namespace emp { template BitArray Export(size_t start_bit=0) const; - /// For debugging: make sure that there are no obvous problems with a BitArray object. + /// For debugging: make sure that there are no obvious problems with a BitArray object. bool OK() const; /// How many bits are in this BitArray? @@ -316,7 +316,7 @@ namespace emp { // ========= Access Groups of bits ========= // - /// Retrive the byte at the specified byte index. + /// Retrieve the byte at the specified byte index. [[nodiscard]] uint8_t GetByte(size_t index) const; /// Get a read-only view into the internal array used by BitArray. @@ -430,7 +430,7 @@ namespace emp { [[nodiscard]] int FindOne() const; /// Deprecated: Return the position of the first one; return -1 if no ones in vector. - [[deprecated("Renamed to more acurate FindOne()")]] + [[deprecated("Renamed to more accurate FindOne()")]] [[nodiscard]] int FindBit() const { return FindOne(); } /// Return the position of the first one after start_pos; return -1 if no ones in vector. @@ -441,7 +441,7 @@ namespace emp { [[nodiscard]] int FindOne(const size_t start_pos) const; /// Deprecated version of FindOne(). - [[deprecated("Renamed to more acurate FindOne(start_pos)")]] + [[deprecated("Renamed to more accurate FindOne(start_pos)")]] [[nodiscard]] int FindBit(const size_t start_pos) const; /// Find the most-significant set-bit. @@ -451,7 +451,7 @@ namespace emp { int PopOne(); /// Deprecated version of PopOne(). - [[deprecated("Renamed to more acurate PopOne()")]] + [[deprecated("Renamed to more accurate PopOne()")]] int PopBit() { return PopOne(); } /// Return positions of all ones. @@ -646,7 +646,7 @@ namespace emp { /// Compound operator plus... const BitArray & operator+=(const BitArray & ar2) { return ADD_SELF(ar2); } - /// Compoount operator minus... + /// Compound operator minus... const BitArray & operator-=(const BitArray & ar2) { return SUB_SELF(ar2); } /// STL COMPATABILITY @@ -686,7 +686,7 @@ namespace emp { const size_t start_pos = FieldPos(start); // Identify the start position WITHIN a bit field. const size_t stop_pos = FieldPos(stop); // Identify the stop position WITHIN a bit field. - size_t start_field = FieldID(start); // Ideftify WHICH bit field we're starting in. + size_t start_field = FieldID(start); // Identify WHICH bit field we're starting in. const size_t stop_field = FieldID(stop-1); // Identify the last field where we actually make a change. // If the start field and stop field are the same, mask off the middle. @@ -814,7 +814,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n<>( (-(c+FIELD_BITS-NUM_BITS)) & FIELD_LOG2_MASK )); @@ -825,7 +825,7 @@ namespace emp { ShiftRight(NUM_BITS - shift_size); OR_SELF(dup); } else { - // for big BitArrays, manual rotating is fater + // for big BitArrays, manual rotating is faster // note that we already modded shift_size by NUM_BITS // so there's no need to mod by FIELD_SIZE here @@ -900,7 +900,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n>>c) | (n<<( (NUM_BITS-c) & FIELD_LOG2_MASK )); @@ -911,7 +911,7 @@ namespace emp { ShiftLeft(NUM_BITS - shift_size); OR_SELF(dup); } else { - // for big BitArrays, manual rotating is fater + // for big BitArrays, manual rotating is faster const field_t field_shift = (shift_size / FIELD_BITS) % NUM_FIELDS; const int bit_shift = shift_size % FIELD_BITS; @@ -999,7 +999,7 @@ namespace emp { /// Assignment operator from a std::bitset. template BitArray & - BitArray::operator=(const std::bitset & bitset) { + BitArray::operator=(const std::bitset & bitset) & { for (size_t i = 0; i < NUM_BITS; i++) Set(i, bitset[i]); return *this; } @@ -1007,7 +1007,7 @@ namespace emp { /// Assignment operator from a string of '0's and '1's. template BitArray & - BitArray::operator=(const std::string & bitstring) { + BitArray::operator=(const std::string & bitstring) & { emp_assert(bitstring.size() <= NUM_BITS); Clear(); if constexpr (ZERO_LEFT) { @@ -1028,7 +1028,7 @@ namespace emp { const size_t from_bit ) { // Only check for same-ness if the two types are the same. - if constexpr (FROM_BITS == NUM_BITS) emp_assert(&from_array != this); + if constexpr (FROM_BITS == NUM_BITS) { emp_assert(&from_array != this); } emp_assert(from_bit < FROM_BITS); @@ -1076,7 +1076,7 @@ namespace emp { return out_bits; } - /// For debugging: make sure that there are no obvous problems with a BitArray object. + /// For debugging: make sure that there are no obvious problems with a BitArray object. template bool BitArray::OK() const { // Make sure final bits are zeroed out. @@ -1091,7 +1091,7 @@ namespace emp { template bool BitArray::Get(size_t index) const { - emp_assert(index >= 0 && index < NUM_BITS); + emp_assert(index < NUM_BITS); const size_t field_id = FieldID(index); const size_t pos_id = FieldPos(index); return (bits[field_id] & (((field_t)1U) << pos_id)) != 0; @@ -1123,7 +1123,7 @@ namespace emp { /// Flip a single bit template BitArray & BitArray::Toggle(size_t index) { - emp_assert(index >= 0 && index < NUM_BITS); + emp_assert(index < NUM_BITS); const size_t field_id = FieldID(index); const size_t pos_id = FieldPos(index); const field_t pos_mask = FIELD_1 << pos_id; @@ -1848,7 +1848,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n<>( (-(c+FIELD_BITS-NUM_BITS)) & FIELD_LOG2_MASK )); @@ -1930,7 +1930,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n>>c) | (n<<( (NUM_BITS-c) & FIELD_LOG2_MASK )); diff --git a/include/emp/bits/BitVector.hpp b/include/emp/bits/BitVector.hpp index aff6817322..06ef9bc4e5 100644 --- a/include/emp/bits/BitVector.hpp +++ b/include/emp/bits/BitVector.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2021. + * @date 2016-2022. * * @file BitVector.hpp * @brief A drop-in replacement for std::vector, with additional bitwise logic features. @@ -18,7 +18,7 @@ * @todo For large BitVectors we can use a factory to preserve/adjust bit info. That should be * just as efficient than a reserve, but without the need to store extra in-class info. * @todo Implement append(), resize(), push_bit(), insert(), remove() - * @todo Think about how itertors should work for BitVector. It should probably go bit-by-bit, + * @todo Think about how iterators should work for BitVector. It should probably go bit-by-bit, * but there are very few circumstances where that would be useful. Going through the * positions of all ones would be more useful, but perhaps less intuitive. * @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "../base/assert.hpp" @@ -42,7 +43,6 @@ #include "../datastructs/hash_utils.hpp" #include "../math/math.hpp" #include "../math/Random.hpp" -#include "../polyfill/span.hpp" #include "_bitset_helpers.hpp" #include "bitset_utils.hpp" @@ -51,7 +51,7 @@ namespace emp { /// @brief A drop-in replacement for std::vector, but with extra bitwise logic features. /// - /// This class stores an arbirary number of bits in a set of "fields" (typically 32 bits or 64 + /// This class stores an arbitrary number of bits in a set of "fields" (typically 32 bits or 64 /// bits per field, depending on which should be faster.) Individual bits can be extracted, /// -or- bitwise logic (including more complex bit magic) can be used on the groups of bits. @@ -71,7 +71,7 @@ namespace emp { static constexpr size_t MAX_BITS = (size_t) -1; ///< Value larger than any bit ID. // Number of bits needed to specify position in a field + mask - static constexpr size_t FIELD_LOG2 = emp::Log2(FIELD_BITS); + static constexpr size_t FIELD_LOG2 = static_cast(emp::Log2(FIELD_BITS)); static constexpr field_t FIELD_LOG2_MASK = MaskLow(FIELD_LOG2); size_t num_bits; ///< Total number of bits are we using @@ -86,7 +86,7 @@ namespace emp { /// A mask to cut off all of the final bits. [[nodiscard]] field_t EndMask() const { return MaskLow(NumEndBits()); } - /// How many feilds do we need for the current set of bits? + /// How many felids do we need for the current set of bits? [[nodiscard]] size_t NumFields() const { return num_bits ? (1 + ((num_bits - 1) / FIELD_BITS)) : 0; } /// What is the ID of the last occupied field? @@ -192,20 +192,20 @@ namespace emp { ~BitVector(); /// Assignment operator. - BitVector & operator=(const BitVector & in); + BitVector & operator=(const BitVector & in) &; /// Move operator. - BitVector & operator=(BitVector && in); + BitVector & operator=(BitVector && in) &; /// Assignment operator from a std::bitset. template - BitVector & operator=(const std::bitset & bitset); + BitVector & operator=(const std::bitset & bitset) &; /// Assignment operator from a string of '0's and '1's. - BitVector & operator=(const std::string & bitstring); + BitVector & operator=(const std::string & bitstring) &; /// Assignment operator from a literal string of '0's and '1's. - BitVector & operator=(const char * bitstring) { return operator=(std::string(bitstring)); } + BitVector & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } /// Assignment from another BitVector without changing size. BitVector & Import( const BitVector & from_bv, const size_t from_bit=0 ); @@ -228,7 +228,7 @@ namespace emp { /// How many distinct values could be held in this BitVector? [[nodiscard]] double GetNumStates() const { return emp::Pow2(num_bits); } - /// Retrive the bit value from the specified index. + /// Retrieve the bit value from the specified index. [[nodiscard]] bool Get(size_t index) const; /// A safe version of Get() for indexing out of range. Useful for representing collections. @@ -240,9 +240,11 @@ namespace emp { /// Set all bits to 1. BitVector & SetAll(); - /// Set a range of bits to one: [start, stop) - BitVector & SetRange(size_t start, size_t stop) - { return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); } + /// Set a range of bits to value (default one): [start, stop) + BitVector & SetRange(size_t start, size_t stop, bool value=true) { + if (value) return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); + return Clear(start, stop); + } /// Set all bits to 0. BitVector & Clear(); @@ -252,7 +254,7 @@ namespace emp { /// Set bits to 0 in the range [start, stop) BitVector & Clear(const size_t start, const size_t stop) - { return ApplyRange([](field_t){ return 0; }, start, stop); } + { return ApplyRange([](field_t) -> size_t { return 0; }, start, stop); } /// Const index operator -- return the bit at the specified position. @@ -299,7 +301,7 @@ namespace emp { const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Set all bits randomly, with a given number of ones. - BitVector & ChooseRandom(Random & random, const int target_ones, + BitVector & ChooseRandom(Random & random, const size_t target_ones, const size_t start_pos=0, size_t stop_pos=MAX_BITS); /// Flip random bits with a given probability. @@ -345,7 +347,7 @@ namespace emp { // ========= Access Groups of bits ========= // - /// Retrive the byte at the specified byte index. + /// Retrieve the byte at the specified byte index. [[nodiscard]] uint8_t GetByte(size_t index) const; /// Get a read-only view into the internal array used by BitVector. @@ -484,7 +486,7 @@ namespace emp { [[nodiscard]] int FindOne() const; /// Deprecated: Return the position of the first one; return -1 if no ones in vector. - [[deprecated("Renamed to more acurate FindOne()")]] + [[deprecated("Renamed to more accurate FindOne()")]] [[nodiscard]] int FindBit() const { return FindOne(); } /// Return the position of the first one after start_pos; return -1 if no ones in vector. @@ -494,8 +496,13 @@ namespace emp { /// [[nodiscard]] int FindOne(const size_t start_pos) const; + /// Special version of FindOne takes int; most common way to call. + [[nodiscard]] int FindOne(int start_pos) const { + return FindOne(static_cast(start_pos)); + } + /// Deprecated version of FindOne(). - [[deprecated("Renamed to more acurate FindOne(start_pos)")]] + [[deprecated("Renamed to more accurate FindOne(start_pos)")]] [[nodiscard]] int FindBit(const size_t start_pos) const; /// Find the most-significant set-bit. @@ -505,12 +512,16 @@ namespace emp { int PopOne(); /// Deprecated version of PopOne(). - [[deprecated("Renamed to more acurate PopOne()")]] + [[deprecated("Renamed to more accurate PopOne()")]] int PopBit() { return PopOne(); } /// Return positions of all ones. [[nodiscard]] emp::vector GetOnes() const; + /// Collect positions of ones in the provided vector (allows id type choice) + template + emp::vector & GetOnes(emp::vector & out_vals) const; + /// Find the length of the longest continuous series of ones. [[nodiscard]] size_t LongestSegmentOnes() const; @@ -665,13 +676,22 @@ namespace emp { [[nodiscard]] inline BitVector operator~() const { return NOT(); } /// Operator bitwise AND... - [[nodiscard]] inline BitVector operator&(const BitVector & ar2) const { return AND(ar2); } + [[nodiscard]] inline BitVector operator&(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return AND(ar2); + } /// Operator bitwise OR... - [[nodiscard]] inline BitVector operator|(const BitVector & ar2) const { return OR(ar2); } + [[nodiscard]] inline BitVector operator|(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return OR(ar2); + } /// Operator bitwise XOR... - [[nodiscard]] inline BitVector operator^(const BitVector & ar2) const { return XOR(ar2); } + [[nodiscard]] inline BitVector operator^(const BitVector & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return XOR(ar2); + } /// Operator shift left... [[nodiscard]] inline BitVector operator<<(const size_t shift_size) const { return SHIFT(-(int)shift_size); } @@ -680,13 +700,19 @@ namespace emp { [[nodiscard]] inline BitVector operator>>(const size_t shift_size) const { return SHIFT((int)shift_size); } /// Compound operator bitwise AND... - BitVector & operator&=(const BitVector & ar2) { return AND_SELF(ar2); } + BitVector & operator&=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return AND_SELF(ar2); + } /// Compound operator bitwise OR... - BitVector & operator|=(const BitVector & ar2) { return OR_SELF(ar2); } + BitVector & operator|=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return OR_SELF(ar2); + } /// Compound operator bitwise XOR... - BitVector & operator^=(const BitVector & ar2) { return XOR_SELF(ar2); } + BitVector & operator^=(const BitVector & ar2) { + emp_assert(size() == ar2.size()); return XOR_SELF(ar2); + } /// Compound operator for shift left... BitVector & operator<<=(const size_t shift_size) { return SHIFT_SELF(-(int)shift_size); } @@ -699,10 +725,17 @@ namespace emp { [[nodiscard]] size_t size() const { return num_bits; } void resize(std::size_t new_size) { Resize(new_size); } + void push_back(bool value) { PushBack(value); } + [[nodiscard]] auto at(size_t pos) { return operator[](pos); } + [[nodiscard]] auto at(size_t pos) const { return operator[](pos); } + [[nodiscard]] auto front() { return at(0); } + [[nodiscard]] auto front() const { return at(0); } + [[nodiscard]] auto back() { return at(GetSize()-1); } + [[nodiscard]] auto back() const { return at(GetSize()-1); } [[nodiscard]] bool all() const { return All(); } [[nodiscard]] bool any() const { return Any(); } [[nodiscard]] bool none() const { return !Any(); } - size_t count() const { return CountOnes(); } + [[nodiscard]] size_t count() const { return CountOnes(); } BitVector & flip() { return Toggle(); } BitVector & flip(size_t pos) { return Toggle(pos); } BitVector & flip(size_t start, size_t end) { return Toggle(start, end); } @@ -758,7 +791,7 @@ namespace emp { emp_assert(stop <= num_bits, stop, num_bits); // Stop cannot be past the end of the bits const size_t start_pos = FieldPos(start); // Identify the start position WITHIN a bit field. const size_t stop_pos = FieldPos(stop); // Identify the stop position WITHIN a bit field. - size_t start_field = FieldID(start); // Ideftify WHICH bit field we're starting in. + size_t start_field = FieldID(start); // Identify WHICH bit field we're starting in. const size_t stop_field = FieldID(stop-1); // Identify the last field where we actually make a change. // If the start field and stop field are the same, mask off the middle. @@ -878,7 +911,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // Mask necessary to suprress shift count overflow warnings. + // Mask necessary to surpress shift count overflow warnings. c &= FIELD_LOG2_MASK; n = (n<>( (-(c+FIELD_BITS-num_bits)) & FIELD_LOG2_MASK )); } @@ -888,27 +921,28 @@ namespace emp { ShiftRight(num_bits - shift_size); OR_SELF(dup); } - else { // For big BitVectors, manual rotating is fater + else { // For big BitVectors, manual rotating is faster // Note: we already modded shift_size by num_bits, so no need to mod by FIELD_SIZE - const int field_shift = ( shift_size + EndGap() ) / FIELD_BITS; + const size_t field_shift = ( shift_size + EndGap() ) / FIELD_BITS; // If we field shift, we need to shift bits by (FIELD_BITS - NumEndBits()) // to account for the filler that gets pulled out of the middle - const int bit_shift = NumEndBits() && (shift_size + field_shift ? EndGap() : 0) % FIELD_BITS; - const int bit_overflow = FIELD_BITS - bit_shift; + const size_t field_gap = field_shift ? EndGap() : 0; + const size_t bit_shift = NumEndBits() && (shift_size + field_gap) % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields auto field_span = FieldSpan(); std::rotate( field_span.rbegin(), - field_span.rbegin()+field_shift, + field_span.rbegin()+static_cast(field_shift), field_span.rend() ); // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = (LastField() + field_shift) % NUM_FIELDS; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = (LastField() + field_shift) % NUM_FIELDS; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -924,7 +958,7 @@ namespace emp { bits[LastField()] ); - for (int i = LastField(); i > 0; --i) { + for (size_t i = LastField(); i > 0; --i) { bits[i] <<= bit_shift; bits[i] |= (bits[i-1] >> bit_overflow); } @@ -954,7 +988,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n>>c) | (n<<( (num_bits-c) & FIELD_LOG2_MASK )); @@ -965,10 +999,10 @@ namespace emp { ShiftLeft(num_bits - shift_size); OR_SELF(dup); } else { - // for big BitVectors, manual rotating is fater + // for big BitVectors, manual rotating is faster const field_t field_shift = (shift_size / FIELD_BITS) % NUM_FIELDS; - const int bit_shift = shift_size % FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; const field_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields @@ -981,8 +1015,8 @@ namespace emp { // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = LastField() - field_shift; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = LastField() - field_shift; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -1132,7 +1166,7 @@ namespace emp { } /// Assignment operator. - BitVector & BitVector::operator=(const BitVector & in) { + BitVector & BitVector::operator=(const BitVector & in) & { emp_assert(in.OK()); if (&in == this) return *this; @@ -1152,7 +1186,7 @@ namespace emp { } /// Move operator. - BitVector & BitVector::operator=(BitVector && in) { + BitVector & BitVector::operator=(BitVector && in) & { emp_assert(&in != this); // in is an r-value, so this shouldn't be possible... if (bits) bits.DeleteArray(); // If we already have bits, get rid of them. num_bits = in.num_bits; // Update the number of bits... @@ -1165,7 +1199,7 @@ namespace emp { /// Assignment operator from a std::bitset. template - BitVector & BitVector::operator=(const std::bitset & bitset) { + BitVector & BitVector::operator=(const std::bitset & bitset) & { const size_t start_fields = NumFields(); num_bits = NUM_BITS; const size_t new_fields = NumFields(); @@ -1184,7 +1218,7 @@ namespace emp { } /// Assignment operator from a string of '0's and '1's. - BitVector & BitVector::operator=(const std::string & bitstring) { + BitVector & BitVector::operator=(const std::string & bitstring) & { const size_t start_fields = NumFields(); num_bits = bitstring.size(); const size_t new_fields = NumFields(); @@ -1248,7 +1282,7 @@ namespace emp { } // Otherwise bits is null; num_bits should be zero. - else emp_assert(num_bits == 0); + else { emp_assert(num_bits == 0); } return true; } @@ -1257,7 +1291,7 @@ namespace emp { // -------------------- Implementations of common accessors ------------------- - /// Retrive the bit value from the specified index. + /// Retrieve the bit value from the specified index. bool BitVector::Get(size_t index) const { emp_assert(index < num_bits, index, num_bits); const size_t field_id = FieldID(index); @@ -1373,7 +1407,7 @@ namespace emp { } /// Set all bits randomly, with a given number of them being on. - BitVector & BitVector::ChooseRandom(Random & random, const int target_ones, + BitVector & BitVector::ChooseRandom(Random & random, const size_t target_ones, const size_t start_pos, size_t stop_pos) { if (stop_pos == MAX_BITS) stop_pos = num_bits; @@ -1381,8 +1415,7 @@ namespace emp { emp_assert(stop_pos <= num_bits); const size_t target_size = stop_pos - start_pos; - emp_assert(target_ones >= 0); - emp_assert(target_ones <= (int) target_size); + emp_assert(target_ones <= target_size); // Approximate the probability of ones as a starting point. double p = ((double) target_ones) / (double) target_size; @@ -1548,7 +1581,7 @@ namespace emp { // ------------------------- Access Groups of bits ------------------------- - /// Retrive the byte at the specified byte index. + /// Retrieve the byte at the specified byte index. uint8_t BitVector::GetByte(size_t index) const { emp_assert(index < NumBytes(), index, NumBytes()); const size_t field_id = Byte2Field(index); @@ -1586,7 +1619,7 @@ namespace emp { if (max_one < 64) return (double) GetUInt64(0); // To grab the most significant field, figure out how much to shift it by. - const int shift_bits = max_one - 63; + const size_t shift_bits = static_cast(max_one) - 63; double out_value = (double) (*this >> shift_bits).GetUInt64(0); out_value *= emp::Pow2(shift_bits); @@ -1774,11 +1807,11 @@ namespace emp { /// Find the most-significant set-bit. int BitVector::FindMaxOne() const { // Find the max field with a one. - int max_field = NumFields() - 1; - while (max_field >= 0 && bits[max_field] == 0) max_field--; + size_t max_field = NumFields() - 1; + while (max_field > 0 && bits[max_field] == 0) max_field--; // If there are no ones, return -1. - if (max_field == -1) return -1; + if (bits[max_field] == 0) return -1; const field_t field = bits[max_field]; // Save a local copy of this field. field_t mask = (field_t) -1; // Mask off the bits still under consideration. @@ -1806,10 +1839,18 @@ namespace emp { /// Return positions of all ones. emp::vector BitVector::GetOnes() const { + emp::vector out_vals; + GetOnes(out_vals); + return out_vals; + } + + /// Return positions of all ones using a specified type. + template + emp::vector & BitVector::GetOnes(emp::vector & out_vals) const { // @CAO -- There are better ways to do this with bit tricks. - emp::vector out_vals(CountOnes()); - size_t cur_pos = 0; - for (size_t i = 0; i < num_bits; i++) { + out_vals.resize(CountOnes()); + T cur_pos = 0; + for (T i = 0; i < num_bits; i++) { if (Get(i)) out_vals[cur_pos++] = i; } return out_vals; @@ -2005,9 +2046,9 @@ namespace emp { // adapted from https://stackoverflow.com/questions/2602823/in-c-c-whats-the-simplest-way-to-reverse-the-order-of-bits-in-a-byte for (size_t i = 0; i < NumBytes(); ++i) { unsigned char & b = BytePtr()[i]; - b = (b & 0xF0) >> 4 | (b & 0x0F) << 4; - b = (b & 0xCC) >> 2 | (b & 0x33) << 2; - b = (b & 0xAA) >> 1 | (b & 0x55) << 1; + b = static_cast( (b & 0xF0) >> 4 | (b & 0x0F) << 4 ); + b = static_cast( (b & 0xCC) >> 2 | (b & 0x33) << 2 ); + b = static_cast( (b & 0xAA) >> 1 | (b & 0x55) << 1 ); } // shift out filler bits @@ -2057,7 +2098,7 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n<>( (-(c+FIELD_BITS-num_bits)) & FIELD_LOG2_MASK )); @@ -2065,34 +2106,34 @@ namespace emp { // note that we already modded shift_size by num_bits // so there's no need to mod by FIELD_SIZE here - int field_shift = NumEndBits() ? ( + size_t field_shift = NumEndBits() ? ( (shift_size + FIELD_BITS - NumEndBits()) / FIELD_BITS ) : ( shift_size / FIELD_BITS ); // if we field shift, we need to shift bits by (FIELD_BITS - NumEndBits()) // more to account for the filler that gets pulled out of the middle - int bit_shift = NumEndBits() && field_shift ? ( + size_t bit_shift = NumEndBits() && field_shift ? ( (shift_size + FIELD_BITS - NumEndBits()) % FIELD_BITS ) : ( shift_size % FIELD_BITS ); - int bit_overflow = FIELD_BITS - bit_shift; + size_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields if (field_shift) { auto field_span = FieldSpan(); std::rotate( field_span.rbegin(), - field_span.rbegin()+field_shift, + field_span.rbegin()+static_cast(field_shift), field_span.rend() ); } // if necessary, shift filler bits out of the middle if (NumEndBits()) { - const int filler_idx = (LAST_FIELD + field_shift) % NUM_FIELDS; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + const size_t filler_idx = (LAST_FIELD + field_shift) % NUM_FIELDS; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } @@ -2108,7 +2149,7 @@ namespace emp { bits[LAST_FIELD] ); - for (int i = LAST_FIELD; i > 0; --i) { + for (size_t i = LAST_FIELD; i > 0; --i) { bits[i] <<= bit_shift; bits[i] |= (bits[i-1] >> bit_overflow); } @@ -2140,14 +2181,14 @@ namespace emp { field_t & n = bits[0]; size_t c = shift_size; - // mask necessary to suprress shift count overflow warnings + // mask necessary to surpress shift count overflow warnings c &= FIELD_LOG2_MASK; n = (n>>c) | (n<<( (num_bits-c) & FIELD_LOG2_MASK )); } else { field_t field_shift = (shift_size / FIELD_BITS) % NUM_FIELDS; - int bit_shift = shift_size % FIELD_BITS; + size_t bit_shift = shift_size % FIELD_BITS; field_t bit_overflow = FIELD_BITS - bit_shift; // if rotating more than field capacity, we need to rotate fields @@ -2162,8 +2203,8 @@ namespace emp { // if necessary, shift filler bits out of the middle if (NumEndBits()) { - int filler_idx = LAST_FIELD - field_shift; - for (int i = filler_idx + 1; i < (int)NUM_FIELDS; ++i) { + size_t filler_idx = LAST_FIELD - field_shift; + for (size_t i = filler_idx + 1; i < NUM_FIELDS; ++i) { bits[i-1] |= bits[i] << NumEndBits(); bits[i] >>= (FIELD_BITS - NumEndBits()); } diff --git a/include/emp/bits/Bits.hpp b/include/emp/bits/Bits.hpp new file mode 100644 index 0000000000..9fc1ac9730 --- /dev/null +++ b/include/emp/bits/Bits.hpp @@ -0,0 +1,2269 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file Bits.hpp + * @brief A generic bit-handler to replace vector, etc +additional bitwise logic features. + * @note Status: RELEASE + * + * The Bits template allows the user to recreate the functionality of std::vector, + * array, std::bitset, and other such bit-handling classes. + * + * This class stores an arbitrary number of bits in a set of "fields" (typically 32 bits or 64 + * bits per field, depending on which should be faster.) Individual bits can be extracted, + * -or- bitwise logic (including more complex bit magic) can be used on the groups of bits. + * + * The template parameters are: + * DATA_T : How is memory managed? + * ZERO_LEFT : Should the index of zero be the left-most bit? (right-most if false) + * + * Specializations are: + * BitVector : A replacement for std::vector (index 0 is on left) + * BitValue : Like BitVector, but index 0 is on the right + * StaticBitVector : Like Bitvector, but max size and fixed memory. + * StaticBitValue : Like BitValue, but max size and fixed memory. + * BitArray : A replacement for std::array (index 0 is on left) + * BitSet : A replacement for std::bitset (index 0 is on right) + * + * In the case of replacements, the aim was for identical functionality, but many additional + * features, especially associated with bitwise logic operations. + * + * @note Compile with -O3 and -msse4.2 for fast bit counting. + * + * + * @todo Most of the operators don't check to make sure that both Bit groups are the same size. + * We should create versions (Intersection() and Union()?) that adjust sizes if needed. + * @todo Do small BitVector optimization. Currently we have number of bits (8 bytes) and a + * pointer to the memory for the bitset (another 8 bytes), but we could use those 16 bytes + * as 1 byte of size info followed by 15 bytes of bitset (120 bits!) + * @todo For large BitVectors we can use a factory to preserve/adjust bit info. That should be + * just as efficient than a reserve, but without the need to store extra in-class info. + * @todo Think about how iterators should work for Bit collections. It should probably go + * bit-by-bit, but there are very few circumstances where that would be useful. Going + * through the positions of all ones would be more useful, but perhaps less intuitive. + */ + +#ifndef EMP_BITS_BITS_HPP_INCLUDE +#define EMP_BITS_BITS_HPP_INCLUDE + + +#include +#include +#include +#include +#include +#include + +#include "../base/array.hpp" +#include "../base/assert.hpp" +#include "../base/error.hpp" +#include "../base/Ptr.hpp" +#include "../base/vector.hpp" +#include "../datastructs/hash_utils.hpp" +#include "../math/constants.hpp" +#include "../math/math.hpp" +#include "../math/Random.hpp" +#include "../meta/type_traits.hpp" + +#include "Bits_Data.hpp" +#include "_bitset_helpers.hpp" +#include "bitset_utils.hpp" + + +namespace emp { + + /// @brief A flexible base template to handle BitVector, BitArray, BitSet, & other combinations. + /// @tparam DATA_T How is this Bits object allowed to change size? + /// @tparam ZERO_LEFT Should the index of zero be the left-most bit? (right-most if false) + template + class Bits { + using this_t = Bits; + using field_t = bits_field_t; + + // All internal data (and base-level manipulators) for Bits. + DATA_T _data; + + static constexpr size_t FIELD_BITS = NUM_FIELD_BITS; + + // Number of bits needed to specify position in a field + mask + static constexpr size_t FIELD_LOG2 = static_cast(emp::Log2(FIELD_BITS)); + static constexpr field_t FIELD_LOG2_MASK = MaskLow(FIELD_LOG2); + + static constexpr field_t FIELD_0 = (field_t) 0; ///< All bits in a field set to 0 + static constexpr field_t FIELD_1 = (field_t) 1; ///< Least significant bit set to 1 + static constexpr field_t FIELD_255 = (field_t) 255; ///< Least significant 8 bits set to 1 + static constexpr field_t FIELD_ALL = ~FIELD_0; ///< All bits in a field set to 1 + + // Identify the field that a specified bit is in. + [[nodiscard]] static constexpr size_t FieldID(const size_t index) { return index / FIELD_BITS; } + + // Identify the position within a field where a specified bit is. + [[nodiscard]] static constexpr size_t FieldPos(const size_t index) { return index & (FIELD_BITS-1); } + + // Identify which field a specified byte position would be in. + [[nodiscard]] static constexpr size_t Byte2Field(const size_t index) { return index / sizeof(field_t); } + + // Convert a byte position in Bits to a byte position in the target field. + [[nodiscard]] static constexpr size_t Byte2FieldPos(const size_t index) { return FieldPos(index * 8); } + + [[nodiscard]] constexpr field_t MaskField(size_t mask_size) const { + return MaskLow(mask_size); + } + [[nodiscard]] constexpr field_t MaskField(size_t mask_size, size_t offset) const { + return MaskLow(mask_size) << offset; + } + + // Assume that the size of the bits has already been adjusted to be the size of the one + // being copied and only the fields need to be copied over. + void RawCopy(const Ptr from, size_t copy_fields=emp::MAX_SIZE_T); + + // Shortcut for RawCopy if we are copying a whole other Bits object. + template + void RawCopy(const Bits & in_bits) { + RawCopy(in_bits.FieldPtr(), in_bits.NumFields()); + } + + // Copy bits from one position in the genome to another; leave old positions unchanged. + constexpr void RawMove(const size_t from_start, const size_t from_stop, const size_t to); + + // Convert the bits to bytes (note that bits are NOT in order at the byte level!) + [[nodiscard]] emp::Ptr BytePtr() { return _data.BytePtr(); } + + // Convert the bits to const bytes array (note that bits are NOT in order at the byte level!) + [[nodiscard]] emp::Ptr BytePtr() const { return _data.BytePtr(); } + + // Any bits past the last "real" bit in the last field should be kept as zeros. + constexpr this_t & ClearExcessBits() { + if (_data.NumEndBits()) _data.bits[_data.LastField()] &= _data.EndMask(); + return *this; + } + + // Apply a transformation to each bit field in a specified range. + template + Bits & ApplyRange(const FUN_T & fun, size_t start, size_t stop); + + // Helper: call SHIFT with positive number + constexpr void ShiftLeft(const size_t shift_size); + + // Helper for calling SHIFT with negative number + // Raw indicates if we should keep bits that are technically out of range; may be needed if + // we are trying to shift bits back INTO range after another operation. + constexpr void ShiftRight(const size_t shift_size, bool raw=false); + + /// Helper: call ROTATE with negative number instead + constexpr void ROTL_SELF(const size_t shift_size_raw); + + /// Helper for calling ROTATE with positive number + constexpr void ROTR_SELF(const size_t shift_size_raw); + + public: + /// @brief Default constructor; will build the default number of bits (often 0, but not always) + /// @param init_val Initial value of all default bits. + Bits(bool init_val=0) { if (init_val) SetAll(); else Clear(); } + + /// @brief Build a new Bits with specified bit count and initialization (default 0) + Bits(size_t in_num_bits, bool init_val=false); + + // Prevent ambiguous conversions... + /// @brief Anything not otherwise defined for first argument, convert to size_t. + template ::value, int>::type = 0> + Bits(T in_num_bits, bool init_val=false) : Bits(static_cast(in_num_bits), init_val) {} + + /// @brief Copy constructor of existing bits object. + Bits(const Bits & in) = default; + + /// @brief Constructor for other type of existing bits object. + template + Bits(const Bits & in); + + /// @brief Move constructor of existing bit field. + Bits(this_t && in) = default; + + /// @brief Constructor to generate a Bits from a std::bitset. + template + explicit Bits(const std::bitset & bitset); + + /// @brief Constructor to generate a Bits from a string of '0's and '1's. + Bits(const std::string & bitstring); + + /// @brief Constructor to generate a Bits from a literal string of '0's and '1's. + Bits(const char * bitstring) : Bits(std::string(bitstring)) {} + + /// @brief Constructor to generate a random set of bits in the default size. + /// @param random Random number generator to use. + Bits(Random & random); + + /// @brief Constructor to generate random Bits with provided prob of 1's, default size. + /// @param random Random number generator to use. + /// @param p1 Probability of a bit being a one. + Bits(Random & random, const double p1); + + /// @brief Constructor to generate random Bits with specified # of ones, default size. + /// @param random Random number generator to use. + /// @param target_ones Number of ones to include in the Bits. + Bits(Random & random, const size_t target_ones); + + /// @brief Constructor to generate random Bits with specified # of ones, default size. + /// @param random Random number generator to use. + /// @param target_ones Number of ones to include in the Bits. + Bits(Random & random, const int target_ones) : Bits(random, (size_t) target_ones) { } + + /// @brief Constructor to generate a specified number of random Bits (with equal prob of 0 or 1). + Bits(size_t in_num_bits, Random & random); + + /// @brief Constructor to generate a random Bits with provided prob of 1's. + Bits(size_t in_num_bits, Random & random, const double p1); + + /// @brief Constructor to generate a random Bits with provided number of 1's. + Bits(size_t in_num_bits, Random & random, const size_t target_ones); + + /// @brief Constructor to generate a random Bits with provided number of 1's. + Bits(size_t in_num_bits, Random & random, const int target_ones) + : Bits(in_num_bits, random, (size_t) target_ones) { } + + /// @brief Initializer list constructor. + template Bits(const std::initializer_list l); + + /// @brief Copy, but with a resize. + template + Bits(const Bits & in, size_t new_size); + + /// @brief Destructor + ~Bits() = default; + + /// @brief Copy assignment operator. + Bits & operator=(const Bits & in) &; + + /// @brief Assignment operator for other Bits object + template + Bits & operator=(const Bits & in) &; + + /// @brief Move operator. + Bits & operator=(Bits && in) &; + + /// @brief Assignment operator from a std::bitset. + template + Bits & operator=(const std::bitset & bitset) &; + + /// @brief Assignment operator from a string of '0's and '1's. + Bits & operator=(const std::string & bitstring) &; + + /// @brief Assignment operator from a literal string of '0's and '1's. + Bits & operator=(const char * bitstring) & { return operator=(std::string(bitstring)); } + + /// @brief Assignment from another Bits object without changing size. + template + Bits & Import( + const Bits & from_bits, + const size_t from_start_pos=0, + size_t max_copy_bits=emp::MAX_SIZE_T + ); + + /// @brief Convert to a Bits of a different size. + template > + [[nodiscard]] OUT_T Export(size_t out_size, size_t start_bit=0) const; + + /// @brief Convert to a BitArray of a different size. + template + [[nodiscard]] Bits,true> + ExportArray(size_t start_bit=0) const { + return Export< Bits,true> >(NUM_BITS, start_bit); + } + + /// @brief concatenate another Bits object on to the end of this one. + template + Bits & Append(const Bits & in_bits); + + // @brief Scan this bitvector to make sure that there are no internal problems. + [[nodiscard]] bool OK() const { return _data.OK(); } + + + // ========= Accessors ========= // + + /// @brief How many bits do we currently have? + [[nodiscard]] constexpr auto GetSize() const { return _data.NumBits(); } + + /// @brief How many bytes are in this Bits? (includes empty field space) + [[nodiscard]] constexpr auto GetNumBytes() const { return _data.NumBytes(); } + + /// @brief How many distinct values could be held in this Bits? + [[nodiscard]] constexpr double GetNumStates() const { return emp::Pow2(_data.NumBits()); } + + /// @brief Retrieve the bit value from the specified index. + [[nodiscard]] constexpr bool Get(size_t index) const; + + /// @brief A safe version of Get() for indexing out of range. Useful for representing collections. + [[nodiscard]] constexpr bool Has(size_t index) const { + return (index < _data.NumBits()) ? Get(index) : false; + } + + /// @brief Update the bit value at the specified index. + Bits & Set(size_t index, bool value=true); + + /// @brief Set all bits to 1. + Bits & SetAll(); + + /// @brief Set a range of bits to value (default one): [start, stop) + Bits & SetRange(size_t start, size_t stop, bool value=true) { + if (value) return ApplyRange([](field_t){ return FIELD_ALL; }, start, stop); + return Clear(start, stop); + } + + /// @brief Set all bits to 0. + Bits & Clear(); + + /// @brief Set specific bit to 0. + Bits & Clear(size_t index) { return Set(index, false); } + + /// @brief Set bits to 0 in the range [start, stop) + Bits & Clear(const size_t start, const size_t stop) { + return ApplyRange([](field_t) -> size_t { return 0; }, start, std::min(stop,GetSize())); + } + + + /// @brief Const index operator -- return the bit at the specified position. + [[nodiscard]] bool operator[](size_t index) const { return Get(index); } + + /// @brief Index operator; return proxy to bit at specified position usable as an lvalue. + BitProxy operator[](size_t index) { return BitProxy(*this, index); } + + /// @brief Change every bit in the sequence. + Bits & Toggle() { return NOT_SELF(); } + + /// @brief Change a specified bit to the opposite value + Bits & Toggle(size_t index); + + /// @brief Flips all the bits in a range [start, end) + Bits & Toggle(size_t start, size_t stop) + { return ApplyRange([](field_t x){ return ~x; }, start, stop); } + + /// @brief Return true if ANY bits are set to 1, otherwise return false. + [[nodiscard]] bool Any() const; + + /// @brief Return true if NO bits are set to 1, otherwise return false. + [[nodiscard]] bool None() const { return !Any(); } + + /// @brief Return true if ALL bits are set to 1, otherwise return false. + // @CAO: Can speed up by not duplicating Bits; fields should be all 1, last should be mask. + [[nodiscard]] bool All() const { return (~(*this)).None(); } + + /// @brief Resize this Bits object to have the specified number of bits (if allowed) + Bits & Resize(size_t new_bits) { _data.RawResize(new_bits, true); return *this; } + + + // ========= Randomization functions ========= // + + /// @brief Set all bits randomly, with a 50% probability of being a 0 or 1. + Bits & Randomize(Random & random); + + /// @brief Set all bits randomly, with probability specified at compile time. + template + Bits & RandomizeP(Random & random, const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set all bits randomly, with a given probability of being a one. + Bits & Randomize(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set all bits randomly, with a given number of ones. + Bits & ChooseRandom(Random & random, const size_t target_ones, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Flip random bits with a given probability. + Bits & FlipRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Set random bits with a given probability (does not check if already set.) + Bits & SetRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Unset random bits with a given probability (does not check if already zero.) + Bits & ClearRandom(Random & random, const double p, + const size_t start_pos=0, size_t stop_pos=MAX_SIZE_T); + + /// @brief Flip a specified number of random bits. + Bits & FlipRandomCount(Random & random, const size_t target_bits); + + /// @brief Set a specified number of random bits (does not check if already set.) + Bits & SetRandomCount(Random & random, const size_t target_bits); + + /// @brief Unset a specified number of random bits (does not check if already zero.) + Bits & ClearRandomCount(Random & random, const size_t target_bits); + + + // ========= Comparison Operators ========= // + + /// @brief Compare two bits objects, even with different template arguments. + template + [[nodiscard]] bool operator==(const Bits & in) const; + template + [[nodiscard]] bool operator!=(const Bits & in) const { return !(*this == in); } + template + [[nodiscard]] bool operator< (const Bits & in) const; + template + [[nodiscard]] bool operator> (const Bits & in) const { return in < *this; } + template + [[nodiscard]] bool operator<=(const Bits & in) const { return !(in < *this); } + template + [[nodiscard]] bool operator>=(const Bits & in) const { return !(*this < in); } + + + // ========= Conversion Operators ========= // + + /// @brief Automatically convert Bits to other vector types. + template operator emp::vector(); + + /// @brief Casting a bit array to bool identifies if ANY bits are set to 1. + explicit operator bool() const { return Any(); } + + + // ========= Access Groups of bits ========= // + + /// @brief Retrieve the byte at the specified byte index. + [[nodiscard]] uint8_t GetByte(size_t index) const; + + /// @brief et a read-only view into the internal array used by Bits. + /// @return Read-only span of Bits's bytes. + [[nodiscard]] auto GetBytes() const { return _data.AsByteSpan(); } + + /// @brief Return a span with all fields in order. + [[nodiscard]] std::span FieldSpan() { + return std::span(_data.FieldPtr().Raw(), _data.NumFields()); + } + + /// @brief Return a const span with all fields in order. + [[nodiscard]] std::span FieldSpan() const { + return std::span(_data.FieldPtr().Raw(), _data.NumFields()); + } + + [[nodiscard]] size_t NumFields() const { return _data.NumFields(); } + + /// @brief Return a pointer to the set of fields. + [[nodiscard]] auto FieldPtr() { return _data.FieldPtr(); } + + /// @brief Return a const pointer to the set of fields. + [[nodiscard]] auto FieldPtr() const { return _data.FieldPtr(); } + + /// @brief Get a read-only pointer to the internal array used by Bits. + /// (note that bits are NOT in order at the byte level!) + /// @return Read-only pointer to Bits' bytes. + [[nodiscard]] emp::Ptr RawBytes() const { return BytePtr(); } + + /// @brief Update the byte at the specified byte index. + void SetByte(size_t index, uint8_t value); + + /// @brief Get overall base-2 value of this Bits, returning as a double. + [[nodiscard]] double GetValue() const; + + /// @brief Get specified type at a given index (in steps of that type size) + template + [[nodiscard]] T GetValueAtIndex(const size_t index) const; + + /// @brief Retrieve the 8-bit uint from the specified uint index. + [[nodiscard]] uint8_t GetUInt8(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 16-bit uint from the specified uint index. + [[nodiscard]] uint16_t GetUInt16(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt32(size_t index) const { return GetValueAtIndex(index); } + + /// @brief Retrieve the 64-bit uint from the specified uint index. + [[nodiscard]] uint64_t GetUInt64(size_t index) const { return GetValueAtIndex(index); } + + /// @brief By default, retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt(size_t index) const { return GetUInt32(index); } + + + /// @brief Set specified type at a given index (in steps of that type size) + template Bits & SetValueAtIndex(const size_t index, T value); + + /// @brief Update the 8-bit uint at the specified uint index. + void SetUInt8(const size_t index, uint8_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 16-bit uint at the specified uint index. + void SetUInt16(const size_t index, uint16_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 32-bit uint at the specified uint index. + void SetUInt32(const size_t index, uint32_t value) { SetValueAtIndex(index, value); } + + /// @brief Update the 64-bit uint at the specified uint index. + void SetUInt64(const size_t index, uint64_t value) { SetValueAtIndex(index, value); } + + /// @brief By default, update the 32-bit uint at the specified uint index. + void SetUInt(const size_t index, uint32_t value) { SetUInt32(index, value); } + + + /// @briefGet specified type starting at a given BIT position. + template + [[nodiscard]] T GetValueAtBit(const size_t index) const; + + /// @brief Retrieve the 8-bit uint from the specified uint index. + [[nodiscard]] uint8_t GetUInt8AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 16-bit uint from the specified uint index. + [[nodiscard]] uint16_t GetUInt16AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUInt32AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief Retrieve the 64-bit uint from the specified uint index. + [[nodiscard]] uint64_t GetUInt64AtBit(size_t index) const { return GetValueAtBit(index); } + + /// @brief By default, retrieve the 32-bit uint from the specified uint index. + [[nodiscard]] uint32_t GetUIntAtBit(size_t index) const { return GetUInt32AtBit(index); } + + + template Bits & SetValueAtBit(const size_t index, T value); + + /// @brief Update the 8-bit uint at the specified uint index. + void SetUInt8AtBit(const size_t index, uint8_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 16-bit uint at the specified uint index. + void SetUInt16AtBit(const size_t index, uint16_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 32-bit uint at the specified uint index. + void SetUInt32AtBit(const size_t index, uint32_t value) { SetValueAtBit(index, value); } + + /// @brief Update the 64-bit uint at the specified uint index. + void SetUInt64AtBit(const size_t index, uint64_t value) { SetValueAtBit(index, value); } + + /// @brief By default, update the 32-bit uint at the specified uint index. + void SetUIntAtBit(const size_t index, uint32_t value) { SetUInt32AtBit(index, value); } + + + // ========= Other Analyses ========= // + + /// @brief A simple hash function for bit vectors. + [[nodiscard]] std::size_t Hash(size_t start_field=0) const; + + /// @brief Count the number of ones in Bits. + [[nodiscard]] constexpr size_t CountOnes() const; + + /// @brief Count the number of ones in a range within Bits. [start, end) + [[nodiscard]] constexpr size_t CountOnes(size_t start, size_t end) const; + + /// @brief Faster counting of ones for very sparse bit vectors. + [[nodiscard]] constexpr size_t CountOnes_Sparse() const; + + /// @brief Count the number of zeros in Bits. + [[nodiscard]] constexpr size_t CountZeros() const { return GetSize() - CountOnes(); } + + /// @brief Pop the last bit in the vector. + /// @return value of the popped bit. + bool PopBack(); + + /// @brief Push given bit(s) onto the back of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + void PushBack(const bool bit=true, const size_t num=1); + + /// @brief Insert bit(s) into any index of vector using bit magic. + /// Blog post on implementation reasoning: https://devolab.org/?p=2249 + /// @param index location to insert bit(s). + /// @param val value of bit(s) to insert. + /// @param num number of bits to insert, default 1. + void Insert(const size_t index, const bool val=true, const size_t num=1); + + /// @brief Delete bits from any index in a vector. + // TODO: consider a bit magic approach here. + /// @param index location to delete bit(s). + /// @param num number of bits to delete, default 1. + void Delete(const size_t index, const size_t num=1); + + /// @brief Return the position of the first one; return -1 if no ones in vector. + [[nodiscard]] int FindOne() const; + + /// Deprecated: Return the position of the first one; return -1 if no ones in vector. + [[deprecated("Renamed to more accurate FindOne()")]] + [[nodiscard]] int FindBit() const { return FindOne(); } + + /// @brief Return the position of the first one after start_pos (or -1 if none) + /// You can loop through all 1-bit positions of Bits object "bits" with: + /// + /// for (int pos = bits.FindOne(); pos >= 0; pos = bits.FindOne(pos+1)) { ... } + /// + [[nodiscard]] int FindOne(const size_t start_pos) const; + + /// @brief Special version of FindOne takes int; most common way to call. + [[nodiscard]] int FindOne(int start_pos) const { + return FindOne(static_cast(start_pos)); + } + + /// Deprecated version of FindOne(). + [[deprecated("Renamed to more accurate FindOne(start_pos)")]] + [[nodiscard]] int FindBit(const size_t start_pos) const; + + /// @brief Find the most-significant set-bit. + [[nodiscard]] int FindMaxOne() const; + + /// @brief Return the position of the first one and change it to a zero. Return -1 if none. + int PopOne(); + + /// Deprecated version of PopOne(). + [[deprecated("Renamed to more accurate PopOne()")]] + int PopBit() { return PopOne(); } + + /// @brief Return vector of positions of all ones. + [[nodiscard]] emp::vector GetOnes() const; + + /// @brief Collect positions of ones in the provided vector (allows id type choice) + template + emp::vector & GetOnes(emp::vector & out_vals) const; + + /// @brief Find the length of the longest continuous series of ones. + [[nodiscard]] size_t LongestSegmentOnes() const; + + /// @brief Return true if any ones are in common with another Bits. + [[nodiscard]] bool HasOverlap(const Bits & in) const; + + + // ========= Print/String Functions ========= // + + /// @brief Convert a specified bit to a character. + [[nodiscard]] char GetAsChar(size_t id) const { return Get(id) ? '1' : '0'; } + + /// @brief Convert this Bits to a vector string [index 0 based on ZERO_LEFT] + [[nodiscard]] std::string ToString() const; + + /// @brief Convert this Bits to an array-based string [index 0 on left] + [[nodiscard]] std::string ToArrayString() const; + + /// @brief Convert this Bits to a numerical string [index 0 on right] + [[nodiscard]] std::string ToBinaryString() const; + + /// @brief Convert this Bits to a series of IDs + [[nodiscard]] std::string ToIDString(const std::string & spacer=" ") const; + + /// @brief Convert this Bits to a series of IDs with ranges condensed. + [[nodiscard]] std::string ToRangeString(const std::string & spacer=",", + const std::string & ranger="-") const; + + /// @brief Regular print function (from least significant bit to most) + void Print(std::ostream & out=std::cout) const { out << ToString(); } + + /// @brief Numerical print function (from most significant bit to least) + void PrintBinary(std::ostream & out=std::cout) const { out << ToBinaryString(); } + + /// @brief Print from smallest bit position to largest. + void PrintArray(std::ostream & out=std::cout) const { out << ToArrayString(); } + + /// @brief Print a space between each field (or other provided spacer) + void PrintFields(std::ostream & out=std::cout, const std::string & spacer=" ") const; + + /// @brief Print out details about the internals of Bits. + void PrintDebug(std::ostream & out=std::cout, const std::string & label="") const; + + /// @brief Print the positions of all one bits, spaces are the default separator. + void PrintOneIDs(std::ostream & out=std::cout, const std::string & spacer=" ") const; + + /// @brief Print the ones in a range format. E.g., 2-5,7,10-15 + void PrintAsRange(std::ostream & out=std::cout, + const std::string & spacer=",", + const std::string & ranger="-") const; + + /// @brief Overload ostream operator to return Print. + friend std::ostream& operator<<(std::ostream &out, const Bits & bits) { + bits.Print(out); + return out; + } + + + // ========= Boolean Logic and Shifting Operations ========= // + + /// @brief Perform a Boolean NOT with this Bits, store result here, and return this object. + Bits & NOT_SELF(); + + /// @brief Perform a Boolean AND with this Bits, store result here, and return this object. + Bits & AND_SELF(const Bits & bits2); + + /// @brief Perform a Boolean OR with this Bits, store result here, and return this object. + Bits & OR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean NAND with this Bits, store result here, and return this object. + Bits & NAND_SELF(const Bits & bits2); + + /// @brief Perform a Boolean NOR with this Bits, store result here, and return this object. + Bits & NOR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean XOR with this Bits, store result here, and return this object. + Bits & XOR_SELF(const Bits & bits2); + + /// @brief Perform a Boolean EQU with this Bits, store result here, and return this object. + Bits & EQU_SELF(const Bits & bits2); + + + /// @brief Perform a Boolean NOT on this Bits and return the result. + [[nodiscard]] Bits NOT() const { return Bits(*this).NOT_SELF(); } + + /// @brief Perform a Boolean AND on this Bits and return the result. + [[nodiscard]] Bits AND(const Bits & bits2) const { return Bits(*this).AND_SELF(bits2); } + + /// @brief Perform a Boolean OR on this Bits and return the result. + [[nodiscard]] Bits OR(const Bits & bits2) const { return Bits(*this).OR_SELF(bits2); } + + /// @brief Perform a Boolean NAND on this Bits and return the result. + [[nodiscard]] Bits NAND(const Bits & bits2) const { return Bits(*this).NAND_SELF(bits2); } + + /// @brief Perform a Boolean NOR on this Bits and return the result. + [[nodiscard]] Bits NOR(const Bits & bits2) const { return Bits(*this).NOR_SELF(bits2); } + + /// @brief Perform a Boolean XOR on this Bits and return the result. + [[nodiscard]] Bits XOR(const Bits & bits2) const { return Bits(*this).XOR_SELF(bits2); } + + /// @brief Perform a Boolean EQU on this Bits and return the result. + [[nodiscard]] Bits EQU(const Bits & bits2) const { return Bits(*this).EQU_SELF(bits2); } + + + /// @brief Positive shifts left and negative right (0 does nothing); return result. + [[nodiscard]] Bits SHIFT(const int shift_size) const; + + /// @brief Positive shifts left and negative right; store result here, and return *this. + Bits & SHIFT_SELF(const int shift_size); + + /// @brief Reverse the order of bits in the bitset + Bits & REVERSE_SELF(); + + /// @brief Reverse order of bits in the bitset. + [[nodiscard]] Bits REVERSE() const; + + /// @brief Positive rotates right and negative goes left; return result. + [[nodiscard]] Bits ROTATE(const int rotate_size) const; + + /// @brief Positive rotates right and negative goes left; store here, and return *this. + Bits & ROTATE_SELF(const int rotate_size); + + /// @brief Sums two Bits objects (following uint rules); returns result. + [[nodiscard]] Bits ADD(const Bits & set2) const; + + /// @brief Sums another Bits object onto this one (following uint rules); returns *this. + Bits & ADD_SELF(const Bits & set2); + + /// @brief Subtracts on Bits object from another (following uint rules); returns result. + [[nodiscard]] Bits SUB(const Bits & set2) const; + + /// @brief Subtracts another Bits object from this one (following uint rules); returns *this. + Bits & SUB_SELF(const Bits & set2); + + + /// @brief Operator bitwise NOT... + [[nodiscard]] inline Bits operator~() const { return NOT(); } + + /// @brief Operator bitwise AND... + [[nodiscard]] inline Bits operator&(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return AND(ar2); + } + + /// @brief Operator bitwise OR... + [[nodiscard]] inline Bits operator|(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return OR(ar2); + } + + /// @brief Operator bitwise XOR... + [[nodiscard]] inline Bits operator^(const Bits & ar2) const { + emp_assert(size() == ar2.size(), size(), ar2.size()); + return XOR(ar2); + } + + /// @brief Operator shift left... + [[nodiscard]] inline Bits operator<<(const size_t shift_size) const { return SHIFT(-(int)shift_size); } + + /// @brief Operator shift right... + [[nodiscard]] inline Bits operator>>(const size_t shift_size) const { return SHIFT((int)shift_size); } + + /// @brief Compound operator bitwise AND... + Bits & operator&=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return AND_SELF(ar2); + } + + /// @brief Compound operator bitwise OR... + Bits & operator|=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return OR_SELF(ar2); + } + + /// @brief Compound operator bitwise XOR... + Bits & operator^=(const Bits & ar2) { + emp_assert(size() == ar2.size()); return XOR_SELF(ar2); + } + + /// @brief Compound operator for shift left... + Bits & operator<<=(const size_t shift_size) { return SHIFT_SELF(-(int)shift_size); } + + /// @brief Compound operator for shift right... + Bits & operator>>=(const size_t shift_size) { return SHIFT_SELF((int)shift_size); } + + /// @brief Operator plus... + [[nodiscard]] Bits operator+(const Bits & ar2) const { return ADD(ar2); } + + /// @brief Operator minus... + [[nodiscard]] Bits operator-(const Bits & ar2) const { return SUB(ar2); } + + /// @brief Compound operator plus... + const Bits & operator+=(const Bits & ar2) { return ADD_SELF(ar2); } + + /// @brief Compound operator minus... + const Bits & operator-=(const Bits & ar2) { return SUB_SELF(ar2); } + + + // ========= Cereal Compatability ========= // + + /// @brief Setup this bits object so that it can be stored in an archive and re-loaded. + template + void serialize(Archive & ar) { ar(_data); } + + + // ========= Standard Library Compatability ========= // + // A set of functions to allow drop-in replacement with std::bitset. + + [[nodiscard]] constexpr size_t size() const { return _data.NumBits(); } + [[nodiscard]] auto & at(size_t pos) { return operator[](pos); } + [[nodiscard]] auto at(size_t pos) const { return operator[](pos); } + [[nodiscard]] auto & front() { return at(0); } + [[nodiscard]] auto front() const { return at(0); } + [[nodiscard]] auto & back() { return at(GetSize()-1); } + [[nodiscard]] auto back() const { return at(GetSize()-1); } + void resize(std::size_t new_size) { Resize(new_size); } + void push_back(const bool bit=true, const size_t num=1) { PushBack(bit, num); } + void pop_back() { resize(GetSize() - 1); } + [[nodiscard]] constexpr bool all() const { return All(); } + [[nodiscard]] constexpr bool any() const { return Any(); } + [[nodiscard]] constexpr bool none() const { return !Any(); } + [[nodiscard]] constexpr size_t count() const { return CountOnes(); } + Bits & flip() { return Toggle(); } + Bits & flip(size_t pos) { return Toggle(pos); } + Bits & flip(size_t start, size_t end) { return Toggle(start, end); } + void reset() { Clear(); } + void reset(size_t id) { Set(id, false); } + void set() { SetAll(); } + void set(size_t id) { Set(id); } + [[nodiscard]] bool test(size_t index) const { return Get(index); } + auto data() { return FieldSpan(); } + auto data() const { return FieldSpan(); } + }; + + + + // ------------------------ Implementations for Internal Functions ------------------------ + + template + void Bits:: + RawCopy(const Ptr from, size_t num_fields) + { + // If num_fields was not specified, set it to the max number of fields. + if (num_fields == emp::MAX_SIZE_T) num_fields = _data.NumFields(); + + emp_assert(num_fields <= _data.NumFields(), "Trying to RawCopy() more fields than can fit."); + + for (size_t i = 0; i < num_fields; i++) _data.bits[i] = from[i]; + } + + // Move bits from one position in the genome to another; leave old positions unchanged. + // All positions are requires to exist and memory must be available for the move. + // @CAO: Can speed up by focusing only on the moved fields (i.e., don't shift unused bits). + template + constexpr void Bits:: + RawMove(const size_t from_start, const size_t from_stop, const size_t to) + { + emp_assert(from_start <= from_stop); // Must move legal region. + emp_assert(from_stop <= _data.NumBits()); // Cannot move from past end. + emp_assert(to <= _data.NumBits()); // Must move to somewhere legal. + + const size_t move_size = from_stop - from_start; // How big is the chunk to move? + emp_assert(to + move_size <= _data.NumBits()); // Must fit in new position. + + // If nothing to copy OR already in place, stop right there. + if (move_size == 0 || from_start == to) return; + + const size_t to_stop = to + move_size; // Where is the end to move it to? + const int shift = (int) from_start - (int) to; // How far will the moved piece shift? + this_t move_bits(*this); // Place to hold moved bits. + move_bits.SHIFT_SELF(shift); // Put the moved bits in place. + Clear(to, to_stop); // Make room for the moved bits. + move_bits.Clear(0, to); // Clear everything BEFORE moved bits. + move_bits.Clear(to_stop, _data.NumBits()); // Clear everything AFTER moved bits. + OR_SELF(move_bits); // Merge bit strings together. + } + + template + template + Bits & Bits:: + ApplyRange(const FUN_T & fun, size_t start, size_t stop) + { + emp_assert(start <= stop, start, stop, _data.NumBits()); // Start cannot be after stop. + emp_assert(stop <= _data.NumBits(), stop, _data.NumBits()); // Stop must be in range. + + if (start == stop) return *this; // Empty range. + + const size_t start_pos = FieldPos(start); // Start position WITHIN a bit field. + const size_t stop_pos = FieldPos(stop); // Stop position WITHIN a bit field. + size_t start_field = FieldID(start); // ID of bit field we're starting in. + const size_t stop_field = FieldID(stop); // ID of last field to actively scan. + + // If all bits are in the same field, mask off the middle. + if (start_field == FieldID(stop-1)) { + const size_t apply_bits = stop - start; // How many bits to change? + const field_t mask = MaskField(apply_bits, start_pos); // Target change bits with a mask. + field_t & target = _data.bits[start_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + } + + // Otherwise mask the ends and fully modify the chunks in between. + else { + // If we're only using a portions of start field, mask it and setup. + if (start_pos != 0) { + const size_t start_bits = FIELD_BITS - start_pos; // How many bits in start field? + const field_t mask = MaskField(start_bits, start_pos); // Target start bits with a mask. + field_t & target = _data.bits[start_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + start_field++; // Move to the next field. + } + + // Middle fields + for (size_t cur_field = start_field; cur_field < stop_field; cur_field++) { + _data.bits[cur_field] = fun(_data.bits[cur_field]); + } + + // Set portions of stop field + if (stop_pos != 0) { + const field_t mask = MaskField(stop_pos); // Target end bits with a mask. + field_t & target = _data.bits[stop_field]; // Isolate the field to change. + target = (target & ~mask) | (fun(target) & mask); // Update targeted bits! + } + } + + return *this; + } + + template + constexpr void Bits::ShiftLeft(const size_t shift_size) { + // If we are shifting out of range, clear the bits and stop. + if (shift_size >= GetSize()) { Clear(); return; } + + // If we have only a single field, this operation can be quick. + if (_data.NumFields() == 1) { + (_data.bits[0] <<= shift_size) &= _data.EndMask(); + return; + } + + const size_t field_shift = shift_size / FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; + + // Loop through each field, from L to R, and update it. + if (field_shift) { + for (size_t i = _data.LastField(); i >= field_shift; --i) { + _data.bits[i] = _data.bits[i - field_shift]; + } + for (size_t i = field_shift; i > 0; --i) _data.bits[i-1] = 0; + } + + // account for bit_shift + if (bit_shift) { + for (size_t i = _data.LastField() ; i > field_shift; --i) { + _data.bits[i] <<= bit_shift; + _data.bits[i] |= (_data.bits[i-1] >> bit_overflow); + } + // Handle final field (field_shift position) + _data.bits[field_shift] <<= bit_shift; + } + + // Mask out any bits that have left-shifted away + ClearExcessBits(); + } + + template + constexpr void Bits::ShiftRight(const size_t shift_size, bool raw) { + if (shift_size == 0) return; + + // If we are shifting out of range, clear the bits and stop. + if (!raw && shift_size >= GetSize()) { Clear(); return; } + + // If we have only a single field, this operation can be quick. + if (_data.NumFields() == 1) { + _data.bits[0] >>= shift_size; + return; + } + + const size_t field_shift = shift_size / FIELD_BITS; + const size_t bit_shift = shift_size % FIELD_BITS; + const size_t bit_overflow = FIELD_BITS - bit_shift; + const size_t NUM_FIELDS = _data.NumFields(); + const size_t field_shift2 = NUM_FIELDS - field_shift; + + // account for field_shift + if (field_shift) { + for (size_t i = 0; i < field_shift2; ++i) { + _data.bits[i] = _data.bits[i + field_shift]; + } + // Clear fields where bits were fully shifted out. + for (size_t i = field_shift2; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_0; + } + + // account for bit_shift + if (bit_shift) { + for (size_t i = 0; i < (field_shift2 - 1); ++i) { + _data.bits[i] >>= bit_shift; + _data.bits[i] |= (_data.bits[i+1] << bit_overflow); + } + _data.bits[field_shift2 - 1] >>= bit_shift; + } + } + + /// Helper: call ROTATE with negative number + template + constexpr void Bits::ROTL_SELF(const size_t shift_size_raw) { + if (GetSize() == 0) return; // Nothing to rotate if there are not bits. + const field_t shift_size = shift_size_raw % GetSize(); + + // Use different approaches based on number of bits. + if (_data.NumFields() == 1) { + _data.bits[0] = emp::RotateBitsLeft(_data.bits[0], shift_size, GetSize()); + } else { // For few bits, shifting L/R and OR-ing is faster. + this_t dup(*this); + dup.ShiftLeft(shift_size); + ShiftRight(GetSize() - shift_size); + OR_SELF(dup); + } + } + + + /// Helper for calling ROTATE with positive number + template + constexpr void Bits::ROTR_SELF(const size_t shift_size_raw) { + const size_t shift_size = shift_size_raw % GetSize(); + + // use different approaches based on number of bits + if (_data.NumFields() == 1) { + _data.bits[0] = emp::RotateBitsRight(_data.bits[0], shift_size, GetSize()); + } else { + this_t dup(*this); + dup.ShiftRight(shift_size); + ShiftLeft(GetSize() - shift_size); + OR_SELF(dup); + } + } + + + /////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + // ---------------------------------------------------------------------------------------- + // --------------------- Implementations of Public Member Functions ----------------------- + // ---------------------------------------------------------------------------------------- + /////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////// + + + // ------------------- Implementations of Constructors and Assignments -------------------- + + /// Build a new Bits object with specified bit count and initialization (default 0) + template + Bits::Bits(size_t _num_bits, bool init_val) : _data(_num_bits) { + if (init_val) SetAll(); else Clear(); + } + + /// Constructor from other type of Bits field. + template + template + Bits::Bits(const Bits & in) + : _data(in.GetSize()) + { + emp_assert(in.OK()); + RawCopy(in); + } + + // -- Move constructor in class; set to default -- + + /// Constructor to generate a Bits from a std::bitset. + template + template + Bits::Bits(const std::bitset & bitset) + : _data(NUM_BITS) + { + // Copy over the values. + Clear(); + for (size_t i = 0; i < NUM_BITS; ++i) Set(i, bitset[i]); + } + + /// Constructor to generate a Bits from a string of '0's and '1's. + template + Bits::Bits(const std::string & bitstring) + : _data(CountBits(bitstring)) + { + Clear(); + + size_t pos = 0; + for (char c : bitstring) { + if (c == '1') { + if constexpr (ZERO_LEFT) Set(pos); + else Set(GetSize() - pos - 1); + pos++; + } + if (c == '0') ++pos; // Leave position as zero and move to next pos. + } + } + + /// Constructor to generate a random set of bits in the default size. + template + Bits::Bits(Random & random) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + Randomize(random); + ClearExcessBits(); + } + + /// Constructor to generate random Bits with provided prob of 1's, default size. + template + Bits::Bits(Random & random, const double p1) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + emp_assert(p1 >= 0.0 && p1 <= 1.0, "Probability of ones out of range", p1); + Randomize(random, p1); + ClearExcessBits(); + } + + /// Constructor to generate random Bits with specified number of ones. + template + Bits::Bits(Random & random, const size_t target_ones) + { + emp_assert(GetSize() > 0, "Trying to construct a random series of bits, but with no bits!"); + ChooseRandom(random, target_ones); + ClearExcessBits(); + } + + /// Constructor to generate a random Bits (with equal prob of 0 or 1). + template + Bits::Bits(size_t in_num_bits, Random & random) + : _data(in_num_bits) + { + Clear(); + Randomize(random); + } + + /// Constructor to generate a random Bits with provided prob of 1's. + template + Bits::Bits(size_t in_num_bits, Random & random, const double p1) + : _data(in_num_bits) + { + emp_assert(p1 >= 0.0 && p1 <= 1.0, "Probability of ones out of range", p1); + Clear(); + Randomize(random, p1); + } + + /// Constructor to generate a random Bits with provided number of 1's. + template + Bits::Bits(size_t in_num_bits, Random & random, const size_t target_ones) + : _data(in_num_bits) + { + Clear(); + ChooseRandom(random, target_ones); + } + + /// Initializer list constructor. + template + template + Bits::Bits(const std::initializer_list l) + : _data(l.size()) + { + Clear(); + size_t idx = 0; + if constexpr (ZERO_LEFT) { + for (auto i = std::begin(l); i != std::end(l); ++i) Set(idx++, *i); + } else { + for (auto i = std::rbegin(l); i != std::rend(l); ++i) Set(idx++, *i); + } + } + + /// Copy, but with a resize. + template + template + Bits:: + Bits(const Bits & in, size_t new_size) + : Bits(new_size) + { + emp_assert(in.OK()); + + // How many fields do we need to copy? + size_t copy_fields = std::min(_data.NumFields(), in.NumFields()); + + RawCopy(in.FieldPtr(), copy_fields); + } + + /// Copy assignment operator. + template + Bits & + Bits::operator=(const Bits & in) & + { + emp_assert(in.OK()); + if (&in != this) { + _data.RawResize(in.GetSize()); + RawCopy(in); + } + + return *this; + } + + /// Other Bits assignment operator. + template + template + Bits & + Bits::operator=(const Bits & in) & + { + emp_assert(in.OK()); + Resize(in.GetSize()); + RawCopy(in); + + return *this; + } + + /// Move operator. + template + Bits & + Bits::operator=(Bits && in) & + { + emp_assert(&in != this); // Shouldn't be possible in an r-value + _data = std::move(in._data); // Shift move into _data objects. + return *this; + } + + /// Assignment operator from a std::bitset. + template + template + Bits & + Bits::operator=(const std::bitset & bitset) & + { + _data.RawResize(NUM_BITS); + for (size_t i = 0; i < NUM_BITS; i++) Set(i, bitset[i]); // Copy bits in. + return ClearExcessBits(); // Set excess bits to zeros. + } + + /// Assignment operator from a string of '0's and '1's. + template + Bits & + Bits::operator=(const std::string & bitstring) & + { + const size_t new_size = CountBits(bitstring); + _data.RawResize(new_size); + + Clear(); + + size_t pos = 0; + for (char c : bitstring) { + if (c == '1') { + if constexpr (ZERO_LEFT) Set(pos); + else Set(new_size - pos - 1); + pos++; + } + if (c == '0') ++pos; // Leave position as zero and move to next pos. + } + + return *this; + } + + + /// Assign from a Bits object of a different size, while keeping current size. + /// If there are too many bits being imported, extras are cut off. + /// If there are fewer bits, the remainder are zero'd out (up to max_copy_bits) + // @CAO: Can copy fields for a speedup. + template + template + Bits & + Bits::Import( + const Bits & from_bits, + const size_t from_start_pos, + size_t max_copy_bits) + { + emp_assert(from_start_pos < from_bits.GetSize()); + size_t bits_available = from_bits.GetSize() - from_start_pos; + + // Actual copied bits is limited by bits available to copy and bits in this object. + size_t copy_size = emp::Min(bits_available, GetSize(), max_copy_bits); + + for (size_t i = 0; i < copy_size; ++i) { + Set(i, from_bits[i+from_start_pos]); + } + + // Any bits AFTER the ones copied, but before the max copy, should be zeroed out. + Clear(copy_size, max_copy_bits); + + return *this; + } + + /// Convert to a Bitset of a different size. + template + template + OUT_T Bits::Export(size_t out_size, size_t start_bit) const { + OUT_T out_bits(out_size); + out_bits.Import(*this, start_bit); + return out_bits; + } + + /// Concatenate another Bits object on to the end of this one. + template + template + Bits & Bits::Append( + const Bits & in_bits + ) { + this_t shift_copy(in_bits); + const size_t old_size = GetSize(); + const size_t new_size = old_size + in_bits.GetSize(); + Resize(new_size); + shift_copy.Resize(new_size); + shift_copy <<= old_size; + OR_SELF(shift_copy); + return *this; + } + + + // -------------------- Implementations of common accessors ------------------- + + /// Retrieve the bit value from the specified index. + template + constexpr bool Bits::Get(size_t index) const { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + return _data.bits[field_id] & (FIELD_1 << pos_id); + } + + /// Update the bit value at the specified index. + template + Bits & Bits::Set(size_t index, bool value) { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + const field_t pos_mask = FIELD_1 << pos_id; + + if (value) _data.bits[field_id] |= pos_mask; + else _data.bits[field_id] &= ~pos_mask; + + return *this; + } + + /// Set all bits to 1. + template + Bits & Bits::SetAll() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_ALL; + return ClearExcessBits(); + } + + /// Set all bits to 0. + template + Bits & Bits::Clear() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = FIELD_0; + return *this; + } + + /// Change a specified bit to the opposite value + template + Bits & Bits::Toggle(size_t index) { + emp_assert(index < GetSize(), index, GetSize()); + const size_t field_id = FieldID(index); + const size_t pos_id = FieldPos(index); + const field_t pos_mask = FIELD_1 << pos_id; + + _data.bits[field_id] ^= pos_mask; + + return *this; + } + + + // ------ @CAO CONTINUE HERE!!! ------ + + + template + bool Bits::Any() const { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) { + if (_data.bits[i]) return true; + } + return false; + } + + // ------------------------- Implementations Randomization functions ------------------------- + + /// Set all bits randomly, with a 50% probability of being a 0 or 1. + template + Bits & Bits::Randomize(Random & random) { + random.RandFill(BytePtr(), _data.NumBytes()); + return ClearExcessBits(); + } + + /// Set all bits randomly, with probability specified at compile time. + template + template + Bits & Bits::RandomizeP(Random & random, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + random.RandFillP

(BytePtr(), _data.NumBytes(), start_pos, stop_pos); + return *this; + } + + + /// Set all bits randomly, with a given probability of being on. + template + Bits & + Bits::Randomize(Random & random, const double p, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos, start_pos, stop_pos); + emp_assert(stop_pos <= GetSize(), stop_pos, GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + random.RandFill(BytePtr(), _data.NumBytes(), p, start_pos, stop_pos); + return *this; + } + + /// Set all bits randomly, with a given number of them being on. + template + Bits & + Bits::ChooseRandom(Random & random, const size_t target_ones, + const size_t start_pos, size_t stop_pos) { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + + const size_t target_size = stop_pos - start_pos; + emp_assert(target_ones <= target_size); + + // Approximate the probability of ones as a starting point. + double p = ((double) target_ones) / (double) target_size; + + // If we are not randomizing the whole sequence, we need to track the number of ones + // in the NON-randomized region to subtract off later. + size_t kept_ones = 0; + if (target_size != GetSize()) { + Clear(start_pos, stop_pos); + kept_ones = CountOnes(); + } + + // Try to find a shortcut if p allows.... + // (These values are currently educated guesses) + if (p < 0.12) { if (target_size == GetSize()) Clear(start_pos, stop_pos); } + else if (p < 0.2) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.35) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.42) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.58) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.65) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.8) RandomizeP(random, start_pos, stop_pos); + else if (p < 0.88) RandomizeP(random, start_pos, stop_pos); + else SetRange(start_pos, stop_pos); + + size_t cur_ones = CountOnes(start_pos, stop_pos) - kept_ones; + + // Do we need to add more ones? + while (cur_ones < (size_t) target_ones) { + size_t pos = random.GetUInt(start_pos, stop_pos); + auto bit = operator[](pos); + if (!bit) { + bit.Set(); + cur_ones++; + } + } + + // See if we have too many ones. + while (cur_ones > (size_t) target_ones) { + size_t pos = random.GetUInt(start_pos, stop_pos); + auto bit = operator[](pos); + if (bit) { + bit.Clear(); + cur_ones--; + } + } + + return *this; + } + + /// Flip random bits with a given probability. + // @CAO: Possibly faster to generate a sequence of bits and XORing with them. + template + Bits & + Bits::FlipRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Toggle(i); + + return *this; + } + + /// Set random bits with a given probability (does not check if already set.) + template + Bits & Bits::SetRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Set(i); + + return *this; + } + + /// Unset random bits with a given probability (does not check if already zero.) + template + Bits & Bits::ClearRandom(Random & random, + const double p, + const size_t start_pos, + size_t stop_pos) + { + if (stop_pos == MAX_SIZE_T) stop_pos = GetSize(); + + emp_assert(start_pos <= stop_pos); + emp_assert(stop_pos <= GetSize()); + emp_assert(p >= 0.0 && p <= 1.0, p); + + for (size_t i=start_pos; i < stop_pos; ++i) if (random.P(p)) Clear(i); + + return *this; + } + + /// Flip a specified number of random bits. + template + Bits & Bits::FlipRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, target_bits); + return XOR_SELF(choice); + } + + /// Set a specified number of random bits (does not check if already set.) + template + Bits & Bits::SetRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, target_bits); + return OR_SELF(choice); + } + + /// Unset a specified number of random bits (does not check if already zero.) + template + Bits & Bits::ClearRandomCount( + Random & random, + const size_t target_bits + ) { + emp_assert(GetSize() <= GetSize()); + Bits choice(GetSize(), random, GetSize() - target_bits); + return AND_SELF(choice); + } + + + // ------------------------- Implementations of Comparison Operators ------------------------- + + /// Test if two bit vectors are identical. + template + template + bool Bits::operator==(const Bits & in) const { + if (GetSize() != in.GetSize()) return false; + + const size_t NUM_FIELDS = _data.NumFields(); + auto in_fields = in.FieldSpan(); + for (size_t i = 0; i < NUM_FIELDS; ++i) { + if (_data.bits[i] != in_fields[i]) return false; + } + return true; + } + + /// Compare the would-be numerical values of two bit vectors. + template + template + bool Bits::operator<(const Bits & in) const { + if (GetSize() != in.GetSize()) return GetSize() < in.GetSize(); + + const size_t NUM_FIELDS = _data.NumFields(); + auto in_fields = in.FieldSpan(); + for (size_t i = NUM_FIELDS; i > 0; --i) { // Start loop at the largest field. + const size_t pos = i-1; + if (_data.bits[pos] == in_fields[pos]) continue; // If same, keep looking! + return (_data.bits[pos] < in_fields[pos]); // Otherwise, do comparison + } + return false; // Bit vectors are identical. + } + + /// Automatically convert Bits object to other vector types. + template + template + Bits::operator emp::vector() { + emp::vector out(GetSize()); + for (size_t i = 0; i < GetSize(); i++) { + out[i] = (T) Get(i); + } + return out; + } + + + // ------------------------- Access Groups of bits ------------------------- + + /// Retrieve the byte at the specified byte index. + template + uint8_t Bits::GetByte(size_t index) const { + emp_assert(index < _data.NumBytes(), index, _data.NumBytes()); + const size_t field_id = Byte2Field(index); + const size_t pos_id = Byte2FieldPos(index); + return (_data.bits[field_id] >> pos_id) & 255U; + } + + /// Update the byte at the specified byte index. + template + void Bits::SetByte(size_t index, uint8_t value) { + emp_assert(index < _data.NumBytes(), index, _data.NumBytes()); + const size_t field_id = Byte2Field(index); + const size_t pos_id = Byte2FieldPos(index); + const field_t val_uint = value; + _data.bits[field_id] = (_data.bits[field_id] & ~(FIELD_255 << pos_id)) | (val_uint << pos_id); + } + + /// Get the overall value of this BitSet, using a uint encoding, but including all bits + /// and returning the value as a double. + template + double Bits::GetValue() const { + const int max_one = FindMaxOne(); + + // If there are no ones, this value must be 0. + if (max_one == -1) return 0.0; + + // If all ones are in the least-significant field, just return it. + if (max_one < 64) return (double) GetUInt64(0); + + // To grab the most significant field, figure out how much to shift it by. + const size_t shift_bits = static_cast(max_one) - 63; + double out_value = (double) (*this >> shift_bits).GetUInt64(0); + + out_value *= emp::Pow2(shift_bits); + + return out_value; + } + + /// Get specified type at a given index (in steps of that type size) + template + template + T Bits::GetValueAtIndex(const size_t index) const { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index + 1) * sizeof(T) <= _data.TotalBytes()); + + T out_value; + std::memcpy( &out_value, BytePtr().Raw() + index * sizeof(T), sizeof(T) ); + return out_value; + } + + + /// Set specified type at a given index (in steps of that type size) + template + template + Bits & Bits::SetValueAtIndex(const size_t index, T in_value) { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index + 1) * sizeof(T) <= _data.TotalBytes()); + std::memcpy( BytePtr().Raw() + index * sizeof(T), &in_value, sizeof(T) ); + return ClearExcessBits(); + } + + + /// Get the specified type starting from a given BIT position. + template + template + T Bits::GetValueAtBit(const size_t index) const { + // For the moment, must fit inside bounds; eventually should pad with zeros. + emp_assert((index+7)/8 + sizeof(T) < _data.TotalBytes()); + + Bits out_bits(*this); + out_bits >>= index; + + return out_bits.template GetValueAtIndex(0); + } + + + /// Set the specified type starting from a given BIT position. + // @CAO: Can be optimized substantially, especially for long Bits objects. + template + template + Bits & Bits::SetValueAtBit(const size_t index, T value) { + // For the moment, must fit inside bounds; eventually should (?) pad with zeros. + emp_assert((index+7)/8 + sizeof(T) < _data.TotalBytes()); + constexpr size_t type_bits = sizeof(T) * 8; + + const size_t end_pos = Min(index+type_bits, GetSize()); + Clear(index, end_pos); // Clear out the bits where new value will go. + Bits in_bits(GetSize()); // Setup a bitset for the new bits. + in_bits.SetValueAtIndex(0, value); // Insert the new bits. + in_bits <<= index; // Shift new bits into place. + OR_SELF(in_bits); // Place new bits into current Bits object. + + return ClearExcessBits(); + } + + + // ------------------------- Other Analyses ------------------------- + + /// A simple hash function for bit vectors. + template + std::size_t Bits::Hash(size_t start_field) const { + static_assert(std::is_same_v, "Hash() requires fields to be size_t"); + + // If there are no fields left, hash on size one. + if (start_field == _data.NumFields()) return GetSize(); + + // If we have only one field left, combine it with size. + if (start_field == _data.NumFields()-1) return hash_combine(_data.bits[start_field], GetSize()); + + // Otherwise we have more than one field. Combine and recurse. + size_t partial_hash = hash_combine(_data.bits[start_field], _data.bits[start_field+1]); + + return hash_combine(partial_hash, Hash(start_field+2)); + } + + // TODO: see https://arxiv.org/pdf/1611.07612.pdf for fast pop counts + /// Count the number of ones in Bits. + template + constexpr size_t Bits::CountOnes() const { + if (GetSize() == 0) return 0; + const field_t NUM_FIELDS = _data.NumFields(); + size_t bit_count = 0; + for (size_t i = 0; i < NUM_FIELDS; i++) { + // when compiling with -O3 and -msse4.2, this is the fastest population count method. + std::bitset std_bs(_data.bits[i]); + bit_count += std_bs.count(); + } + + emp_assert(bit_count <= GetSize()); + return bit_count; + } + + // TODO: Speed this up so that we don't need to copy out all of the bits. + /// Count the number of ones in a specified range of Bits. + template + constexpr size_t Bits::CountOnes(size_t start, size_t end) const { + emp_assert(start <= end); + emp_assert(end <= GetSize()); + if (start == end) return 0; + const size_t range_size = end-start; + return Export(range_size, start).CountOnes(); + } + + /// Faster counting of ones for very sparse bit vectors. + template + constexpr size_t Bits::CountOnes_Sparse() const { + size_t bit_count = 0; + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; ++i) { + field_t cur_field = _data.bits[i]; + while (cur_field) { + cur_field &= (cur_field-1); // Peel off a single 1. + bit_count++; // Increment the counter + } + } + return bit_count; + } + + /// Pop the last bit in the vector. + /// @return value of the popped bit. + template + bool Bits::PopBack() { + const bool val = Get(GetSize()-1); + Resize(GetSize() - 1); + return val; + } + + /// Push given bit(s) onto the back of a vector. + /// @param bit value of bit to be pushed. + /// @param num number of bits to be pushed. + template + void Bits::PushBack(const bool bit, const size_t num) { + Resize(GetSize() + num); + if (bit) SetRange(GetSize()-num, GetSize()); + } + + /// Insert bit(s) into any index of vector using bit magic. + /// Blog post on implementation reasoning: https://devolab.org/?p=2249 + /// @param index location to insert bit(s). + /// @param val value of bit(s) to insert (default true) + /// @param num number of bits to insert, default 1. + template + void Bits::Insert(const size_t index, const bool val, const size_t num) { + Resize(GetSize() + num); // Adjust to new number of bits. + Bits low_bits(*this); // Copy current bits + SHIFT_SELF(-(int)num); // Shift the high bits into place. + Clear(0, index+num); // Reduce current to just high bits. + low_bits.Clear(index, GetSize()); // Reduce copy to just low bits. + if (val) SetRange(index, index+num); // If new bits should be ones, make it so. + OR_SELF(low_bits); // Put the low bits back in place. + } + + + /// Delete bits from any index in a vector. + /// @param index location to delete bit(s). + /// @param num number of bits to delete, default 1. + template + void Bits::Delete(const size_t index, const size_t num) { + emp_assert(index+num <= GetSize()); // Make sure bits to delete actually exist! + RawMove(index+num, GetSize(), index); // Shift positions AFTER delete into place. + Resize(GetSize() - num); // Crop off end bits. + } + + /// Return the position of the first one; return -1 if no ones in vector. + template + int Bits::FindOne() const { + const size_t NUM_FIELDS = _data.NumFields(); + size_t field_id = 0; + while (field_id < NUM_FIELDS && _data.bits[field_id]==0) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Return the position of the first one after start_pos; return -1 if no ones in vector. + /// You can loop through all 1-bit positions in "bits" with: + /// + /// for (int pos = bits.FindOne(); pos >= 0; pos = bits.FindOne(pos+1)) { ... } + + template + int Bits::FindOne(const size_t start_pos) const { + if (start_pos >= GetSize()) return -1; // If we're past the end, return fail. + size_t field_id = FieldID(start_pos); // What field do we start in? + const size_t field_pos = FieldPos(start_pos); // What position in that field? + + // If there's a hit in a partial first field, return it. + if (field_pos && (_data.bits[field_id] & ~(MaskField(field_pos)))) { + return (int) (find_bit(_data.bits[field_id] & ~(MaskField(field_pos))) + + field_id * FIELD_BITS); + } + + // Search other fields... + const size_t NUM_FIELDS = _data.NumFields(); + if (field_pos) field_id++; + while (field_id < NUM_FIELDS && _data.bits[field_id]==0) field_id++; + return (field_id < NUM_FIELDS) ? + (int) (find_bit(_data.bits[field_id]) + (field_id * FIELD_BITS)) : -1; + } + + /// Find the most-significant set-bit. + template + int Bits::FindMaxOne() const { + // Find the max field with a one. + size_t max_field = _data.NumFields() - 1; + while (max_field > 0 && _data.bits[max_field] == 0) max_field--; + + // If there are no ones, return -1. + if (_data.bits[max_field] == 0) return -1; + + const field_t field = _data.bits[max_field]; // Save a local copy of this field. + field_t mask = (field_t) -1; // Mask off the bits still under consideration. + size_t offset = 0; // Indicate where the mask should be applied. + size_t range = FIELD_BITS; // Indicate how many bits are in the mask. + + while (range > 1) { + // Cut the range in half and see if we need to adjust the offset. + range /= 2; // Cut range size in half + mask >>= range; // Cut the mask down. + + // Check the upper half of original range; if has a one shift new offset to there. + if (field & (mask << (offset + range))) offset += range; + } + + return (int) (max_field * FIELD_BITS + offset); + } + + /// Return the position of the first one and change it to a zero. Return -1 if no ones. + template + int Bits::PopOne() { + const int out_bit = FindOne(); + if (out_bit >= 0) Clear((size_t) out_bit); + return out_bit; + } + + /// Return positions of all ones. + template + emp::vector Bits::GetOnes() const { + emp::vector out_vals; + GetOnes(out_vals); + return out_vals; + } + + /// Return positions of all ones using a specified type. + template + template + emp::vector & Bits::GetOnes(emp::vector & out_vals) const { + // @CAO -- There are better ways to do this with bit tricks. + out_vals.resize(CountOnes()); + T cur_pos = 0; + for (T i = 0; i < GetSize(); i++) { + if (Get(i)) out_vals[cur_pos++] = i; + } + return out_vals; + } + + /// Find the length of the longest continuous series of ones. + template + size_t Bits::LongestSegmentOnes() const { + size_t length = 0; + Bits test_bits(*this); + while(test_bits.Any()){ + ++length; + test_bits.AND_SELF(test_bits<<1); + } + return length; + } + + /// Return true if any ones are in common with another Bits object. + template + bool Bits::HasOverlap(const Bits & in) const { + const size_t num_fields = std::min(_data.NumFields(), in.NumFields()); + auto in_fields = in.FieldSpan(); + for (size_t i = 0; i < num_fields; ++i) { + // Short-circuit if we find any overlap. + if (_data.bits[i] & in_fields[i]) return true; + } + return false; + } + + + // ------------------------- Printing and string conversion ------------------------- + + /// Convert this Bits object to a vector string [0 index on left] + template + std::string Bits::ToString() const { + if constexpr (ZERO_LEFT) return ToArrayString(); + else return ToBinaryString(); + } + + /// Convert this Bits object to a vector string [0 index on left] + template + std::string Bits::ToArrayString() const { + std::string out_string; + out_string.reserve(GetSize()); + for (size_t i = 0; i < GetSize(); ++i) out_string.push_back(GetAsChar(i)); + return out_string; + } + + /// Convert this Bits object to a numerical string [0 index on right] + template + std::string Bits::ToBinaryString() const { + std::string out_string; + out_string.reserve(GetSize()); + for (size_t i = GetSize(); i > 0; --i) out_string.push_back(GetAsChar(i-1)); + return out_string; + } + + /// Convert this Bits object to a series of IDs + template + std::string Bits::ToIDString(const std::string & spacer) const { + std::stringstream ss; + PrintOneIDs(ss, spacer); + return ss.str(); + } + + /// Convert this Bits object to a series of IDs with ranges condensed. + template + std::string Bits::ToRangeString(const std::string & spacer, + const std::string & ranger) const + { + std::stringstream ss; + PrintAsRange(ss, spacer, ranger); + return ss.str(); + } + + /// Print a space between each field (or other provided spacer) + template + void Bits::PrintFields(std::ostream & out, const std::string & spacer) const { + for (size_t i = GetSize()-1; i < GetSize(); i--) { + out << Get(i); + if (i && (i % FIELD_BITS == 0)) out << spacer; + } + } + + /// Print a space between each field (or other provided spacer) + template + void Bits::PrintDebug( + std::ostream & out, + const std::string & label + ) const { + if (label.size()) out << label << ":\n"; + for (size_t field = 0; field < _data.NumFields(); field++) { + for (size_t bit_id = 0; bit_id < FIELD_BITS; bit_id++) { + bool bit = (FIELD_1 << bit_id) & _data.bits[field]; + out << ( bit ? 1 : 0 ); + } + out << " : " << field << std::endl; + } + size_t end_pos = _data.NumEndBits(); + if (end_pos == 0) end_pos = FIELD_BITS; + for (size_t i = 0; i < end_pos; i++) out << " "; + out << "^" << std::endl; + } + + /// Print the positions of all one bits, spaces are the default separator. + template + void Bits::PrintOneIDs(std::ostream & out, const std::string & spacer) const { + bool started = false; + for (size_t i = 0; i < GetSize(); i++) { + if (Get(i)) { + if (started) out << spacer; + out << i; + started = true; + } + } + } + + /// Print the ones in a range format. E.g., 2-5,7,10-15 + template + void Bits::PrintAsRange(std::ostream & out, + const std::string & spacer, + const std::string & ranger) const + { + emp::vector ones = GetOnes(); + + for (size_t pos = 0; pos < ones.size(); pos++) { + if (pos) out << spacer; + + size_t start = ones[pos]; + while (pos+1 < ones.size() && ones[pos+1] == ones[pos]+1) pos++; + size_t end = ones[pos]; + + out << start; + if (start != end) out << ranger << end; + } + } + + + // ------------------------- Base Boolean-logic operations ------------------------- + + /// Perform a Boolean NOT with this Bits object, store result here, and return this object. + template + Bits & Bits::NOT_SELF() { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~_data.bits[i]; + return ClearExcessBits(); + } + + /// Perform a Boolean AND with this Bits object, store result here, and return this object. + template + Bits & Bits::AND_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] & bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean OR with this Bits object, store result here, and return this object. + template + Bits & Bits::OR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] | bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean NAND with this Bits object, store result here, and return this object. + template + Bits & Bits::NAND_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] & bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Perform a Boolean NOR with this Bits object, store result here, and return this object. + template + Bits & Bits::NOR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] | bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Perform a Boolean XOR with this Bits object, store result here, and return this object. + template + Bits & Bits::XOR_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = _data.bits[i] ^ bits2._data.bits[i]; + return *this; + } + + /// Perform a Boolean EQU with this Bits object, store result here, and return this object. + template + Bits & Bits::EQU_SELF(const Bits & bits2) { + const size_t NUM_FIELDS = _data.NumFields(); + for (size_t i = 0; i < NUM_FIELDS; i++) _data.bits[i] = ~(_data.bits[i] ^ bits2._data.bits[i]); + return ClearExcessBits(); + } + + /// Positive shifts go left and negative go right (0 does nothing); return result. + template + Bits Bits::SHIFT(const int shift_size) const { + Bits out_bits(*this); + if (shift_size > 0) out_bits.ShiftRight((size_t) shift_size); + else if (shift_size < 0) out_bits.ShiftLeft((size_t) -shift_size); + return out_bits; + } + + /// Positive shifts go left and negative go right; store result here, and return this object. + template + Bits & Bits::SHIFT_SELF(const int shift_size) { + if (shift_size > 0) ShiftRight((size_t) shift_size); + else if (shift_size < 0) ShiftLeft((size_t) -shift_size); + return *this; + } + + /// Reverse the order of bits in the bitset + template + Bits & Bits::REVERSE_SELF() { + auto bit_span = _data.AsSpan(); + + // Reverse order of whole fields + std::reverse( bit_span.begin(), bit_span.end() ); + + // Reverse the bits in each field. + for (auto & cur_field : bit_span) cur_field = emp::ReverseBits(cur_field); + + // Move the gap to the other side. + if (_data.NumEndBits()) ShiftRight(_data.EndGap(), true); + + return *this; + } + + /// Reverse order of bits in the bitset. + template + Bits Bits::REVERSE() const { + Bits out_set(*this); + return out_set.REVERSE_SELF(); + } + + + /// Positive rotates go left and negative rotates go left (0 does nothing); + /// return result. + template + Bits Bits::ROTATE(const int rotate_size) const { + Bits out_set(*this); + if (rotate_size > 0) out_set.ROTR_SELF((field_t) rotate_size); + else if (rotate_size < 0) out_set.ROTL_SELF((field_t) (-rotate_size)); + return out_set; + } + + /// Positive rotates go right and negative rotates go left (0 does nothing); + /// store result here, and return this object. + template + Bits & Bits::ROTATE_SELF(const int rotate_size) { + if (rotate_size > 0) ROTR_SELF((field_t) rotate_size); + else if (rotate_size < 0) ROTL_SELF((field_t) -rotate_size); + return *this; + } + + + /// Addition of two Bitsets. + /// Wraps if it overflows. + /// Returns result. + template + Bits Bits::ADD(const Bits & set2) const{ + Bits out_set(*this); + return out_set.ADD_SELF(set2); + } + + /// Addition of two Bitsets. + /// Wraps if it overflows. + /// Returns this object. + template + Bits & Bits::ADD_SELF(const Bits & set2) { + bool carry = false; + + for (size_t i = 0; i < GetSize()/FIELD_BITS; ++i) { + field_t addend = set2._data.bits[i] + static_cast(carry); + carry = set2._data.bits[i] > addend; + + field_t sum = _data.bits[i] + addend; + carry |= _data.bits[i] > sum; + + _data.bits[i] = sum; + } + + if (_data.NumEndBits()) { + _data.bits[GetSize()/FIELD_BITS] = ( + _data.bits[GetSize()/FIELD_BITS] + + set2._data.bits[GetSize()/FIELD_BITS] + + static_cast(carry) + ) & _data.EndMask(); + } + + return *this; + } + + /// Subtraction of two Bitsets. + /// Wraps around if it underflows. + /// Returns result. + template + Bits Bits::SUB(const Bits & set2) const{ + Bits out_set(*this); + return out_set.SUB_SELF(set2); + } + + /// Subtraction of two Bitsets. + /// Wraps if it underflows. + /// Returns this object. + template + Bits & Bits::SUB_SELF(const Bits & set2){ + + bool carry = false; + + for (size_t i = 0; i < GetSize()/FIELD_BITS; ++i) { + field_t subtrahend = set2._data.bits[i] + static_cast(carry); + carry = set2._data.bits[i] > subtrahend; + carry |= _data.bits[i] < subtrahend; + _data.bits[i] -= subtrahend; + } + + if (_data.NumEndBits()) { + _data.bits[GetSize()/FIELD_BITS] = ( + _data.bits[GetSize()/FIELD_BITS] + - set2._data.bits[GetSize()/FIELD_BITS] + - static_cast(carry) + ) & _data.EndMask(); + } + + return *this; + } + + // Set up some aliases from common types of Bit strings. + // BitVector and BitArray function like vectors and arrays, which is to say that the zero + // index is on the left-hand side. BitSet and BitValue are treated like numerical + // representations, with the zero-position on the right-hand side. + + // using BitVector = Bits; + using BitVector = Bits; + using BitValue = Bits; + + template using BitArray = Bits, true>; + template using BitSet = Bits, false>; + template using StaticBitVector = Bits, true>; + template using StaticBitValue = Bits, false>; +} + + +// ---------------------- Implementations to work with standard library ---------------------- + +namespace std { + /// Hash function to allow BitVector to be used with maps and sets (must be in std). + template <> + struct hash { + std::size_t operator()(const emp::BitVector & bits) const { + return bits.Hash(); + } + }; +} + +#endif // #ifndef EMP_BITS_BITS_HPP_INCLUDE diff --git a/include/emp/bits/Bits_Data.hpp b/include/emp/bits/Bits_Data.hpp new file mode 100644 index 0000000000..ac4c50a27a --- /dev/null +++ b/include/emp/bits/Bits_Data.hpp @@ -0,0 +1,458 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file Bits_Data.hpp + * @brief Helper class to handle memory management for Bits objects. + * @note Status: BETA + * + * Bits_Data handles the actual bits inside of the Bits class. Bits itself provides many tools + * to operate on that data. + */ + +#ifndef EMP_BITS_BITS_DATA_HPP_INCLUDE +#define EMP_BITS_BITS_DATA_HPP_INCLUDE + + +#include +#include + +#include "../base/array.hpp" +#include "../base/assert.hpp" +#include "../base/Ptr.hpp" +#include "../math/math.hpp" + +#include "bitset_utils.hpp" + +namespace emp { + + // BitsMode specifies how a Bits object can change the number of bits in itself. + // FIXED is locked at the base size an cannot change and is stored in static memory. + // CAPPED must be the base size or lower, but requires size tracking. + // DYNAMIC defaults to base size, but can be changed; requires indirect memory and allocation. + // WATERMARK is like DYNAMIC, but never reallocates memory when shrinking active size. + enum class BitsMode { FIXED, CAPPED, DYNAMIC, WATERMARK }; + + namespace internal { + + // ------------------------------------------------------------------------------------ + // SIZE TRACKING + // ------------------------------------------------------------------------------------ + + /// Dynamic size is stored here to work with, but not the actual bits. + template + struct Bits_Data_Size_Var { + using field_t = bits_field_t; + + size_t num_bits; ///< Total number of bits are we using + + constexpr void SetSize(size_t new_size) { num_bits = new_size; } + + [[nodiscard]] constexpr size_t NumBits() const noexcept { return num_bits; } + + /// Number of bits used in partial field at the end; 0 = perfect fit. + [[nodiscard]] constexpr size_t NumEndBits() const noexcept { + return num_bits & (NUM_FIELD_BITS - 1); + } + + /// EXTRA bits leftover in the gap at the end + [[nodiscard]] constexpr size_t EndGap() const noexcept { + return NumEndBits() ? (NUM_FIELD_BITS - NumEndBits()) : 0; + } + + /// Mask to cut off all of the final bits. + [[nodiscard]] constexpr field_t EndMask() const noexcept { + return MaskLow(NumEndBits()); + } + + /// How many fields do we need for the current set of bits? + [[nodiscard]] constexpr size_t NumFields() const noexcept { + return num_bits ? (1 + ((num_bits - 1) / NUM_FIELD_BITS)) : 0; + } + + /// ID of the last occupied field + [[nodiscard]] constexpr size_t LastField() const noexcept { + return NumFields() - 1; + } + + /// Number of bytes needed for the current set of bits + [[nodiscard]] constexpr size_t NumBytes() const noexcept { + return num_bits ? (1 + ((num_bits - 1) >> 3)) : 0; + } + + /// How many bytes are allocated? + [[nodiscard]] constexpr size_t TotalBytes() const noexcept { + return NumFields() * sizeof(field_t); + } + + Bits_Data_Size_Var(size_t in_size=DEFAULT_SIZE) : num_bits(in_size) { } + Bits_Data_Size_Var(const Bits_Data_Size_Var &) = default; + + template + void serialize(Archive & ar) { ar(num_bits); } + + [[nodiscard]] constexpr bool OK() const { return true; } // Nothing to check yet. + }; + + /// If we have a fixed number of bits, we know size at compile time. + template + struct Bits_Data_Size_Fixed { + using field_t = bits_field_t; + static constexpr size_t DEFAULT_SIZE = NUM_BITS; + + constexpr void SetSize(size_t new_size) { + emp_assert(new_size == NUM_BITS, "Cannot change to new_size"); + } + + [[nodiscard]] constexpr size_t NumBits() const noexcept { return NUM_BITS; } + + /// Number of bits used in partial field at the end; 0 if perfect fit. + [[nodiscard]] constexpr size_t NumEndBits() const noexcept { + return NUM_BITS & (NUM_FIELD_BITS - 1); + } + + /// How many EXTRA bits are leftover in the gap at the end? + [[nodiscard]] constexpr size_t EndGap() const noexcept { + return (NUM_FIELD_BITS - NumEndBits()) % NUM_FIELD_BITS; + } + + /// A mask to cut off all of the final bits. + [[nodiscard]] constexpr field_t EndMask() const noexcept { + return MaskLow(NumEndBits()); + } + + /// How many felids do we need for the current set of bits? + [[nodiscard]] constexpr size_t NumFields() const noexcept { + return NUM_BITS ? (1 + ((NUM_BITS - 1) / NUM_FIELD_BITS)) : 0; + } + + /// What is the ID of the last occupied field? + [[nodiscard]] constexpr size_t LastField() const noexcept { return NumFields() - 1; } + + /// How many bytes are used for the current set of bits? (rounded up!) + [[nodiscard]] constexpr size_t NumBytes() const noexcept { + return NUM_BITS ? (1 + ((NUM_BITS - 1) >> 3)) : 0; + } + + /// How many bytes are allocated? (rounded up!) + [[nodiscard]] constexpr size_t TotalBytes() const noexcept { + return NumFields() * sizeof(field_t); + } + + Bits_Data_Size_Fixed(size_t in_size=NUM_BITS) { + emp_assert(in_size <= NUM_BITS, in_size, NUM_BITS); + } + Bits_Data_Size_Fixed(const Bits_Data_Size_Fixed &) = default; + + template + void serialize(Archive & ar) { /* Nothing to do here. */ } + + [[nodiscard]] constexpr bool OK() const { return true; } // Nothing to check yet. + }; + + + // ------------------------------------------------------------------------------------ + // RAW MEMORY MANAGEMENT + // ------------------------------------------------------------------------------------ + + /// Data & functions for Bits types with fixed memory (size may be dynamic, capped by CAPACITY) + template + struct Bits_Data_Mem_Static_Base : public BASE_T { + using base_size_t = BASE_T; + using field_t = bits_field_t; + static constexpr size_t MAX_FIELDS = (1 + ((CAPACITY - 1) / NUM_FIELD_BITS)); + + emp::array bits; ///< Fields to hold the actual bit values. + + Bits_Data_Mem_Static_Base() = default; + Bits_Data_Mem_Static_Base(size_t num_bits) : BASE_T(num_bits) + { + emp_assert(num_bits <= CAPACITY, num_bits, CAPACITY); + } + Bits_Data_Mem_Static_Base(const Bits_Data_Mem_Static_Base &) = default; + Bits_Data_Mem_Static_Base(Bits_Data_Mem_Static_Base &&) = default; + + Bits_Data_Mem_Static_Base & operator=(const Bits_Data_Mem_Static_Base &) = default; + Bits_Data_Mem_Static_Base & operator=(Bits_Data_Mem_Static_Base &&) = default; + + // --- Helper functions -- + + [[nodiscard]] Ptr FieldPtr() { return bits.data(); } + [[nodiscard]] Ptr FieldPtr() const { return bits.data(); } + + void RawResize(const size_t new_size, const bool preserve_data=false) { + const size_t old_num_fields = BASE_T::NumFields(); + BASE_T::SetSize(new_size); + if (preserve_data && BASE_T::NumEndBits()) { + bits[BASE_T::LastField()] &= BASE_T::EndMask(); + for (size_t id = BASE_T::NumFields(); id < old_num_fields; ++id) bits[id] = 0; + } + } + + #ifdef NDEBUG + [[nodiscard]] auto AsSpan() { return std::span(bits); } + [[nodiscard]] auto AsSpan() const { return std::span(bits); } + #else + [[nodiscard]] auto AsSpan() { return std::span(bits.data()); } + [[nodiscard]] auto AsSpan() const { return std::span(bits.data()); } + #endif + + [[nodiscard]] bool OK() const { return true; } // Nothing to check yet. + + template + void serialize(Archive & ar) { + BASE_T::serialize(ar); // Save size info. + for (size_t i=0; i < BASE_T::NumFields(); ++i) { ar(bits[i]); } + } + + }; + + template + using Bits_Data_Mem_Static = + Bits_Data_Mem_Static_Base< Bits_Data_Size_Var, CAPACITY >; + + template + using Bits_Data_Mem_Fixed = + Bits_Data_Mem_Static_Base< Bits_Data_Size_Fixed, CAPACITY >; + + /// Data & functions for Bits types with dynamic memory (size is tracked elsewhere) + template + struct Bits_Data_Mem_Dynamic : public Bits_Data_Size_Var + { + using base_t = Bits_Data_Size_Var; + using base_size_t = base_t; + using field_t = bits_field_t; + + Ptr bits; ///< Pointer to array with the status of each bit + + Bits_Data_Mem_Dynamic(size_t num_bits=DEFAULT_SIZE) : base_t(num_bits), bits(nullptr) + { + if (num_bits) bits = NewArrayPtr(NumBitFields(num_bits)); + } + Bits_Data_Mem_Dynamic(const Bits_Data_Mem_Dynamic & in) : bits(nullptr) { Copy(in); } + Bits_Data_Mem_Dynamic(Bits_Data_Mem_Dynamic && in) : bits(nullptr) { Move(std::move(in)); } + ~Bits_Data_Mem_Dynamic() { if (bits) {bits.DeleteArray();} } + + Bits_Data_Mem_Dynamic & operator=(const Bits_Data_Mem_Dynamic & in) { Copy(in); return *this; } + Bits_Data_Mem_Dynamic & operator=(Bits_Data_Mem_Dynamic && in) { Move(std::move(in)); return *this; } + + // --- Helper functions -- + + [[nodiscard]] Ptr FieldPtr() { return bits; } + [[nodiscard]] Ptr FieldPtr() const { return bits; } + + void MakeEmpty() { + base_t::SetSize(0); + if (bits) bits.DeleteArray(); + bits = nullptr; + } + + void RawResize(const size_t new_size, const bool preserve_data=false) { + if (new_size == 0) { return MakeEmpty(); } + + // See if number of bit fields needs to change. + const size_t num_old_fields = base_t::NumFields(); + const size_t num_new_fields = NumBitFields(new_size); + + if (num_old_fields != num_new_fields) { + auto new_bits = NewArrayPtr(num_new_fields); + if (num_old_fields) { + if (preserve_data) { + size_t copy_count = std::min(num_old_fields, num_new_fields); + emp::CopyMemory(bits, new_bits, copy_count); + } + bits.DeleteArray(); // Delete old memory + } + bits = new_bits; // Use new memory + if (preserve_data) { + // Zero out any newly added fields. + for (size_t i = num_old_fields; i < num_new_fields; ++i) bits[i] = 0; + } + } + + base_t::SetSize(new_size); + + // Clear out any extra bits in the last field. + if (preserve_data && base_t::NumEndBits()) { + bits[base_t::LastField()] &= base_t::EndMask(); + } + } + + // Assume size is already correct. + void Copy(const Bits_Data_Mem_Dynamic & in) { + RawResize(in.NumBits()); + for (size_t i = 0; i < base_t::NumFields(); ++i) bits[i] = in.bits[i]; + } + + void Move(Bits_Data_Mem_Dynamic && in) { + base_t::SetSize(in.NumBits()); + if (bits) bits.DeleteArray(); // Clear out old bits. + bits = in.bits; // Move over the bits. + in.bits = nullptr; // Clear them out of the original. + } + + [[nodiscard]] auto AsSpan() { return std::span(bits.Raw(), base_t::NumFields()); } + [[nodiscard]] auto AsSpan() const { return std::span(bits.Raw(), base_t::NumFields()); } + + template + void save(Archive & ar) { + base_t::serialize(ar); // Save size info. + for (size_t i=0; i < base_t::NumFields(); ++i) { + ar(bits[i]); + } + } + + template + void load(Archive & ar) { + base_t::serialize(ar); + if (bits) bits.DeleteArray(); // Delete old memory if needed + bits = NewArrayPtr(base_t::NumFields()); + for (size_t i=0; i < base_t::NumFields(); ++i) { + ar(bits[i]); + } + } + + bool OK() const { + // Do some checking on the bits array ptr to make sure it's value. + if (bits) { + #ifdef EMP_TRACK_MEM + emp_assert(bits.DebugIsArray()); // Must be marked as an array. + emp_assert(bits.OK()); // Pointer must be okay. + #endif + } + else { emp_assert(base_t::num_bits == 0); } // If bits is null, num_bits should be zero. + return true; + } + }; + + /// Data & functions for Bits types with dynamic memory (size is tracked elsewhere) + template + struct Bits_Data_Mem_Watermark : public Bits_Data_Mem_Dynamic + { + using this_t = Bits_Data_Mem_Watermark; + using base_t = Bits_Data_Mem_Dynamic; + using field_t = bits_field_t; + using base_t::bits; ///< Pointer to array with the status of each bit + size_t field_capacity = 0; ///< How many fields is the watermark up to? + + Bits_Data_Mem_Watermark(size_t num_bits=DEFAULT_SIZE) : base_t(num_bits) + { + field_capacity = base_t::NumFields(); + } + Bits_Data_Mem_Watermark(const this_t & in) { Copy(in); } + Bits_Data_Mem_Watermark(this_t && in) { Move(std::move(in)); } + ~Bits_Data_Mem_Watermark() { /* cleanup in base class */ } + + Bits_Data_Mem_Watermark & operator=(const this_t & in) { Copy(in); return *this; } + Bits_Data_Mem_Watermark & operator=(this_t && in) { Move(std::move(in)); return *this; } + + // --- Helper functions -- + + /// Resize to have at least the specified number of fields. + /// @param new_size The number of bits the new data needs to hold. + /// @param preserve_data Should we keep existing bits and zero out new bits? + void RawResize(const size_t new_size, const bool preserve_data=false) { + // See if number of bit fields needs to change. + const size_t num_old_fields = base_t::NumFields(); + const size_t num_new_fields = NumBitFields(new_size); + + // If we need more fields than are currently available, reallocate memory. + if (num_new_fields > field_capacity) { + auto new_bits = NewArrayPtr(num_new_fields); + if (field_capacity) { // If we already had some allocated fields... + // If needed, copy over previous memory. + if (preserve_data) emp::CopyMemory(bits, new_bits, field_capacity); + bits.DeleteArray(); // Delete old memory + } + field_capacity = num_new_fields; + bits = new_bits; // Use new memory + } + + base_t::SetSize(new_size); + + if (preserve_data) { + // Clear any new (or previously unused) fields. + for (size_t i = num_old_fields; i < num_new_fields; ++i) bits[i] = 0; + + // Clear out any extra end bits. + if (base_t::NumEndBits()) bits[base_t::LastField()] &= base_t::EndMask(); + } + } + + void Copy(const Bits_Data_Mem_Watermark & in) { // Same as base class, but call THIS RawResize(). + RawResize(in.NumBits()); + for (size_t i = 0; i < base_t::NumFields(); ++i) bits[i] = in.bits[i]; + } + + void Move(Bits_Data_Mem_Watermark && in) { + base_t::Move(std::move(in)); + field_capacity = in.field_capacity; + } + + template + void save(Archive & ar) { base_t::save(ar); } // Base class handles saving. + + template + void load(Archive & ar) { + base_t::load(ar); + field_capacity = base_t::NumFields(); // Use loaded size as capacity. + } + + bool OK() const { + emp_assert(field_capacity >= base_t::NumFields()); + return base_t::OK(); + } + }; + + + + /// Internal data for the Bits class to separate static vs. dynamic. + template + struct Bits_Data : public BASE_T + { + using field_t = bits_field_t; + + Bits_Data() = default; + Bits_Data(size_t num_bits) : BASE_T(num_bits) { } + Bits_Data(const Bits_Data & in) = default; + Bits_Data(Bits_Data && in) = default; + + Bits_Data & operator=(const Bits_Data &) = default; + Bits_Data & operator=(Bits_Data &&) = default; + + [[nodiscard]] emp::Ptr BytePtr() { + return BASE_T::FieldPtr().template ReinterpretCast(); + } + [[nodiscard]] emp::Ptr BytePtr() const { + return BASE_T::FieldPtr().template ReinterpretCast(); + } + + [[nodiscard]] auto AsByteSpan() const { return std::as_bytes( BASE_T::AsSpan() ); } + + [[nodiscard]] bool OK() const { + bool result = BASE_T::OK(); + + // If there are end bits, make sure that everything past the last one is clear. + if (BASE_T::NumEndBits()) { + // Make sure final bits are zeroed out. + const field_t excess_bits = + BASE_T::bits[BASE_T::LastField()] & ~MaskLow(BASE_T::NumEndBits()); + result &= !excess_bits; + } + + return result; + } + + }; + } + + using Bits_WatermarkData = internal::Bits_Data< internal::Bits_Data_Mem_Watermark<0> >; + using Bits_DynamicData = internal::Bits_Data< internal::Bits_Data_Mem_Dynamic<0> >; + template + using Bits_FixedData = internal::Bits_Data< internal::Bits_Data_Mem_Fixed >; + template + using Bits_StaticData = internal::Bits_Data< internal::Bits_Data_Mem_Static >; +} + +#endif // #ifndef EMP_BITS_BITS_DATA_HPP_INCLUDE diff --git a/include/emp/bits/bitset_utils.hpp b/include/emp/bits/bitset_utils.hpp index 17858e8a62..9f9db99283 100644 --- a/include/emp/bits/bitset_utils.hpp +++ b/include/emp/bits/bitset_utils.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2020. + * @date 2016-2022. * * @file bitset_utils.hpp * @brief A set of simple functions to manipulate bitsets. @@ -12,10 +12,45 @@ #define EMP_BITS_BITSET_UTILS_HPP_INCLUDE #include +#include +#include #include +#include + +#include "../base/Ptr.hpp" namespace emp { + /// @brief Use size_t as the default bits field type. + using bits_field_t = size_t; + + /// @brief Track the number of bits in a single bit field. + static constexpr size_t NUM_FIELD_BITS = sizeof(bits_field_t)*8; + + /// @brief Convert a bit count to the number of fields needed to store them. + [[nodiscard]] constexpr size_t NumBitFields(size_t num_bits) noexcept { + return num_bits ? (1 + ((num_bits - 1) / NUM_FIELD_BITS)) : 0; + } + + /// @brief Convert a single bit field to a string. + /// @param field A single bit field to convert to a string. + [[nodiscard]] std::string BitFieldToString(bits_field_t field) { + std::stringstream ss; + ss << '[' << std::hex << field << ']'; + return ss.str(); + } + + /// @brief Convert a series of bit fields to a string. + /// @param field A single bit field to convert to a string. + [[nodiscard]] std::string BitFieldsToString(emp::Ptr bits, size_t count) { + std::stringstream ss; + for (size_t i = 0; i < count; ++i) { + if (i) ss << ' '; + ss << BitFieldToString(bits[i]); + } + return ss.str(); + } + /// Create a series of a specified number of ones (at compile time) in a uint. template constexpr uint32_t UIntMaskFirst() { return (UIntMaskFirst() << 1) | 1; } @@ -36,58 +71,64 @@ namespace emp { 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; - /// Count the number of bits in a 64-bit unsigned integer. - inline constexpr size_t count_bits(uint64_t val) { - return - ByteCount[ val >> 56 ] + - ByteCount[ (val >> 48) & 0xFF ] + - ByteCount[ (val >> 40) & 0xFF ] + - ByteCount[ (val >> 32) & 0xFF ] + - ByteCount[ (val >> 24) & 0xFF ] + - ByteCount[ (val >> 16) & 0xFF ] + - ByteCount[ (val >> 8) & 0xFF ] + - ByteCount[ val & 0xFF ]; - } - - // /// Count the number of bits in a 32-bit unsigned integer. - // inline constexpr size_t count_bits(uint32_t val) { - // return - // ByteCount[ val >> 24 ] + - // ByteCount[ (val >> 16) & 0xFF ] + - // ByteCount[ (val >> 8) & 0xFF ] + - // ByteCount[ val & 0xFF ]; - // } + /// Count the number of bits in an unsigned integer. + template + [[nodiscard]] inline constexpr size_t count_bits(T val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t num_bytes = sizeof(T); + static_assert(num_bytes <= 8, "count_bits() requires 8 or fewer bytes."); + + size_t out_ones = ByteCount[ val & 0xFF ]; + if constexpr (num_bytes > 1) { + out_ones += ByteCount[ (val >> 8) & 0xFF ]; + } + if constexpr (num_bytes > 2) { + out_ones += ByteCount[ (val >> 24) & 0xFF ] + + ByteCount[ (val >> 16) & 0xFF ]; + } + if constexpr (num_bytes > 4) { + out_ones += ByteCount[ val >> 56 ] + + ByteCount[ (val >> 48) & 0xFF ] + + ByteCount[ (val >> 40) & 0xFF ] + + ByteCount[ (val >> 32) & 0xFF ]; + } + return out_ones; + } + /// Return the position of the first one bit template - inline constexpr size_t find_bit(T val) { return count_bits( (~val) & (val-1) ); } + [[nodiscard]] inline constexpr size_t find_bit(T val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + return count_bits( (~val) & (val-1) ); + } /// Return the position of the first one bit AND REMOVE IT. template inline size_t pop_bit(T & val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); const size_t pos = find_bit(val); val &= ~(1 << pos); return pos; } - /// A compile-time bit counter. - template - static constexpr int CountOnes(TYPE x) { return x == 0 ? 0 : (CountOnes(x/2) + (x&1)); } - /// Quick bit-mask generator for low bits. - template - static constexpr TYPE MaskLow(std::size_t num_bits) { + template + [[nodiscard]] static constexpr TYPE MaskLow(std::size_t num_bits) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); return (num_bits == 8*sizeof(TYPE)) ? ((TYPE)-1) : ((((TYPE)1) << num_bits) - 1); } /// Quick bit-mask generator for high bits. - template - static constexpr TYPE MaskHigh(std::size_t num_bits) { + template + [[nodiscard]] static constexpr TYPE MaskHigh(std::size_t num_bits) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); return MaskLow(num_bits) << (8*sizeof(TYPE)-num_bits); } - template - static constexpr TYPE MaskUsed(TYPE val) { + template + [[nodiscard]] static constexpr TYPE MaskUsed(TYPE val) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); size_t shift = 1; TYPE last = 0; while (val != last) { // While the shift is making progress... @@ -99,6 +140,109 @@ namespace emp { return val; } + template + [[nodiscard]] constexpr T ReverseBits(T in) { + constexpr size_t num_bytes = sizeof(T); + + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + static_assert( num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8, + "ReverseBits() currently requires 1, 2, 4, or 8-byte values." ); + + if constexpr (num_bytes == 1) { + in = static_cast( (in & 0xF0) >> 4 | (in & 0x0F) << 4 ); + in = static_cast( (in & 0xCC) >> 2 | (in & 0x33) << 2 ); + in = static_cast( (in & 0xAA) >> 1 | (in & 0x55) << 1 ); + } + else if constexpr (num_bytes == 2) { + in = static_cast( (in & 0xFF00) >> 8 | (in & 0x00FF) << 8 ); + in = static_cast( (in & 0xF0F0) >> 4 | (in & 0x0F0F) << 4 ); + in = static_cast( (in & 0xCCCC) >> 2 | (in & 0x3333) << 2 ); + in = static_cast( (in & 0xAAAA) >> 1 | (in & 0x5555) << 1 ); + } + else if constexpr (num_bytes == 4) { + in = static_cast( (in & 0xFFFF0000) >> 16 | (in & 0x0000FFFF) << 16 ); + in = static_cast( (in & 0xFF00FF00) >> 8 | (in & 0x00FF00FF) << 8 ); + in = static_cast( (in & 0xF0F0F0F0) >> 4 | (in & 0x0F0F0F0F) << 4 ); + in = static_cast( (in & 0xCCCCCCCC) >> 2 | (in & 0x33333333) << 2 ); + in = static_cast( (in & 0xAAAAAAAA) >> 1 | (in & 0x55555555) << 1 ); + } + else /* if constexpr (num_bytes == 8) */ { + in = static_cast( (in & 0xFFFFFFFF00000000) >> 32 | (in & 0x00000000FFFFFFFF) << 32 ); + in = static_cast( (in & 0xFFFF0000FFFF0000) >> 16 | (in & 0x0000FFFF0000FFFF) << 16 ); + in = static_cast( (in & 0xFF00FF00FF00FF00) >> 8 | (in & 0x00FF00FF00FF00FF) << 8 ); + in = static_cast( (in & 0xF0F0F0F0F0F0F0F0) >> 4 | (in & 0x0F0F0F0F0F0F0F0F) << 4 ); + in = static_cast( (in & 0xCCCCCCCCCCCCCCCC) >> 2 | (in & 0x3333333333333333) << 2 ); + in = static_cast( (in & 0xAAAAAAAAAAAAAAAA) >> 1 | (in & 0x5555555555555555) << 1 ); + } + + return in; + } + + // Rotate all bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsLeft( + T in, + size_t rotate_size = 1 + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + rotate_size %= FIELD_BITS; // Make sure rotate is in range. + return (in << rotate_size) | + (in >> (FIELD_BITS - rotate_size)); + } + + // Rotate lowest "bit_count" bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsLeft( + T in, + size_t rotate_size, + size_t bit_count + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + emp_assert(bit_count <= FIELD_BITS, "Cannot have more bits than can fit in field."); + rotate_size %= bit_count; // Make sure rotate is in range. + const T out = (in << rotate_size) | (in >> (bit_count - rotate_size)); + return out & MaskLow(bit_count); // Zero out excess bits. + } + + // Rotate all bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsRight( + T in, + size_t rotate_size = 1 + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + rotate_size %= FIELD_BITS; // Make sure rotate is in range. + return (in >> rotate_size) | + (in << (FIELD_BITS - rotate_size)); + } + + // Rotate lowest "bit_count" bits to the left (looping around) in a provided field. + template + [[nodiscard]] constexpr T RotateBitsRight( + T in, + size_t rotate_size, + size_t bit_count + ) { + static_assert( std::is_unsigned_v, "Bit manipulation requires unsigned values." ); + constexpr size_t FIELD_BITS = sizeof(T) * 8; + emp_assert(bit_count <= FIELD_BITS, "Cannot have more bits than can fit in field."); + rotate_size %= bit_count; // Make sure rotate is in range. + const T out = (in >> rotate_size) | (in << (bit_count - rotate_size)); + return out & MaskLow(bit_count); // Zero out excess bits. + } + + /// Count the number of bits ('0' or '1') found in a string. + size_t CountBits(const std::string & bitstring) { + return std::count_if( + bitstring.begin(), + bitstring.end(), + [](char i) { return i == '0' || i == '1'; } + ); + } + /* // Returns the position of the first set (one) bit or a -1 if none exist. template diff --git a/include/emp/compiler/DFA.hpp b/include/emp/compiler/DFA.hpp index 7c8f83ce1a..5f3d354093 100644 --- a/include/emp/compiler/DFA.hpp +++ b/include/emp/compiler/DFA.hpp @@ -28,13 +28,17 @@ namespace emp { private: emp::vector< emp::array > transitions; emp::vector< STOP_TYPE > is_stop; // 0=not stop; other values for STOP return value. + + using this_t = tDFA; public: tDFA(size_t num_states=0) : transitions(num_states), is_stop(num_states, 0) { for (auto & t : transitions) t.fill(-1); } - tDFA(const tDFA &) = default; + tDFA(const this_t &) = default; + tDFA(this_t &&) = default; ~tDFA() { ; } - tDFA & operator=(const tDFA &) = default; + this_t & operator=(const this_t &) = default; + this_t & operator=(this_t &&) = default; using stop_t = STOP_TYPE; diff --git a/include/emp/compiler/Lexer.hpp b/include/emp/compiler/Lexer.hpp index 0b9ed843da..6067ca77c1 100644 --- a/include/emp/compiler/Lexer.hpp +++ b/include/emp/compiler/Lexer.hpp @@ -1,17 +1,37 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2019. + * @date 2016-2022. * * @file Lexer.hpp * @brief A general-purpose, fast lexer. - * @note Status: ALPHA + * @note Status: BETA + * + * Build a lexer that can convert input strings or streams into a series of provided tokens. + * + * Use AddToken(name, regex) to list out the relevant tokens. + * 'name' is the unique name for this token. + * 'regex' is the regular expression that describes this token. + * It will return a unique ID associated with this lexeme. + * + * IgnoreToken(name, regex) uses the same arguments, but is used for tokens that + * should be skipped over. + * + * Names and IDs can be recovered later using GetTokenID(name) and GetTokenName(id). + * + * Tokens can be retrieved either one at a time with Process(string) or Process(stream), + * which will return the next (non-ignored) token, removing it from the input. + * + * Alternatively, an entire series of tokens can be processed with Tokenize(). + * + * Finally, GetLexeme() can be used to retrieve the lexeme from the most recent token found. */ #ifndef EMP_COMPILER_LEXER_HPP_INCLUDE #define EMP_COMPILER_LEXER_HPP_INCLUDE #include +#include #include #include #include @@ -34,9 +54,12 @@ namespace emp { bool save_lexeme; ///< Preserve the lexeme for this token? bool save_token; ///< Keep token at all? (Whitespace and comments are often discarded). + // Default constructor produces an error token. + TokenInfo() : name(""), desc("Unable to parse input!"), regex(""), + id(-1), save_lexeme(true), save_token(true) { } TokenInfo(const std::string & _name, const std::string & _regex, int _id, bool _save_l=true, bool _save_t=true, const std::string & _desc="") - : name(_name), desc(_desc), regex(_regex), id(_id), save_lexeme(_save_l), save_token(_save_t) { ; } + : name(_name), desc(_desc), regex(_regex), id(_id), save_lexeme(_save_l), save_token(_save_t) { } TokenInfo(const TokenInfo &) = default; TokenInfo(TokenInfo &&) = default; TokenInfo & operator=(const TokenInfo &) = default; @@ -55,42 +78,119 @@ namespace emp { /// Information about a token instance from an input stream. struct Token { - int token_id; ///< Which type of token is this? + int id; ///< Which type of token is this? std::string lexeme; ///< Sequence matched by this token (or empty if not saved) size_t line_id; ///< Which line did this token start on? - Token(int id, const std::string & str="", size_t _line=0) - : token_id(id), lexeme(str), line_id(_line) { ; } + Token(int _id, const std::string & str="", size_t _line=0) + : id(_id), lexeme(str), line_id(_line) { ; } Token(const Token &) = default; Token(Token &&) = default; Token & operator=(const Token &) = default; Token & operator=(Token &&) = default; /// Token will automatically convert to its ID if used as an int. - operator int() const { return token_id; } + operator int() const { return id; } /// Token will automatically convert to its matched sequence (lexeme) is used as a string. operator const std::string &() const { return lexeme; } }; + class TokenStream { + private: + std::string name = ""; + emp::vector tokens; + + public: + TokenStream(const std::string & in_name) : name(in_name) { } + TokenStream(const TokenStream &) = default; + TokenStream(TokenStream &&) = default; + TokenStream(const emp::vector & in_tokens, const std::string & in_name) + : name(in_name), tokens(in_tokens) { } + + TokenStream & operator=(const TokenStream &) = default; + TokenStream & operator=(TokenStream &&) = default; + + class Iterator { + private: + emp::Ptr ts; + size_t pos; + + public: + Iterator(const Iterator &) = default; + Iterator(const TokenStream & in_ts, size_t in_pos) : ts(&in_ts), pos(in_pos) { } + Iterator & operator=(const Iterator &) = default; + + const TokenStream & GetTokenStream() const { return *ts; } + size_t GetIndex() const { return pos; } + emp::Ptr ToPtr() const { return ts->GetPtr(pos); } + + Token operator*() const { return ts->tokens[pos]; } + const Token * operator->() const { return &(ts->tokens[pos]); } + + bool operator==(const Iterator & in) const { return ToPtr() == in.ToPtr(); } + bool operator!=(const Iterator & in) const { return ToPtr() != in.ToPtr(); } + bool operator< (const Iterator & in) const { return ToPtr() < in.ToPtr(); } + bool operator<=(const Iterator & in) const { return ToPtr() <= in.ToPtr(); } + bool operator> (const Iterator & in) const { return ToPtr() > in.ToPtr(); } + bool operator>=(const Iterator & in) const { return ToPtr() >= in.ToPtr(); } + + Iterator & operator++() { ++pos; return *this; } + Iterator operator++(int) { Iterator old(*this); ++pos; return old; } + Iterator & operator--() { --pos; return *this; } + Iterator operator--(int) { Iterator old(*this); --pos; return old; } + + bool IsValid() const { return pos < ts->size(); } + bool AtEnd() const { return pos == ts->size(); } + + operator bool() const { return IsValid(); } + }; + + size_t size() const { return tokens.size(); } + const Token & Get(size_t pos) const { return tokens[pos]; } + emp::Ptr GetPtr(size_t pos) const { return &(tokens.data()[pos]); } + const std::string & GetName() const { return name; } + Iterator begin() const { return Iterator(*this, 0); } + Iterator end() const { return Iterator(*this, tokens.size()); } + const Token & back() const { return tokens.back(); } + + void push_back(const Token & in) { tokens.push_back(in); } + + void Print(std::ostream & os=std::cout) const { + for (auto x : tokens) { + os << " [" << x.lexeme << "]"; + } + os << std::endl; + } + }; + + /// A lexer with a set of token types (and associated regular expressions) class Lexer { private: static constexpr int MAX_ID = 255; ///< IDs count down so that first ones have priority. static constexpr int ERROR_ID = -1; ///< Code for unknown token ID. - emp::vector token_set; ///< List of all active tokens. + emp::vector token_set; ///< List of all active tokens types. emp::map token_map; ///< Map of token names to id. int cur_token_id = MAX_ID; ///< Which ID should the next new token get? mutable bool generate_lexer = false; ///< Do we need to regenerate the lexer? mutable DFA lexer_dfa; ///< Table driven lexer implementation. - std::string lexeme; ///< Current state of lexeme being generated. + mutable std::string lexeme; ///< Current state of lexeme being generated. - const TokenInfo ERROR_TOKEN{"", "", ERROR_ID, true, true, "Unable to parse input!"}; + static const TokenInfo & ERROR_TOKEN() { + static const TokenInfo token; + return token; + } public: - Lexer() { ; } - ~Lexer() { ; } + Lexer() = default; + Lexer(const Lexer &) = default; + Lexer(Lexer &&) = default; + ~Lexer() = default; + + Lexer & operator=(const Lexer &) = default; + Lexer & operator=(Lexer &&) = default; /// How many types of tokens can be identified in this Lexer? size_t GetNumTokens() const { return token_set.size(); } @@ -99,8 +199,12 @@ namespace emp { /// Add a new token, specified by a name and the regex used to identify it. /// Note that token ids count down with highest IDs having priority. - int AddToken(const std::string & name, const std::string & regex, - bool save_lexeme=true, bool save_token=true, const std::string & desc="") { + int AddToken(const std::string & name, + const std::string & regex, + bool save_lexeme = true, + bool save_token = true, + const std::string & desc = "") + { int id = cur_token_id--; // Grab the next available token id. generate_lexer = true; // Indicate the the lexer DFA needs to be rebuilt. token_set.emplace_back( name, regex, id, save_lexeme, save_token, desc ); @@ -126,7 +230,7 @@ namespace emp { /// Get the full information about a token (you provide the id) const TokenInfo & GetTokenInfo(int id) const { - if (id > MAX_ID || id <= cur_token_id) return ERROR_TOKEN; + if (id > MAX_ID || id <= cur_token_id) return ERROR_TOKEN(); return token_set[(size_t)(MAX_ID - id)]; } @@ -160,7 +264,7 @@ namespace emp { /// longest one we can find.) Every time we do hit a valid lexeme, store it as the current /// "best" and keep going. Once we hit a point where no other valid lexemes are possible, /// stop and return the best we've found so far. - Token Process(std::istream & is) { + Token Process(std::istream & is) const { // If we still need to generate the DFA for the lexer, do so. if (generate_lexer) Generate(); @@ -173,7 +277,7 @@ namespace emp { lexeme.resize(0); // Keep looking as long as: - // 1: We may still be able to contine the current lexeme. + // 1: We may still be able to continue the current lexeme. // 2: We have not entered an invalid state. // 3: Our input stream has more symbols. while (cur_stop >= 0 && cur_state >= 0 && is) { @@ -201,8 +305,8 @@ namespace emp { return { best_stop, lexeme }; } - /// Shortcut to process a string rather than a stream. - Token Process(std::string & in_str) { + /// Shortcut to process a string rather than a stream, chopping off one token each time. + Token Process(std::string & in_str) const { std::stringstream ss; ss << in_str; auto out_val = Process(ss); @@ -210,8 +314,16 @@ namespace emp { return out_val; } + /// Shortcut to just get a single token. + Token ToToken(std::string_view in_str) const { + std::stringstream ss; + ss << in_str; + auto out_val = Process(ss); + return out_val; + } + /// Turn an input stream of text into a vector of tokens. - emp::vector Tokenize(std::istream & is) { + TokenStream Tokenize(std::istream & is, const std::string & name="in_stream") const { emp::vector out_tokens; size_t cur_line = 1; emp::Token token = Process(is); @@ -221,28 +333,30 @@ namespace emp { if (GetSaveToken(token)) out_tokens.push_back(token); token = Process(is); } - return out_tokens; + return TokenStream{out_tokens, name}; } /// Turn an input string into a vector of tokens. - emp::vector Tokenize(const std::string & str) { + TokenStream Tokenize(std::string_view str, const std::string & name="in_view") const { std::stringstream ss; ss << str; - return Tokenize(ss); + return Tokenize(ss, name); } /// Turn a vector of strings into a vector of tokens. - emp::vector Tokenize(const emp::vector & str_v) { + TokenStream Tokenize(const emp::vector & str_v, + const std::string & name="in_string vector") const + { std::stringstream ss; for (auto & str : str_v) { - ss << str; + ss << str << '\n'; } - return Tokenize(ss); + return Tokenize(ss, name); } /// Get the lexeme associated with the last token identified. - const std::string & GetLexeme() { return lexeme; } + const std::string & GetLexeme() const { return lexeme; } /// Print the full information about this lexer (for debugging) void Print(std::ostream & os=std::cout) const { @@ -252,7 +366,7 @@ namespace emp { } /// Try out the lexer on a string and demonstrate how it's tokenized. - void DebugString(std::string test_string) { + void DebugString(std::string test_string) const { std::stringstream ss; ss << test_string; diff --git a/include/emp/compiler/RegEx.hpp b/include/emp/compiler/RegEx.hpp index b6e139afe4..49e56521b8 100644 --- a/include/emp/compiler/RegEx.hpp +++ b/include/emp/compiler/RegEx.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2019. + * @date 2016-2022. * * @file RegEx.hpp * @brief Basic regular expression handler. @@ -29,8 +29,11 @@ * static DFA to_DFA(const RegEx & regex); * * - * @todo Need to implement ^ and $ (beginning and end of line) - * @todo Need to implement {n}, {n,} and {n,m} (exactly n, at least n, and n-m copies, respecitvely) + * @todo Implement ^ and $ (beginning and end of line) + * @todo Implement {n}, {n,} and {n,m} (exactly n, at least n, and n-m copies, respectively) + * @todo Implement \d (for digits), \s (for whitespace), etc. + * @todo Consider a separator (maybe backtick?) to divide up a regex expression; + * the result can be returned by each section as a vector of strings. */ #ifndef EMP_COMPILER_REGEX_HPP_INCLUDE @@ -206,9 +209,11 @@ namespace emp { // If blocks are nested, merge them into a single block. if (nodes[i]->AsBlock()) { auto old_node = nodes[i]->AsBlock(); // Save the old node for merging. - nodes.erase(nodes.begin() + (int) i); // Remove block from nodes. - nodes.insert(nodes.begin() + (int) i, old_node->nodes.begin(), old_node->nodes.end()); - old_node->nodes.resize(0); // Don't recurse delete since nodes were moved! + nodes.erase(nodes.begin() + (int) i); // Remove block from nodes. + if (old_node->nodes.size()) { + nodes.insert(nodes.begin() + (int) i, old_node->nodes.begin(), old_node->nodes.end()); + old_node->nodes.resize(0); // Don't recurse delete since nodes were moved! + } old_node.Delete(); i--; modify = true; @@ -346,7 +351,7 @@ namespace emp { case '-': case '\\': case ']': - case '[': + case '[': // technically doesn't need to be escaped, but allowed. case '^': break; default: @@ -456,7 +461,7 @@ namespace emp { /// Process the input regex into a tree representaion. Ptr Process(Ptr cur_block=nullptr) { - emp_assert(pos >= 0 && pos < regex.size(), pos, regex.size()); + emp_assert(pos < regex.size(), pos, regex.size()); // If caller does not provide current block, create one (and return it.) if (cur_block==nullptr) cur_block = NewPtr(); diff --git a/include/emp/compiler/regex_utils.hpp b/include/emp/compiler/regex_utils.hpp new file mode 100644 index 0000000000..7e92b7109d --- /dev/null +++ b/include/emp/compiler/regex_utils.hpp @@ -0,0 +1,47 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2016-2021. + * + * @file regex_utils.hpp + * @brief Helper functions for building regular expressions. + * @note Status: BETA + */ + +#ifndef EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE +#define EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE + +#include + +#include "RegEx.hpp" + +namespace emp { + + using namespace std::string_literals; + + std::string regex_nested(char open='(', + char close=')', + size_t depth=0, + bool stop_at_newline=true + ) + { + // Setup open and close as literal strings. + std::string open_re = emp::to_string('"', open, '"'); + std::string close_re = emp::to_string('"', close, '"'); + + // Base version has open_re and close_re at either end. + const std::string no_parens = "[^"s + open_re + close_re + (stop_at_newline ? "\n\r]*" : "]*"); + const std::string matched = open_re + no_parens + close_re; + + for (size_t level = 0; level < depth; level++) { + const std::string multi = no_parens + "("s + matched + no_parens + ")*"s; + const std::string matched = open_re + multi + close_re; + } + + return matched; + } + + +} + +#endif // #ifndef EMP_COMPILER_REGEX_UTILS_HPP_INCLUDE diff --git a/include/emp/config/ArgManager.hpp b/include/emp/config/ArgManager.hpp index 27aca46090..5827dc6825 100644 --- a/include/emp/config/ArgManager.hpp +++ b/include/emp/config/ArgManager.hpp @@ -188,8 +188,8 @@ namespace emp { } else if (args[i].size() == 2) { // in POSIX, -- means treat subsequent words as literals // so we remove the -- and stop deflagging subsequent words - res.erase(std::next(std::begin(res),i)); - args.erase(std::next(std::begin(args),i)); + res.erase(std::next(std::begin(res),(int) i)); + args.erase(std::next(std::begin(args),(int) i)); break; } // " ", -, ---, ----, etc. left in place and treated as non-flags @@ -301,17 +301,14 @@ namespace emp { ); // store the argument pack + bool is_special = command == "_positional" + || command == "_unknown" + || command == "_invalid"; res.insert({ command, pack_t( - std::next( - std::begin(args), - command == "_positional" - || command == "_unknown" - || command == "_invalid" - ? i : i+1 - ), - j+1 < args.size() ? std::next(std::begin(args), j+1) : std::end(args) + std::next( std::begin(args), (int) (is_special ? i : i+1) ), + j+1 < args.size() ? std::next(std::begin(args), (int) j+1) : std::end(args) ) }); i = j; diff --git a/include/emp/config/config.hpp b/include/emp/config/config.hpp index 9ab97d4d25..900c8a84ae 100644 --- a/include/emp/config/config.hpp +++ b/include/emp/config/config.hpp @@ -403,7 +403,9 @@ namespace emp { for (auto & x : type_manager_map) delete x.second; } + #ifndef DOXYGEN_SHOULD_SKIP_THIS friend class ConfigWebUI; + #endif /*DOXYGEN_SHOULD_SKIP_THIS*/ ConfigEntry * operator[](const std::string & name) { return var_map[name]; } auto begin() -> decltype(var_map.begin()) { return var_map.begin(); } diff --git a/include/emp/data/AnnotatedType.hpp b/include/emp/data/AnnotatedType.hpp new file mode 100644 index 0000000000..48c16030dc --- /dev/null +++ b/include/emp/data/AnnotatedType.hpp @@ -0,0 +1,91 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021. + * + * @file AnnotatedType.hpp + * @brief A base class to provide a DataMap and accessors to another class. + * @note Status: ALPHA + * + */ + +#ifndef EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE +#define EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE + +#include "../base/assert.hpp" +#include "../meta/TypeID.hpp" +#include "../tools/string_utils.hpp" + +#include "DataMap.hpp" + +namespace emp { + + /// A generic base class implementing the use of dynamic traits via DataMaps. + class AnnotatedType { + private: + emp::DataMap data_map; ///< Dynamic variables assigned to this class. + + public: + emp::DataMap & GetDataMap() { return data_map; } + const emp::DataMap & GetDataMap() const { return data_map; } + + void SetDataMap(emp::DataMap & in_dm) { data_map = in_dm; } + + emp::DataLayout & GetDataLayout() { return data_map.GetLayout(); } + const emp::DataLayout & GetDataLayout() const { return data_map.GetLayout(); } + + bool HasTraitID(size_t id) const { return data_map.HasID(id); } + bool HasTrait(const std::string & name) const { return data_map.HasName(name); } + template + bool TestTraitType(size_t id) const { return data_map.IsType(id); } + template + bool TestTraitType(const std::string & name) const { return data_map.IsType(name); } + + size_t GetTraitID(const std::string & name) const { return data_map.GetID(name); } + + template + auto & GetTrait(KEY_T && key) { + return data_map.Get(std::forward(key)); + } + + template + auto GetTrait(KEY_T && key, size_t count) { + return data_map.Get(std::forward(key), count); + } + + template + const auto & GetTrait(KEY_T && key) const { + return data_map.Get(std::forward(key)); + } + + template + auto GetTrait(KEY_T && key, size_t count) const { + return data_map.Get(std::forward(key), count); + } + + template + T & SetTrait(size_t id, const T & val) { return data_map.Set(id, val); } + + template + T & SetTrait(const std::string & name, const T & val) { return data_map.Set(name, val); } + + emp::TypeID GetTraitType(size_t id) const { return data_map.GetType(id); } + emp::TypeID GetTraitType(const std::string & name) const { return data_map.GetType(name); } + + double GetTraitAsDouble(size_t id) const { return data_map.GetAsDouble(id); } + + double GetTraitAsDouble(size_t trait_id, emp::TypeID type_id) const { + return data_map.GetAsDouble(trait_id, type_id); + } + + std::string GetTraitAsString(size_t id) const { return data_map.GetAsString(id); } + + std::string GetTraitAsString(size_t trait_id, emp::TypeID type_id, size_t count=1) const { + return data_map.GetAsString(trait_id, type_id, count); + } + }; + + +} + +#endif // #ifndef EMP_DATA_ANNOTATEDTYPE_HPP_INCLUDE diff --git a/include/emp/data/DataFile.hpp b/include/emp/data/DataFile.hpp index ba3361ab78..74f6c38021 100644 --- a/include/emp/data/DataFile.hpp +++ b/include/emp/data/DataFile.hpp @@ -137,7 +137,7 @@ namespace emp { /// Print a header containing comments describing all of the columns virtual void PrintHeaderComment(const std::string & cstart = "# ") { for (size_t i = 0; i < keys.size(); i++) { - *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl; + *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")\n"; } os->flush(); } @@ -613,10 +613,10 @@ namespace emp { /// Print a header containing comments describing all of the columns void PrintHeaderComment(const std::string & cstart = "# ") override { for (size_t i = 0; i < keys.size(); i++) { - *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")" << std::endl; + *os << cstart << i << ": " << descs[i] << " (" << keys[i] << ")\n"; } for (size_t i = 0; i < container_keys.size(); i++) { - *os << cstart << i+keys.size() << ": " << container_descs[i] << " (" << container_keys[i] << ")" << std::endl; + *os << cstart << i+keys.size() << ": " << container_descs[i] << " (" << container_keys[i] << ")\n"; } os->flush(); diff --git a/include/emp/data/DataLayout.hpp b/include/emp/data/DataLayout.hpp index fd460569a3..a02b835ef3 100644 --- a/include/emp/data/DataLayout.hpp +++ b/include/emp/data/DataLayout.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2019. + * @date 2019-2022. * * @file DataLayout.hpp * @brief A mapping of names to variables stored in a MemoryImage. @@ -18,6 +18,7 @@ #include "../base/assert.hpp" #include "../base/vector.hpp" #include "../datastructs/map_utils.hpp" +#include "../math/constants.hpp" #include "../meta/TypeID.hpp" #include "MemoryImage.hpp" @@ -33,6 +34,7 @@ namespace emp { std::string name; ///< Name of this setting. std::string desc; ///< Full description of this setting. std::string notes; ///< Any additional notes about this setting. + size_t count; ///< Number of objects in this entry. bool is_log; ///< Is this setting a current value or a log of all values? }; @@ -75,13 +77,55 @@ namespace emp { /// Determine if we have an ID. bool HasID(size_t id) const { return emp::Has(setting_map, id); } - /// Detemine if we have the correct type of a specific variable ID. + /// Determine if we have the correct type of a specific variable ID. template bool IsType(size_t id) const { - emp_assert(Has(setting_map, id), id); + emp_assert(emp::Has(setting_map, id), id); return setting_map.find(id)->second.type == emp::GetTypeID(); } + // Verify type, position, AND count. + template + bool Has(size_t id, size_t count=1) const { + auto it = setting_map.find(id); + return it != setting_map.end() && + it->second.type == emp::GetTypeID() && + it->second.count == count; + } + + // Verify name, position, AND count. + template + bool Has(const std::string & name, size_t count=1) const { + auto it = id_map.find(name); + return (it != id_map.end()) && Has(it->second, count); + } + + template + std::string DiagnoseHas(KEY_T key, size_t count=1) const { + size_t id = 0; + if constexpr (std::is_arithmetic()) { + id = key; + } else { // key is name. + auto it = id_map.find(key); + if (it == id_map.end()) return emp::to_string("Unknown trait name '", key, "'"); + id = it->second; + } + + auto setting_it = setting_map.find(id); + if (setting_it == setting_map.end()) { + if (id == emp::MAX_SIZE_T) return emp::to_string("Unknown ID ", id, " (aka -1)"); + return emp::to_string("Unknown ID ", id); + } + if (setting_it->second.type != emp::GetTypeID()) { + return emp::to_string("Checking for type as ", emp::GetTypeID(), + ", but recorded as ", setting_it->second.type); + } + if (setting_it->second.count != count) { + return emp::to_string("Checking for count of ", count, + ", but recorded as ", setting_it->second.count); + } + return emp::to_string("Has<", emp::GetTypeID(), ">(", key, ",", count, ") should be true."); + } /// Return the number of bytes in the default image. size_t GetImageSize() const { return image_size; } @@ -98,31 +142,44 @@ namespace emp { return setting_map.find(id)->second.type; } + // What is the count associated with a given entry. + size_t GetCount(size_t id) const { + emp_assert(HasID(id), id); + return setting_map.find(id)->second.count; + } + + /// Determine is entry is some form of numeric type. + bool IsNumeric(size_t id) const { + return GetType(id).IsArithmetic(); + } + + bool IsNumeric(const std::string & name) const { + return IsNumeric(GetID(name)); + } + /// Prevent this layout from being modified. void Lock() { is_locked = true; } /// Add a new variable with a specified type, name and value. template - size_t Add(MemoryImage & base_memory, - const std::string & name, - const T & default_value, - const std::string & desc="", - const std::string & notes="") { - emp_assert(!HasName(name), name); // Make sure this doesn't already exist. - emp_assert(is_locked == false); // Cannot add to a locked layout. - - // std::cout << "\nL: Adding var '" << name - // << "' of type " << emp::GetTypeID() - // << " to DataMap with " << id_map.size() << " elements" - // << " totalling " << image_size << " bytes." - // << std::endl; - - // Analyze the size of the new object and where it will go. + size_t Add(MemoryImage & base_memory, // Memory to store prototype objects. + const std::string & name, // Lookup name for this variable. + const T & default_value, // Initial value for each object in this entry. + const std::string & desc="", // Description associated with this variable + const std::string & notes="", // Additional information. + const size_t count = 1 // Number of values to store with this entry. + ) { + emp_assert(!HasName(name), name); // Make sure this doesn't already exist. + emp_assert(count >= 1); // Must add at least one instance of an object. + emp_assert(is_locked == false); // Cannot add to a locked layout. + + // Analyze the size of the new object(s) and where it will go. constexpr const size_t obj_size = sizeof(T); + const size_t entry_size = obj_size * count; const size_t pos = image_size; // Create a new image with enough room for the new object and move the old data over. - MemoryImage new_memory(image_size + obj_size); + MemoryImage new_memory(image_size + entry_size); MoveImageContents(base_memory, new_memory); // Now that the data is moved, cleanup the old image and put the new one in place. @@ -130,18 +187,22 @@ namespace emp { // Setup this new object. image_size = base_memory.GetSize(); - base_memory.Construct(pos, default_value); + for (size_t i = 0; i < count; ++i) { + base_memory.Construct(pos + i*obj_size, default_value); + } base_memory.init_to = image_size; // Store the information about this object. id_map[name] = pos; - setting_map[pos] = { emp::GetTypeID(), name, desc, notes, false }; + setting_map[pos] = { emp::GetTypeID(), name, desc, notes, count, false }; // Store copy constructor if needed. if (std::is_trivially_copyable() == false) { copy_constructors.push_back( - [pos](const MemoryImage & from_image, MemoryImage & to_image) { - to_image.CopyObj(pos, from_image); + [pos,count](const MemoryImage & from_image, MemoryImage & to_image) { + for (size_t i = 0; i < count; ++i) { + to_image.CopyObj(pos + i*sizeof(T), from_image); + } } ); } @@ -149,15 +210,21 @@ namespace emp { // Store destructor if needed. if (std::is_trivially_destructible() == false) { destructors.push_back( - [pos](MemoryImage & image) { image.Destruct(pos); } + [pos,count](MemoryImage & image) { + for (size_t i = 0; i < count; ++i) { + image.Destruct(pos + i*sizeof(T)); + } + } ); } // Store move constructor if needed. if (std::is_trivially_destructible() == false) { move_constructors.push_back( - [pos](MemoryImage & from_image, MemoryImage & to_image) { - to_image.MoveObj(pos, from_image); + [pos,count](MemoryImage & from_image, MemoryImage & to_image) { + for (size_t i = 0; i < count; ++i) { + to_image.MoveObj(pos + i*sizeof(T), from_image); + } } ); } @@ -177,7 +244,7 @@ namespace emp { image.init_to = 0; } - /// Destruct and delete all memomry assocated with this DataMap. + /// Destruct and delete all memory associated in the provided image. void ClearImage(MemoryImage & image) const { // If this memory image is already clear, stop. if (image.GetSize() == 0) return; diff --git a/include/emp/data/DataLog.hpp b/include/emp/data/DataLog.hpp index 8a13a52304..b9dc62548d 100644 --- a/include/emp/data/DataLog.hpp +++ b/include/emp/data/DataLog.hpp @@ -7,6 +7,8 @@ * @brief Tools for processing a single set of data. * @note Status: ALPHA * + * A DataLog takes in a continuous series of data and allows for easy analysis, both by + * performing calculations on those values and by outputting ascii histograms, etc. */ #ifndef EMP_DATA_DATALOG_HPP_INCLUDE diff --git a/include/emp/data/DataMap.hpp b/include/emp/data/DataMap.hpp index 3e0bc3eb2a..c15f0a698c 100644 --- a/include/emp/data/DataMap.hpp +++ b/include/emp/data/DataMap.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2018-2021. + * @date 2018-2022. * * @file DataMap.hpp * @brief A DataMap links names to arbitrary object types. @@ -50,10 +50,10 @@ * * - We should be able to keep a series of values, not just a single one. This can be done with * a series of new functions: - * AddLog() instead of AddVar() when new veriable is created. - * Get() should still work for latest value. Ideally keep lates in first position. + * AddLog() instead of AddVar() when new variable is created. + * Get() should still work for latest value. Ideally keep latest in first position. * Change non-const Get() to GetRef() which cannot be used for a log. - * Add GetAve() function for logs as well as GetLog() for the full vector. + * Add GetAve() function for logs as well as GetLog() for the full series (as std::span?). * * - Settings for all entries should have more information on how they are dealt with, such as if * they should be included in output an how. Perhaps a system of tags for dynamic use? @@ -67,7 +67,7 @@ * * - A user should be able to override copy constructors (though probably not move constructors * or destructors?). Then the copy process can be more customizable, for example having some - * settings retrun to the default value or be further processed. It's also possible to have + * settings return to the default value or be further processed. It's also possible to have * multiple types of copies, so if we indicate a "Copy birth" we get the above, but if we * indicate a "Copy clone" or "Copy inject" we do something different. We also probably need * to allow for multiple parents... @@ -78,14 +78,17 @@ * images repeatedly. * * - Some way of grouping memory across DataMaps so that a particular entry for many maps has all - * of its instances consecutive in memory? This seems really tricky to pull of, but if we can + * of its instances consecutive in memory? This seems really tricky to pull off, but if we can * do it, the improvement in cache performance could be dramatic. + * + * - Rename DataLayout and MemoryImage to DataMap_Layout and DataMap_Memory? */ #ifndef EMP_DATA_DATAMAP_HPP_INCLUDE #define EMP_DATA_DATAMAP_HPP_INCLUDE #include // For std::memcpy +#include #include #include @@ -95,6 +98,7 @@ #include "../tools/string_utils.hpp" #include "DataLayout.hpp" +#include "Datum.hpp" #include "MemoryImage.hpp" namespace emp { @@ -107,6 +111,8 @@ namespace emp { DataMap(emp::Ptr in_layout_ptr, size_t in_size) : memory(in_size), layout_ptr(in_layout_ptr) { ; } + // -- Helper functions -- + /// If the current layout is shared, make a copy of it. void MakeLayoutUnique() { // Make sure we have a layout, even if empty. @@ -118,6 +124,7 @@ namespace emp { layout_ptr.New(*layout_ptr); } } + public: DataMap() : layout_ptr(nullptr) { ; } DataMap(const DataMap & in_map) : layout_ptr(in_map.layout_ptr) { @@ -132,44 +139,20 @@ namespace emp { } // Copy Operator... - DataMap & operator=(const DataMap & in_map) { - // If we have a layout pointer, use it to clear our memory image and update it if needed. - if (layout_ptr) { - layout_ptr->ClearImage(memory); - - // If layout pointer doesn't match the new one, shift over. - if (layout_ptr != in_map.layout_ptr) { - layout_ptr->DecMaps(); // Remove self from counter. - if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); // Delete layout if now unused. - layout_ptr = in_map.layout_ptr; // Shift to new layout. - if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. - } - } - - // Otherwise we DON'T have a layout pointer, so setup the new one. - else { - layout_ptr = in_map.layout_ptr; // Shift to new layout. - if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. - } - - // Now that we know we have a good layout, copy over the image. - layout_ptr->CopyImage(in_map.memory, memory); - - return *this; - } + DataMap & operator=(const DataMap & in_map); ~DataMap() { /// If we have a layout pointer, clean up! if (!layout_ptr.IsNull()) { - // Clean up the current MemoryImage. - layout_ptr->ClearImage(memory); - - // Clean up the DataLayout - layout_ptr->DecMaps(); + layout_ptr->ClearImage(memory); // Clean up the current MemoryImage. + layout_ptr->DecMaps(); // Clean up the DataLayout if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); } } + // Built-in types. + using key_type = std::string; + /// Determine how many Bytes large this image is. size_t GetSize() const { return memory.GetSize(); } @@ -197,19 +180,26 @@ namespace emp { return layout_ptr->IsType(GetID(name)); } + /// Verify settings + template + bool Has(ARGS &&... args) const { + emp_assert(layout_ptr); + return layout_ptr->Has(std::forward(args)...); + } + /// Retrieve a variable by its type and position. template T & Get(size_t id) { - emp_assert(HasID(id), "Can only get IDs the are available in DataMap.", id, GetSize()); - emp_assert(IsType(id)); + emp_assert(Has(id), "Can only get IDs/types that match DataMap in type and count.", + id, GetSize(), layout_ptr->DiagnoseHas(id)); return memory.Get(id); } /// Retrieve a const variable by its type and position. template const T & Get(size_t id) const { - emp_assert(HasID(id), id, GetSize()); - emp_assert(IsType(id)); + emp_assert(Has(id), "Can only get IDs/types that match DataMap in type and count.", + id, GetSize(), layout_ptr->DiagnoseHas(id)); return memory.Get(id); } @@ -217,20 +207,51 @@ namespace emp { /// Retrieve a variable by its type and name. (Slower!) template T & Get(const std::string & name) { - emp_assert(HasName(name), name); - emp_assert(IsType(name), "DataMap::Get() must be provided the correct type.", - name, GetType(name), emp::GetTypeID()); + emp_assert(Has(name), "Can only get name/types that match DataMap in type and count.", + name, GetSize(), layout_ptr->DiagnoseHas(name)); return memory.Get(GetID(name)); } /// Retrieve a const variable by its type and name. (Slower!) template const T & Get(const std::string & name) const { - emp_assert(HasName(name)); - emp_assert(IsType(name), name, GetType(name), emp::GetTypeID()); + emp_assert(Has(name), "Can only get name/types that match DataMap in type and count.", + name, GetSize(), layout_ptr->DiagnoseHas(name)); return memory.Get(GetID(name)); } + // Retrieve a set of variables by id (as an std::span) + template + std::span Get(size_t id, size_t count) { + emp_assert(Has(id, count), "Can only get name/types that match DataMap.", + id, count, GetSize(), layout_ptr->DiagnoseHas(id,count)); + return memory.Get(id, count); + } + + // Retrieve a const set of variables by id (as an std::span) + template + std::span Get(size_t id, size_t count) const { + emp_assert(Has(id, count), "Can only get name/types that match DataMap.", + id, GetSize(), layout_ptr->DiagnoseHas(id,count)); + return memory.Get(id, count); + } + + // Retrieve a set of variables by name (as an std::span) + template + std::span Get(const std::string & name, size_t count) { + emp_assert(HasName(name), "Cannot get names not stored in DataMap.", + name, layout_ptr->DiagnoseHas(name, count)); + return Get(GetID(name), count); + } + + // Retrieve a const set of variables by name (as an std::span) + template + std::span Get(const std::string & name, size_t count) const { + emp_assert(HasName(name), "Cannot get names not stored in DataMap.", + name, layout_ptr->DiagnoseHas(name, count)); + return Get(GetID(name), count); + } + /// Set a variable by ID. template T & Set(size_t id, const T & value) { return (Get(id) = value); @@ -253,6 +274,9 @@ namespace emp { return layout_ptr->GetType(GetID(name)); } + bool IsNumeric(size_t id) const { return GetType(id).IsArithmetic(); } + bool IsNumeric(const std::string & name) const { return IsNumeric(GetID(name)); } + /// Get the memory at the target position, assume it is the provided type, and convert the /// value found there to double. double GetAsDouble(size_t id, TypeID type_id) const { @@ -269,16 +293,25 @@ namespace emp { /// Get the memory at the target position, assume it is the provided type, and convert the /// value found there to string. - std::string GetAsString(size_t id, TypeID type_id) const { + std::string GetAsString(size_t id, TypeID type_id, size_t count=1) const { emp_assert(HasID(id), "Can only Get IDs that are available in DataMap.", id, GetSize()); emp_assert(type_id == layout_ptr->GetType(id)); - return type_id.ToString(memory.GetPtr(id)); + emp_assert(count = layout_ptr->GetCount(id)); + if (count == 1) return type_id.ToString(memory.GetPtr(id)); + else { + size_t obj_size = type_id.GetSize(); + std::stringstream ss; + for (size_t i = 0; i < count; ++i) { + ss << '[' << type_id.ToString(memory.GetPtr(id+i*obj_size)) << ']'; + } + return ss.str(); + } } /// Get the memory at the target position, lookup it's type, and convert the value to string. std::string GetAsString(size_t id) const { emp_assert(HasID(id), "Can only get IDs the are available in DataMap.", id, GetSize()); - return GetAsString(id, layout_ptr->GetType(id)); + return GetAsString(id, layout_ptr->GetType(id), layout_ptr->GetCount(id)); } /// Add a new variable with a specified type, name and value. @@ -286,9 +319,17 @@ namespace emp { size_t AddVar(const std::string & name, const T & default_value, const std::string & desc="", - const std::string & notes="") { + const std::string & notes="", + size_t count=1) { + MakeLayoutUnique(); // If the current layout is shared, first make a copy of it. + return layout_ptr->Add(memory, name, default_value, desc, notes, count); + } + + /// Add a new variable with just a specified type and name; must be able to default. + template + size_t AddVar(const std::string & name) { MakeLayoutUnique(); // If the current layout is shared, first make a copy of it. - return layout_ptr->Add(memory, name, default_value, desc, notes); + return layout_ptr->Add(memory, name, T{}, "", "", 1); } /// Test if this DataMap uses the specified layout. @@ -296,6 +337,9 @@ namespace emp { return layout_ptr == &in_layout; } + /// Test if this DataMap has ANY layout. + bool HasLayout() const { return layout_ptr; } + /// Test if this DataMap is using the identical layout as another DataMap. bool SameLayout(const emp::DataMap & in_dm) const { return layout_ptr == in_dm.layout_ptr; @@ -303,7 +347,10 @@ namespace emp { } /// Get the DataLayout so that it can be used elsewhere. - const emp::DataLayout & GetLayout() { return *layout_ptr; } + emp::DataLayout & GetLayout() { return *layout_ptr; } + + /// Get the DataLayout so that it can be used elsewhere. + const emp::DataLayout & GetLayout() const { return *layout_ptr; } /// Test if this layout is locked (i.e., it cannot be changed.) bool IsLocked() const { return layout_ptr && layout_ptr->IsLocked(); } @@ -313,8 +360,83 @@ namespace emp { MakeLayoutUnique(); layout_ptr->Lock(); } + + + ///////////////////////////////////////////////////////////////// + // Tools for working with DataMaps.... + + + /// Return a function that takes in a data map and (efficiently) returns a Datum using the + /// specified entry. + static std::function + MakeDatumAccessor(const emp::DataLayout & layout, size_t id) { + // This must be a DataLayout entry name. + emp_assert(layout.HasID(id), "DatumAccessor pointing to invalid id", id); + emp_assert(layout.GetCount(id) == 1, + "DatumAccessors must have a count of 1 for proper conversion.", + layout.GetCount(id)); + TypeID type_id = layout.GetType(id); + + // Return an appropriate accessor for this value. + if (type_id.IsType()) { // Explicit STRING + return [id](const emp::DataMap & dm){ + return emp::Datum(dm.Get(id)); + }; + } + else if (type_id.IsType()) { // Explicit DOUBLE + return [id](const emp::DataMap & dm){ + return emp::Datum(dm.Get(id)); + }; + } + else if (type_id.IsArithmetic()) { // Other NUMERIC type + return [id,type_id](const emp::DataMap & dm){ + return emp::Datum(type_id.ToDouble(dm.memory.GetPtr(id))); + }; + } + else { // Resort to STRING + return [id,type_id](const emp::DataMap & dm){ + return emp::Datum(type_id.ToString(dm.memory.GetPtr(id))); + }; + } + } + + /// Return a function that takes in a data map and (efficiently) returns a Datum using the + /// specified name. + static auto MakeDatumAccessor(const emp::DataLayout & layout, const std::string & name) { + emp_assert(layout.HasName(name), "DatumAccessor not pointing to valid name", name); + return MakeDatumAccessor(layout, layout.GetID(name)); + } }; + + // Copy Operator... + DataMap & DataMap::operator=(const DataMap & in_map) { + // If we have a layout pointer, use it to clear our memory image and update it if needed. + if (layout_ptr) { + layout_ptr->ClearImage(memory); + + // If layout pointer doesn't match the new one, shift over. + if (layout_ptr != in_map.layout_ptr) { + layout_ptr->DecMaps(); // Remove self from counter. + if (layout_ptr->GetNumMaps() == 0) layout_ptr.Delete(); // Delete layout if now unused. + layout_ptr = in_map.layout_ptr; // Shift to new layout. + if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. + } + } + + // Otherwise we DON'T have a layout pointer, so setup the new one. + else { + layout_ptr = in_map.layout_ptr; // Shift to new layout. + if (layout_ptr) layout_ptr->IncMaps(); // Add self to new counter. + } + + // Now that we know we have a good layout, copy over the image. + layout_ptr->CopyImage(in_map.memory, memory); + + return *this; + } + + } #endif // #ifndef EMP_DATA_DATAMAP_HPP_INCLUDE diff --git a/include/emp/data/DataNode.hpp b/include/emp/data/DataNode.hpp index fd36dd8f2d..08627db1d7 100644 --- a/include/emp/data/DataNode.hpp +++ b/include/emp/data/DataNode.hpp @@ -127,7 +127,10 @@ namespace emp { /// Calculate the median of observed values double GetMedian() const {emp_assert(false, "Calculating median requires a DataNode with the Log modifier"); return 0;} /// Calculate a percentile of observed values - double GetPercentile(const double pct) const {emp_assert(false, "Calculating percentile requires a DataNode with the Log modifier"); return 0;} + double GetPercentile(const double /*pct*/) const { + emp_assert(false, "Calculating percentile requires a DataNode with the Log modifier"); + return 0; + } const std::string & GetName() const { return emp::empty_string(); } const std::string & GetDescription() const { return emp::empty_string(); } @@ -142,7 +145,7 @@ namespace emp { emp_assert(false, "Invalid call for DataNode config."); } - void AddDatum(const VAL_TYPE & val) { val_count++; } + void AddDatum(const VAL_TYPE & /*val*/) { val_count++; } void Reset() { val_count = 0; } diff --git a/include/emp/data/Datum.hpp b/include/emp/data/Datum.hpp new file mode 100644 index 0000000000..302290d83d --- /dev/null +++ b/include/emp/data/Datum.hpp @@ -0,0 +1,168 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file Datum.hpp + * @brief A single piece of data, either a value or a string. + * @note Status: ALPHA + * + * DEVELOPER NOTES: + * - For now, using unions, but this creates complications with non-trivial constructor/destructor + * for strings, so could try shifting over. + */ + +#ifndef EMP_DATA_DATUM_HPP_INCLUDE +#define EMP_DATA_DATUM_HPP_INCLUDE + +#include + +#include "../base/assert.hpp" +#include "../base/notify.hpp" +#include "../math/math.hpp" + +namespace emp { + + class Datum { + private: + union { + double num; + std::string str; + }; + bool is_num = true; + + void InitString() { new (&str) std::string; } + void InitString(const std::string & in) { new (&str) auto(in); } + void FreeString() { str.~basic_string(); } + public: + Datum() : num(0.0), is_num(true) { } + Datum(double in) : num(in), is_num(true) { } + Datum(const std::string & in) : is_num(false) { InitString(in); } + Datum(const char * in) : is_num(false) { InitString(in); } + Datum(const Datum & in) { + is_num = in.is_num; + if (is_num) num = in.num; + else InitString(in.str); + } + ~Datum() { if (!is_num) FreeString(); } + + bool IsDouble() const { return is_num; } ///< Is this natively stored as a double? + bool IsString() const { return !is_num; } ///< Is this natively stored as a string? + + /// If we know Datum is a Double, we can request its native form. + double & NativeDouble() { emp_assert(is_num); return num; } + double NativeDouble() const { emp_assert(is_num); return num; } + + /// If we know Datum is a String, we can request its native form. + std::string & NativeString() { emp_assert(!is_num); return str; } + const std::string & NativeString() const { emp_assert(!is_num); return str; } + + double AsDouble() const { + if (is_num) return num; + // Make sure we have a value here; otherwise provide a warning and return 0.0. + if (str.size() > 0 && + (std::isdigit(str[0]) || (str[0] == '-' && str.size() > 1 && std::isdigit(str[1])))) { + return std::stod(str); + } + + // Otherwise this string is invalid. + emp::notify::Warning("Cannot convert string '", str, "' to double."); + return 0.0; + } + + std::string AsString() const { + if (!is_num) return str; + std::stringstream ss; + ss << num; + return ss.str(); + //return std::to_string(num); + } + +// operator bool() const { return AsDouble() != 0.0; } + operator double() const { return AsDouble(); } + operator std::string() const { return AsString(); } + + Datum & SetDouble(double in) { // If this were previously a string, clean it up! + if (!is_num) { + FreeString(); + is_num = true; + } + num = in; + return *this; + } + + Datum & SetString(const std::string & in) { + if (is_num) { // If this were previously a num, change to string. + InitString(in); + is_num = false; + } + else str = in; // Already a string; just change its value. + return *this; + } + + Datum & Set(const Datum & in) { + if (in.is_num) return SetDouble(in.num); + else return SetString(in.str); + } + + Datum & operator=(double in) { return SetDouble(in); } + Datum & operator=(const std::string & in) { return SetString(in); } + Datum & operator=(const char * in) { return SetString(in); } + Datum & operator=(const Datum & in) { return Set(in); } + + // Unary operators + Datum operator-() const { return -AsDouble(); } + Datum operator!() const { return AsDouble() == 0.0; } + + // Binary operators + int CompareNumber(double rhs) const { + const double val = AsDouble(); + return (val == rhs) ? 0 : ((val < rhs) ? -1 : 1); + } + + int CompareString(const std::string & rhs) const { + if (is_num) { + const std::string val = std::to_string(num); + return (val == rhs) ? 0 : ((val < rhs) ? -1 : 1); + } + return (str == rhs) ? 0 : ((str < rhs) ? -1 : 1); + } + + int Compare(double rhs) const { return CompareNumber(rhs); } + int Compare(const std::string & rhs) const { return CompareString(rhs); } + int Compare(const char * rhs) const { return CompareString(rhs); } + int Compare(const Datum & rhs) const { return (rhs.is_num) ? CompareNumber(rhs) : CompareString(rhs); } + + template bool operator==(const T & rhs) const { return Compare(rhs) == 0; } + template bool operator!=(const T & rhs) const { return Compare(rhs) != 0; } + template bool operator< (const T & rhs) const { return Compare(rhs) == -1; } + template bool operator>=(const T & rhs) const { return Compare(rhs) != -1; } + template bool operator> (const T & rhs) const { return Compare(rhs) == 1; } + template bool operator<=(const T & rhs) const { return Compare(rhs) != 1; } + + Datum operator+(const Datum & in) const { + if (IsDouble()) return NativeDouble() + in.AsDouble(); + return NativeString() + in.AsString(); + } + Datum operator*(const Datum & in) const { + if (IsDouble()) return NativeDouble() * in.AsDouble(); + std::string out_string; + size_t count = static_cast(in.AsDouble()); + out_string.reserve(NativeString().size() * count); + for (size_t i = 0; i < count; i++) out_string += NativeString(); + return out_string; + } + Datum operator-(const Datum & in) const { return AsDouble() - in.AsDouble(); } + Datum operator/(const Datum & in) const { return AsDouble() / in.AsDouble(); } + Datum operator%(const Datum & in) const { return emp::Mod(AsDouble(), in.AsDouble()); } + + }; + + std::ostream & operator<<(std::ostream & out, const emp::Datum & d) { + out << d.AsString(); + return out; + } + +} + +#endif // #ifndef EMP_DATA_DATUM_HPP_INCLUDE diff --git a/include/emp/data/MemoryImage.hpp b/include/emp/data/MemoryImage.hpp index 86bec3f8de..4a45446819 100644 --- a/include/emp/data/MemoryImage.hpp +++ b/include/emp/data/MemoryImage.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2019 + * @date 2019-2022. * * @file MemoryImage.hpp * @brief A managed set of Bytes to store any kind of data. @@ -14,6 +14,7 @@ #include // For std::memcpy #include // For placement new +#include #include #include "../base/assert.hpp" @@ -28,7 +29,7 @@ namespace emp { private: emp::Ptr image = nullptr; ///< Current memory image. size_t size = 0; ///< Size of current image. - size_t init_to = 0; ///< How far if the current image initialized? + size_t init_to = 0; ///< How far of the current image is initialized? // Setup all of the uninitialized memory to be non-zero. void Fuzz() { @@ -77,6 +78,16 @@ namespace emp { return *GetPtr(pos); } + /// Get proper spans to sets of same-type objects represented in this image. + template std::span Get(size_t pos, size_t count) { + emp_assert(pos < GetInitSize(), "Only get a span from initialized memory."); + return std::span( GetPtr(pos).Raw(), count ); + } + template std::span Get(size_t pos, size_t count) const { + emp_assert(pos < GetInitSize(), "Only get a span from initialized memory."); + return std::span( GetPtr(pos).Raw(), count ); + } + /// Change the size of this memory. Assume all cleanup and setup is done elsewhere. void RawResize(size_t new_size) { // If the size is already good, stop here. @@ -92,7 +103,7 @@ namespace emp { } /// Copy all of the bytes directly from another memory image. Size manipulation must be - /// done beforehand to ensure sufficient space is availabe. + /// done beforehand to ensure sufficient space is available. void RawCopy(const MemoryImage & from_memory) { emp_assert(GetSize() >= from_memory.GetSize()); if (from_memory.GetSize() == 0) return; // Nothing to copy! diff --git a/include/emp/data/README.md b/include/emp/data/README.md new file mode 100644 index 0000000000..b851d57b7c --- /dev/null +++ b/include/emp/data/README.md @@ -0,0 +1,62 @@ +# Data Management Tools + +This directory contains a set of tools for managing more or less genetic data. + + +## Individual pieces of data + +* Datum.hpp - emp::Datum holds a single value, which can be a string or a float. It shifts + between these types as needed. + + +## Tracking series of Data + +* DataNode.hpp - Manage a stream of data of a specific type; can specify at compile time how + data should be handled (tracking averages, modes, entropy, etc.) Can also be made to + pull data when needed. + +* DataInterface.hpp - A generic interface to DataNodes to make the easy to manage collectively. + +* DataManager.hpp - Manages a collection of DataNodes that all have the same settings. + +* DataFile.hpp - A collection of DataNodes that automatically output desired information to + an output file. + +* DataLog.hpp - Manage a series of data, tracking calculations and printing histograms. + + +## Tracking arbitrary named data + +* DataMap.hpp - Links variable names to arbitrary type values, these are stored in a single + memory block for locality and easy group copying. + +* DataLayout.hpp - Keeps track of information associated with each variable in a data map, + including location, type, description, etc. + +* MemoryImage.hpp - Block of memory managed using a given DataLayout. + +* AnnotatedType.hpp - Base class for objects that have a linked DataMap. + +* VarMap.hpp - Similar to DataMap, but types are stored with variables and not consecutive + in memory. + +* DataMapParser.hpp - A parser to take an equation based on variables in a DataLayout that + will produce a lambda. If a DataMap is passed into the lambda the equation will be + calculated and the result returned. + +* Trait.hpp - ? + + +## To add? + +DataFrame - rows are entries, columns are types, stored by column for fast calculation. +DataColumn = vector from DataFrame with type information. +DataRow - Same interface as DataMap; refers to associated DataFrame. + +DataTracker - Handles all of the functionality of DataNode, DataLog, etc., but more dynamic + using lambdas to deal with values as needed. + + +## To modify? + +Datum - should be able to do uint64_t? diff --git a/include/emp/data/SimpleParser.hpp b/include/emp/data/SimpleParser.hpp new file mode 100644 index 0000000000..eb242fec90 --- /dev/null +++ b/include/emp/data/SimpleParser.hpp @@ -0,0 +1,626 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file SimpleParser.hpp + * @brief Parser to convert function descriptions to lambdas using maps for variable lookup. + * @note Status: ALPHA + * + * A fully functional parser that will convert a string-description of a function to a C++ + * lambda. A map-typed object should be passed in to provide values associated with variables. + * Allowed map types include std::map, std::unordered_map, + * emp::DataMap, and (soon) derivations from emp::AnnotatedType. For standard maps, T must be + * convertable to emp::Datum. + * + * Developer TODO: + * - Setup operator RegEx to be built dynamically + * - Setup LVALUES as a type, and allow assignment + * - Allow types other than Datum (string and double)? + */ + +#ifndef EMP_DATA_SIMPLEPARSER_HPP_INCLUDE +#define EMP_DATA_SIMPLEPARSER_HPP_INCLUDE + +#include +#include +#include +#include + +#include "../base/notify.hpp" +#include "../compiler/Lexer.hpp" +#include "../compiler/regex_utils.hpp" +#include "../data/Datum.hpp" +#include "../datastructs/ra_map.hpp" +#include "../math/Random.hpp" +#include "../meta/meta.hpp" + +#include "AnnotatedType.hpp" +#include "DataMap.hpp" + +namespace emp { + + class SimpleParser { + private: + + template + struct ValueType { + using fun_t = std::function; + enum type_t { ERROR=0, VALUE, FUNCTION }; + + type_t type; + emp::Datum value; + fun_t fun; + + ValueType() : type(ERROR) {} + ValueType(const ValueType &) = default; + ValueType(double in_val) : type(VALUE), value(in_val) { } + ValueType(std::string in_val) : type(VALUE), value(in_val) { } + ValueType(emp::Datum in_val) : type(VALUE), value(in_val) { } + ValueType(fun_t in_fun) : type(FUNCTION), fun(in_fun) { } + + ValueType & operator=(const ValueType &) = default; + ValueType & operator=(emp::Datum in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(double in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(const std::string & in_val) { type = VALUE; value = in_val; return *this; } + ValueType & operator=(fun_t in_fun) { type = FUNCTION; fun = in_fun; return *this; } + + fun_t AsFunction() { + if (type==FUNCTION) return fun; + else return [v=value](ARG_T){ return v; }; + } + }; + + template + struct SymbolTable { + using arg_t = const MAP_T &; + using fun_t = std::function; + using value_t = ValueType; + + SymbolTable() { } + SymbolTable(arg_t) { } + + static_assert( std::is_same(), + "Any map type used by the parser must have a key type of std::string"); + + static fun_t MakeDatumAccessor(const std::string & name) { + return [name](arg_t symbol_vals){ + auto val_it = symbol_vals.find(name); + emp_assert(val_it != symbol_vals.end()); + return emp::Datum(val_it->second); + }; + } + + /// By default, let the value handle its own converstion to a function. + auto AsFunction(ValueType & val) const { return val.AsFunction(); } + }; + + template + struct SymbolTable, DUMMY_T> { + using map_t = emp::ra_map; + using arg_t = const map_t &; + using fun_t = std::function; + using value_t = ValueType; + + const typename map_t::layout_t & layout; + + SymbolTable(const emp::ra_map & in_map) + : layout(in_map.GetLayout()) { } + + fun_t MakeDatumAccessor(const std::string & name) const { + emp_assert(layout.find(name) != layout.end()); + size_t id = layout.find(name)->second; + #ifdef NDEBUG + return [id](arg_t symbol_vals){ + #else + return [id,name](arg_t symbol_vals){ // Keep name in debug mode to check id. + emp_assert(symbol_vals.GetID(name) == id); + #endif + return emp::Datum(symbol_vals.AtID(id)); + }; + } + + /// By default, let the value handle its own converstion to a function. + auto AsFunction(ValueType & val) const { + // @CAO: Could check layout correctness in debug mode. + return val.AsFunction(); + } + }; + + /// Specialty implementation for DataLayouts. + template + struct SymbolTable { + using arg_t = const emp::DataMap &; + using fun_t = std::function; + using value_t = ValueType; + + const emp::DataLayout & layout; + + SymbolTable(const emp::DataLayout & in_layout) : layout(in_layout) { } + + auto MakeDatumAccessor(const std::string & name) const { + return emp::DataMap::MakeDatumAccessor(layout, name); + } + + auto AsFunction(ValueType & val) const { + #ifdef NDEBUG + return val.AsFunction(); + #else + // If we are in debug mode, add wrapper to ensure DataMap with has correct layout. + return [fun=val.AsFunction(),layout_ptr=&layout](arg_t dm) { + emp_assert(dm.HasLayout(*layout_ptr)); + return fun(dm); + }; + #endif + } + + }; + + /// Special DataMap implementation that just converts to underlying layout. + template + struct SymbolTable : public SymbolTable { + SymbolTable(const emp::DataMap & dm) : SymbolTable(dm.GetLayout()) { } + }; + + + using pos_t = emp::TokenStream::Iterator; + + static constexpr const bool verbose = false; + + class MapLexer : public emp::Lexer { + private: + int token_identifier; ///< Token id for identifiers + int token_number; ///< Token id for literal numbers + int token_string; ///< Token id for literal strings + int token_char; ///< Token id for literal characters + int token_external; ///< Token id for an external value that was passed in + int token_symbol; ///< Token id for other symbols + + public: + MapLexer() { + // Whitespace and comments should always be dismissed (top priority) + IgnoreToken("Whitespace", "[ \t\n\r]+"); + IgnoreToken("//-Comments", "//.*"); + IgnoreToken("/*...*/-Comments", "/[*]([^*]|([*]+[^*/]))*[*]+/"); + + // Meaningful tokens have next priority. + + // An identifier must begin with a letter, underscore, or dot, and followed by + // more of the same OR numbers or brackets. + token_identifier = AddToken("Identifier", "[a-zA-Z_.][a-zA-Z0-9_.[\\]]*"); + + // A literal number must begin with a digit; it can have any number of digits in it and + // optionally a decimal point. + token_number = AddToken("Literal Number", "[0-9]+(\\.[0-9]+)?"); + + // A string must begin and end with a quote and can have an escaped quote in the middle. + token_string = AddToken("Literal String", "\\\"([^\"\\\\]|\\\\.)*\\\""); + + // A literal char must begin and end with a single quote. It will always be treated as + // its ascii value. + token_char = AddToken("Literal Character", "'([^'\n\\\\]|\\\\.)+'"); + + // An external value that was passed in will be a dollar sign ('$') followed by the + // position of the value to be used (e.g., '$3'). + token_external = AddToken("External Value", "[$][0-9]+"); + + // Symbols should have least priority. They include any solitary character not listed + // above, or pre-specified multi-character groups. + token_symbol = AddToken("Symbol", ".|\"==\"|\"!=\"|\"<=\"|\">=\"|\"~==\"|\"~!=\"|\"~<\"|\"~>\"|\"~<=\"|\"~>=\"|\"&&\"|\"||\"|\"**\"|\"%%\""); + } + + bool IsID(const emp::Token & token) const noexcept { return token.id == token_identifier; } + bool IsNumber(const emp::Token & token) const noexcept { return token.id == token_number; } + bool IsString(const emp::Token & token) const noexcept { return token.id == token_string; } + bool IsChar(const emp::Token & token) const noexcept { return token.id == token_char; } + bool IsExternal(const emp::Token & token) const noexcept { return token.id == token_external; } + bool IsSymbol(const emp::Token & token) const noexcept { return token.id == token_symbol; } + }; + + struct BinaryOperator { + using fun_t = std::function; + size_t prec; + fun_t fun; + void Set(size_t in_prec, fun_t in_fun) { prec = in_prec; fun = in_fun; } + }; + + struct Function { + using fun0_t = std::function; + using fun1_t = std::function; + using fun2_t = std::function; + using fun3_t = std::function; + + size_t num_args = 0; + fun0_t fun0; fun1_t fun1; fun2_t fun2; fun3_t fun3; + + void Set0(fun0_t in_fun) { num_args = 0; fun0 = in_fun; } + void Set1(fun1_t in_fun) { num_args = 1; fun1 = in_fun; } + void Set2(fun2_t in_fun) { num_args = 2; fun2 = in_fun; } + void Set3(fun3_t in_fun) { num_args = 3; fun3 = in_fun; } + }; + + // --------- MEMBER VARIABLES ----------- + MapLexer lexer; + + // Operators and functions that should be used when parsing. + std::unordered_map> unary_ops; + std::unordered_map binary_ops; + std::unordered_map functions; + emp::vector external_vals; + + // The set of data map entries accessed when the last function was parsed. + std::set var_names; + + // Track the number of errors and the function to call when errors occur. + template + size_t ParseError(Ts &&... args) { + emp::notify::Exception("SimpleParser::PARSE_ERROR", emp::to_string(args...), this); + return 1; + } + + public: + SimpleParser(bool use_defaults=true) { + if (use_defaults) { + AddDefaultOperators(); + AddDefaultFunctions(); + } + } + + /// Construct with a random number generator to automatically include random functions. + SimpleParser(bool use_defaults, emp::Random & random) : SimpleParser(use_defaults) + { AddRandomFunctions(random); } + + /// Get the set of variable names that the most recently generated function used. + const std::set & GetNamesUsed() const { return var_names; } + + /// Get the set of names used in the provided equation. + const std::set & GetNamesUsed(const std::string & expression) { + var_names.clear(); + emp::TokenStream tokens = lexer.Tokenize(expression, std::string("Expression: ") + expression); + for (emp::Token token : tokens) { + if (lexer.IsID(token) && !emp::Has(functions, token.lexeme)) { + var_names.insert(token.lexeme); + } + } + return var_names; + } + + + /// Add a unary operator + void AddOp(const std::string & op, std::function fun) { + unary_ops[op] = fun; + } + + /// Add a binary operator + void AddOp(const std::string & op, size_t prec, + std::function fun) { + binary_ops[op].Set(prec, fun); + } + + + static int ApproxCompare(double x, double y) { + static constexpr double APPROX_FRACTION = 8192.0; + double margin = y / APPROX_FRACTION; + if (x < y - margin) return -1; + if (x > y + margin) return 1; + return 0; + } + + void AddDefaultOperators() { + // Setup the unary operators for the parser. + AddOp("+", [](emp::Datum x) { return x; }); + AddOp("-", [](emp::Datum x) { return -x; }); + AddOp("!", [](emp::Datum x) { return !x; }); + + + // Setup the default binary operators for the parser. + size_t prec = 0; // Precedence level of each operator... + AddOp("||", ++prec, [](emp::Datum x, emp::Datum y){ return (x!=0.0)||(y!=0.0); } ); + AddOp("&&", ++prec, [](emp::Datum x, emp::Datum y){ return (x!=0.0)&&(y!=0.0); } ); + AddOp("==", ++prec, [](emp::Datum x, emp::Datum y){ return x == y; } ); + AddOp("!=", prec, [](emp::Datum x, emp::Datum y){ return x != y; } ); + AddOp("~==", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == 0; } ); + AddOp("~!=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != 0; } ); + AddOp("<", ++prec, [](emp::Datum x, emp::Datum y){ return x < y; } ); + AddOp("<=", prec, [](emp::Datum x, emp::Datum y){ return x <= y; } ); + AddOp(">", prec, [](emp::Datum x, emp::Datum y){ return x > y; } ); + AddOp(">=", prec, [](emp::Datum x, emp::Datum y){ return x >= y; } ); + AddOp("~<", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == -1; } ); + AddOp("~<=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != 1; } ); + AddOp("~>", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) == 1; } ); + AddOp("~>=", prec, [](emp::Datum x, emp::Datum y){ return ApproxCompare(x,y) != -1; } ); + AddOp("+", ++prec, [](emp::Datum x, emp::Datum y){ return x + y; } ); + AddOp("-", prec, [](emp::Datum x, emp::Datum y){ return x - y; } ); + AddOp("*", ++prec, [](emp::Datum x, emp::Datum y){ return x * y; } ); + AddOp("/", prec, [](emp::Datum x, emp::Datum y){ return x / y; } ); + AddOp("%", prec, [](emp::Datum x, emp::Datum y){ return emp::Mod(x, y); } ); + AddOp("**", ++prec, [](emp::Datum x, emp::Datum y){ return emp::Pow(x, y); } ); + AddOp("%%", prec, [](emp::Datum x, emp::Datum y){ return emp::Log(x, y); } ); + } + + void AddDefaultFunctions() { + // Setup the default functions. + functions["ABS"].Set1( [](emp::Datum x){ return std::abs(x); } ); + functions["EXP"].Set1( [](emp::Datum x){ return emp::Pow(emp::E, x); } ); + functions["LOG"].Set1( [](emp::Datum x){ return std::log(x); } ); + functions["LOG2"].Set1( [](emp::Datum x){ return std::log2(x); } ); + functions["LOG10"].Set1( [](emp::Datum x){ return std::log10(x); } ); + + functions["SQRT"].Set1( [](emp::Datum x){ return std::sqrt(x); } ); + functions["CBRT"].Set1( [](emp::Datum x){ return std::cbrt(x); } ); + + functions["SIN"].Set1( [](emp::Datum x){ return std::sin(x); } ); + functions["COS"].Set1( [](emp::Datum x){ return std::cos(x); } ); + functions["TAN"].Set1( [](emp::Datum x){ return std::tan(x); } ); + functions["ASIN"].Set1( [](emp::Datum x){ return std::asin(x); } ); + functions["ACOS"].Set1( [](emp::Datum x){ return std::acos(x); } ); + functions["ATAN"].Set1( [](emp::Datum x){ return std::atan(x); } ); + functions["SINH"].Set1( [](emp::Datum x){ return std::sinh(x); } ); + functions["COSH"].Set1( [](emp::Datum x){ return std::cosh(x); } ); + functions["TANH"].Set1( [](emp::Datum x){ return std::tanh(x); } ); + functions["ASINH"].Set1( [](emp::Datum x){ return std::asinh(x); } ); + functions["ACOSH"].Set1( [](emp::Datum x){ return std::acosh(x); } ); + functions["ATANH"].Set1( [](emp::Datum x){ return std::atanh(x); } ); + + functions["CEIL"].Set1( [](emp::Datum x){ return std::ceil(x); } ); + functions["FLOOR"].Set1( [](emp::Datum x){ return std::floor(x); } ); + functions["ROUND"].Set1( [](emp::Datum x){ return std::round(x); } ); + + functions["ISINF"].Set1( [](emp::Datum x){ return std::isinf(x); } ); + functions["ISNAN"].Set1( [](emp::Datum x){ return std::isnan(x); } ); + + // Default 2-input functions + functions["HYPOT"].Set2( [](emp::Datum x, emp::Datum y){ return std::hypot(x,y); } ); + functions["EXP"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Pow(x,y); } ); + functions["LOG"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Log(x,y); } ); + functions["MIN"].Set2( [](emp::Datum x, emp::Datum y){ return (xy) ? x : y; } ); + functions["POW"].Set2( [](emp::Datum x, emp::Datum y){ return emp::Pow(x,y); } ); + + // Default 3-input functions. + functions["IF"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (x!=0.0) ? y : z; + } ); + functions["CLAMP"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (xz) ? z : x; + } ); + functions["TO_SCALE"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (z-y)*x+y; + } ); + functions["FROM_SCALE"].Set3( [](emp::Datum x, emp::Datum y, emp::Datum z){ + return (x-y) / (z-y); + } ); + } + + void AddRandomFunctions(Random & random) { + functions["RAND"].Set0( [&random](){ return random.GetDouble(); } ); + functions["RAND"].Set1( [&random](emp::Datum x){ return random.GetDouble(x); } ); + functions["RAND"].Set2( [&random](emp::Datum x, emp::Datum y){ return random.GetDouble(x,y); } ); + } + + /// Helpers for parsing. + template + typename SYMBOLS_T::value_t ParseValue(const SYMBOLS_T & symbols, pos_t & pos) { + if constexpr (verbose) { + std::cout << "ParseValue at position " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } + + using arg_t = typename SYMBOLS_T::arg_t; + using fun_t = typename SYMBOLS_T::fun_t; + using value_t = typename SYMBOLS_T::value_t; + + // Deal with any unary operators... + if (emp::Has(unary_ops, pos->lexeme)) { + if constexpr (verbose) std::cout << "Found UNARY OP: " << pos->lexeme << std::endl; + auto op = unary_ops[pos->lexeme]; + ++pos; + value_t val = ParseValue(symbols, pos); + if (val.type == value_t::VALUE) { return op(val.value); } + else { + return static_cast( + [fun=val.fun,op](arg_t arg){ return op(fun(arg)); } + ); + } + } + + // If we have parentheses, process the contents + if (pos->lexeme == "(") { + if constexpr (verbose) std::cout << "Found: OPEN PAREN" << std::endl; + ++pos; + value_t val = ParseMath(symbols, pos); + if (pos->lexeme != ")") return ParseError("Expected ')', but found '", pos->lexeme, "'."); + ++pos; + return val; + } + + // If this is a value, set it and return. + if (lexer.IsNumber(*pos)) { + double result = emp::from_string(pos->lexeme); + ++pos; + return result; + } + + // Similar for an external value + if (lexer.IsExternal(*pos)) { + size_t id = emp::from_string(pos->lexeme.substr(1)); + ++pos; + if (id >= external_vals.size()) { + ParseError("Invalid access into external variable (\"$", id, "\"): Does not exist."); + } + return external_vals[id]; + } + + // Otherwise it should be and identifier! + const std::string & name = pos->lexeme; + ++pos; + + // If it is followed by a parenthesis, it should be a function. + const bool is_fun = (pos.IsValid() && pos->lexeme == "("); + + if (is_fun) { + if (!emp::Has(functions, name)) return ParseError("Call to unknown function '", name,"'."); + ++pos; + emp::vector args; + while(pos->lexeme != ")") { + args.push_back(ParseMath(symbols, pos)); + if (pos->lexeme == ",") ++pos; + } + ++pos; + + // Now build the function based on its argument count. + fun_t out_fun; + switch (args.size()) { + case 0: + if (!functions[name].fun0) ParseError("Function '", name, "' requires arguments."); + out_fun = [fun=functions[name].fun0](arg_t /*sym_arg*/) { return fun(); }; + break; + case 1: + if (!functions[name].fun1) ParseError("Function '", name, "' cannot have 1 arguments."); + out_fun = [fun=functions[name].fun1,arg0=args[0].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg)); + }; + break; + case 2: + if (!functions[name].fun2) ParseError("Function '", name, "' cannot have 2 arguments."); + out_fun = [fun=functions[name].fun2, + arg0=args[0].AsFunction(), + arg1=args[1].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg), arg1(sym_arg)); + }; + break; + case 3: + if (!functions[name].fun3) ParseError("Function '", name, "' cannot have 3 arguments."); + out_fun = [fun=functions[name].fun3, + arg0=args[0].AsFunction(), + arg1=args[1].AsFunction(), + arg2=args[2].AsFunction()](arg_t sym_arg) { + return fun(arg0(sym_arg), arg1(sym_arg), arg2(sym_arg)); + }; + break; + default: + ParseError("Too many arguments (", args.size(), ") for function '", name, "'."); + } + return out_fun; + } + + var_names.insert(name); // Store this name in the list of those used. + return symbols.MakeDatumAccessor(name); // Return an accessor for this name. + } + + template + typename SYMBOLS_T::value_t ParseMath(const SYMBOLS_T & symbols, pos_t & pos, size_t prec_limit=0) { + using value_t = typename SYMBOLS_T::value_t; + using arg_t = typename SYMBOLS_T::arg_t; + value_t val1 = ParseValue(symbols, pos); + + if constexpr (verbose) { + if (pos.IsValid()) { + std::cout << "ParseMath at " << pos.GetIndex() << " : " << pos->lexeme << std::endl; + } else std::cout << "PROCESSED!" << std::endl; + } + + while (pos.IsValid() && pos->lexeme != ")" && pos->lexeme != ",") { + if constexpr (verbose) { std::cout << "...Scanning for op... [" << pos->lexeme << "]" << std::endl; } + + // If we have an operator, act on it! + if (Has(binary_ops, pos->lexeme)) { + const BinaryOperator & op = binary_ops[pos->lexeme]; + if (prec_limit >= op.prec) return val1; // Precedence not allowed; return currnet value. + ++pos; + value_t val2 = ParseMath(symbols, pos, op.prec); + if (val1.type == value_t::VALUE) { + if (val2.type == value_t::VALUE) { val1 = op.fun(val1.value, val2.value); } + else { + val1 = [val1_num=val1.value,val2_fun=val2.fun,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_num, val2_fun(symbol_vals)); + }; + } + } else { + if (val2.type == value_t::VALUE) { + val1 = [val1_fun=val1.fun,val2_num=val2.value,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_fun(symbol_vals), val2_num); + }; + } else { + val1 = [val1_fun=val1.fun,val2_fun=val2.fun,op_fun=op.fun](arg_t symbol_vals){ + return op_fun(val1_fun(symbol_vals), val2_fun(symbol_vals)); + }; + } + } + } + + else ParseError("Operator '", pos->lexeme, "' NOT found!"); + } + + // @CAO Make sure there's not a illegal lexeme here. + + return val1; + } + + /// Take a set of variables and use them to replace $0, $1, etc. in any function. + template + void SetupStaticValues(T1 arg1, Ts... args) { + // If we have a vector of incoming values, make sure it is valid and then just pass it along. + if constexpr (sizeof...(Ts) == 0 && emp::is_emp_vector()) { + using value_t = typename T1::value_type; + static_assert(std::is_same(), + "If BuildMathFunction is provided a vector, it must contain only emp::Datum."); + external_vals = arg1; + return; + } + + else { + // Otherwise convert all args to emp::Datum. + external_vals = emp::vector{ + static_cast(arg1), + static_cast(args)... + }; + } + } + + /// If there are no input args, just clear external values. + void SetupStaticValues() { external_vals.resize(0); } + + /// Parse a function description that will take a map and return the results. + /// For example, if the string "foo * 2 + bar" is passed in, a function will be returned + /// that takes a map (of the proper type) loads in the values of "foo" and "bar", and + /// returns the result of the above equation. + + template + auto BuildMathFunction( + const MAP_T & symbol_map, ///< The map or layout to use, specifying variables. + const std::string & expression, ///< The primary expression to convert. + EXTRA_Ts... extra_args ///< Extra value arguments (accessed as $1, $2, etc.) + ) { + // If we have incoming values, store them appropriately. + SetupStaticValues(extra_args...); + + using value_t = typename SymbolTable::value_t; + SymbolTable symbol_table(symbol_map); + + // Tokenize the expression. + emp::TokenStream tokens = lexer.Tokenize(expression, std::string("Expression: ") + expression); + if constexpr (verbose) tokens.Print(); + var_names.clear(); // Reset the names used from data map. + pos_t pos = tokens.begin(); + value_t val = ParseMath(symbol_table, pos); + + // Return the value as a function. + return symbol_table.AsFunction(val); + } + + + /// Generate a temporary math function and immediately run it on the provided arguments. + /// @param symbol_map The map containing the required variables. + /// @param expression The mathematical expression to be run on the data map. + /// @param extras Any extra values to fill in a $0, $1, etc. + template + emp::Datum RunMathFunction(const MAP_T & symbol_map, ARG_Ts... args) { + auto fun = BuildMathFunction(symbol_map, std::forward(args)...); + return fun(symbol_map); + } + + }; + +} + +#endif // #ifndef EMP_DATA_SIMPLEPARSER_HPP_INCLUDE diff --git a/include/emp/datastructs/IndexMap.hpp b/include/emp/datastructs/IndexMap.hpp index 7b8fe30fef..c1c35cfb5b 100644 --- a/include/emp/datastructs/IndexMap.hpp +++ b/include/emp/datastructs/IndexMap.hpp @@ -1,14 +1,22 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2015-2018 + * @date 2015-2022. * * @file IndexMap.hpp - * @brief A simple class to weight items differently within a container and return the correct index. + * @brief Container that weights items and returns ID for a given weight position. * @note Status: BETA * + * An IndexMap is a container where each item has a specified weight (specified as a double). + * The total weight of the container determines the max index point. When indexing into the + * container, each item is represented by a range of values equal to it's weight. Randomly + * indexing into the container will provide either item with a probability proportional to its + * weight. + * + * In this regular IndexMap, all items are kept in order (so the map starts at 0, then 1, then + * 2, etc.) If order is not required, UnorderedIndexMap is slightly faster. + * * @todo Convert to a template that acts as a glorified vector, simplifying random selection? - * @todo Should operator[] index by element count or by weight? * @todo Make Raw*() function private. */ @@ -43,13 +51,14 @@ namespace emp { /// Which ID is the right child of the ID provided? size_t RightID(size_t id) const { return 2*id + 2; } - /// Sift through the nodes to find the where index zero maps to. + /// Sift through the nodes to find where index zero maps to. size_t CalcZeroOffset() const { size_t id = 0; while (id < num_items - 1) id = LeftID(id); return id - (num_items - 1); } + /// Convert an item ID to the internal position where it's stored. size_t ToInternalID(size_t id) const { return (id + zero_offset) % num_items + num_items-1; } @@ -58,10 +67,34 @@ namespace emp { return (id + _offset) % _items + _items-1; } + /// Convert and internal position to the item ID to which it refers. size_t ToExternalID(size_t id) const { return (id + 1 - zero_offset) % num_items; } + // Collect the weight at the specified index of the array (no conversions) + double RawWeight(size_t id) const { return weights[id]; } + + // Collect the probability at the specified index of the array (no conversions) + double RawProb(size_t id) const { ResolveRefresh(); return weights[id] / weights[0]; } + + /// Adjust the weight associated with a particular index in the map. + /// @param id is the identification number of the item whose weight is being adjusted. + /// @param new_weight is the new weight for that entry. + void RawAdjust(size_t id, const double new_weight) { + // Update this node. + const double weight_diff = new_weight - weights[id]; // Track change size for tree weights. + weights[id] = new_weight; // Update THIS item weight + + if (needs_refresh) return; // If we already need a refresh don't update tree weights! + + // Update tree to root. + while (id > 0) { + id = ParentID(id); + weights[id] += weight_diff; + } + } + /// A Proxy class so that an index can be treated as an l-value. class Proxy { private: @@ -96,7 +129,10 @@ namespace emp { } IndexMap(size_t _items, double init_weight) : num_items(_items), zero_offset(CalcZeroOffset()), needs_refresh(true) - , weights(num_items, init_weight) { ; } + , weights(num_items*2-1, 0.0) + { + if (init_weight != 0.0) AdjustAll(init_weight); + } IndexMap(const IndexMap &) = default; IndexMap(IndexMap &&) = default; ~IndexMap() = default; @@ -110,11 +146,9 @@ namespace emp { double GetWeight() const { ResolveRefresh(); return weights[0]; } /// What is the current weight of the specified index? - double RawWeight(size_t id) const { return weights[id]; } double GetWeight(size_t id) const { return RawWeight(ToInternalID(id)); } /// What is the probability of the specified index being selected? - double RawProb(size_t id) const { ResolveRefresh(); return weights[id] / weights[0]; } double GetProb(size_t id) const { return RawProb(ToInternalID(id)); } /// Change the number of indices in the map. @@ -163,23 +197,6 @@ namespace emp { Clear(); } - /// Adjust the weight associated with a particular index in the map. - /// @param id is the identification number of the item whose weight is being adjusted. - /// @param new_weight is the new weight for that entry. - void RawAdjust(size_t id, const double new_weight) { - // Update this node. - const double weight_diff = new_weight - weights[id]; // Track change size for tree weights. - weights[id] = new_weight; // Update THIS item weight - - if (needs_refresh) return; // If we already need a refresh don't update tree weights! - - // Update tree to root. - while (id > 0) { - id = ParentID(id); - weights[id] += weight_diff; - } - } - void Adjust(size_t id, const double new_weight) { RawAdjust(ToInternalID(id), new_weight); } /// Adjust all index weights to the set provided. diff --git a/include/emp/datastructs/SmallFifoMap.hpp b/include/emp/datastructs/SmallFifoMap.hpp index c71cd07c95..1cc6b4090f 100644 --- a/include/emp/datastructs/SmallFifoMap.hpp +++ b/include/emp/datastructs/SmallFifoMap.hpp @@ -28,8 +28,8 @@ class SmallFifoMap { using value_type = std::pair; private: - - using storage_t = emp::array; + // TODO: Turn this back into an emp::array once iterator is fixed + using storage_t = std::array; storage_t storage; diff --git a/include/emp/datastructs/UnorderedIndexMap.hpp b/include/emp/datastructs/UnorderedIndexMap.hpp index 16b5c92692..58fd4506f1 100644 --- a/include/emp/datastructs/UnorderedIndexMap.hpp +++ b/include/emp/datastructs/UnorderedIndexMap.hpp @@ -1,10 +1,10 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2015-2021. + * @date 2015-2022. * * @file UnorderedIndexMap.hpp - * @brief A simple class to weight items differently within a container and return the correct index. + * @brief Container that weights items and returns ID for a given weight position; order not guaranteed. * @note Status: BETA * * @todo Convert to a template that acts as a glorified vector, simplifying random selection? @@ -46,7 +46,7 @@ namespace emp { class Proxy { private: UnorderedIndexMap & index_map; ///< Which index map is this proxy from? - size_t id; ///< Which id does it represent? + size_t id; ///< Which id does it represent? public: Proxy(UnorderedIndexMap & _im, size_t _id) : index_map(_im), id(_id) { ; } operator double() const { return index_map.RawWeight(id); } diff --git a/include/emp/datastructs/hash_utils.hpp b/include/emp/datastructs/hash_utils.hpp index 83c373fed7..bbc6167dec 100644 --- a/include/emp/datastructs/hash_utils.hpp +++ b/include/emp/datastructs/hash_utils.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2019-2021. + * @date 2019-2022. * * @file hash_utils.hpp * @brief This file provides tools for hashing values and containers. @@ -15,11 +15,11 @@ #include #include #include +#include #include #include #include "../base/Ptr.hpp" -#include "../polyfill/span.hpp" namespace emp { @@ -107,7 +107,7 @@ namespace emp { // helper functions for murmur hash #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace internal { - constexpr inline uint64_t rotate(const size_t x, const size_t r) noexcept { + constexpr inline uint64_t rotate(const uint64_t x, const uint64_t r) noexcept { return (x << r) | (x >> (64 - r)); } constexpr inline void fmix64(uint64_t& k) noexcept { @@ -128,13 +128,13 @@ namespace emp { /// @param key Span of bytes to hash. /// @param seed Optional seed. /// @return Hash of key. - constexpr inline size_t murmur_hash( + constexpr inline uint64_t murmur_hash( const std::span key, - const size_t seed = 0 + const uint64_t seed = 0 ) noexcept { // define constants - const size_t numbytes = key.size(); - const size_t nblocks = numbytes / 16; + const uint64_t numbytes = key.size(); + const uint64_t nblocks = numbytes / 16; const uint64_t c1 = 0x87c37b91114253d5LLU; const uint64_t c2 = 0x4cf5ad432745937fLLU; diff --git a/include/emp/datastructs/map_utils.hpp b/include/emp/datastructs/map_utils.hpp index 567317affc..a88f40dfa3 100644 --- a/include/emp/datastructs/map_utils.hpp +++ b/include/emp/datastructs/map_utils.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2017 + * @date 2016-2022. * * @file map_utils.hpp * @brief A set of simple functions to manipulate maps. @@ -11,6 +11,7 @@ #ifndef EMP_DATASTRUCTS_MAP_UTILS_HPP_INCLUDE #define EMP_DATASTRUCTS_MAP_UTILS_HPP_INCLUDE +#include #include #include @@ -25,6 +26,60 @@ namespace emp { return in_map.find(key) != in_map.end(); } + /// Take a map where the value is an integer and a key. + /// Increment value associated with that key if its present + /// or if its not add it and set it to 1 + template + inline void IncrementCounter( MAP_T & in_map, const KEY_T & key ) { + static_assert( std::is_same< typename MAP_T::key_type, int >::value); + if (emp::Has(in_map, key)) { + in_map[key]++; + } else { + in_map[key] = 1; + } + } + + // Check to see if any of the elements in a map satisfy a function. + template + bool AnyOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::any_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::any_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } + + // Check to see if any of the elements in a map satisfy a function. + template + bool AllOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::all_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::all_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } + + // Check to see if any of the elements in a map satisfy a function. + template + bool NoneOf(const std::map & c, FUN_T fun) { + // If the provided function takes just the element type, that's all we should give it. + if constexpr (std::is_invocable_r()) { + return std::none_of(c.begin(), c.end(), [fun](auto x){ return fun(x.second); }); + } + + // Otherwise provide both key and element. + else { + return std::none_of(c.begin(), c.end(), [fun](auto x){ return fun(x.first, x.second); }); + } + } template inline auto Keys( const MAP_T & in_map) -> emp::vectorfirst)>::type> { @@ -58,6 +113,15 @@ namespace emp { return val_it->second; } + /// Take any map and element, run find() member function, and return a reference to + /// the result found; trip assert if the result is not present. + template + inline const auto & GetConstRef( const MAP_T & in_map, const KEY_T & key) { + auto val_it = in_map.find(key); + emp_assert(val_it != in_map.end()); + return val_it->second; + } + // The following two functions are from: // http://stackoverflow.com/questions/5056645/sorting-stdmap-using-value diff --git a/include/emp/datastructs/ra_map.hpp b/include/emp/datastructs/ra_map.hpp new file mode 100644 index 0000000000..86bff50254 --- /dev/null +++ b/include/emp/datastructs/ra_map.hpp @@ -0,0 +1,173 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file ra_map.hpp + * @brief This file defines a Random Access Map template. + * @note Status: ALPHA + * + * A random access map allows for simple traversal by index and a guarantee that a value at a + * given index will always be at that index unless any map element is deleted. This allows + * storage of indices for maps with a fixed layout, resulting in easy access. + */ + +#ifndef EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE +#define EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE + +#include + +#include "../base/unordered_map.hpp" +#include "../base/vector.hpp" +#include "../math/constants.hpp" + +namespace emp { + + /// This class uses a combination of a hashtable (std::unordered_map) and emp::vector to + /// lookup insert, lookup, and delete values in constant time, while still being able to + /// step through all values (albeit in an arbitrary order). + /// + /// @note The arbitrary order of values may change if any values are deleted. + + template , + typename KeyEqual = std::equal_to, + typename Allocator = std::allocator< std::pair > + > + class ra_map { + public: + using key_type = KEY_T; + using mapped_type = T; + using value_type = std::pair; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename std::allocator_traits::pointer; + using const_pointer = typename std::allocator_traits::const_pointer; + + using layout_t = emp::unordered_map; + + private: + layout_t id_map; ///< Map to find keys in vector. + emp::vector vals; ///< Vector of all values. + + using this_t = ra_map; + public: + ra_map() = default; + ra_map(const ra_map &) = default; + ra_map(ra_map &&) = default; + this_t & operator=(const ra_map &) = default; + this_t & operator=(ra_map &&) = default; + + // -- Iterators -- + auto begin() { return vals.begin(); } + auto cbegin() const { return vals.cbegin(); } + auto end() { return vals.end(); } + auto cend() const { return vals.cend(); } + + // -- Capacity -- + size_t size() const { return vals.size(); } ///< Number of entries in map. + bool empty() const { return size() == 0; } ///< Are there NO values in map? + size_t max_size() const { return id_map.max_size(); } ///< Max system limit on size. + + // -- Modifiers -- + void clear() { id_map.clear(); vals.resize(0); } ///< Remove all values from container. + + /// Insert a new value into container by copy; return position. + size_t insert(const value_type & v) { + auto pos_it = id_map.find(v.first); + if (pos_it != id_map.end()) return pos_it->second; // Already in map. + const size_t pos = vals.size(); + id_map[v.first] = pos; + vals.emplace_back(v); + return pos; + } + + /// Insert a new value into container by move; return position. + size_t insert(value_type && v) { + auto pos_it = id_map.find(v.first); + if (pos_it != id_map.end()) return pos_it->second; // Already in map. + const size_t pos = vals.size(); + id_map[v.first] = pos; + vals.emplace_back(std::move(v)); + return pos; + } + + /// Construct a new value in place in a container container; return position. + template + size_t emplace(Ts &&... args) { + const size_t new_pos = vals.size(); + vals.emplace_back(std::forward(args)...); + auto old_pos_it = id_map.find(vals.back().first); + if (old_pos_it != id_map.end()) { + vals.resize(vals.size()-1); // Destroy newly created instance. + return old_pos_it->second; // Return old position in map. + } + id_map[vals.back().first] = new_pos; // Save new position for later lookup. + return new_pos; // And return it. + } + + /// Erase a specific value from the container. + bool erase(const KEY_T & key) { + if (!count(key)) return false; // Not in map. + + // Find out where key is in id_map and clear it. + const size_t pos = id_map[key]; + id_map.erase(key); + + // Move the former last value to the now-empty spot. + const size_t last_pos = vals.size() - 1; + if (pos != last_pos) { + const_cast(vals[pos].first) = vals[last_pos].first; + vals[pos].second = vals[last_pos].second; + id_map[vals[pos].first] = pos; + } + vals.resize(last_pos); + return true; + } + + + size_t count(const KEY_T & key) const { return id_map.count(key); } /// Is value included? (0 or 1). + + /// Index into the ra_map by key. + T & operator[](key_type key) { + auto key_it = id_map.find(key); + if (key_it == id_map.end()) { + return NewEntry(key); + } + return vals[key_it->second].second; + } + + // --- Empirical only commands --- + + const layout_t & GetLayout() const { return id_map; } + + T & NewEntry(key_type key) { + emp_assert(id_map.find(key) == id_map.end(), "ra_map::NewEntry must be an unused key!", key); + const size_t pos = vals.size(); + id_map[key] = pos; + vals.emplace_back(); + return vals.back().second; + } + + bool Has(key_type key) const { return id_map.find(key) != id_map.end(); } + + size_t GetID(key_type key) const { + auto key_it = id_map.find(key); + return (key_it == id_map.end()) ? emp::MAX_SIZE_T : key_it->second; + } + + key_type & KeyAtID(size_t id) { return vals[id]->first; } + + T & AtID(size_t id) { return vals[id]->second; } + const T & AtID(size_t id) const { return vals[id].second; } + }; + +} + +#endif // #ifndef EMP_DATASTRUCTS_RA_MAP_HPP_INCLUDE diff --git a/include/emp/datastructs/span_utils.hpp b/include/emp/datastructs/span_utils.hpp new file mode 100644 index 0000000000..6130086de6 --- /dev/null +++ b/include/emp/datastructs/span_utils.hpp @@ -0,0 +1,57 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file span_utils.hpp + * @brief A set of simple functions to manipulate std::span + * @note Status: BETA + * + */ + +#ifndef EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE +#define EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE + +#include +#include + +#include "../base/array.hpp" +#include "../base/vector.hpp" + +namespace emp { + + /// Print the contents of a span. + template + void Print(const std::span & v, std::ostream & os=std::cout, const std::string & spacer=" ") { + for (size_t id = 0; id < v.size(); id++) { + if (id) os << spacer; // Put a space before second element and beyond. + os << emp::to_string(v[id]); + } + } + + /// Convert an emp::array to an equivalent span + template + auto to_span(emp::array a) { return std::span(a); } + + /// Convert an emp::vector to an equivalent span + template + auto to_span(emp::vector v) { return std::span(v); } +} + +namespace std { + // A generic streaming function for spans. + template + std::ostream & operator<<(std::ostream & out, std::span s) { + emp::Print(s, out); + return out; + } + + template + std::istream & operator>>(std::istream & is, std::span s) { + for (T & x : s) is >> x; + return is; + } + +} + +#endif // #ifndef EMP_DATASTRUCTS_SPAN_UTILS_HPP_INCLUDE diff --git a/include/emp/datastructs/vector_utils.hpp b/include/emp/datastructs/vector_utils.hpp index 95891350a4..b5f80280e1 100644 --- a/include/emp/datastructs/vector_utils.hpp +++ b/include/emp/datastructs/vector_utils.hpp @@ -28,6 +28,15 @@ namespace emp { + // Remove and return the first element of a vector. + template + T PopFront(emp::vector & v) { + emp_assert(v.size()); + T out = v[0]; + v.erase(v.begin()); + return out; + } + /// Base case for Append; we just have a single vector with nothing to append. template emp::vector & Append(emp::vector & base) { @@ -62,13 +71,19 @@ namespace emp { /// Convert a map to a vector. template - emp::vector ToVector(const std::map & in_map, T default_val=T()) { + emp::vector ToVector( + const std::map & in_map, + T default_val=T(), + INDEX_T index_cap=32768 + ) { INDEX_T max_index = in_map.back().second; if (max_index < 0) max_index = 0; // In case all entries are negative... + if (max_index >= index_cap) max_index=index_cap-1; emp::vector out_vec; out_vec.resize(max_index+1, default_val); for (auto [index, val] : in_map) { - if (index < 0) continue; // Skip entries that can't go into a vector... + if (index < 0) continue; // Skip entries that can't go into a vector... + if (index >= index_cap) break; // Stop when we've hit the upper limit on vector size. out_vec[index] = val; } return out_vec; @@ -76,10 +91,14 @@ namespace emp { /// Convert an unordered map to a vector. template - emp::vector ToVector(const std::unordered_map & in_map, T default_val=T()) { + emp::vector ToVector( + const std::unordered_map & in_map, + T default_val=T(), + INDEX_T index_cap=32768 + ) { emp::vector out_vec; for (auto [index, val] : in_map) { - if (index < 0) continue; // Skip entries that can't go into a vector... + if (index < 0 || index >= index_cap) continue; // Skip entries that can't go into a vector... if (((size_t) index) >= out_vec.size()) out_vec.resize(index+1, default_val); out_vec[index] = val; } diff --git a/include/emp/debug/debug.hpp b/include/emp/debug/debug.hpp index 8fae099a1b..c189febb82 100644 --- a/include/emp/debug/debug.hpp +++ b/include/emp/debug/debug.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2015-2017 + * @date 2015-2022 * * @file debug.hpp * @brief Basic tools for use in developing high-assurance code. @@ -20,7 +20,7 @@ namespace emp { - /// BlockRelease() will halt compilation if NDEBUG is on and EMP_NO_BLOCK is off. + /// BlockRelease(true) will halt compilation if NDEBUG is on and EMP_NO_BLOCK is off. /// It is useful to include alongside debug code that you want to remember to remove when you /// are done debugging; it is automatically included with the emp_debug() function below. /// If you want to intentionally compile in release mode, make sure to define EMP_NO_BLOCK. @@ -28,17 +28,22 @@ namespace emp { #ifdef EMP_NO_BLOCK #define BlockRelease(BLOCK) #else - #define BlockRelease(BLOCK) static_assert(!BLOCK, "Release blocked due to debug material.") + #define BlockRelease(BLOCK) \\ + std::cerr << "Release block at " << __FILE___ << ", line " << __LINE__ << std::endl;\\ + static_assert(!BLOCK, "Release blocked due to debug material.") #endif #else #define BlockRelease(BLOCK) #endif /// The EMP_DEBUG macro executes its contents in debug mode, but otherwise ignores them. + /// test_debug() can be used inside of an if-constexpr for code you want only in debug mode. #ifdef NDEBUG -#define EMP_DEBUG(...) + #define EMP_DEBUG(...) + constexpr bool test_debug() { return false; } #else -#define EMP_DEBUG(...) __VA_ARGS__ + #define EMP_DEBUG(...) __VA_ARGS__ + constexpr bool test_debug() { return true; } #endif template @@ -47,19 +52,10 @@ namespace emp { std::cerr << std::endl; } - /// emp_debug() will print its contents as a message in debug mode and BLOCK release mode until it's removed. - #define emp_debug(...) BlockRelease(true); emp::emp_debug_print(__VA_ARGS__); - - /// Depricated() prints its contents exactly once to notify a user of a depricated function. - static void Depricated(const std::string & name, const std::string & desc="") { - static std::set name_set; - if (name_set.count(name) == 0) { - std::cerr << "Deprication WARNING: " << name << std::endl; - if (desc != "") std::cerr << desc << std::endl; - name_set.insert(name); - } - } - + /// emp_debug() will print its contents as a message in debug mode and BLOCK release mode until + /// it is removed. It's a useful too for printing "Ping1", "Ping2", etc, but no forgetting to + /// remove them. + #define emp_debug(...) { BlockRelease(true); emp::emp_debug_print(__VA_ARGS__); } } #endif // #ifndef EMP_DEBUG_DEBUG_HPP_INCLUDE diff --git a/include/emp/functional/AnyFunction.hpp b/include/emp/functional/AnyFunction.hpp index 14d96381ac..82dbc6cca9 100644 --- a/include/emp/functional/AnyFunction.hpp +++ b/include/emp/functional/AnyFunction.hpp @@ -51,6 +51,8 @@ namespace emp { /// Determine if this BaseFunction can be converted into a derived emp::Function template bool ConvertOK(); + + virtual emp::Ptr Clone() = 0; }; @@ -81,6 +83,10 @@ namespace emp { /// Get the std::function to be called. const fun_t & GetFunction() const { return fun; } + + emp::Ptr Clone() override{ + return emp::NewPtr>(fun); + } }; @@ -89,6 +95,7 @@ namespace emp { private: emp::Ptr fun = nullptr; + private: /// Helper to build a proper derived function. template auto MakePtr( T in_fun ) { @@ -102,6 +109,29 @@ namespace emp { // By default, build an empty function. AnyFunction() { ; } + AnyFunction(const AnyFunction& other){ // copy constructor + fun = other.CloneFunc(); + } + + AnyFunction(AnyFunction&& other) noexcept{ // move constructor + fun = other.CloneFunc(); + other.fun.Delete(); + other.fun = nullptr; + } + + AnyFunction& operator=(const AnyFunction& other){ // copy assignment + Clear(); + fun = other.CloneFunc(); + return *this; + } + + AnyFunction& operator=(AnyFunction&& other) noexcept{ // move assignment + Clear(); + fun = other.CloneFunc(); + other.Clear(); + return *this; + } + /// If an argument is provided, set the function. template AnyFunction(T in_fun) { @@ -112,6 +142,10 @@ namespace emp { void Clear() { if (fun) fun.Delete(); fun = nullptr; } size_t NumArgs() const { return fun ? fun->NumArgs() : 0; } + emp::Ptr CloneFunc() const{ + if(fun == nullptr) return nullptr; + return fun->Clone(); + } operator bool() { return (bool) fun; } diff --git a/include/emp/games/Mancala.hpp b/include/emp/games/Mancala.hpp index ed28e02f85..3b8dd3995f 100644 --- a/include/emp/games/Mancala.hpp +++ b/include/emp/games/Mancala.hpp @@ -29,8 +29,9 @@ namespace emp { side_t boardA; // Current board state for side A. side_t boardB; // Current board state for side B. + size_t turn_count; // How many turns has this game been played? bool over = false; // Has the game ended? - size_t is_A_turn; // Which player goes next? + bool is_A_turn; // Which player goes next? void TestOver() { bool side_A_empty = true; @@ -47,7 +48,7 @@ namespace emp { public: using move_t = size_t; - Mancala(bool A_first=true) : boardA(), boardB(), over(false), is_A_turn(true) { + Mancala(bool A_first=true) : boardA(), boardB(), turn_count(0), over(false), is_A_turn(true) { Reset(A_first); } ~Mancala() { ; } @@ -55,6 +56,7 @@ namespace emp { void Reset(bool A_first=true) { for (size_t i = 0; i < 6; i++) { boardA[i] = 4; boardB[i] = 4; } boardA[6] = boardB[6] = 0; + turn_count = 0; over = false; is_A_turn = A_first; } @@ -95,12 +97,14 @@ namespace emp { // Returns bool indicating whether player can go again bool DoMove(move_t cell) { - emp_assert(cell < 6); // You cannot choose a cell out of bounds. + emp_assert(cell < 6); // Make sure move is not out of bounds. - side_t & cur_board = GetCurSide(); + turn_count++; // Maintain count of moves. + + side_t & cur_board = GetCurSide(); // Load in board view based on current player. side_t & other_board = GetOtherSide(); - emp_assert(cur_board[cell] != 0); // You cannot choose an empty cell. + emp_assert(cur_board[cell] != 0); // Make sure move is not an empty pit. size_t stone_count = cur_board[cell]; size_t cur_cell = cell; diff --git a/include/emp/hardware/AvidaCPU_InstLib.hpp b/include/emp/hardware/AvidaCPU_InstLib.hpp index 3293771597..f05acedf59 100644 --- a/include/emp/hardware/AvidaCPU_InstLib.hpp +++ b/include/emp/hardware/AvidaCPU_InstLib.hpp @@ -17,10 +17,8 @@ #include "InstLib.hpp" namespace emp { - /// AvidaCPU_InstLib is a pure-virtual class that defines a series of instructions that /// can be used with AvidaCPU_Base or any of its derived classes. - template struct AvidaCPU_InstLib : public InstLib { using hardware_t = HARDWARE_T; diff --git a/include/emp/hardware/AvidaGP.hpp b/include/emp/hardware/AvidaGP.hpp index e3efa1a63e..9e2cfe2421 100644 --- a/include/emp/hardware/AvidaGP.hpp +++ b/include/emp/hardware/AvidaGP.hpp @@ -55,9 +55,10 @@ namespace emp { using genome_t = Genome; using stack_t = emp::vector; - using arg_set_t = emp::array; + // TODO: Turn this back into emp::array when possible + using arg_set_t = std::array; - struct Instruction { + struct Instruction : public inst_lib_t::InstructionBase { size_t id; arg_set_t args; @@ -79,6 +80,10 @@ namespace emp { void Set(size_t _id, size_t _a0=0, size_t _a1=0, size_t _a2=0) { id = _id; args[0] = _a0; args[1] = _a1; args[2] = _a2; } + + size_t GetIndex() const override{ + return id; + } }; struct ScopeInfo { diff --git a/include/emp/hardware/EventDrivenGP.hpp b/include/emp/hardware/EventDrivenGP.hpp index 300e2756a0..fdc4f0f32d 100644 --- a/include/emp/hardware/EventDrivenGP.hpp +++ b/include/emp/hardware/EventDrivenGP.hpp @@ -137,7 +137,8 @@ namespace emp { using mem_val_t = double; //< Hardware memory map value type. using memory_t = std::unordered_map; //< Hardware memory map type. using arg_t = int; //< Instruction argument type. - using arg_set_t = emp::array; //< Instruction argument set type. + // TODO: Turn this back into emp::array when possible + using arg_set_t = std::array; //< Instruction argument set type. using affinity_t = BitSet; //< Affinity type alias. using properties_t = std::unordered_set; //< Event/Instruction properties type. using trait_t = TRAIT_T; @@ -364,6 +365,7 @@ namespace emp { CEREAL_NVP(id) ); } + size_t GetIndex() const{ return id; } }; diff --git a/include/emp/hardware/InstLib.hpp b/include/emp/hardware/InstLib.hpp index 4eb1f55c1c..a9385c1cac 100644 --- a/include/emp/hardware/InstLib.hpp +++ b/include/emp/hardware/InstLib.hpp @@ -4,7 +4,7 @@ * @date 2017-2021. * * @file InstLib.hpp - * @brief This file maintains information about instructions availabel in virtual hardware. + * @brief This file maintains information about instructions available in virtual hardware. */ #ifndef EMP_HARDWARE_INSTLIB_HPP_INCLUDE @@ -23,6 +23,7 @@ namespace emp { + /// ScopeType is used for scopes that we need to do something special at the end. /// Eg: LOOP needs to go back to beginning of loop; FUNCTION needs to return to call. enum class ScopeType { NONE=0, ROOT, BASIC, LOOP, FUNCTION }; @@ -41,7 +42,14 @@ namespace emp { using fun_t = std::function; using inst_properties_t = std::unordered_set; + struct InstructionBase{ + virtual ~InstructionBase() {;} + virtual size_t GetIndex() const = 0; + }; + struct InstDef { + size_t index; + size_t id; std::string name; ///< Name of this instruction. fun_t fun_call; ///< Function to call when executing. size_t num_args; ///< Number of args needed by function. @@ -51,11 +59,11 @@ namespace emp { inst_properties_t properties; ///< Are there any generic properties associated with this inst def? char symbol; ///< Unique symbol for this instruction. - InstDef(const std::string & _n, fun_t _fun, size_t _args, const std::string & _d, - ScopeType _s_type, size_t _s_arg, + InstDef(size_t _idx, size_t _id, const std::string & _n, fun_t _fun, size_t _args, + const std::string & _d, ScopeType _s_type, size_t _s_arg, const inst_properties_t & _properties = inst_properties_t(), char _sym='?') - : name(_n), fun_call(_fun), num_args(_args), desc(_d) + : index(_idx), id(_id), name(_n), fun_call(_fun), num_args(_args), desc(_d) , scope_type(_s_type), scope_arg(_s_arg), properties(_properties), symbol(_sym) { ; } InstDef(const InstDef &) = default; }; @@ -64,6 +72,7 @@ namespace emp { emp::vector inst_lib; ///< Full definitions for instructions. emp::vector inst_funs; ///< Map of instruction IDs to their functions. std::map name_map; ///< How do names link to instructions? + std::map id_map; ///< How do identifiers link to instructions? std::map arg_map; ///< How are different arguments named? /// Symbols to use when representing individual instructions (80). @@ -72,39 +81,43 @@ namespace emp { emp::array symbol_map; ///< Map of symbols back to instruction IDs. public: - InstLib() : inst_lib(), inst_funs(), name_map(), arg_map() { ; } ///< Default Constructor + InstLib() : inst_lib(), inst_funs(), name_map(), id_map(), arg_map() { ; } ///< Default Constructor InstLib(const InstLib &) = delete; ///< Copy Constructor InstLib(InstLib &&) = delete; ///< Move Constructor - ~InstLib() { ; } ///< Destructor + virtual ~InstLib() { ; } ///< Destructor - InstLib & operator=(const InstLib &) = default; ///< Copy Operator - InstLib & operator=(InstLib &&) = default; ///< Move Operator + InstLib & operator=(const InstLib &) = default; ///< Copy Operator + InstLib & operator=(InstLib &&) = default; ///< Move Operator /// Return the name associated with the specified instruction ID. - const std::string & GetName(size_t id) const { return inst_lib[id].name; } + const std::string & GetName(size_t idx) const { return inst_lib[idx].name; } /// Return the function associated with the specified instruction ID. - const fun_t & GetFunction(size_t id) const { return inst_lib[id].fun_call; } + const fun_t & GetFunction(size_t idx) const { return inst_lib[idx].fun_call; } /// Return the number of arguments expected for the specified instruction ID. - size_t GetNumArgs(size_t id) const { return inst_lib[id].num_args; } + size_t GetNumArgs(size_t idx) const { return inst_lib[idx].num_args; } - /// Return the provided description for the provided instruction ID. - const std::string & GetDesc(size_t id) const { return inst_lib[id].desc; } + /// Return the provided description for the providxed instruction ID. + const std::string & GetDesc(size_t idx) const { return inst_lib[idx].desc; } /// What type of scope does this instruction state? ScopeType::NONE is default. - ScopeType GetScopeType(size_t id) const { return inst_lib[id].scope_type; } + ScopeType GetScopeType(size_t idx) const { return inst_lib[idx].scope_type; } - /// If this instruction alters scope, identify which argument does so. - size_t GetScopeArg(size_t id) const { return inst_lib[id].scope_arg; } + /// If this instruction alters scope, idxentify which argument does so. + size_t GetScopeArg(size_t idx) const { return inst_lib[idx].scope_arg; } - /// Return the set of properties for the provided instruction ID. - const inst_properties_t & GetProperties(size_t id) const { return inst_lib[id].properties; } + /// Return the set of properties for the providxed instruction ID. + const inst_properties_t & GetProperties(size_t idx) const { + return inst_lib[idx].properties; + } - char GetSymbol(size_t id) const { return inst_lib[id].symbol; } + char GetSymbol(size_t idx) const { return inst_lib[idx].symbol; } /// Does the given instruction ID have the given property value? - bool HasProperty(size_t id, std::string property) const { return inst_lib[id].properties.count(property); } + bool HasProperty(size_t idx, std::string property) const { + return inst_lib[idx].properties.count(property); + } /// Get the number of instructions in this set. size_t GetSize() const { return inst_lib.size(); } @@ -113,10 +126,13 @@ namespace emp { return Has(name_map, name); } + size_t GetID(const size_t idx) const { + return inst_lib[idx].id; + } /// Return the ID of the instruction that has the specified name. size_t GetID(const std::string & name) const { emp_assert(Has(name_map, name), name); - return Find(name_map, name, (size_t) -1); + return inst_lib[Find(name_map, name, (size_t) -1)].id; } /// Return the ID of the instruction associated with the specified symbol. @@ -125,6 +141,17 @@ namespace emp { return symbol_map[(size_t) symbol]; } + /// Return the ID of the instruction that has the specified name. + size_t GetIndex(const std::string & name) const { + emp_assert(Has(name_map, name), name); + return Find(name_map, name, (size_t) -1); + } + /// Return the ID of the instruction that has the specified name. + size_t GetIndex(const size_t id) const { + emp_assert(Has(id_map, id), id); + return Find(id_map, id, (size_t) -1); + } + /// Return the argument value associated with the provided keyword. arg_t GetArg(const std::string & name) { emp_assert(Has(arg_map, name)); @@ -145,14 +172,20 @@ namespace emp { const std::string & desc="", ScopeType scope_type=ScopeType::NONE, size_t scope_arg=(size_t) -1, - const inst_properties_t & inst_properties=inst_properties_t()) + const inst_properties_t & inst_properties=inst_properties_t(), + int _id = -1) { - const size_t id = inst_lib.size(); + const size_t idx = inst_lib.size(); + const size_t id = (_id >= 0) ? _id : inst_lib.size(); + emp_assert(!Has(id_map, id), "ID is already in use!", id); const char symbol = (id < symbol_defaults.size()) ? symbol_defaults[id] : '+'; - inst_lib.emplace_back(name, fun_call, num_args, desc, scope_type, scope_arg, inst_properties, symbol); + inst_lib.emplace_back(idx, id, name, fun_call, num_args, desc, scope_type, scope_arg, + inst_properties, symbol); inst_funs.emplace_back(fun_call); - name_map[name] = id; + name_map[name] = idx; + id_map[id] = idx; symbol_map[(size_t) symbol] = id; + std::cout << "Registered instruction: " << name << " index: " << idx << "; id: " << id << "; symbol: " << symbol << std::endl; } /// Specify a keyword and arg value. @@ -162,18 +195,17 @@ namespace emp { } /// Process a specified instruction in the provided hardware. - void ProcessInst(hardware_t & hw, const inst_t & inst) const { - inst_funs[inst.id](hw, inst); + virtual void ProcessInst(hardware_t & hw, const inst_t & inst) const { + inst_funs[inst.GetIndex()](hw, inst); } /// Process a specified instruction on hardware that can be converted to the correct type. template void ProcessInst(emp::Ptr hw, const inst_t & inst) const { emp_assert( dynamic_cast(hw.Raw()) ); - inst_funs[inst.id](*(hw.template Cast()), inst); + inst_funs[inst.GetIndex()](*(hw.template Cast()), inst); } - /// Write out a full genome to the provided ostream. void WriteGenome(const genome_t & genome, std::ostream & os=std::cout) const { for (const inst_t & inst : genome) { @@ -189,9 +221,10 @@ namespace emp { /// Read the instruction in the provided info and append it to the provided genome. void ReadInst(genome_t & genome, std::string info) const { std::string name = emp::string_pop_word(info); - size_t id = GetID(name); - genome.emplace_back(id); - size_t num_args = GetNumArgs(id); + size_t idx = GetIndex(name); + size_t id = GetID(idx); + genome.emplace_back(idx, id); + size_t num_args = GetNumArgs(idx); for (size_t i = 0; i < num_args; i++) { std::string arg_name = emp::string_pop_word(info); // @CAO: Should check to make sure arg name is real. diff --git a/include/emp/hardware/VirtualCPU.hpp b/include/emp/hardware/VirtualCPU.hpp new file mode 100644 index 0000000000..46fa6ba168 --- /dev/null +++ b/include/emp/hardware/VirtualCPU.hpp @@ -0,0 +1,796 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022. + * + * @file VirtualCPU.hpp + * @brief A simple virtual CPU styled after the original and extended Avidian architectures. + * + * @TODO + * - Expanded heads? + * - expanded_nop_args useful? + * - Consider changing default return value for search functions + * - Consider switching to (or adding an optional mode) where nops are only curated + * as-needed instead of all at once + * + */ + +#ifndef EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE +#define EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE + +#include +#include +#include + +#include "../base/array.hpp" +#include "../base/Ptr.hpp" +#include "../base/unordered_map.hpp" +#include "../base/vector.hpp" +#include "../datastructs/map_utils.hpp" +#include "../datastructs/vector_utils.hpp" +#include "../io/File.hpp" +#include "../math/Random.hpp" +#include "../tools/string_utils.hpp" + +#include "Genome.hpp" +#include "VirtualCPU_InstLib.hpp" + +namespace emp{ + /// \brief A simple virtual CPU styled after those seen in Avida + /// + /// This class represents a single virtual CPU following a genome of assembly-level + /// instructions. + /// By default, each CPU features four heads, two stacks, multiple registers, and + /// a circular genome. + /// Both the original and extended architectures are supported. + template + class VirtualCPU{ + public: + static constexpr size_t NUM_STACKS = 2; ///< Number of stacks in this CPU (currently 2) + static constexpr size_t MAX_NOPS = 23; ///< Maximum number of nop instructions supported + struct Instruction; + + using derived_t = DERIVED; + using data_t = uint32_t; + using inst_t = Instruction; + using inst_lib_t = VirtualCPU_InstLib; + using genome_t = Genome; + using nop_vec_t = emp::vector; + using stack_t = emp::vector; + + /// \brief Representation of a single instruction in the CPU's genome + /// + /// Only contains the necessary information for which instruction is being represented + /// as well as any data it needs in the genome. + /// Does NOT contain the actual logic of the instruction, nor the name. + /// These are handled by the instruction library itself. + struct Instruction : public inst_lib_t::InstructionBase { + size_t idx; /// Index of the instruction in the instruction library + size_t id; /// Identifier for the instruction that gives the user + /// flexibility over the instruction (e.g., what symbol + /// it should use in a string representation) + emp::vector nop_vec; /// Representation of the contiguous sequence of NOP + /// instructions following this instruction in the genome + bool has_been_executed = false; /// Has this instruction been executed? + bool has_been_copied = false; // Has this instruction been copied to an offspring? + + Instruction() = delete; + Instruction(size_t _idx, size_t _id=0, emp::vector _nop_vec = {}) + : idx(_idx), id(_id), nop_vec(_nop_vec) { ; } + Instruction(const Instruction &) = default; + Instruction(Instruction &&) = default; + + Instruction & operator=(const Instruction &) = default; + Instruction & operator=(Instruction &&) = default; + bool operator<(const Instruction & in) const { + return id < in.id; + } + bool operator==(const Instruction & in) const { return id == in.id; } + bool operator!=(const Instruction & in) const { return !(*this == in); } + bool operator>(const Instruction & in) const { return in < *this; } + bool operator>=(const Instruction & in) const { return !(*this < in); } + bool operator<=(const Instruction & in) const { return !(in < *this); } + + void Set(size_t _idx, size_t _id, emp::vector _nop_vec = {}) + { idx = _idx; id = _id; nop_vec=_nop_vec;} + + size_t GetIndex() const override { return idx; } + }; + + + protected: + size_t num_regs = 0; ///< Number of registers found in this CPU + size_t num_nops = 0; ///< Number of NOP instructions found in this CPU's library + + public: + //////// FLAGS + bool are_nops_counted = false; ///< Flag detailing if the number of NOP instructions + ///< in the CPU's library have been counted + bool are_regs_expanded = false; ///< Flag signaling if the number of registers have + ///< been expanded to accommodate the number of NOP + ///< instructions in the library + bool nops_need_curated = true; ///< Flag signaling that NOP instructions need curated + bool expanded_nop_args = false; ///< Flag signaling that CPU is used the expanded + + //////// CPU COMPONENTS + emp::vector regs; ///< Vector of registers + std::unordered_map inputs; ///< Map of all available inputs + ///< (position -> value) + std::unordered_map outputs; ///< Map of all outputs (position -> value) + emp::array stacks; ///< Array of stacks for this CPU + size_t inst_ptr; ///< Instruction pointer, signifies next + ///< instruction to be executed + size_t flow_head; ///< Flow head, used for moving heads and + ///< values + size_t read_head; ///< Read head, signals what instruction to + ///< copy next + size_t write_head; ///< Write head, signals where to copy next + ///< instruction + //////// HELPER CONSTRUCTS + emp::unordered_map nop_id_map;/**< NOP inst id -> Nop index + (e.g., NopA -> 0, NopB -> 1, + NopE -> 5) */ + emp::vector label_idx_vec; ///< Vector of LABEL instructions indices in genome + //////// GENOME + genome_t genome; ///< Preserved copy of genome from organism creation/birth + ///< that should not change in any way + genome_t genome_working; ///< Working copy of genome that can mutate, resize, and change + //////// BOOKKEEPING + size_t active_stack_idx = 0; ///< Index of CPU's active stack + emp::vector copied_inst_id_vec; /**< Vector of instructions that have been + copied */ + size_t num_insts_executed = 0; ///< Number of instructions that have been executed + + + //////// CONSTRUCTORS / DESTRUCTOR + /// Create a new VirtualCPU with the same genome (and thus instruction library) + VirtualCPU(const genome_t & in_genome) + : regs(), inputs(), outputs(), + inst_ptr(0), flow_head(0), read_head(0), write_head(0), + genome(in_genome), genome_working(in_genome) { + Initialize(); + ResetHardware(); + } + /// Create a default VirtualCPU (no genome sequence, default instruction set) + VirtualCPU() : + VirtualCPU(genome_t(inst_lib_t::DefaultInstLib())) { + Initialize(); + ResetHardware(); + } + /// Create a perfect copy of passed VirtualCPU + VirtualCPU(const VirtualCPU &) = default; + /// Default move constructor + VirtualCPU(VirtualCPU &&) = default; + /// Default destructor + virtual ~VirtualCPU() { ; } + + + //////// GETTERS + /// Return size of original genome + size_t GetGenomeSize() const { return genome.GetSize(); } + /// Return size of working genome + size_t GetWorkingGenomeSize() const { return genome_working.GetSize(); } + /// Return the number of registers in the CPU + size_t GetNumRegs() const { return num_regs; } + /// Return the number of NOP instructions found in the CPU's instruction library + size_t GetNumNops() const { return num_nops; } + /// Return the outputs of the CPU + const std::unordered_map & GetOutputs() const { return outputs; } + /// Return a pointer to the CPU's instruction library + Ptr GetInstLib() const { return genome.GetInstLib(); } + /// Return the number of instructions that have been executed + size_t GetNumInstsExecuted() const{ + size_t count = 0; + for (auto inst : genome_working) { + if (inst.has_been_executed) count++; + } + return count; + } + /// Return the number of instructions that have been copied + size_t GetNumInstsCopied() const{ + size_t count = 0; + for (auto inst : genome_working) { + if (inst.has_been_copied) count++; + } + return count; + } + + + + //////// SETTERS + /// Copies passed vector into input map + void SetInputs(const emp::vector & vals) { + inputs = emp::ToUMap(vals); + } + + + //////// GENOME & INSTRUCTION MANIPULATION + /// Load instructions from input stream + bool Load(std::istream & input) { + ClearGenome(); + File file(input); + file.RemoveComments("//"); // Remove all C++ style comments + file.RemoveComments("#"); // Remove all bash/Python/R style comments + file.CompressWhitespace(); // Trim down remaining whitespace. + file.RemoveEmpty(); + if (file.GetNumLines() == 0) { + emp::notify::Error("VirtualCPU trying to load a genome from an empty stream!"); + } + file.Apply( [this](std::string & info) { PushInst(info); } ); + nops_need_curated = true; + return true; + } + + /// Load instructions from file + bool Load(const std::string & filename) { + std::ifstream is(filename); + if (is.is_open()) return Load(is); + emp::notify::Error("VirtualCPU genome file is either empty or missing: ", filename); + return false; + } + + /// Add a new instruction to the end of the genome, by index in the instruction library + void PushInst(size_t idx) { + const size_t id = GetInstLib()->GetID(idx); + genome.emplace_back(idx, id); + genome_working.emplace_back(idx, id); + nops_need_curated = true; + } + + /// Add a new instruction to the end of the genome, by name + void PushInst(const std::string & name) { + PushInst(GetInstLib()->GetIndex(name)); + nops_need_curated = true; + } + + /// Add a specified new instruction to the end of the genome + void PushInst(const inst_t & inst) { + genome.emplace_back(inst); + genome_working.emplace_back(inst); + nops_need_curated = true; + } + + /// Add multiple copies of a specified instruction to the end of the genome + void PushInst(const inst_t & inst, size_t count) { + genome.reserve(genome.size() + count); + for (size_t i = 0; i < count; i++) genome.emplace_back(inst); + genome_working.reserve(genome.size() + count); + for (size_t i = 0; i < count; i++) genome_working.emplace_back(inst); + nops_need_curated = true; + } + + /// Return the first instruction in the instruction library + inst_t GetDefaultInst() const{ + return inst_t(GetInstLib()->GetIndex(0), 0); + } + + /// Add one or more default instructions to the end of the genome + void PushDefaultInst(size_t count=1) { + PushInst( inst_t(GetInstLib()->GetIndex(0), 0), count ); + nops_need_curated = true; + } + + /// Return a random instruction from the instruction library + inst_t GetRandomInst(Random & rand) { + size_t id = rand.GetUInt(GetInstLib()->GetSize()); + size_t idx = GetInstLib()->GetIndex(id); + //size_t idx = rand.GetUInt(GetInstLib()->GetSize()); + //size_t id = GetInstLib()->GetID(idx); + return inst_t(idx, id); + } + + /// Overwrite the instruction at the given genome index with passed instruction + void SetInst(size_t pos, const inst_t & inst) { + genome[pos] = inst; + genome_working[pos] = inst; + nops_need_curated = true; + } + + /// Overwrite the instruction at the given genome index with a random instruction + void RandomizeInst(size_t pos, Random & rand) { + SetInst(pos, GetRandomInst(rand) ); + nops_need_curated = true; + } + + /// Add a random instruction from the instruction library to the end of the genome + void PushRandomInst(Random & random, const size_t count=1) { + for (size_t i = 0; i < count; i++) { + PushInst(GetRandomInst(random)); + } + nops_need_curated = true; + } + + /// Insert the given instruction at the specified genome position + void InsertInst(const inst_t& inst, const size_t idx) { + genome.emplace(genome.begin() + idx, inst); + genome_working.emplace(genome_working.begin() + idx, inst); + nops_need_curated = true; + } + + /// Inserts a random instruction at the given genome position + void InsertRandomInst(const size_t idx, emp::Random& random) { + InsertInst(GetRandomInst(random), idx); + } + + /// Remove the instruction at the specified genome position + void RemoveInst(const size_t idx) { + genome.erase(genome.begin() + idx); + genome_working.erase(genome_working.begin() + idx); + nops_need_curated = true; + } + + + + //////// HEAD MANIPULATION + + void ResetIP() { inst_ptr = 0; } ///< Move instruction pointer to beginning of the genome. + void ResetRH() { read_head = 0; } ///< Move read head to beginning of the genome. + void ResetWH() { write_head = 0; } ///< Move write head to beginning of the genome. + void ResetFH() { flow_head = 0; } ///< Move flow head to beginning of the genome. + + /// Advance the instruction pointer so many steps and wrap around the end of the genome + void AdvanceIP(size_t steps=1) { + inst_ptr += steps; + inst_ptr = (genome_working.size() > 0 ? inst_ptr % genome_working.size() : 0); + } + /// Advance the read head so many steps and wrap around the end of the genome + void AdvanceRH(size_t steps=1) { + read_head += steps; + read_head = (genome_working.size() > 0 ? read_head % genome_working.size() : 0); + } + /// Advance the write head so many steps and wrap around the end of the genome + void AdvanceWH(size_t steps=1) { + write_head += steps; + write_head = (genome_working.size() > 0 ? write_head % genome_working.size() : 0); + } + /// Advance the flow head so many steps and wrap around the end of the genome + void AdvanceFH(size_t steps=1) { + flow_head += steps; + flow_head = (genome_working.size() > 0 ? flow_head % genome_working.size() : 0); + } + /// Set the instruction pointer to the genome index, wrap around the end of the genome + void SetIP(size_t pos) { + inst_ptr = pos; + inst_ptr %= genome_working.size(); + } + /// Set the read head to the genome index, wrap around the end of the genome + void SetRH(size_t pos) { + read_head = pos; + read_head %= genome_working.size(); + } + /// Set the write head to the genome index, wrap around the end of the genome + void SetWH(size_t pos) { + write_head = pos; + write_head %= genome_working.size(); + } + /// Set the flow head to the genome index, wrap around the end of the genome + void SetFH(size_t pos) { + flow_head = pos; + flow_head %= genome_working.size(); + } + /// Set the specified head (which can wrap) to the beginning of the genome, + void ResetModdedHead(size_t head_idx) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) SetIP(0); + else if (modded_idx == 1) SetRH(0); + else if (modded_idx == 2) SetWH(0); + else if (modded_idx == 3) SetFH(0); + } + /// Set the specified head (which can wrap) to the given genome position, + /// wrap around the end of the genome + void SetModdedHead(size_t head_idx, size_t pos) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) SetIP(pos); + else if (modded_idx == 1) SetRH(pos); + else if (modded_idx == 2) SetWH(pos); + else if (modded_idx == 3) SetFH(pos); + } + /// Advance the specified head (which can wrap) the given number of instructions, + /// wrap around the end of the genome + void AdvanceModdedHead(size_t head_idx, size_t steps=1) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) AdvanceIP(steps); + else if (modded_idx == 1) AdvanceRH(steps); + else if (modded_idx == 2) AdvanceWH(steps); + else if (modded_idx == 3) AdvanceFH(steps); + } + /// Return the head POSITION of the specified head (can wrap) + size_t GetModdedHead(size_t head_idx) { + size_t modded_idx = head_idx % 4; + if (modded_idx == 0) return inst_ptr; + else if (modded_idx == 1) return read_head; + else if (modded_idx == 2) return write_head; + else if (modded_idx == 3) return flow_head; + return inst_ptr; + } + + + //////// HARDWARE MANIPULATION + /// Initializes the CPU by counting the number of NOP instructions in the instruction + /// library and expanding the number of registers to match + void Initialize() { + CountNops(); + ExpandRegisters(); + ResetHardware(); + } + /// Reset all heads + void ResetHeads() { + ResetIP(); + ResetRH(); + ResetWH(); + ResetFH(); + } + /// Reset all inputs and outputs + void ResetIO() { + inputs.clear(); + outputs.clear(); + } + /// Reset all memory/data + void ResetMemory() { + // Initialize registers to their position. So Reg0 = 0 and Reg11 = 11. + for (size_t i = 0; i < num_regs; i++) { + regs[i] = (data_t) i; + } + for (size_t i = 0; i < NUM_STACKS; ++i) { + stacks[i].resize(0); + } + active_stack_idx = 0; + } + /// Reset all bookkeeping variables + void ResetBookkeeping() { + copied_inst_id_vec.clear(); + num_insts_executed = 0; + } + /// Reset the working genome back to the original genome + void ResetWorkingGenome() { + genome_working = genome; + label_idx_vec.clear(); + nops_need_curated = true; + } + /// Reset just the CPU hardware, but keep the original genome + void ResetHardware() { + ResetHeads(); + ResetMemory(); + ResetIO(); + ResetBookkeeping(); + } + /// Clear the main genome of the organism and reset all hardware + void ClearGenome() { + genome.resize(0,0); // Clear out genome + genome_working.resize(0,0); // Clear out working genome + label_idx_vec.clear(); // No labels if genome is empty + nops_need_curated = true; + ResetHardware(); // Reset the full hardware + } + /// Compile NOP instructions in genome into useful nop vectors for each instruction, + /// and records the position of all LABEL instructions + void CurateNops() { + if (genome_working.size() == 0) { + nops_need_curated = false; + return; + } + bool label_inst_present = GetInstLib()->IsInst("Label"); + size_t label_inst_id = label_inst_present ? GetInstLib()->GetID("Label") : 0; + + if (!are_nops_counted) CountNops(); + label_idx_vec.clear(); + // Start by filling the nop vector of the last instruction + for (size_t inst_idx = 0; inst_idx < genome_working.GetSize() - 1; ++inst_idx) { + if (emp::Has(nop_id_map, genome_working[inst_idx].id)) { + genome_working[genome_working.size() - 1].nop_vec.push_back( + nop_id_map[genome_working[inst_idx].id]); + } + else break; + } + // If the last index is a label, record it! + if (label_inst_present && + (genome_working[genome_working.size() - 1].id == label_inst_id)) + label_idx_vec.push_back(genome_working.size() - 1); + // Now iterate backward over the genome, filling in each instruction's nop vector + // Example, our genome looks like xyzabc where only a, b, and c are nops + // If we are on index 2 (z), we see it is followed by a nop. + // Thus, we copy the next instruction into the nop vector [a] + // Then we copy THAT instruction's nop vector, too: [a,b,c] + // By going in reverse order, all following instructions already have a nop vec + for (auto it = genome_working.rbegin() + 1; it != genome_working.rend(); ++it) { + if (emp::Has(nop_id_map, (it - 1)->id)) { + it->nop_vec.resize( (it - 1)->nop_vec.size() + 1 ); + it->nop_vec[0] = nop_id_map[(it - 1)->id]; + std::copy( + (it - 1)->nop_vec.begin(), + (it - 1)->nop_vec.end(), + it->nop_vec.begin() + 1); + } + } + for (size_t inst_idx = 0; inst_idx < genome_working.size(); ++inst_idx) { + if (genome_working[inst_idx].id == label_inst_id) // Record pos if inst is label + label_idx_vec.push_back(inst_idx); + } + nops_need_curated = false; + } + /// Determine the number of sequential NOP instructions in the instruction library + /// + /// Starts at NopA and continues from there. Any missing instructions force count to + /// stop. Last possible NOP instruction is NopW, as NopX is a special case in Avida. + void CountNops() { + num_nops = 0; + nop_id_map.clear(); + are_nops_counted = true; + for (size_t idx = 0; idx < MAX_NOPS ; ++idx) { // Stop before X! + std::string nop_name = (std::string)"Nop" + (char)('A' + idx); + if (GetInstLib()->IsInst(nop_name)) { + num_nops++; + size_t id = GetInstLib()->GetID(nop_name); + nop_id_map[id] = idx; + } + else return; + } + } + /// Expand the CPU's registers to match the number of NOP instructions in the + /// instruction library + void ExpandRegisters() { + if (!are_nops_counted) CountNops(); + are_regs_expanded = true; + num_regs = num_nops; + regs.resize(num_regs); + } + + //////// NOP SEQUENCE METHODS + /// For a given NOP instruction (as an index), return its complement index + size_t GetComplementNop(size_t idx) { + if (idx >= num_nops - 1) return 0; + else return idx + 1; + } + /// For a vector of NOP instructions (as indices), return a vector of complement indices + /// in the same order + nop_vec_t GetComplementNopSequence(const nop_vec_t& nop_vec) { + nop_vec_t res_vec; + for (size_t nop : nop_vec) { + res_vec.push_back(GetComplementNop(nop)); + } + return res_vec; + } + /// Check if a vector of NOP instructions is the same as the START of another vector + bool CompareNopSequences(const nop_vec_t& search_vec, const nop_vec_t& compare_vec) { + if (search_vec.size() > compare_vec.size()) return false; + if (search_vec.size() == 0 || compare_vec.size() == 0) return false; + for (size_t idx = 0; idx < search_vec.size(); ++idx) { + if (search_vec[idx] != compare_vec[idx]) return false; + } + return true; + } + /// Check if the given vector of NOP instructions (as indices) were the last + /// instructions to be copied by the CPU + bool CheckIfLastCopied(const nop_vec_t& label) { + if (label.size() > copied_inst_id_vec.size()) return false; + if (label.size() == 0) return false; + int idx = label.size() - 1; + for (auto copied_it = copied_inst_id_vec.rbegin(); copied_it != copied_inst_id_vec.rend(); copied_it++) { + if (*copied_it != label[idx]) + return false; + idx--; + if (idx < 0) break; + + } + return true; + } + /// Search up the genome (backward) for a sequence of NOP instructions following a LABEL + /// instruction that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindLabel_Reverse(bool start_local) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + size_t start_label_vec_idx = label_idx_vec.size() - 1; + if (start_local) { + bool start_found = false; + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + if (label_idx_vec[label_idx_vec.size() - offset - 1] < inst_ptr) { + start_label_vec_idx = label_idx_vec.size() - offset - 1; + start_found = true; + break; + } + } + if (!start_found) start_label_vec_idx = label_idx_vec.size() - 1; + } + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + const size_t idx = + label_idx_vec[ + (start_label_vec_idx - offset + label_idx_vec.size()) % label_idx_vec.size() + ]; + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search the genome for a sequence of NOP instructions following a LABEL + /// instruction that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindLabel(bool start_local, bool reverse = false) { + if (reverse) return FindLabel_Reverse(start_local); + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + size_t start_label_vec_idx = 0; + if (start_local) { + bool start_found = false; + for (; start_label_vec_idx < label_idx_vec.size(); ++start_label_vec_idx) { + if (label_idx_vec[start_label_vec_idx] > inst_ptr) { + start_found = true; + break; + } + } + if (!start_found) start_label_vec_idx = 0; + } + for (size_t offset = 0; offset < label_idx_vec.size(); ++offset) { + const size_t idx = label_idx_vec[(start_label_vec_idx + offset) % label_idx_vec.size()]; + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the given NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_idx Position in the genome to start the search + size_t FindNopSequence_Reverse(const nop_vec_t& search_vec, size_t start_idx) { + for (size_t offset = 1; offset < genome_working.size() + 1; ++offset) { + const size_t idx = (start_idx - offset + genome_working.size()) % genome_working.size(); + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the given NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindNopSequence_Reverse(const nop_vec_t& search_vec, bool start_local) { + size_t start_idx = 0; + if (start_local && inst_ptr != 0) start_idx = inst_ptr; + return FindNopSequence_Reverse(search_vec, start_idx); + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + size_t FindNopSequence_Reverse(bool start_local) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + return FindNopSequence_Reverse(search_vec, start_local); + } + /// Search the genome for a sequence of NOP instructions that match the given + /// NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_idx Position in the genome to start the search + size_t FindNopSequence(const nop_vec_t& search_vec, size_t start_idx, + bool reverse = false) { + if (reverse) return FindNopSequence_Reverse(search_vec, start_idx); + for (size_t offset = 1; offset < genome_working.size() + 1; ++offset) { + const size_t idx = (start_idx + offset) % genome_working.size(); + if (CompareNopSequences(search_vec, genome_working[idx].nop_vec)) return idx; + } + return inst_ptr; + } + /// Search the genome for a sequence of NOP instructions that match the given + /// NOP sequence + /// + /// @param search_vec The sequence of NOP instructions to search for + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindNopSequence(const nop_vec_t& search_vec, bool start_local, + bool reverse = false) { + size_t start_idx = genome_working.size() - 1; + if (start_local) start_idx = inst_ptr; + return FindNopSequence(search_vec, start_idx, reverse); + } + /// Search up the genome (backward) for a sequence of NOP instructions + /// that match the NOP sequence following the current instruction + /// + /// @param start_local If true, search from instruction pointer. If false, search from + /// start of the genome + /// @param reverse If true, traverse the genome backward. If false, traverse forward + size_t FindNopSequence(bool start_local, bool reverse = false) { + const nop_vec_t search_vec = genome_working[inst_ptr].nop_vec; + return FindNopSequence(search_vec, start_local, reverse); + } + + + //////// STACK MANIPULATION + /// Push the value in the specified register on top of the active stack + void StackPush(size_t reg_idx) { + stacks[active_stack_idx].push_back(regs[reg_idx]); + } + /// Remove the value from the top of the active stack and store it in the + /// specified register + void StackPop(size_t reg_idx) { + if (stacks[active_stack_idx].size()) { + regs[reg_idx] = *stacks[active_stack_idx].rbegin(); + stacks[active_stack_idx].pop_back(); + } + } + /// Swap which stack is active + void StackSwap() { + active_stack_idx++; + if (active_stack_idx >= NUM_STACKS) active_stack_idx = 0; + } + /// Fetch the nth value of the specified stack + data_t GetStackVal(size_t stack_idx, size_t val_idx) { + emp_assert(stack_idx < NUM_STACKS); + emp_assert(val_idx < stacks[stack_idx].size()); + size_t reverse_idx = stacks[stack_idx].size() - val_idx - 1; + return stacks[stack_idx][reverse_idx]; + } + + + //////// PROCESSING + /// Process the next instruction pointed to be the instruction pointer + void SingleProcess(bool verbose = true) { + emp_assert(genome_working.GetSize() > 0); // A genome must exist to be processed. + if (!are_regs_expanded) ExpandRegisters(); + if (nops_need_curated) CurateNops(); + if (verbose) { + GetInstLib()->GetName(genome_working[inst_ptr].idx); + PrintDetails(); + } + genome_working[inst_ptr].has_been_executed = true; + GetInstLib()->ProcessInst(ToPtr(this), genome_working[inst_ptr]); + AdvanceIP(); + num_insts_executed++; + } + /// Process the next SERIES of instructions, directed by the instruction pointer. + void Process(size_t num_inst = 1, bool verbose = true) { + for (size_t i = 0; i < num_inst; i++) SingleProcess(verbose); + } + + + //////// STATE -> STRING FUNCTIONS + /// Return the working genome in string form. + /// + /// Each instruction is represented by a single character, dictated by the + /// instruction's ID. + std::string GetWorkingGenomeString() const{ + std::stringstream sstr; + sstr << "[" << genome_working.size() << "]"; + for (size_t idx = 0; idx < genome_working.size(); idx++) { + unsigned char c = 'a' + genome_working[idx].id; + if (genome_working[idx].id > 25) c = 'A' + genome_working[idx].id - 26; + sstr << c; + } + return sstr.str(); + } + /// Return the original genome in string form. + /// + /// Each instruction is represented by a single character, dictated by the + /// instruction's ID. + std::string GetGenomeString() const{ + std::stringstream sstr; + sstr << "[" << genome.size() << "]"; + for (size_t idx = 0; idx < genome.size(); idx++) { + unsigned char c = 'a' + genome[idx].id; + if (genome[idx].id > 25) c = 'A' + genome[idx].id - 26; + sstr << c; + } + return sstr.str(); + } + /// Output the state of the CPU's heads and registers to the specified output stream + void PrintDetails(std::ostream& os = std::cout) { + os << "IP: " << inst_ptr; + os << " RH: " << read_head; + os << " WH: " << write_head; + os << " FH: " << flow_head; + os << "(nops: " << num_nops << "; regs: " << num_regs << ")" << std::endl; + for (size_t reg_idx = 0; reg_idx < regs.size(); ++reg_idx) { + os << "[" << reg_idx << "] " << regs[reg_idx] << std::endl; + } + } + + }; // End VirtualCPU class +} // End namespace + + + +#endif // #ifndef EMP_HARDWARE_VIRTUALCPU_HPP_INCLUDE diff --git a/include/emp/hardware/VirtualCPU_InstLib.hpp b/include/emp/hardware/VirtualCPU_InstLib.hpp new file mode 100644 index 0000000000..74d02b79a5 --- /dev/null +++ b/include/emp/hardware/VirtualCPU_InstLib.hpp @@ -0,0 +1,295 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021-2022 + * + * @file VirtualCPU_InstLib.hpp + * @brief A specialized version of InstLib to handle VirtualCPU instructions. + */ + +#ifndef EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE +#define EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE + +#include "../base/error.hpp" +#include "../math/math.hpp" + +#include "InstLib.hpp" + +namespace emp { + + /// \brief A pure-virtual class that defines a series of instructions for VirtualCPU_Base or any of its derived classes. + template + struct VirtualCPU_InstLib : public InstLib { + using hardware_t = HARDWARE_T; + using inst_lib_t = InstLib; + using arg_t = ARG_T; + using this_t = VirtualCPU_InstLib; + using inst_t = typename hardware_t::inst_t; + using nop_vec_t = typename hardware_t::nop_vec_t; + + // Instructions + static void Inst_NopA(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_NopB(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_NopC(hardware_t & /*hw*/, const inst_t & /*inst*/) { ; } + static void Inst_Inc(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + ++hw.regs[idx]; + } + static void Inst_Dec(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + --hw.regs[idx]; + } + static void Inst_If_Not_Equal(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_op_1 = inst.nop_vec.size() < 1 ? 1 : inst.nop_vec[0]; + size_t idx_op_2 = inst.nop_vec.size() < 2 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[1]; + if(hw.regs[idx_op_1] == hw.regs[idx_op_2]) + hw.AdvanceIP(1); + hw.AdvanceIP(inst.nop_vec.size()); + } + else{ + size_t idx_1 = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx_1); + if(hw.regs[idx_1] == hw.regs[idx_2]) + hw.AdvanceIP(1); + if(inst.nop_vec.size()) hw.AdvanceIP(1); + } + } + static void Inst_If_Less(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_op_1 = inst.nop_vec.size() < 1 ? 1 : inst.nop_vec[0]; + size_t idx_op_2 = inst.nop_vec.size() < 2 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[1]; + if(hw.regs[idx_op_1] >= hw.regs[idx_op_2]) + hw.AdvanceIP(1); + hw.AdvanceIP(inst.nop_vec.size()); + } + else{ + size_t idx_1 = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx_1); + if(hw.regs[idx_1] >= hw.regs[idx_2]) + hw.AdvanceIP(1); + if(inst.nop_vec.size()) hw.AdvanceIP(1); + } + } + static void Inst_Pop(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.StackPop(idx); + } + static void Inst_Push(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.StackPush(idx); + } + static void Inst_Swap_Stack(hardware_t & hw, const inst_t & /*inst*/) { + hw.StackSwap(); + } + static void Inst_Shift_Right(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.regs[idx] >>= 1; + } + static void Inst_Shift_Left(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + hw.regs[idx] <<= 1; + } + static void Inst_Add(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = hw.regs[idx_op_1] + hw.regs[idx_op_2]; + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] + hw.regs[idx_2]; + } + } + static void Inst_Sub(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = hw.regs[idx_op_1] - hw.regs[idx_op_2]; + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] - hw.regs[idx_2]; + } + } + static void Inst_Nand(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t idx_res = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_op_1 = inst.nop_vec.size() < 2 ? idx_res : inst.nop_vec[1]; + size_t idx_op_2 = inst.nop_vec.size() < 3 ? hw.GetComplementNop(idx_op_1) : inst.nop_vec[2]; + hw.regs[idx_res] = ~(hw.regs[idx_op_1] & hw.regs[idx_op_2]); + } + else{ + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + size_t idx_2 = hw.GetComplementNop(idx); + hw.regs[idx] = hw.regs[idx] + hw.regs[idx_2]; + hw.regs[idx] = ~(hw.regs[idx] & hw.regs[idx_2]); + } + } + static void Inst_IO(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 1 : inst.nop_vec[0]; + std::cout << "Output: " << hw.regs[idx] << std::endl; + // TODO: Handle input + } + static void Inst_H_Alloc(hardware_t & hw, const inst_t & /*inst*/) { + hw.genome_working.resize(hw.genome.size() * 2, hw.GetDefaultInst()); + hw.regs[0] = hw.genome.size(); + } + static void Inst_H_Divide(hardware_t & hw, const inst_t & /*inst*/) { + if(hw.read_head >= hw.genome.size()){ + hw.genome_working.resize(hw.read_head, 0); + hw.ResetHardware(); + hw.inst_ptr = hw.genome.size() - 1; + std::cout << "Divide!" << std::endl; + } + } + static void Inst_H_Copy(hardware_t & hw, const inst_t & /*inst*/) { + hw.genome_working[hw.write_head] = hw.genome_working[hw.read_head]; + hw.copied_inst_id_vec.push_back(hw.genome_working[hw.write_head].id); + hw.read_head++; + while(hw.read_head >= hw.genome_working.size()) hw.read_head -= hw.genome_working.size(); + hw.write_head++; + while(hw.write_head >= hw.genome_working.size()) hw.write_head -= hw.genome_working.size(); + // TODO: Mutation + } + static void Inst_H_Search(hardware_t & hw, const inst_t & inst) { + size_t res = hw.FindNopSequence(hw.GetComplementNopSequence(inst.nop_vec), hw.inst_ptr); + if(inst.nop_vec.size() == 0 || res == hw.inst_ptr){ + hw.regs[1] = 0; + hw.regs[2] = 0; + hw.SetFH(hw.inst_ptr + 1); + } + else{ + hw.regs[1] = (res - hw.inst_ptr) > 0 ? res - hw.inst_ptr : res + hw.genome_working.size() - res + hw.inst_ptr; + hw.regs[2] = inst.nop_vec.size(); + hw.SetFH(res + inst.nop_vec.size() + 1); + } + } + static void Inst_Mov_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t dest_idx = hw.flow_head; + if(inst.nop_vec.size() >= 2) dest_idx = hw.GetModdedHead(inst.nop_vec[1]); + if(!inst.nop_vec.empty()) hw.SetModdedHead(inst.nop_vec[0], dest_idx); + else hw.SetIP(dest_idx); + } + else{ + if(!inst.nop_vec.empty()){ + // IP is a special case because it auto advances! + if(inst.nop_vec[0] % 4 == 0) hw.SetIP(hw.flow_head - 1); + else hw.SetModdedHead(inst.nop_vec[0], hw.flow_head); + } + else hw.SetIP(hw.flow_head - 1); + } + } + static void Inst_Jmp_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t jump_dist = hw.regs[1]; + if(inst.nop_vec.size() >= 2) jump_dist = hw.regs[inst.nop_vec[1]]; + if(!inst.nop_vec.empty()) hw.AdvanceModdedHead(inst.nop_vec[0], jump_dist); + else hw.AdvanceIP(jump_dist); + } + else{ + if(!inst.nop_vec.empty()) hw.AdvanceModdedHead(inst.nop_vec[0], hw.regs[2]); + else hw.AdvanceIP(hw.regs[2]); + } + } + static void Inst_Get_Head(hardware_t & hw, const inst_t & inst) { + if(hw.expanded_nop_args){ + size_t head_val = inst.nop_vec.empty() ? hw.inst_ptr : hw.GetModdedHead(inst.nop_vec[0]); + if(inst.nop_vec.size() < 2) hw.regs[2] = head_val; + else hw.regs[inst.nop_vec[1]] = head_val; + } + else{ + if(inst.nop_vec.empty()) hw.regs[2] = hw.inst_ptr; + else hw.regs[2] = hw.GetModdedHead(inst.nop_vec[0]); + } + } + static void Inst_If_Label(hardware_t & hw, const inst_t & inst) { + hw.AdvanceIP(inst.nop_vec.size()); + if(!hw.CheckIfLastCopied(hw.GetComplementNopSequence(inst.nop_vec))) hw.AdvanceIP(); + } + static void Inst_Set_Flow(hardware_t & hw, const inst_t & inst) { + size_t idx = inst.nop_vec.empty() ? 2 : inst.nop_vec[0]; + hw.SetFH(hw.regs[idx]); + } + + /// Maintain and return a singleton of default instructions + static const this_t & DefaultInstLib() { + static this_t inst_lib; + if (inst_lib.GetSize() == 0) { + inst_lib.AddInst("NopA", Inst_NopA, 0, "No-operation A"); + inst_lib.AddInst("NopB", Inst_NopB, 0, "No-operation B"); + inst_lib.AddInst("NopC", Inst_NopC, 0, "No-operation C"); + inst_lib.AddInst("IfNEq", Inst_If_Not_Equal, 1, + "Skip next inst unless register values match"); + inst_lib.AddInst("IfLess", Inst_If_Less, 1, + "Skip next inst unless focal register is less than its complement"); + inst_lib.AddInst("Inc", Inst_Inc, 1, "Increment value in reg Arg1"); + inst_lib.AddInst("Dec", Inst_Dec, 1, "Decrement value in reg Arg1"); + inst_lib.AddInst("Pop", Inst_Pop, 1, "Pop value from active stack into register"); + inst_lib.AddInst("Push", Inst_Push, 1, "Add register's value to active stack"); + inst_lib.AddInst("Swap-Stk", Inst_Swap_Stack, 1, "Swap which stack is active"); + inst_lib.AddInst("ShiftR", Inst_Shift_Right, 1, "Shift register value right by one bit"); + inst_lib.AddInst("ShiftL", Inst_Shift_Left, 1, "Shift register value left by one bit"); + inst_lib.AddInst("Add", Inst_Add, 1, + "Add values in registers B and C, then store result in given register"); + inst_lib.AddInst("Sub", Inst_Sub, 1, + "Sub values in registers B and C, then store result in given register"); + inst_lib.AddInst("Nand", Inst_Nand, 1, + "NAND values in registers B and C, then store result in given register"); + inst_lib.AddInst("IO", Inst_IO, 1, + "Output value in given register and then place new input in that register"); + inst_lib.AddInst("HAlloc", Inst_H_Alloc, 1, "Allocate memory for offspring"); + inst_lib.AddInst("HDivide", Inst_H_Divide, 1, "Attempt to split offspring"); + inst_lib.AddInst("HCopy", Inst_H_Copy, 1, "Copy instruction from read head to write head"); + inst_lib.AddInst("HSearch", Inst_H_Search, 1, "Search for label complement"); + inst_lib.AddInst("MovHead", Inst_Mov_Head, 1, "Move a given head to a postiion"); + inst_lib.AddInst("JmpHead", Inst_Jmp_Head, 1, "Move a given head by a relative amount"); + inst_lib.AddInst("GetHead", Inst_Get_Head, 1, "Get location of head"); + inst_lib.AddInst("IfLabel", Inst_If_Label, 1, + "Execute next instruction if label was the last thing copied"); + inst_lib.AddInst("SetFlow", Inst_Set_Flow, 1, "Set flow head to register value"); + /* + inst_lib.AddInst("Dec", Inst_Dec, 1, "Decrement value in reg Arg1"); + inst_lib.AddInst("Not", Inst_Not, 1, "Logically toggle value in reg Arg1"); + inst_lib.AddInst("SetReg", Inst_SetReg, 2, "Set reg Arg1 to numerical value Arg2"); + inst_lib.AddInst("Add", Inst_Add, 3, "regs: Arg3 = Arg1 + Arg2"); + inst_lib.AddInst("Sub", Inst_Sub, 3, "regs: Arg3 = Arg1 - Arg2"); + inst_lib.AddInst("Mult", Inst_Mult, 3, "regs: Arg3 = Arg1 * Arg2"); + inst_lib.AddInst("Div", Inst_Div, 3, "regs: Arg3 = Arg1 / Arg2"); + inst_lib.AddInst("Mod", Inst_Mod, 3, "regs: Arg3 = Arg1 % Arg2"); + inst_lib.AddInst("TestEqu", Inst_TestEqu, 3, "regs: Arg3 = (Arg1 == Arg2)"); + inst_lib.AddInst("TestNEqu", Inst_TestNEqu, 3, "regs: Arg3 = (Arg1 != Arg2)"); + inst_lib.AddInst("TestLess", Inst_TestLess, 3, "regs: Arg3 = (Arg1 < Arg2)"); + inst_lib.AddInst("If", Inst_If, 2, "If reg Arg1 != 0, scope -> Arg2; else skip scope", ScopeType::BASIC, 1); + inst_lib.AddInst("While", Inst_While, 2, "Until reg Arg1 != 0, repeat scope Arg2; else skip", ScopeType::LOOP, 1); + inst_lib.AddInst("Countdown", Inst_Countdown, 2, "Countdown reg Arg1 to zero; scope to Arg2", ScopeType::LOOP, 1); + inst_lib.AddInst("Break", Inst_Break, 1, "Break out of scope Arg1"); + inst_lib.AddInst("Scope", Inst_Scope, 1, "Enter scope Arg1", ScopeType::BASIC, 0); + inst_lib.AddInst("Define", Inst_Define, 2, "Build function Arg1 in scope Arg2", ScopeType::FUNCTION, 1); + inst_lib.AddInst("Call", Inst_Call, 1, "Call previously defined function Arg1"); + inst_lib.AddInst("Push", Inst_Push, 2, "Push reg Arg1 onto stack Arg2"); + inst_lib.AddInst("Pop", Inst_Pop, 2, "Pop stack Arg1 into reg Arg2"); + inst_lib.AddInst("Input", Inst_Input, 2, "Pull next value from input Arg1 into reg Arg2"); + inst_lib.AddInst("Output", Inst_Output, 2, "Push reg Arg1 into output Arg2"); + inst_lib.AddInst("CopyVal", Inst_CopyVal, 2, "Copy reg Arg1 into reg Arg2"); + inst_lib.AddInst("ScopeReg", Inst_ScopeReg, 1, "Backup reg Arg1; restore at end of scope"); + */ + + //for (size_t i = 0; i < hardware_t::NUM_REGS; i++) { + // inst_lib.AddArg(to_string((int)i), i); // Args can be called by value + // inst_lib.AddArg(to_string("Reg", 'A'+(char)i), i); // ...or as a register. + //} + } + + return inst_lib; + } + }; + +} + +#endif // #ifndef EMP_HARDWARE_VIRTUALCPU_INSTLIB_HPP_INCLUDE diff --git a/include/emp/hardware/signalgp_utils.hpp b/include/emp/hardware/signalgp_utils.hpp index a34ab02105..4a47aa3f07 100644 --- a/include/emp/hardware/signalgp_utils.hpp +++ b/include/emp/hardware/signalgp_utils.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2018 + * @date 2018-2022. * * @file signalgp_utils.hpp * @brief Helper functions for working with SignalGP virtual hardware/programs. @@ -666,7 +666,7 @@ namespace emp { fun_t new_fun(program[fID].GetAffinity()); size_t expected_func_len = program[fID].GetSize(); // Compute number and location of insertions. - const uint32_t num_ins = rnd.GetRandBinomial(program[fID].GetSize(), INST_INS__PER_INST()); + const uint32_t num_ins = rnd.GetBinomial(program[fID].GetSize(), INST_INS__PER_INST()); emp::vector ins_locs; if (num_ins > 0) { ins_locs = emp::RandomUIntVector(rnd, num_ins, 0, program[fID].GetSize()); diff --git a/include/emp/io/File.hpp b/include/emp/io/File.hpp index aff1f47545..a00c4908f6 100644 --- a/include/emp/io/File.hpp +++ b/include/emp/io/File.hpp @@ -1,14 +1,13 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2018-2020. + * @date 2018-2023. * * @file File.hpp * @brief The File object maintains a simple, in-memory file. * @note Status: BETA * * @todo We need to modify this code so that File can work with Emscripten. - * Alternatively, we might want to have a more flexible file class that wraps this one. * */ @@ -24,14 +23,16 @@ #include #include "../base/vector.hpp" +#include "../meta/FunInfo.hpp" #include "../tools/string_utils.hpp" namespace emp { - /// A class to maintin files for loading, writing, storing, and easy access to components. + /// A class to maintain files for loading, writing, storing, and easy access to components. class File { protected: emp::vector lines; + std::string file_error = ""; public: File() : lines() { ; } @@ -83,6 +84,15 @@ namespace emp { /// Return a const reference to the last line in the file. const std::string & back() const { return lines.back(); } + // Was there an error working with this file? + bool HasError() const { return file_error.size(); } + + // Text of error. + const std::string & GetError() const { return file_error; } + + // Remove any errors. + void ClearError() { file_error.resize(0); } + /// Append a new line to the end of the file. File & Append(const std::string & line) { lines.emplace_back(line); return *this; } @@ -111,33 +121,34 @@ namespace emp { } /// Extract first line from file - auto operator>>(std::string &out) { + auto operator>>(std::string & out) { out = size() ? front() : out; lines.erase(begin()); } /// Test if two files are identical. - bool operator==(const File in) { return lines == in.lines; } + bool operator==(const File & in) const { return lines == in.lines; } /// Test if two files are different. - bool operator!=(const File in) { return lines != in.lines; } + bool operator!=(const File & in) const { return lines != in.lines; } - /// Load a line from an input stream into a file. - File & LoadLine(std::istream & input) { + /// Load a line from an input stream into a file; return whether load was successful. + bool LoadLine(std::istream & input) { lines.emplace_back(""); - std::getline(input, lines.back()); + if (!std::getline(input, lines.back())) { + lines.pop_back(); + return false; + } // If the input file is DOS formatted, make sure to remove the \r at the end of each line. if (lines.back().size() && lines.back().back() == '\r') lines.back().pop_back(); - return *this; + return true; } /// Load an entire input stream into a file. File & Load(std::istream & input) { - while (!input.eof()) { - LoadLine(input); - } + while (LoadLine(input)); return *this; } @@ -148,6 +159,8 @@ namespace emp { if (file.is_open()) { Load(file); file.close(); + } else { + file_error = emp::to_string("File '", filename, "' failed to open."); } return *this; } @@ -186,9 +199,17 @@ namespace emp { } /// Apply a string manipulation function to all lines in the file. - File & Apply(const std::function & fun) { + template + File & Apply(FUN_T fun) { for (std::string & cur_line : lines) { - fun(cur_line); + // If the function returns a string, assume that's what we're supposed to use. + // Otherwise assume that the string gets modified. + using return_t = typename FunInfo::return_t; + if constexpr ( std::is_same() ) { + cur_line = fun(cur_line); + } else { + fun(cur_line); + } } return *this; } @@ -246,16 +267,18 @@ namespace emp { } /// A technique to remove all comments in a file. - File & RemoveComments(const std::string & marker) { - Apply( [marker](std::string & str) { - size_t pos = str.find(marker); + File & RemoveComments(const std::string & marker, bool skip_quotes=true) { + Apply( [marker,skip_quotes](std::string & str) { + size_t pos = emp::find(str, marker, 0, skip_quotes); if (pos !=std::string::npos) str.resize( pos ); } ); return *this; } /// Allow remove comments to also be specified with a single character. - File & RemoveComments(char marker) { return RemoveComments(emp::to_string(marker)); } + File & RemoveComments(char marker, bool skip_quotes=true) { + return RemoveComments(emp::to_string(marker), skip_quotes); + } /// Run a function on each line of a file and return the restults as a vector. /// Note: Function is allowed to modify string. @@ -268,6 +291,26 @@ namespace emp { return results; } + /// Get a series of lines. + emp::vector Read(size_t start, size_t end) const { + if (end > lines.size()) end = lines.size(); + return emp::vector(lines.begin()+start, lines.begin()+end); + } + + /// Get a series of lines until a line meets a certain condition. + emp::vector ReadUntil(size_t start, auto test_fun) const { + size_t end = start; + while (end < lines.size() && !test_fun(lines[end])) ++end; + return Read(start, end); + } + + /// Get a series of lines while lines continue to meet a certain condition. + emp::vector ReadWhile(size_t start, auto test_fun) const { + size_t end = start; + while (end < lines.size() && test_fun(lines)) ++end; + return Read(start, end); + } + /// Remove the first column from the file, returning it as a vector of strings. emp::vector ExtractCol(char delim=',') { return Process( [delim](std::string & line){ @@ -333,6 +376,58 @@ namespace emp { return out_data; } + // A File::Scan object allows a user to easily step through a File. + class Scan { + private: + const File & file; + size_t line = 0; + + public: + Scan(const File & in, size_t start=0) : file(in), line(start) { } + Scan(const Scan & in) = default; + + const File & GetFile() const { return file; } + size_t GetLine() const { return line; } + + bool AtStart() const { return line == 0; } + bool AtEnd() const { return line >= file.size(); } + operator bool() const { return !AtEnd(); } + + void Set(size_t in_line) { line = in_line; } + void Reset() { line = 0; } + void SetEnd() { line = file.size(); } + + // Get the very next line. + const std::string & Read() { + if (line > file.size()) return emp::empty_string(); + return file[line++]; + } + + // Get a block of lines. + emp::vector ReadTo(size_t end) { + emp_assert(end >= line); + if (end > file.size()) end = file.size(); + size_t start = line; + line = end; + return file.Read(start, end); + } + + // Get a block of lines, ending when a condition is met. + emp::vector ReadUntil(auto test_fun) { + auto out = file.ReadUntil(line, test_fun); + line += out.size(); + return out; + } + + // Get a block of lines for as lone as a condition is met. + emp::vector ReadWhile(auto test_fun) { + auto out = file.ReadWhile(line, test_fun); + line += out.size(); + return out; + } + }; + + Scan StartScan(size_t start=0) const { return Scan(*this, start); } }; } diff --git a/include/emp/io/StreamManager.hpp b/include/emp/io/StreamManager.hpp index a516f2257f..b4fdac2400 100644 --- a/include/emp/io/StreamManager.hpp +++ b/include/emp/io/StreamManager.hpp @@ -36,7 +36,7 @@ namespace emp { protected: - // Helper under error conditions. + // Helper, especially under error conditions. static std::iostream & GetDefaultStream() { static std::stringstream default_stream; return default_stream; @@ -136,6 +136,7 @@ namespace emp { if constexpr (ACCESS == Access::INPUT) ptr = NewPtr(name); else if constexpr (ACCESS == Access::OUTPUT) ptr = NewPtr(name); else if constexpr (ACCESS == Access::IO) ptr = NewPtr(name); + else emp_error("Unknown access type for file creation in StreamManager."); } // Build string streams. @@ -305,12 +306,18 @@ namespace emp { std::istream & GetInputStream(const std::string & name) { - if (!HasInputStream(name)) return AddInputStream(name); + if (!HasInputStream(name)) { // If we don't have this input stream, add it! + emp_assert(!Has(name)); // Make sure we don't have this stream at all! + return AddInputStream(name); + } return streams[name]->GetInputStream(); } std::ostream & GetOutputStream(const std::string & name) { - if (!HasOutputStream(name)) return AddOutputStream(name); + if (!HasOutputStream(name)) { // If we don't have this output stream, add it! + emp_assert(!Has(name)); // Make sure we don't have this stream at all! + return AddOutputStream(name); + } return streams[name]->GetOutputStream(); } diff --git a/include/emp/matching/MatchBin.hpp b/include/emp/matching/MatchBin.hpp index e74a7a836e..d0687c37c9 100644 --- a/include/emp/matching/MatchBin.hpp +++ b/include/emp/matching/MatchBin.hpp @@ -62,6 +62,7 @@ namespace emp::internal { using query_t = Query; using tag_t = Tag; + #ifndef DOXYGEN_SHOULD_SKIP_THIS template < typename Val, typename Metric, @@ -69,7 +70,7 @@ namespace emp::internal { typename Regulator > friend class emp::MatchBin; - + #endif /*DOXYGEN_SHOULD_SKIP_THIS*/ struct LogEntry { query_t query; diff --git a/include/emp/matching/matchbin_metrics.hpp b/include/emp/matching/matchbin_metrics.hpp index b55f7cacf3..dbc5492919 100644 --- a/include/emp/matching/matchbin_metrics.hpp +++ b/include/emp/matching/matchbin_metrics.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2019-2021. + * @date 2019-2022. * * @file matchbin_metrics.hpp * @brief Metric structs that can be plugged into MatchBin. @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ #include "../datastructs/tuple_utils.hpp" #include "../math/Distribution.hpp" #include "../math/math.hpp" -#include "../polyfill/span.hpp" #include "../tools/string_utils.hpp" namespace emp { diff --git a/include/emp/math/CombinedBinomialDistribution.hpp b/include/emp/math/CombinedBinomialDistribution.hpp new file mode 100644 index 0000000000..a8b5786d71 --- /dev/null +++ b/include/emp/math/CombinedBinomialDistribution.hpp @@ -0,0 +1,85 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2018-2022. + * + * @file CombinedBinomialDistribution.hpp + * @brief A means of quickly generating binomial random variables while only storing a small number of distributions. + * @note Status: ALPHA + * + * Quick check for theory: https://math.stackexchange.com/questions/1176385/sum-of-two-independent-binomial-variables + * + * If we want to generate binomial random variables of various trial counts (n's) using the + * Distribution class, we'd have to create a new Distribution for each unique trial count. + * + * This class leverages the fact that B(n, p) + B(m, p) = B(n + m, p) to calculate binomial + * draws with arbitrary trail counts without storing N distributions. + * By storing distributions for powers of 2, we only store log_2(N) distributions. + * + * Developor Notes: + * - We should come up with a more informative name for the file/class + */ + +#ifndef EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE +#define EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE + +#include "./Distribution.hpp" + +namespace emp{ + /// \brief A collection of distributions that allows for pulls from a binomial distribution with arbitrary N while only storing log_2(N) distributions + class CombinedBinomialDistribution{ + protected: + emp::vector distribution_vec; /**< The collection of binomial distributions + used to construct any N */ + double p; ///< The success probability of a single Bernoulli trial + size_t cur_max_power; /**< The maximum power of two currently supported by our + distributions */ + + /// Fetch the smallest power of two that is larger than N + size_t GetMaxPower(size_t n) const { + size_t power = 0; + for(size_t val = 1; val < n; val <<= 1, ++power){ ; } + return power; + } + + public: + CombinedBinomialDistribution() : p(0), cur_max_power(0){ ; } + CombinedBinomialDistribution(double _p, size_t _starting_n) : p(_p), cur_max_power(0){ + Expand(_starting_n); + } + + /// Sample a binomial distribution with n events + size_t PickRandom(size_t n, Random & random){ + size_t local_max_power = GetMaxPower(n); + size_t result = 0; + if(local_max_power > cur_max_power) Expand(n); + for(size_t power = 0; power <= local_max_power; ++power){ + if( (n & (1 << power)) != 0){ + result += distribution_vec[power].PickRandom(random); + } + } + return result; + } + + /// Reset the distribution with a new probability, p, and a starting n value + void Setup(double _p, size_t _n){ + distribution_vec.clear(); + cur_max_power = 0; + p = _p; + if(_n > (1ull << cur_max_power)) Expand(_n); + } + + /// Create more distributions to handle the given value of n + void Expand(size_t max_n){ + cur_max_power = GetMaxPower(max_n); + for(size_t power = distribution_vec.size(); power <= cur_max_power; ++power){ + distribution_vec.emplace_back(p, 1 << power); + } + } + + /// Fetch the current maximum power handled by this combined distribution + size_t GetCurMaxPower(){ return cur_max_power; } + }; +} + +#endif // #ifndef EMP_MATH_COMBINEDBINOMIALDISTRIBUTION_HPP_INCLUDE diff --git a/include/emp/math/Distribution.hpp b/include/emp/math/Distribution.hpp index 8e93404b06..45666e1ca1 100644 --- a/include/emp/math/Distribution.hpp +++ b/include/emp/math/Distribution.hpp @@ -8,9 +8,9 @@ * @note Status: ALPHA * * A Distribution is a pre-calculated set of probabilities to quickly pick a whole-number result. - * These should be used when either we need to draw from the same distribution many time (and hence - * the extra time to pre-calculate it is amortized away) -or- in functions that we want to call with - * a range of distributions that we may not know ahead of time. + * These should be used when either we need to draw from the same distribution many time (and + * hence the extra time to pre-calculate it is amortized away) -or- in functions that we want to + * call with a range of distributions that we may not know ahead of time. * * Currently, we have: * @@ -19,7 +19,7 @@ * NegativeBinomial - How many attempts to reach N successes, with p probability per attempt? * * - * Developor Notes: + * Developer Notes: * - We should setup an offset in the base Distribution class to ignore "impossible" low values. * */ @@ -50,6 +50,7 @@ namespace emp { return weights.Index( in_value * GetTotalProb() ); } + /// Pick a random item using this distribution. size_t PickRandom(Random & random) const { emp_assert(weights.GetSize() > 0, "Distribution can only pick a random entry if it has at least one entry!"); return weights.Index( random.GetDouble(GetTotalProb()) ); @@ -125,7 +126,7 @@ namespace emp { }; - /// How many attempts to reach N successes, assumming p probability per attempt? + /// How many attempts to reach N successes, assuming p probability per attempt? class NegativeBinomial : public Distribution { private: double p = 0.0; diff --git a/include/emp/math/DistributionSet.hpp b/include/emp/math/DistributionSet.hpp new file mode 100644 index 0000000000..5fdcb0c188 --- /dev/null +++ b/include/emp/math/DistributionSet.hpp @@ -0,0 +1,50 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2022. + * + * @file DistributionSet.hpp + * @brief Management of pre-calculated distributions with different input values. + * @note Status: ALPHA + * + * A DistributionSet manages a set of pre-calculated distributions. When input values are + * provided, the correct distribution is identified, and the associated value is drawn. + * + */ + +#ifndef EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE +#define EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE + +#include "Distribution.hpp" + +#include +#include + +#include "Distribution.hpp" + +#include "../datastructs/tuple_utils.hpp" + +namespace emp { + + /// @param DIST_T Type of distribution being used. + /// @param Ts Types of parameters to choose the set based on. + template + class DistributionSet { + private: + /// Map parameters to pre-calculated distributions. + unordered_map< std::tuple, DIST_T, emp::TupleHash> dist_map; + + public: + size_t PickRandom(Random & random, Ts... args) { + auto arg_tup = std::make_tuple(args...); // Build the tuple to use as a key. + auto [it, success] = dist_map.emplace(arg_tup, DIST_T(args...)); + return it->second.PickRandom(random); + } + }; + + using BinomialSet = emp::DistributionSet; + using NegativeBinomialSet = emp::DistributionSet; + +} + +#endif // #ifndef EMP_MATH_DISTRIBUTIONSET_HPP_INCLUDE diff --git a/include/emp/math/Random.hpp b/include/emp/math/Random.hpp index f7a8b826dc..bcd9d58275 100644 --- a/include/emp/math/Random.hpp +++ b/include/emp/math/Random.hpp @@ -73,6 +73,9 @@ namespace emp { /// Starts a new sequence of pseudo random numbers. A negative seed means that the random /// number generator gets its seed from the current system time and the process memory. void ResetSeed(const int64_t seed) noexcept { + value = 0; + expRV = 0.0; + // If the provided seed is <= 0, choose a unique seed based on time and memory location. if (seed <= 0) { uint64_t seed_time = (uint64_t) time(NULL); @@ -175,7 +178,7 @@ namespace emp { inline uint64_t GetUInt64(const uint64_t max) noexcept { if (max <= RAND_CAP) return (uint64_t) GetUInt(max); // Don't need extra precision. - size_t mask = emp::MaskUsed(max); // Create a mask for just the bits we need. + uint64_t mask = emp::MaskUsed(max); // Create a mask for just the bits we need. uint64_t val = GetUInt64() & mask; // Grab a value using just the current bits. while (val >= max) val = GetUInt64() & mask; // Grab new values until we find a valid one. @@ -390,7 +393,7 @@ namespace emp { // Distributions // /// Generate a random variable drawn from a unit normal distribution. - double GetRandNormal() noexcept { + double GetNormal() noexcept { // Draw from a Unit Normal Dist // Using Rejection Method and saving of initial exponential random variable double expRV2; @@ -407,18 +410,18 @@ namespace emp { /// @return A random variable drawn from a normal distribution. /// @param mean Center of distribution. /// @param std Standard deviation of distribution. - inline double GetRandNormal(const double mean, const double std) { return mean + GetRandNormal() * std; } + inline double GetNormal(const double mean, const double std) { return mean + GetNormal() * std; } /// Generate a random variable drawn from a Poisson distribution. - inline uint32_t GetRandPoisson(const double n, const double p) { + inline uint32_t GetPoisson(const double n, const double p) { emp_assert(p >= 0.0 && p <= 1.0, p); // Optimizes for speed and calculability using symetry of the distribution - if (p > .5) return (uint32_t) n - GetRandPoisson(n * (1 - p)); - else return GetRandPoisson(n * p); + if (p > .5) return (uint32_t) n - GetPoisson(n * (1 - p)); + else return GetPoisson(n * p); } /// Generate a random variable drawn from a Poisson distribution. - inline uint32_t GetRandPoisson(const double mean) { + inline uint32_t GetPoisson(const double mean) { // Draw from a Poisson Dist with mean; if cannot calculate, return UINT_MAX. // Uses Rejection Method const double a = exp(-mean); @@ -437,7 +440,7 @@ namespace emp { /// This function is exact, but slow. /// @see Random::GetApproxRandBinomial /// @see emp::Binomial in source/tools/Distribution.h - inline uint32_t GetRandBinomial(const double n, const double p) { // Exact + inline uint32_t GetBinomial(const double n, const double p) { // Exact emp_assert(p >= 0.0 && p <= 1.0, p); emp_assert(n >= 0.0, n); // Actually try n Bernoulli events, each with probability p @@ -446,17 +449,18 @@ namespace emp { return k; } - inline uint32_t GetRandGeometric(double p){ - emp_assert(p >= 0 && p <= 1, "Pobabilities must be between 0 and 1"); - // TODO: When we have warnings, add one for passing a really small number to - // this function. Alternatively, make this function not ludicrously slow with small numbers. - // Looks like return floor(ln(GetDouble())/ln(1-p)) might be sufficient? - if (p == 0) { - return std::numeric_limits::infinity(); - } - uint32_t result = 1; - while (!P(p)) { result++;} - return result; + /// Generate a random variable drawn from an exponential distribution. + inline double GetExponential(double p) { + emp_assert(p > 0.0 && p <= 1.0, p); + // if (p == 0.0) return std::numeric_limits::infinity(); + if (p == 1.0) return 0.0; + return std::log(GetDouble()) / std::log(1.0 - p); + } + + /// Generate a random variable drawn from a geometric distribution. + inline uint32_t GetGeometric(double p) { + emp_assert(p > 0.0 && p <= 1.0, p); + return static_cast( GetExponential(p) ) + 1; } }; diff --git a/include/emp/math/Range.hpp b/include/emp/math/Range.hpp index 2afa64ca96..6eeb300968 100644 --- a/include/emp/math/Range.hpp +++ b/include/emp/math/Range.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2019 + * @date 2016-2022. * * @file Range.hpp * @brief A simple way to track value ranges @@ -23,12 +23,13 @@ namespace emp { template class Range { private: - T lower = std::numeric_limits::min(); ///< Beginning of range, inclusive. - T upper = std::numeric_limits::max(); ///< End of range, inclusive. + T lower = std::numeric_limits::lowest(); ///< Beginning of range, inclusive. + T upper = std::numeric_limits::max(); ///< End of range, inclusive. public: Range() = default; Range(T _l, T _u) : lower(_l), upper(_u) { emp_assert(_l < _u); } + Range(const Range &) = default; T GetLower() const { return lower; } T GetUpper() const { return upper; } diff --git a/include/emp/math/constants.hpp b/include/emp/math/constants.hpp index a6dfa20444..d806842368 100644 --- a/include/emp/math/constants.hpp +++ b/include/emp/math/constants.hpp @@ -29,6 +29,8 @@ namespace emp { constexpr const int32_t MIN_INT = -2147483648; ///< (- 2^31) + constexpr const size_t MAX_SIZE_T = static_cast(-1); + /// Determine the maximum value for any type. // @CAO: Prevent inf to get more realistic numbers for double/float? template diff --git a/include/emp/math/info_theory.hpp b/include/emp/math/info_theory.hpp index 2e8cb28a99..f84131c1f7 100644 --- a/include/emp/math/info_theory.hpp +++ b/include/emp/math/info_theory.hpp @@ -56,7 +56,7 @@ namespace emp { double entropy = 0.0; for (auto & o : objs) { double p = ((double) fun(o)) / total; - entropy -= p * Log2(p); + entropy -= p * log2(p); } return entropy; } diff --git a/include/emp/math/math.hpp b/include/emp/math/math.hpp index 435c166d3a..a0f3723834 100644 --- a/include/emp/math/math.hpp +++ b/include/emp/math/math.hpp @@ -271,7 +271,7 @@ namespace emp { // exclude clang versions with compiler bug https://reviews.llvm.org/D35190 #if defined(__clang__) && __clang_major__>=9 || defined(__GNUC__) && !defined(__clang__) // if base is not known at compile time, use std::pow which is faster - if ( !__builtin_constant_p( base ) ) return std::pow(base, exp); + if ( !__builtin_constant_p( base ) ) return static_cast(std::pow(base, exp)); // otherwise, use constexpr-friendly implementations else #endif diff --git a/include/emp/math/sequence_utils.hpp b/include/emp/math/sequence_utils.hpp index 2b36cce234..a948a692bd 100644 --- a/include/emp/math/sequence_utils.hpp +++ b/include/emp/math/sequence_utils.hpp @@ -8,7 +8,7 @@ * @note Status: BETA * * A set of functions for analyzing sequences, including distance metrics (Hamming and - * Edit/Levenschtein) and alignment. + * Edit/Levenshtein) and alignment. */ #ifndef EMP_MATH_SEQUENCE_UTILS_HPP_INCLUDE @@ -16,12 +16,51 @@ #include +#include "../base/notify.hpp" #include "../base/vector.hpp" +#include "../tools/string_utils.hpp" #include "math.hpp" namespace emp { + /// Generate a sequence from a string. + /// Format: "entry1,entry2,entry3" etc. + /// Entries can be single values (Eg: "72") or ranges using start[:step]:stop format + /// (Eg: "0:100" or "3:5:33"). + + template + emp::vector ToSequence(std::string sequence_str) { + // Clean up input sequence and slice by commas. + emp::remove_whitespace(sequence_str); + emp::vector seq_slices = emp::slice(sequence_str, ','); + emp::vector out; + + // Convert each slice into a value or range of values. + emp::vector range_slices; + for (const std::string & slice : seq_slices) { + emp::slice(slice, range_slices, ':'); + T start = emp::from_string(range_slices[0]); + T step = static_cast(1); + T stop = start + static_cast(1); + + if (range_slices.size() == 2) stop = emp::from_string(range_slices[1]); + else if (range_slices.size() == 3) { + step = emp::from_string(range_slices[1]); + stop = emp::from_string(range_slices[2]); + } + else if (range_slices.size() > 3) { + emp::notify::Exception("math::sequence_utils::ToSequence::invalid_range", + "emp::ToSequence() provided with range with too many ':'", + slice); + } + + for (T i = start; i < stop; i += step) out.push_back(i); + } + + return out; + } + // --- Distance functions for any array-type objects --- /// Hamming distance is a simple count of substitutions needed to convert one array to another. @@ -107,7 +146,7 @@ namespace emp { emp::vector prev_row(size1); // The previous row we calculated emp::vector > edit_info(size2, emp::vector(size1)); - // Initialize the previous row to record the differece from nothing. + // Initialize the previous row to record the difference from nothing. for (size_t i = 0; i < size1; i++) { prev_row[i] = i + 1; edit_info[0][i] = 'i'; diff --git a/include/emp/meta/ConceptWrapper.hpp b/include/emp/meta/ConceptWrapper.hpp index 0aca2fec1c..7d07212032 100644 --- a/include/emp/meta/ConceptWrapper.hpp +++ b/include/emp/meta/ConceptWrapper.hpp @@ -29,7 +29,7 @@ * * REQUIRED_OVERLOAD_FUN ( FUNCTION_NAME, ERROR_MESSAGE, RETURN_TYPE, ARG1_TYPES, OTHER_ARGS... ) * Setup a set of overloaded member functions called FUNCTION_NAME that varies the first - * parameter (and may have additional paramters with fixed types. ARG1_TYPES must be an + * parameter (and may have additional parameters with fixed types. ARG1_TYPES must be an * emp::TypePack that includes the full set of types to be used for the first parameter. * Zero or more additional parameters may be included in OTHER_ARGS. The wrapped class must * already define the full set of overloaded functions by the correct name and with the correct @@ -90,7 +90,7 @@ * OPTIONAL_VAR ( VAR_NAME, DEFAULT_VALUE, TYPE ) * Setup a member variable called VAR_NAME. If it already exists in the wrapped class, use * that version. If it does not already exist, create it with the provided TYPE and set it to - * the DEFAULT_VALUE prodided. + * the DEFAULT_VALUE provided. * */ @@ -108,7 +108,7 @@ #define EMP_BUILD_CONCEPT( WRAPPER_NAME, BASE_NAME, ... ) \ - /* Do error-checkig on the inputs! */ \ + /* Do error-checking on the inputs! */ \ EMP_WRAP_EACH(EMP_BUILD_CONCEPT__ERROR_CHECK, __VA_ARGS__) \ /* Build the interface class. */ \ class BASE_NAME { \ @@ -139,8 +139,14 @@ #define EMP_BUILD_CONCEPT__EC_PROTECTED(...) /* PROTECTED okay */ #define EMP_BUILD_CONCEPT__EC_PUBLIC(...) /* PUBLIC okay */ -#define EMP_BUILD_CONCEPT__CHECK_EMPTY(A, CMD) EMP_GET_ARG_2( EMP_BUILD_CONCEPT__SPACER ## A, \ - static_assert(false, "\n\n \033[1;31mInvalid EMP_BUILD_CONCEPT.\033[0m May be invalid command or missing comma in:\n \033[1;32m" #CMD "\033[0m;\n\n"); ) +#define EMP_BUILD_CONCEPT__CHECK_EMPTY(A, CMD) \ + EMP_GET_ARG_2( EMP_BUILD_CONCEPT__SPACER ## A, \ + static_assert(false, \ + "\n\n \033[1;31mInvalid EMP_BUILD_CONCEPT.\033" \ + "[0m May be invalid command or missing comma in:\n" \ + " \033[1;32m" #CMD "\033[0m;\n\n" \ + ); \ + ) #define EMP_BUILD_CONCEPT__SPACER ~, /* EMPTY! */ #define EMP_BUILD_CONCEPT__ERROR @@ -154,7 +160,7 @@ #define EMP_BUILD_CONCEPT__BASE_REQUIRED_FUN(NAME, X, RETURN_T, ...) virtual RETURN_T NAME( __VA_ARGS__ ) = 0; #define EMP_BUILD_CONCEPT__BASE_OPTIONAL_FUN(NAME, X, RETURN_T, ...) virtual RETURN_T NAME( __VA_ARGS__ ) = 0; -// Since you cannot have virtual tempalated functions, we need to do a bit of work in the bast class. +// Since you cannot have virtual templated functions, we need to do a bit of work in the bast class. // ARGS are: FUNCTION_NAME, ERROR_MESSAGE, RETURN_TYPE, ARG1_TYPES, OTHER_ARGS... #define EMP_BUILD_CONCEPT__BASE_REQUIRED_OVERLOAD_FUN(NAME, X, RETURN_TYPE, ...) \ static_assert(emp::is_TypePack() == true, \ @@ -194,7 +200,7 @@ #define EMP_BUILD_CONCEPT__REQUIRED_FUN_impl(FUN_NAME, ERROR, NUM_ARGS, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ EMP_IF( NUM_ARGS, \ @@ -240,7 +246,7 @@ #define EMP_BUILD_CONCEPT__OPTIONAL_impl(FUN_NAME, DEFAULT, NUM_ARGS, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ EMP_IF( NUM_ARGS, \ @@ -277,7 +283,7 @@ #define EMP_BUILD_CONCEPT__PROCESS_REQUIRED_OVERLOAD_FUN(FUN_NAME, ERROR_MESSAGE, RETURN_T, ...) \ protected: \ /* Determine return type if we try to call this function in the base class. \ - It should be undefined if the member functon does not exist! */ \ + It should be undefined if the member function does not exist! */ \ template \ using return_t_ ## FUN_NAME = \ decltype( std::declval().FUN_NAME( std::declval() ) ); \ diff --git a/include/emp/meta/FunInfo.hpp b/include/emp/meta/FunInfo.hpp new file mode 100644 index 0000000000..e0ff0c5938 --- /dev/null +++ b/include/emp/meta/FunInfo.hpp @@ -0,0 +1,256 @@ +/** + * @note This file is part of Empirical, https://github.com/devosoft/Empirical + * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md + * @date 2021 + * + * @file FunInfo.hpp + * @brief Wrap a function to provide more information about it. + * @note Status: ALPHA + * + * FunInfo will collect information about a provided function and facilitate + * manipulations. + * + * + * Developer Notes: + * - Will not currently handle return by reference! + */ + +#ifndef EMP_META_FUNINFO_HPP_INCLUDE +#define EMP_META_FUNINFO_HPP_INCLUDE + +#include + +#include "TypePack.hpp" +#include "ValPack.hpp" + +namespace emp { + + // A generic base class that expands anything with operator() + template + struct FunInfo : public FunInfo< decltype(&T::operator()) > {}; + + // Specialization for functions; redirect to function-object specialization. + template + struct FunInfo + : public FunInfo< std::function > {}; + + // Specialization for functions; redirect to function-object specialization. + template + struct FunInfo + : public FunInfo< std::function > {}; + + + // Specialization for function objects with AT LEAST ONE parameter... + template + struct FunInfo { + private: + // template struct is_templated_converter : std::false_type{}; + // template + // struct is_templated_converter().template operator()(0))>> : std::true_type{}; + + /// Helper function to lock an argument at a designated position in a function. + template + static auto BindAt_impl(CLASS_T fun, BOUND_T && bound, + TypePack, TypePack) { + // If the function needs a reference for the parameter, send the supplied value through. + if constexpr (std::is_reference()) { + return [fun, &bound](BEFORE_Ts &&... before_args, AFTER_Ts &&... after_args) { + return fun(std::forward(before_args)..., + std::forward(bound), + std::forward(after_args)...); + }; + } + // Otherwise, a copy is fine. + else { + return [fun, bound](BEFORE_Ts &&... before_args, AFTER_Ts &&... after_args) { + return fun(std::forward(before_args)..., + bound, + std::forward(after_args)...); + }; + } + } + + + public: + using fun_t = RETURN_T(PARAM1_T, PARAM_Ts...); + using return_t = RETURN_T; + using params_t = TypePack; + + template + using arg_t = typename params_t::template get; + + static constexpr size_t num_args = 1 + sizeof...(PARAM_Ts); + + /// Test if this function can be called with a particular set of arguments. + template + static constexpr bool InvocableWith(ARG1 &&, ARG_Ts &&...) { + return std::is_invocable(); + } + + /// Test if this function can be called with a particular set of argument TYPEs. + template + static constexpr bool InvocableWith() { + return std::is_invocable(); + } + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun](PARAM1_T && arg1, PARAM_Ts &&... args) { + return c( fun(std::forward(arg1), std::forward(args)...) ); + }; + } + + /// Change a function's arguments using a fixed converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun](NEW_T arg1, decoy_t... args) { + return fun(c(arg1), c(args)...); + }; + } + + /// Convert a function's arguments using a dynamic (tempalted) lambda function. + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T convert_lambda) + { + // If the converter can take two arguments, assume the second is for type. + if constexpr ( std::is_invocable()) { + return [fun=fun, c=convert_lambda](NEW_T arg1, decoy_t... args) { + return fun(c(arg1, std::decay_t{}), + c(args, std::decay_t{})...); + }; + } + + // Otherwise assume that we are using a templated lambda (or similar object) + else { + return [fun=fun, c=convert_lambda](NEW_T arg1, decoy_t... args) { + return fun(c.template operator()(arg1), + c.template operator()(args)...); + }; + } + } + + /// Lock in a specified argument of a function. + template + static auto BindAt(CLASS_T fun, T && bound) { + using before_pack = typename params_t::template shrink; + using after_pack = typename params_t::template popN; + return BindAt_impl(fun, std::forward(bound), before_pack(), after_pack()); + } + + /// Lock in multiple function arguments. + template + static auto Bind(CLASS_T fun, T1 && bound1, Ts &&... bound) { + static_assert(emp::ValPack::IsSorted() && emp::ValPack::IsUnique(), + "FunInfo::Bind must be given unique, sorted indicies."); + static_assert(sizeof...(IDs) == sizeof...(Ts), + "FunInfo::Bind must have exactly one ID per bound value."); + + // Bind all LATER positions first, if there are any. + if constexpr (sizeof...(IDs) > 0) { + auto new_fun = Bind(fun, std::forward(bound)...); + return FunInfo::template BindAt(new_fun, bound1); + } + + // Otherwise just bind THIS position. + else return FunInfo::template BindAt(fun, bound1); + } + }; + + // Specialization for function objects with NO parameters... + template + struct FunInfo + { + using fun_t = RETURN_T(); + using return_t = RETURN_T; + using params_t = TypePack<>; + + static constexpr size_t num_args = 0; + + /// Test if this function can be called with a particular set of arguments. + template + static constexpr bool InvocableWith(ARG_Ts...) { return sizeof...(ARG_Ts) == 0; } + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return [fun=fun, c=convert_fun]() { + return c(fun()); + }; + } + + /// Change a function's arguments using a converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T /*convert_fun*/) + { + // No parameters, so no changes to make. + return fun; + } + + /// Convert a function's arguments using a dynamic (tempalted) lambda function. + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T /*convert_lambda*/) + { + // No parameters, so no conversions to make. + return fun; + } + + }; + + + // === Stand-alone helper functions === + + /// Change a function's return type using a converter function. + template + static auto ChangeReturnType(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::ChangeReturnType(fun, convert_fun); + } + + /// Change a function's arguments using a simple converter function. + template + static auto ChangeParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::template ChangeParameterTypes(fun, convert_fun); + } + + /// Convert a function's arguments using a templated lambda. + /// @note: Will not work until C++20!! + template + static auto ConvertParameterTypes(FUN_T fun, CONVERTER_T convert_fun) + { + return FunInfo::template ConvertParameterTypes(fun, convert_fun); + } + + /// Convert both return type AND parameter type. + /// Convert a function's arguments using a templated lambda. + template + static auto ChangeTypes(FUN_T fun, R_CONVERTER_T ret_convert_fun, P_CONVERTER_T param_convert_fun) + { + auto partial = FunInfo::template ChangeParameterTypes(fun, param_convert_fun); + return FunInfo::ChangeReturnType(partial, ret_convert_fun); + } + + /// Lock in a specified argument of a function. + template + auto BindAt(FUN_T fun, BOUND_T && bound) { + return FunInfo::template BindAt(fun, std::forward(bound)); + } + + /// Lock in the first argument of a function. + template + auto BindFirst(FUN_T fun, BOUND_T && bound) { + return FunInfo::template BindAt<0>(fun, std::forward(bound)); + } + + /// Lock in a series of specified arguments to a function. + template + auto Bind(FUN_T fun, Ts &&... bound) { + return FunInfo::template Bind(fun, std::forward(bound)...); + } +} + +#endif // #ifndef EMP_META_FUNINFO_HPP_INCLUDE diff --git a/include/emp/meta/TypeID.hpp b/include/emp/meta/TypeID.hpp index e12f5c4e4d..e0260cdde6 100644 --- a/include/emp/meta/TypeID.hpp +++ b/include/emp/meta/TypeID.hpp @@ -1,14 +1,65 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2021 + * @date 2016-2022. * * @file TypeID.hpp * @brief TypeID provides an easy way to convert types to strings. * + * TypeID provides an easy way to compare types, analyze them, and convert to strings. + * All TypeID objects are consistent within a type, and are ordinal and hashable. + * + * To get the unique type information for type T use: + * TypeID t = emp::GetTypeID(); + * + * To make TypeID work more effectively with your custom class, implement the static member + * function EMPGetTypeName() which returns a string with its full name (including namespace). + * static std::string EMPGetTypeName() { return "myns::MyClass"; } + * + * MEMBER FUNCTIONS: + * + * std::string GetName() - Return a human readable (ideally) version of type's name. + * void SetName(in_name) - Set the name that should be used henceforth for this type. + * size_t GetSize() - Return number of bytes used by this type. + * + * -- TYPE TESTS -- + * bool IsAbstract() - Is this type a pure-virtual class? + * bool IsArithmetic() - Is this type numeric? + * bool IsArray() - Does this type represent a sequence of objects in memory? + * bool IsClass() - Is this type a non-union class? + * bool IsConst() - Is this contents of this type prevented from changing? + * bool IsEmpty() - Does type type have no contents? + * bool IsObject() - Is this type ANY object type? + * bool IsPointer() - Is this type a pointer? + * bool IsReference() - Is this type a reference? + * bool IsTrivial() - Is this type trivial? + * bool IsVoid() - Is this the type "void"? + * bool IsVolatile() - Is this type volatile qualified? + * bool IsTypePack() - Is this type an emp::TypePack? + * + * -- COMPARISON TESTS -- + * bool IsType() - Is this type the specified type T? + * bool IsTypeIn() - Is this type one of the listed types? + * + * -- TYPE CONVERSIONS -- + * TypeID GetDecayTypeID() - Remove all qualifications (const, reference, etc.) + * TypeID GetElementTypeID() - Return type that makes up this type (i.e. for arrays) + * TypeID GetRemoveConstTypeID() - Remove const-ness of this type, if any. + * TypeID GetRemoveCVTypeID() - Remove constness and volatility of this type. + * TypeID GetRemoveExtentTypeID() - Flatten one level of a multi-dimensional array. + * TypeID GetRemoveAllExtentsTypeID() - Flatten multi-dimensional arrays. + * TypeID GetRemovePointerTypeID() - If this is a pointer, change to type pointed to. + * TypeID GetRemoveReferenceTypeID() - If this is a reference, change to type referred to. + * TypeID GetRemoveVolatileTypeID() - Remove volatility of this type, if any + * + * -- VALUE CONVERSIONS -- + * double ToDouble(pointer) - Convert pointed-to object (of this type) to a double. + * std::string ToString(pointer) - Convert pointed-to object (of this type) to a std::string. + * bool FromDouble(value, pointer) - Use double value to set pointed-to object (of this type) + * bool FromString(string, pointer) - Use string value to set pointed-to object (of this type) + * * Developer notes: * * Fill out defaults for remaining standard library classes (as possible) - * * If a class has a static TypeID_GetName() defined, use that for the name. * * If a type is a template, give access to parameter types. * * If a type is a function, give access to parameter types. */ @@ -70,6 +121,7 @@ namespace emp { virtual bool IsTrivial() const { return false; } virtual bool IsVoid() const { return false; } virtual bool IsVolatile() const { return false; } + virtual bool IsFunction() const { return false; } virtual bool IsTypePack() const { return false; } @@ -121,6 +173,7 @@ namespace emp { bool IsTrivial() const override { return std::is_trivial(); } bool IsVoid() const override { return std::is_same(); } bool IsVolatile() const override { return std::is_volatile(); } + bool IsFunction() const override { return std::is_function(); } bool IsTypePack() const override { return emp::is_TypePack(); } @@ -172,6 +225,7 @@ namespace emp { size_t GetSize() const override { if constexpr (std::is_void()) return 0; + else if constexpr (std::is_function()) return 0; else return sizeof(T); } @@ -183,9 +237,9 @@ namespace emp { return ptr.ReinterpretCast()->ToDouble(); } - // If this type is convertable to a double, cast the pointer to the correct type, de-reference it, + // If this type is convertible to a double, cast the pointer to the correct type, de-reference it, // and then return the conversion. Otherwise return NaN - if constexpr (std::is_convertible::value) { + else if constexpr (std::is_convertible::value) { return (double) *ptr.ReinterpretCast(); } else return std::nan(""); @@ -222,7 +276,8 @@ namespace emp { return "[N/A]"; } - bool FromDouble(double value, const emp::Ptr ptr) const override { + bool FromDouble([[maybe_unused]] double value, + [[maybe_unused]] const emp::Ptr ptr) const override { using base_t = std::decay_t; // If this variable has a built-in FromDouble() trait, use it! @@ -386,6 +441,11 @@ namespace emp { return internal::TypePackIDs_impl::GetIDs(); } + // Determine if a type has a static EMPGetTypeName() member function. + template struct HasEMPGetTypeName : std::false_type { }; + template + struct HasEMPGetTypeName> : std::true_type{}; + /// Build the information for a single TypeID. template static emp::Ptr BuildInfo() { @@ -394,7 +454,12 @@ namespace emp { TypeID type_id(&info); info.init = true; - info.name = typeid(T).name(); + + if constexpr (HasEMPGetTypeName()) { + info.name = T::EMPGetTypeName(); + } else { + info.name = typeid(T).name(); + } // Now, fix the name if we can be more precise about it. if constexpr (std::is_const()) { @@ -437,10 +502,16 @@ namespace emp { /// Setup a bunch of standard type names to be more readable. void SetupTypeNames() { - - // Built-in types. GetTypeID().SetName("void"); + // Probably replaced later, but good to have for systems where it's not. + GetTypeID().SetName("size_t"); + GetTypeID().SetName("long"); + GetTypeID().SetName("long long"); + GetTypeID().SetName("unsigned long"); + GetTypeID().SetName("unsigned long long"); + + // Main built-in types. GetTypeID().SetName("bool"); GetTypeID().SetName("double"); GetTypeID().SetName("float"); diff --git a/include/emp/meta/ValPack.hpp b/include/emp/meta/ValPack.hpp index e2a73bd881..0aaa21fd14 100644 --- a/include/emp/meta/ValPack.hpp +++ b/include/emp/meta/ValPack.hpp @@ -27,11 +27,13 @@ namespace emp { // Anonymous implementations of ValPack interface. #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace internal { + // Helper. DONE arg starts as true, but set to false when sequence finished. template struct vp_range { static constexpr auto NEXT = START + STEP; using type = typename vp_range<(NEXT >= END), NEXT, END, STEP, VALS..., START>::type; }; + // Specialization for when DONE is true. template struct vp_range { using type = ValPack; @@ -201,6 +203,9 @@ namespace emp { /// Find the overall maximum value in an ValPack. constexpr static auto Max() { return pop::Max(V1); } + /// Determine if the pack is sorted. + constexpr static bool IsSorted() { return V1 <= Min() && pop::IsSorted(); } + /// Use each value in an ValPack as an index and return results as a tuple. template constexpr static auto ApplyIndex(T && container) { @@ -252,6 +257,8 @@ namespace emp { template constexpr static auto Max(T floor) { return floor; } + constexpr static bool IsSorted() { return true; } + static std::string ToString() { return ""; } static void PrintVals(std::ostream & /* os */=std::cout) { ; } diff --git a/include/emp/meta/macros.hpp b/include/emp/meta/macros.hpp index 0504b4632c..e66796cd06 100644 --- a/include/emp/meta/macros.hpp +++ b/include/emp/meta/macros.hpp @@ -89,7 +89,7 @@ /// The below values allow you to have EMP_FAKE_ARG or EMP_FAKE_2ARG as a single argument. /// If you prepend it with EMP_CONVERT it will trigger a conversion. If you prepend anything -/// else similarly, it wil NOT triggera a conversion (and stay a single argument) +/// else similarly, it wil NOT trigger a conversion (and stay a single argument) #define EMP_CONVERT_ARG_EMP_FAKE_ARG(A) A #define EMP_CONVERT_ARG_EMP_FAKE_2ARG(A) ~, A @@ -203,7 +203,7 @@ /// @cond MACROS // S = Size of each pack // N = Number of packs -// P = Pack representatio of number of packs +// P = Pack representation of number of packs #define EMP_ARGS_TO_PACKS_impl(S, N, ...) EMP_ARGS_TO_PACKS_implB(S, EMP_DEC_TO_PACK(N), __VA_ARGS) #define EMP_ARGS_TO_PACKS_implB(S, P, ...) @CAO diff --git a/include/emp/meta/meta.hpp b/include/emp/meta/meta.hpp index a8c7a9c47c..d245963d25 100644 --- a/include/emp/meta/meta.hpp +++ b/include/emp/meta/meta.hpp @@ -30,10 +30,59 @@ namespace emp { /// Effectively create a function (via constructor) where all args are computed, then ignored. struct run_and_ignore { template run_and_ignore(T&&...) {} }; + template struct type_index; + + template <> struct type_index<> { + using t1 = void; using t2 = void; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = void; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = T2; using t3 = void; using t4 = void; + }; + + template struct type_index { + using t1 = T1; using t2 = T2; using t3 = T3; using t4 = void; + }; + + template + struct type_index { + using t1 = T1; using t2 = T2; using t3 = T3; using t4 = T4; + }; + + /// Trim off a specific type position from a pack. - template using first_type = T1; - template using second_type = T2; - template using third_type = T3; + template using first_type = typename type_index::t1; + template using second_type = typename type_index::t2; + template using third_type = typename type_index::t3; + template using fourth_type = typename type_index::t4; + + // Index into a template parameter pack to grab a specific type. + #ifndef DOXYGEN_SHOULD_SKIP_THIS + namespace internal { + template + struct pack_id_impl { using type = typename pack_id_impl::type; }; + + template struct pack_id_impl<0,T,Ts...> { using type = T; }; + } + #endif // DOXYGEN_SHOULD_SKIP_THIS + + /// Pick a specific position from a type pack. + template + using pack_id = typename internal::pack_id_impl::type; + + /// Trim off the last type from a pack. + template using last_type = pack_id; + + /// A struct declaration with no definition to show a type name in a compile time error. + template struct ShowType; + + /// A false type that does NOT resolve in unexecuted if-constexpr branches. + /// By Brian Bi; from: https://stackoverflow.com/questions/69501472/best-way-to-trigger-a-compile-time-error-if-no-if-constexprs-succeed + template struct dependent_false : std::false_type {}; /// Create a placeholder template to substitute for a real type. template struct PlaceholderType; @@ -77,23 +126,6 @@ namespace emp { return out_v; } - // Index into a template parameter pack to grab a specific type. - #ifndef DOXYGEN_SHOULD_SKIP_THIS - namespace internal { - template - struct pack_id_impl { using type = typename pack_id_impl::type; }; - - template - struct pack_id_impl<0,T,Ts...> { using type = T; }; - } - - template - using pack_id = typename internal::pack_id_impl::type; - #endif // DOXYGEN_SHOULD_SKIP_THIS - - // Trim off the last type from a pack. - template using last_type = pack_id; - // Trick to call a function using each entry in a parameter pack. #define EMP_EXPAND_PPACK(PPACK) ::emp::run_and_ignore{ 0, ((PPACK), void(), 0)... } diff --git a/include/emp/meta/type_traits.hpp b/include/emp/meta/type_traits.hpp index defdfa45b4..aa40599953 100644 --- a/include/emp/meta/type_traits.hpp +++ b/include/emp/meta/type_traits.hpp @@ -1,7 +1,7 @@ /** * @note This file is part of Empirical, https://github.com/devosoft/Empirical * @copyright Copyright (C) Michigan State University, MIT Software license; see doc/LICENSE.md - * @date 2016-2021. + * @date 2016-2022. * * @file type_traits.hpp * @brief Extensions on the standard library type traits to handle Empirical classes (such as Ptr). @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include "meta.hpp" + namespace emp { // Predeclarations used below. @@ -96,7 +98,7 @@ namespace emp { template struct remove_std_function_type> { using type = T; }; template using remove_std_function_t = typename remove_std_function_type::type; - // Collect the reference type for any container. + // Collect the reference type for any standard container. template struct element_type { using type = T; }; template

Word" + << "ExpectedWords" + << "MaximumWords" + << "Information
" << word.word << "" + << "" << word.ave_options + << "" << word.max_options + << "" << word.entropy + << "