From e8c052f6978d0564654f876ff3747fcc35efd30a Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Fri, 3 May 2019 15:04:33 +0200 Subject: [PATCH 1/9] wt_string::push_many --- include/internal/alphabet_encoder.hpp | 11 ++- include/internal/includes.hpp | 1 + include/internal/wt_string.hpp | 99 +++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/include/internal/alphabet_encoder.hpp b/include/internal/alphabet_encoder.hpp index ffc8db4..28732a4 100644 --- a/include/internal/alphabet_encoder.hpp +++ b/include/internal/alphabet_encoder.hpp @@ -181,9 +181,18 @@ class alphabet_encoder{ * alphabet size */ uint64_t size() const { - return enc_type==fixed ? 1ull< keys() const { + set keys; + for(char_type c = 0; c < size(); ++c) { + if (char_exists(c) || enc_type==fixed) + keys.insert(c); + } + return keys; + } + /* * Total number of bits allocated in RAM for this structure * diff --git a/include/internal/includes.hpp b/include/internal/includes.hpp index de09cb4..17bc874 100644 --- a/include/internal/includes.hpp +++ b/include/internal/includes.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index a239f89..2ac9607 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -24,6 +24,8 @@ #include "includes.hpp" #include "alphabet_encoder.hpp" +#include +#include namespace dyn{ @@ -166,6 +168,25 @@ namespace dyn{ } + void push_many(vector& values) { + map>&& Bs{}; + set&& partition{}; + + for (ulint i = 0; i < values.size(); ++i) { + auto c = values.at(i); + if(!ae.char_exists(c)) + ae.encode(c); + } + partition = ae.keys(); + + for(auto c : partition) { + Bs[c] = ae.encode(c); + } + + root.push_many(Bs, values, partition); + n += values.size(); + } + void push_front(char_type c){ insert(0,c); @@ -441,6 +462,84 @@ namespace dyn{ } + void push_many(map>& Bs, + const vector& values, + set partition, + ulint j=0, + ulint offset=0) { + + if(partition.size()==1){ + //this node must be a leaf + assert(bv.size()==0); + + auto c = *partition.begin(); + assert(j==Bs[c].size()); + + if(is_leaf()){ + //if it's already marked as leaf, check + //that the label is correct + assert(c==label()); + //TODO assert all `values` are `c` + }else{ + //else, mark node as leaf + make_leaf(c); + } + return; + } + + //assert(i<=bv.size()); + assert(not is_leaf()); + + std::future f0, f1; + + for(ulint idx = offset; idx < values.size(); ++idx) { + std::cout << "e" << j << std::endl; + auto c = values.at(idx); + if (partition.find(c) != partition.end()) { + auto B = Bs[values.at(idx)]; + + bool b = B[j]; + bv.push_back(b); + + if(b){ + if(not has_child1()){ + child1_ = new node(this); + + set new_partition; + for_each(partition.begin(), partition.end(), [&](char_type c) { + if (Bs[c][j]) + new_partition.insert(c); + }); + f1 = std::move(std::async(std::launch::async, [&]{ child1_->push_many(Bs, values, new_partition, j+1, idx); })); + } + }else{ + if(not has_child0()){ + child0_ = new node(this); + + set new_partition; + for_each(partition.begin(), partition.end(), [&](char_type c) { + if (!Bs[c][j]) + new_partition.insert(c); + }); + f0 = std::move(std::async(std::launch::async, [&]{ child0_->push_many(Bs, values, new_partition, j+1, idx); })); + } + } + } + } + std::cout << "f" << j << std::endl; + + try { + if (f0.valid()) { + f0.wait(); + } + if (f1.valid()) { + f1.wait(); + } + } catch (std::runtime_error& e) { + std::cout << "Async task threw exception: " << e.what() << std::endl; + } + } + /* * remove code B[j,...,B.size()-1] from position i. This code is associated * with character c From 80a7e02852db4eac12f7fc3ec98cb230247c3b68 Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Fri, 3 May 2019 21:18:47 +0200 Subject: [PATCH 2/9] fixes --- include/internal/wt_string.hpp | 37 +++++++++++++++------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index 2ac9607..20a245b 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -24,8 +24,7 @@ #include "includes.hpp" #include "alphabet_encoder.hpp" -#include -#include +#include namespace dyn{ @@ -168,9 +167,9 @@ namespace dyn{ } - void push_many(vector& values) { - map>&& Bs{}; - set&& partition{}; + void push_many(const vector& values) { + map> Bs{}; + set partition{}; for (ulint i = 0; i < values.size(); ++i) { auto c = values.at(i); @@ -183,7 +182,7 @@ namespace dyn{ Bs[c] = ae.encode(c); } - root.push_many(Bs, values, partition); + root.push_many(std::move(Bs), values, std::move(partition)); n += values.size(); } @@ -462,7 +461,7 @@ namespace dyn{ } - void push_many(map>& Bs, + void push_many(map>&& Bs, const vector& values, set partition, ulint j=0, @@ -490,10 +489,9 @@ namespace dyn{ //assert(i<=bv.size()); assert(not is_leaf()); - std::future f0, f1; + std::thread t0, t1; for(ulint idx = offset; idx < values.size(); ++idx) { - std::cout << "e" << j << std::endl; auto c = values.at(idx); if (partition.find(c) != partition.end()) { auto B = Bs[values.at(idx)]; @@ -510,7 +508,8 @@ namespace dyn{ if (Bs[c][j]) new_partition.insert(c); }); - f1 = std::move(std::async(std::launch::async, [&]{ child1_->push_many(Bs, values, new_partition, j+1, idx); })); + //[&, new_partition]{ child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); }(); + t1 = std::thread([&, new_partition]{ child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); }); } }else{ if(not has_child0()){ @@ -521,22 +520,18 @@ namespace dyn{ if (!Bs[c][j]) new_partition.insert(c); }); - f0 = std::move(std::async(std::launch::async, [&]{ child0_->push_many(Bs, values, new_partition, j+1, idx); })); + //[&, new_partition]{ child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); }(); + t0 = std::thread([&, new_partition]{ child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); }); } } } } - std::cout << "f" << j << std::endl; - try { - if (f0.valid()) { - f0.wait(); - } - if (f1.valid()) { - f1.wait(); - } - } catch (std::runtime_error& e) { - std::cout << "Async task threw exception: " << e.what() << std::endl; + if (t0.joinable()) { + t0.join(); + } + if (t1.joinable()) { + t1.join(); } } From adcdc50a1d98422f38cf016f04f778ae1a4ebff8 Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Sat, 4 May 2019 13:30:15 +0200 Subject: [PATCH 3/9] wt_string::node::push_many fix --- include/internal/wt_string.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index 20a245b..f80118a 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -508,8 +508,9 @@ namespace dyn{ if (Bs[c][j]) new_partition.insert(c); }); - //[&, new_partition]{ child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); }(); - t1 = std::thread([&, new_partition]{ child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); }); + t1 = std::thread([&, new_partition, idx]{ + child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); + }); } }else{ if(not has_child0()){ @@ -520,8 +521,9 @@ namespace dyn{ if (!Bs[c][j]) new_partition.insert(c); }); - //[&, new_partition]{ child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); }(); - t0 = std::thread([&, new_partition]{ child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); }); + t0 = std::thread([&, new_partition, idx]{ + child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); + }); } } } From 64dc824bd746ec53c8127a32764a9b8b4bc332b5 Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Sun, 5 May 2019 09:52:11 +0200 Subject: [PATCH 4/9] templatize vector to wt_string::push_many, fix "push" onto non-empty wt --- include/internal/wt_string.hpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index f80118a..a1c0036 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -167,12 +167,14 @@ namespace dyn{ } - void push_many(const vector& values) { + template + void push_many(const Vector& values) { map> Bs{}; set partition{}; + for (ulint i = 0; i < values.size(); ++i) { - auto c = values.at(i); + auto c = values[i]; if(!ae.char_exists(c)) ae.encode(c); } @@ -461,8 +463,9 @@ namespace dyn{ } + template void push_many(map>&& Bs, - const vector& values, + const Vector& values, set partition, ulint j=0, ulint offset=0) { @@ -492,16 +495,17 @@ namespace dyn{ std::thread t0, t1; for(ulint idx = offset; idx < values.size(); ++idx) { - auto c = values.at(idx); + auto c = values[idx]; if (partition.find(c) != partition.end()) { - auto B = Bs[values.at(idx)]; + auto B = Bs[c]; bool b = B[j]; bv.push_back(b); if(b){ - if(not has_child1()){ - child1_ = new node(this); + if(not t1.joinable()){ + if(not has_child1()) + child1_ = new node(this); set new_partition; for_each(partition.begin(), partition.end(), [&](char_type c) { @@ -513,8 +517,9 @@ namespace dyn{ }); } }else{ - if(not has_child0()){ - child0_ = new node(this); + if(not t0.joinable()){ + if(not has_child0()) + child0_ = new node(this); set new_partition; for_each(partition.begin(), partition.end(), [&](char_type c) { From 5edce057b62bb7ff1c46c0ef3ad32f1ea25fe7d9 Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Mon, 6 May 2019 15:38:08 +0200 Subject: [PATCH 5/9] use omp --- CMakeLists.txt | 8 ++++++++ include/internal/wt_string.hpp | 31 ++++++++++++++----------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1da4435..0910399 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 2.6) +option(USE_OPENMP "Enable multi-threading" OFF) + # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to 'Debug' as none was specified.") @@ -36,6 +38,12 @@ message("Building in ${CMAKE_BUILD_TYPE} mode") set(CMAKE_CXX_FLAGS "--std=c++11") +if(XXSDS_DYN_MULTI_THREADED) + find_package(OpenMP REQUIRED) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif(XXSDS_DYN_MULTI_THREADED) + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb -g -p") set(CMAKE_CXX_FLAGS_RELEASE "-ggdb -Ofast -fstrict-aliasing -DNDEBUG -march=native") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -ggdb -Ofast -fstrict-aliasing -march=native") diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index a1c0036..ab6bf6e 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -184,6 +184,8 @@ namespace dyn{ Bs[c] = ae.encode(c); } + #pragma omp parallel + #pragma omp master root.push_many(std::move(Bs), values, std::move(partition)); n += values.size(); } @@ -481,7 +483,6 @@ namespace dyn{ //if it's already marked as leaf, check //that the label is correct assert(c==label()); - //TODO assert all `values` are `c` }else{ //else, mark node as leaf make_leaf(c); @@ -489,10 +490,9 @@ namespace dyn{ return; } - //assert(i<=bv.size()); assert(not is_leaf()); - std::thread t0, t1; + bool t0 = false, t1 = false; for(ulint idx = offset; idx < values.size(); ++idx) { auto c = values[idx]; @@ -503,7 +503,9 @@ namespace dyn{ bv.push_back(b); if(b){ - if(not t1.joinable()){ + if(not t1){ + t1 = true; + if(not has_child1()) child1_ = new node(this); @@ -512,12 +514,13 @@ namespace dyn{ if (Bs[c][j]) new_partition.insert(c); }); - t1 = std::thread([&, new_partition, idx]{ - child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); - }); + #pragma omp task + child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); } }else{ - if(not t0.joinable()){ + if(not t0){ + t0 = true; + if(not has_child0()) child0_ = new node(this); @@ -526,20 +529,14 @@ namespace dyn{ if (!Bs[c][j]) new_partition.insert(c); }); - t0 = std::thread([&, new_partition, idx]{ - child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); - }); + #pragma omp task + child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); } } } } - if (t0.joinable()) { - t0.join(); - } - if (t1.joinable()) { - t1.join(); - } + #pragma omp taskwait } /* From a3959394b2cdf60e00554c3ac63a3fc0fa2c856d Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Mon, 6 May 2019 16:17:59 +0200 Subject: [PATCH 6/9] cleanup --- include/internal/wt_string.hpp | 147 ++++++++++++++++----------------- 1 file changed, 72 insertions(+), 75 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index ab6bf6e..4447c79 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -169,24 +169,20 @@ namespace dyn{ template void push_many(const Vector& values) { - map> Bs{}; - set partition{}; - - for (ulint i = 0; i < values.size(); ++i) { auto c = values[i]; if(!ae.char_exists(c)) ae.encode(c); } - partition = ae.keys(); - for(auto c : partition) { - Bs[c] = ae.encode(c); + map> path_to_leaf; + for(char_type c : ae.keys()) { + path_to_leaf[c] = ae.encode(c); } #pragma omp parallel #pragma omp master - root.push_many(std::move(Bs), values, std::move(partition)); + root.push_many(std::move(path_to_leaf), values); n += values.size(); } @@ -465,79 +461,80 @@ namespace dyn{ } - template - void push_many(map>&& Bs, - const Vector& values, - set partition, - ulint j=0, - ulint offset=0) { - - if(partition.size()==1){ - //this node must be a leaf - assert(bv.size()==0); - - auto c = *partition.begin(); - assert(j==Bs[c].size()); - - if(is_leaf()){ - //if it's already marked as leaf, check - //that the label is correct - assert(c==label()); - }else{ - //else, mark node as leaf - make_leaf(c); - } - return; + template + void push_many(map>&& Bs, + const Vector& values, + ulint j=0, + ulint offset=0) { + + if(Bs.size()==1){ + //this node must be a leaf + assert(bv.size()==0); + + auto c = Bs.begin()->first; + assert(j==Bs[c].size()); + + if(is_leaf()){ + //if it's already marked as leaf, check + //that the label is correct + assert(c==label()); + }else{ + //else, mark node as leaf + make_leaf(c); + } + return; } assert(not is_leaf()); - bool t0 = false, t1 = false; - - for(ulint idx = offset; idx < values.size(); ++idx) { - auto c = values[idx]; - if (partition.find(c) != partition.end()) { - auto B = Bs[c]; - - bool b = B[j]; - bv.push_back(b); - - if(b){ - if(not t1){ - t1 = true; - - if(not has_child1()) - child1_ = new node(this); - - set new_partition; - for_each(partition.begin(), partition.end(), [&](char_type c) { - if (Bs[c][j]) - new_partition.insert(c); - }); - #pragma omp task - child1_->push_many(std::move(Bs), values, new_partition, j+1, idx); - } - }else{ - if(not t0){ - t0 = true; - - if(not has_child0()) - child0_ = new node(this); - - set new_partition; - for_each(partition.begin(), partition.end(), [&](char_type c) { - if (!Bs[c][j]) - new_partition.insert(c); - }); - #pragma omp task - child0_->push_many(std::move(Bs), values, new_partition, j+1, idx); - } - } - } + bool t0 = false; + bool t1 = false; + + for(ulint idx = offset; idx < values.size(); ++idx) { + auto c = values[idx]; + if (!Bs.count(c)) + continue; + + auto B = Bs[c]; + + bool b = B[j]; + bv.push_back(b); + + if(b){ + if(not t1){ + t1 = true; + + if(not has_child1()) + child1_ = new node(this); + + map> new_Bs; + for_each(Bs.begin(), Bs.end(), [&new_Bs](const auto &pair) { + if (pair.second[j]) + new_Bs.insert(pair); + }); + #pragma omp task + child1_->push_many(std::move(new_Bs), values, j+1, idx); + } + }else{ + if(not t0){ + t0 = true; + + if(not has_child0()) + child0_ = new node(this); + + map> new_Bs; + for_each(partition.begin(), partition.end(), [&](const auto &pair) { + if (!pair.second[j]) + new_Bs.insert(pair); + }); + #pragma omp task + child0_->push_many(std::move(new_Bs), values, j+1, idx); + } } + } - #pragma omp taskwait - } + #pragma omp taskwait + } /* * remove code B[j,...,B.size()-1] from position i. This code is associated From a2b26f7b64b86656583162ed7cf9daf99ef27e8f Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Mon, 6 May 2019 16:43:11 +0200 Subject: [PATCH 7/9] fix --- include/internal/wt_string.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index 4447c79..ab17263 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -508,7 +508,7 @@ namespace dyn{ child1_ = new node(this); map> new_Bs; - for_each(Bs.begin(), Bs.end(), [&new_Bs](const auto &pair) { + for_each(Bs.begin(), Bs.end(), [&new_Bs, &j](const auto &pair) { if (pair.second[j]) new_Bs.insert(pair); }); @@ -523,7 +523,7 @@ namespace dyn{ child0_ = new node(this); map> new_Bs; - for_each(partition.begin(), partition.end(), [&](const auto &pair) { + for_each(Bs.begin(), Bs.end(), [&new_Bs, &j](const auto &pair) { if (!pair.second[j]) new_Bs.insert(pair); }); From 11f8b0df95e796f42291074633a11a7adc2c4ed1 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Mon, 6 May 2019 17:02:43 +0200 Subject: [PATCH 8/9] cleanup --- include/internal/wt_string.hpp | 64 ++++++++++++++++------------------ 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/include/internal/wt_string.hpp b/include/internal/wt_string.hpp index 4447c79..4a8a521 100644 --- a/include/internal/wt_string.hpp +++ b/include/internal/wt_string.hpp @@ -467,7 +467,7 @@ namespace dyn{ ulint j=0, ulint offset=0) { - if(Bs.size()==1){ + if(Bs.size()==1){ //this node must be a leaf assert(bv.size()==0); @@ -483,53 +483,51 @@ namespace dyn{ make_leaf(c); } return; - } + } - assert(not is_leaf()); + assert(not is_leaf()); - bool t0 = false; - bool t1 = false; + bool task_started_0 = false; + bool task_started_1 = false; for(ulint idx = offset; idx < values.size(); ++idx) { - auto c = values[idx]; - if (!Bs.count(c)) + char_type c = values[idx]; + auto it = Bs.find(c); + if (it == Bs.end()) continue; - auto B = Bs[c]; + bool b = it->second[j]; - bool b = B[j]; bv.push_back(b); - if(b){ - if(not t1){ - t1 = true; + if(b && !task_started_1){ + task_started_1 = true; - if(not has_child1()) + if(not has_child1()) child1_ = new node(this); - map> new_Bs; - for_each(Bs.begin(), Bs.end(), [&new_Bs](const auto &pair) { - if (pair.second[j]) - new_Bs.insert(pair); - }); - #pragma omp task - child1_->push_many(std::move(new_Bs), values, j+1, idx); - } - }else{ - if(not t0){ - t0 = true; + map> new_Bs; + for_each(Bs.begin(), Bs.end(), [&new_Bs,j](const auto &pair) { + if (pair.second[j]) + new_Bs.insert(pair); + }); + #pragma omp task + child1_->push_many(std::move(new_Bs), values, j+1, idx); + } + + if (!b && !task_started_0){ + task_started_0 = true; - if(not has_child0()) + if(not has_child0()) child0_ = new node(this); - map> new_Bs; - for_each(partition.begin(), partition.end(), [&](const auto &pair) { - if (!pair.second[j]) - new_Bs.insert(pair); - }); - #pragma omp task - child0_->push_many(std::move(new_Bs), values, j+1, idx); - } + map> new_Bs; + for_each(Bs.begin(), Bs.end(), [&new_Bs,j](const auto &pair) { + if (!pair.second[j]) + new_Bs.insert(pair); + }); + #pragma omp task + child0_->push_many(std::move(new_Bs), values, j+1, idx); } } From 5b7c90beb95f2535c86a2e33784a4f965d77b7cd Mon Sep 17 00:00:00 2001 From: Christopher Barber Date: Mon, 6 May 2019 21:03:54 +0200 Subject: [PATCH 9/9] packed_vector insert/push_back optimization --- include/internal/packed_vector.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/internal/packed_vector.hpp b/include/internal/packed_vector.hpp index e4b2221..d82a236 100644 --- a/include/internal/packed_vector.hpp +++ b/include/internal/packed_vector.hpp @@ -459,6 +459,11 @@ namespace dyn{ void insert(uint64_t i, uint64_t x){ + if(i==size()){ + push_back(x); + return; + } + if(bitsize(x)>width_){ //auto vec = to_vector(i,x);