diff --git a/benchmark/Make.download b/benchmark/Make.download index 9d42845ee..7bd3f3918 100644 --- a/benchmark/Make.download +++ b/benchmark/Make.download @@ -1,4 +1,4 @@ -../data/%: +../data/%: $(eval URL:=$(firstword $(call config_filter,test_case.config,$@,4))) @$(if $(URL),,\ $(error "No download link nor generation program specified for test case $@") ) @@ -6,7 +6,7 @@ $(eval DEST_DIR:=$(shell dirname $@)) cd $(DEST_DIR); curl -O $(URL) $(eval FILE:=$(DEST_DIR)/$(notdir $(URL))) - @$(if $(filter-out ".gz",$(FILE)),\ + @$(if $(filter %.gz,$(FILE)),\ echo "Extract file $(FILE) using gunzip";\ gunzip $(FILE)) diff --git a/benchmark/k2_trees/Makefile b/benchmark/k2_trees/Makefile new file mode 100644 index 000000000..0c2accadd --- /dev/null +++ b/benchmark/k2_trees/Makefile @@ -0,0 +1,90 @@ +include ../../Make.helper +CFLAGS = $(MY_CXX_FLAGS) +SRC_DIR = src +BIN_DIR = bin +LIBS = -lsdsl + +C_OPTIONS:=$(call config_ids,compile_options.config) +TC_IDS:=$(call config_ids,test_case.config) +K2_IDS:=$(call config_ids,k2tree.config) + +DL = $(foreach TC_ID,$(TC_IDS),\ + $(call config_select,test_case.config,$(TC_ID),2)) +DL_K2T = $(foreach TC_ID,$(TC_IDS),\ + $(foreach K2_ID,$(K2_IDS),\ + ../tmp/K2T.$(TC_ID).$(K2_ID))) + +K2_EXECS = $(foreach K2_ID,$(K2_IDS),$(BIN_DIR)/build_$(K2_ID)) + +RES_FILES = $(foreach K2_ID,$(K2_IDS),\ + $(foreach TC_ID,$(TC_IDS),\ + results/$(K2_ID).$(TC_ID))) + +GEN_FILES = $(foreach K2_ID,$(K2_IDS),\ + $(BIN_DIR)/gen_k2_$(K2_ID)) + +K2T_RES_FILES = $(foreach K2_ID,$(K2_IDS),\ + $(foreach TC_ID,$(TC_IDS),\ + results-k2t/$(K2_ID).$(TC_ID))) + +RESULT_FILE=results/all.txt + +all: execs + +execs: $(K2_EXECS) + +timing: execs $(RES_FILES) + @cat $(RES_FILES) > $(RESULT_FILE) + @cd visualize;make + +../tmp/K2T.%: $(DL) $(GEN_FILES) + $(eval TC_ID:=$(call dim,1,$*)) + $(eval K2_ID:=$(call dim,2,$*)) + $(eval TC_PATH:=$(call config_select,test_case.config,$(TC_ID),2)) + $(eval NUM_BYTE:=$(call config_select,test_case.config,$(TC_ID),5)) + @$(BIN_DIR)/gen_k2_$(K2_ID) $(TC_PATH) "../tmp/K2T.$(TC_ID).VECTOR" "../tmp/K2T.$(TC_ID).$(K2_ID)" + +# Execute $(BIN_DIR)/build_[K2_ID] and write result +results/%: test_case.config $(DL) $(DL_K2T) execs + $(eval K2_ID:=$(call dim,1,$*)) + $(eval TC_ID:=$(call dim,2,$*)) + $(eval K2_TEX_NAME:=$(call config_select,k2tree.config,$(K2_ID),3)) + $(eval TC_TEX_NAME:=$(call config_select,test_case.config,$(TC_ID),3)) + $(eval K2T:=$(strip $(call config_select,test_case.config,$(TC_ID),6))) + $(eval TC_PATH:=$(call config_select,test_case.config,$(TC_ID),2)) + $(eval TC_TYPE:=$(call config_select,test_case.config,$(TC_ID),5)) + $(eval TC_SIZE:=$(shell wc -c <$(TC_PATH))) + $(eval ARGS:="../tmp/K2T.$(TC_ID).$(K2_ID)") + @echo "Running bin/build_$(K2_ID) on $(TC_ID)" + @echo "# K2_ID = $(K2_ID)" > $@ + @echo "# TC_ID = $(TC_ID)" >> $@ + @echo "# K2_TEX_NAME = $(K2_TEX_NAME)">>$@ + @echo "# TC_TEX_NAME = $(TC_TEX_NAME)">>$@ + @echo "# TC_SIZE = $(TC_SIZE)">>$@ + @$(BIN_DIR)/build_$(K2_ID) $(ARGS) >> $@ + +# $(BIN_DIR)/build_[K2_ID] +$(BIN_DIR)/build_%: $(SRC_DIR)/k2_time_and_space.cpp k2tree.config + $(eval K2_ID:=$(call dim,1,$*)) + $(eval K2_TYPE:=$(call config_select,k2tree.config,$(K2_ID),2)) + @$(MY_CXX) $(CFLAGS) $(C_OPTIONS) -DK2_TYPE="$(K2_TYPE)" -L$(LIB_DIR)\ + $(SRC_DIR)/k2_time_and_space.cpp -I$(INC_DIR) -o $@ $(LIBS) + +$(BIN_DIR)/gen_k2_%: $(SRC_DIR)/gen_k2t.cpp + $(eval K2_ID:=$(call dim,1,$*)) + $(eval K2_TYPE:=$(call config_select,k2tree.config,$(K2_ID),2)) + @$(MY_CXX) $(CFLAGS) $(C_OPTIONS) -DK2_TYPE="$(K2_TYPE)" -L$(LIB_DIR)\ + $(SRC_DIR)/gen_k2t.cpp -I$(INC_DIR) -o $@ $(LIBS) -ldivsufsort -ldivsufsort64 + +include ../Make.download + +clean-build: + @echo "Remove executables" + rm -rf $(BIN_DIR)/build* + rm -rf $(BIN_DIR)/gen* + +clean-result: + @echo "Remove results" + rm -rf results/* + +cleanall: clean-build clean-result diff --git a/benchmark/k2_trees/README.md b/benchmark/k2_trees/README.md new file mode 100644 index 000000000..144d4b158 --- /dev/null +++ b/benchmark/k2_trees/README.md @@ -0,0 +1,75 @@ +# Benchmarking k2 trees + +## Methodology + +Explored dimensions: + + * k2 tree implementations + * test cases + * methods (`adj`, `neighbors`, `reverse_neighbors`) + +## Data + + * The data input for the benchmarking are arc files. An arc file is a text + file where each line represents a directed edge between two nodes, the + first column is the origin node and the second the target node. + +### Test cases + + * EXAMPLE test case uses an small file with a little more than 100 nodes and + roughly the same number of edges from web data commons. + * HOSTGRAPH is a test case where the data comes from the Web Cropus released + by the Common Crawl Foundation in April 2014. The file aggregates the + page graph by subdomain/host. It has 123.660.351 edges. + +## Directory structure + + * [bin](./bin): Contains the executables of the project. + * `build_*` generates the binary file with the graph from the arc files. + * `gen_*` executes the experiments. + * [results](./results): Contains the results of the experiments. + * [src](./src): Contains the source code of the benchmark. + * [visualize](./visualize): Contains a `R`-script which generates + a report in LaTeX format. + +## Prerequisites + + * For the visualization you need the following software: + - [R][RPJ] with package `tikzDevice`. You can install the + package by calling + `install.packages("filehash", repos="http://cran.r-project.org")` + and + `install.packages("tikzDevice", repos="http://R-Forge.R-project.org")` + in `R`. + - [pdflatex][LT] to generate the pdf reports. + +## Usage + + * `make timing` compiles the programs, downloads or generates + the test instances, builds the k2 trees, + runs the performance tests and generated a report located at + `visualize/k2.pdf`. The raw numbers of the timings + can be found in the `results/all.txt`. The default benchmark + took 75 minutes on my machine (MacBookPro Retina 2.6Ghz Intel + Core i5 16GB 1600 Mhz DDR3, SSD). Have a look at the + [complete report][RES]. + * All created binaries and test results can be deleted + by calling `make cleanall`. + +## Customization of the benchmark + +The project contains several configuration files: + + * [k2tree.config][K2CONFIG]: Specify different k2 tree implementations. + * [test_case.config][TCCONF]: Specify test instances by ID, path, LaTeX-name + for the report, and download URL. + * [compile_options.config][CCONF]: Specify compile options by option string. + +Note that the benchmark will execute every combination of k2 trees and test cases. + +[RPJ]: http://www.r-project.org/ "R" +[LT]: http://www.tug.org/applications/pdftex/ "pdflatex" +[K2CONFIG]: ./k2tree.config "k2tree.config" +[TCCONF]: ./test_case.config "test_case.config" +[CCONF]: ./compile_options.config "compile_options.config" +[RES]: https://users.dcc.uchile.cl/~fmontoto/static/k2.pdf "k2.pdf" diff --git a/benchmark/k2_trees/bin/.gitignore b/benchmark/k2_trees/bin/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/benchmark/k2_trees/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/benchmark/k2_trees/compile_options.config b/benchmark/k2_trees/compile_options.config new file mode 100644 index 000000000..cebc3e9f2 --- /dev/null +++ b/benchmark/k2_trees/compile_options.config @@ -0,0 +1,2 @@ +# Compile options +-O3 -funroll-loops -fomit-frame-pointer -ffast-math -DNDEBUG diff --git a/benchmark/k2_trees/k2tree.config b/benchmark/k2_trees/k2tree.config new file mode 100644 index 000000000..f15b3cb83 --- /dev/null +++ b/benchmark/k2_trees/k2tree.config @@ -0,0 +1,23 @@ +# This file specifies k^2 trees that are used in the benchmark. +# +# Each index is specified by a triple: K2_ID;SDSL_TYPE;K2_LATEX_NAME +# * K2_ID : An identifier for the index. Only letters and underscores are allowed in ID. +# * SDSL_TYPE : Corresponding sdsl type. +# * K2_LATEX_NAME: LaTeX name for output in the benchmark report. No underscores are allowed here. + +# Different k and Bit-Vectors: +K2_BV;k2_tree<2, bit_vector, bit_vector::rank_1_type>;K2BV +# K3_BV;k2_tree<3, bit_vector, bit_vector::rank_1_type>;K3BV +# K4_BV;k2_tree<4, bit_vector, bit_vector::rank_1_type>;K4BV +# K2_RRR63V;k2_tree<2, rrr_vector<63>, rrr_vector<63>::rank_1_type>;K2RRR63V +# K3_RRR63V;k2_tree<3, rrr_vector<63>, rrr_vector<63>::rank_1_type>;K3RRR63V +# K4_RRR63V;k2_tree<4, rrr_vector<63>, rrr_vector<63>::rank_1_type>;K4RRR63V +# K2_RRR126V;k2_tree<2, rrr_vector<126>, rrr_vector<126>::rank_1_type>;K2RRR126V +# K3_RRR126V;k2_tree<3, rrr_vector<126>, rrr_vector<126>::rank_1_type>;K3RRR126V +# K4_RRR126V;k2_tree<4, rrr_vector<126>, rrr_vector<126>::rank_1_type>;K4RRR126V +K2_ILV;k2_tree<2, bit_vector_il<512>, bit_vector_il<512>::rank_1_type>;K2ILV +# K3_ILV;k2_tree<3, bit_vector_il<512>, bit_vector_il<512>::rank_1_type>;K3ILV +# K4_ILV;k2_tree<4, bit_vector_il<512>, bit_vector_il<512>::rank_1_type>;K4ILV +K2_SDV;k2_tree<2, sd_vector, sd_vector::rank_1_type>;K2SDV +# K3_SDV;k2_tree<3, sd_vector, sd_vector::rank_1_type>;K3SDV +# K4_SDV;k2_tree<4, sd_vector, sd_vector::rank_1_type>;K4SDV diff --git a/benchmark/k2_trees/results/.gitignore b/benchmark/k2_trees/results/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/benchmark/k2_trees/results/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/benchmark/k2_trees/src/gen_k2t.cpp b/benchmark/k2_trees/src/gen_k2t.cpp new file mode 100644 index 000000000..eff425af5 --- /dev/null +++ b/benchmark/k2_trees/src/gen_k2t.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace sdsl; + +void generate_bit_vector_buffers(const std::string& idx_file, + const std::string& output_x_file, + const std::string& output_y_file) +{ + std::ifstream infile(idx_file); + std::string line; + uint64_t cnt = 0; + + for (int i = 0; std::getline(infile, line); ++i) + cnt++; + + infile.clear(); + infile.seekg(0, std::ios::beg); + // Set size of vector to the amount of lines in the input file. + int_vector<>xv(cnt), yv(cnt); + cnt = 0; + + while(std::getline(infile, line)) { + sdsl::k2_tree_ns::idx_type x, y; + std::istringstream iss(line); + if(!(iss >> x >> y)) + throw std::invalid_argument("Not expected line at construct"); + xv[cnt] = x; + yv[cnt++] = y; + } + + store_to_file(xv, output_x_file); + store_to_file(yv, output_y_file); +} + +inline bool exists(const std::string& name) { + std::ifstream f(name.c_str()); + return f.good(); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { + std::cout<<"Usage: input_file output_file_prefix output_k2_file" << std::endl; + } + + std::string out_x(argv[2]); + out_x.append(".x"); + std::string out_y(argv[2]); + out_y.append(+ ".y"); + + if(!exists(out_x) || !exists(out_y)) + generate_bit_vector_buffers(argv[1], out_x, out_y); + + K2_TYPE k2(argv[2]); + std::ofstream fs; + fs.open(argv[3]); + k2.serialize(fs); +} diff --git a/benchmark/k2_trees/src/k2_time_and_space.cpp b/benchmark/k2_trees/src/k2_time_and_space.cpp new file mode 100644 index 000000000..f16469a51 --- /dev/null +++ b/benchmark/k2_trees/src/k2_time_and_space.cpp @@ -0,0 +1,90 @@ +#include +#include +#include + +#include +#include + +using namespace std; +using namespace sdsl; +using namespace std::chrono; +using timer = std::chrono::high_resolution_clock; + +typedef K2_TYPE::idx_type idx_type; +typedef K2_TYPE::size_type size_type; + + +template +size_type test_adj(const t_kt &tree, idx_type node, size_type neighbor, + uint64_t times) +{ + size_type cnt = 0; + for(uint64_t i = 0; i < times; i++) + if(tree.adj(node, neighbor)) + cnt++; + return cnt; +} + +template +size_type test_neighbors(const t_kt &tree, idx_type node, uint64_t times) +{ + size_type cnt = 0; + for(uint64_t i = 0; i < times; i++) + cnt += tree.neigh(node).size(); + return cnt; +} + +template +size_type test_reverse_neighbors(const t_kt &tree, idx_type node, uint64_t times) +{ + size_type cnt = 0; + for(uint64_t i = 0; i < times; i++) + cnt += tree.reverse_neigh(node).size(); + return cnt; +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + cout << "Usage: file" << endl; + return 1; + } + + const uint64_t reps = 100000; + + // construct + memory_monitor::start(); + std::ifstream is(argv[1]); + auto start = timer::now(); + K2_TYPE k2; + k2.load(is); + auto stop = timer::now(); + memory_monitor::stop(); + cout << "# constructs_time = " << duration_cast(stop-start).count()/(double)1000 << endl; + cout << "# constructs_space = " << memory_monitor::peak() << endl; + // size + cout << "# k2_size = " << size_in_bytes(k2) << endl; + is.close(); + + // adj + start = timer::now(); + auto check = test_adj(k2, 7, 5, reps); + stop = timer::now(); + cout << "# adj_time = " << duration_cast(stop-start).count()/(double)reps << endl; + cout << "# adj_check = " << check << endl; + + // neighbors + start = timer::now(); + check = test_neighbors(k2, 7, reps); + stop = timer::now(); + cout << "# neighbors_time = " << duration_cast(stop-start).count()/(double)reps << endl; + cout << "# neighbors_check = " << check << endl; + + start = timer::now(); + check = test_reverse_neighbors(k2, 10, reps); + stop = timer::now(); + cout << "# reverse_neighbors_time = " << duration_cast(stop-start).count()/(double)reps << endl; + cout << "# reverse_neighbors_check = " << check << endl; + + return 0; +} diff --git a/benchmark/k2_trees/test_case.config b/benchmark/k2_trees/test_case.config new file mode 100644 index 000000000..a3e129d93 --- /dev/null +++ b/benchmark/k2_trees/test_case.config @@ -0,0 +1,7 @@ +# Configuration for test files +# (1) Identifier for test file (consisting of letters, no `.`) +# (2) Path to the test file +# (3) LaTeX name +# (4) Download link (if the test is available online) +EXAMPLE;../data/example_arcs;examples;http://webdatacommons.org/hyperlinkgraph/data/example_arcs +HOSTGRAPH;../data/hostgraph.arc;hostgraph;http://users.dcc.uchile.cl/~fmontoto/static/hostgraph.arc.gz diff --git a/benchmark/k2_trees/visualize/.gitignore b/benchmark/k2_trees/visualize/.gitignore new file mode 100644 index 000000000..139e8ec00 --- /dev/null +++ b/benchmark/k2_trees/visualize/.gitignore @@ -0,0 +1,6 @@ +* +!.gitignore +!Makefile +!k2-footer.tex +!k2-header.tex +!k2.R diff --git a/benchmark/k2_trees/visualize/Makefile b/benchmark/k2_trees/visualize/Makefile new file mode 100644 index 000000000..b12d91ad9 --- /dev/null +++ b/benchmark/k2_trees/visualize/Makefile @@ -0,0 +1,17 @@ +include ../../../Make.helper + +CONFIG_FILES= ../test_case.config + +all: k2.pdf + +k2.pdf: k2.tex + @echo "Use pdflatex to generate k2.pdf" + @pdflatex k2.tex >> LaTeX.Log 2>&1 + +k2.tex: ../results/all.txt ../../basic_functions.R k2.R $(CONFIG_FILES) + @echo "Use R to generate k2.tex" + @R --vanilla < k2.R > R.log 2>&1 + +clean: + rm -f k2.pdf k2.aux k2.tex fig* \ + k2.log R.log LaTeX.log diff --git a/benchmark/k2_trees/visualize/k2-footer.tex b/benchmark/k2_trees/visualize/k2-footer.tex new file mode 100644 index 000000000..6b47932f5 --- /dev/null +++ b/benchmark/k2_trees/visualize/k2-footer.tex @@ -0,0 +1 @@ +\end{document} diff --git a/benchmark/k2_trees/visualize/k2-header.tex b/benchmark/k2_trees/visualize/k2-header.tex new file mode 100644 index 000000000..0b4deecc7 --- /dev/null +++ b/benchmark/k2_trees/visualize/k2-header.tex @@ -0,0 +1,10 @@ +\documentclass[9pt,a4paper,DIV10]{scrartcl} +\usepackage{tikz} +\usepackage{booktabs} +\usepackage{array} +\usepackage{ragged2e} +\usepackage{float} + +\begin{document} + +\pagestyle{empty} diff --git a/benchmark/k2_trees/visualize/k2.R b/benchmark/k2_trees/visualize/k2.R new file mode 100644 index 000000000..c9fd617f2 --- /dev/null +++ b/benchmark/k2_trees/visualize/k2.R @@ -0,0 +1,190 @@ +require(tikzDevice) +source("../../basic_functions.R") + +tex_file = "k2.tex" + +tc_config <- readConfig("../test_case.config",c("TC_ID","PATH","LATEX_NAME","URL")) + +open_tikz <- function( file_name ){ + tikz(file_name, width = 5.5, height = 7.5 , standAlone = F , sanitize = TRUE) +} + +x_for_bar<-function(value){ + c(0,0,value,value) +} + +y_for_bar<-function(offset){ + c(offset,offset+0.4,offset+0.4,offset) +} + +#Method which plots the size figure +plot_size_figure <-function(data,heading,ylab=F){ + + #set margin + par(mar=c(3,2,3,0)) + if(ylab){ + par(mar=c(3,10,3,0)) + } + + plot(c(),c(),ylim=c(0,(length(data)*0.5)+0.2),xlim=c(0,max(101,(max(data)+1))),xlab="",ylab="",xaxt="n",yaxt="n") + + #label y-axis + if(ylab){ + axis( 2, at =seq(0.3,(length(data)*0.5)+0.2,0.5), label=colnames(data),las=1) + } + #label x-axis + axis(1) + mtext("Size relative to original file size (arc file)", side=1, line=2) + + #draw bars + offset=0.1 + for(time in data){ + polygon( x_for_bar(time),y_for_bar(offset), border=NA, col="grey") + offset=offset+0.5 + } + + #abline(v=c(axis(1)/2,max(axis(1)/2)+axis(1)/2), col="gray") + abline(v=c(axis(1),axis(1)+(axis(1)[2]-axis(1)[1])/2),col="gray") + abline(v=100, col="red") + draw_figure_heading(heading) +} + +#Method which plots the a time figure +plot_time_figure <-function(data,heading,ylab=T,xlab=T,constructor=F,xmax=max(data)){ + #set margin + par(mar=c(3,2,2,0)) + if(ylab){ + par(mar=c(3,10,2,0)) + } + + plot(c(),c(),ylim=c(0,(length(data)*0.5)+0.2),xlim=c(0,(xmax*1.02)),xlab="",ylab="",xaxt="n",yaxt="n") + + #label y-axis + if(ylab){ + axis( 2, at =seq(0.3,(length(data)*0.5)+0.2,0.5), label=colnames(data),las=1) + } + #label x-axis + axis(1) + abline(v=c(axis(1),axis(1)+(axis(1)[2]-axis(1)[1])/2),col="gray") + if(xlab){ + mtext("Time in microseconds", side=1, line=2) + } + if(constructor){ + mtext("Time in seconds", side=1, line=2) + } + + #draw bars + offset=0.1 + for(time in data){ + polygon( x_for_bar(time),y_for_bar(offset), border=NA, col="grey") + offset=offset+0.5 + } + + draw_figure_heading(heading) +} + + +#read header +tex_doc <- paste(readLines("k2-header.tex"),collapse="\n") + +tex_doc<-paste(tex_doc,"\\section{Result of the K2 Tree benchmark}") + + +maindata <- data_frame_from_key_value_pairs( "../results/all.txt" ) + +#create two pages for each test case +#for(tc in tc_config[['TC_ID']]){ +for(tc in unique(maindata$TC_ID)){ + + data<-maindata[maindata$TC_ID==tc,] + id <-data[['K2_TEX_NAME']] + + #first page start + fig_name <- paste("fig-page1-",tc,".tex",sep="") + tex_doc<-paste(tex_doc,"\\subsection{Test case: {\\sc ",data[['TC_TEX_NAME']],"}}") + + open_tikz( fig_name ) + + layout(matrix(c(1,2,3), nrow=3, ncol=1, byrow = TRUE), + widths=c(1,1,1), heights=c(1)) + + xmax<-max(data[c('adj_time', 'neighbors_time','reverse_neighbors_time')]) + a <-data['adj_time'] + neighbors <-data['neighbors_time'] + reverse_neighbors <-data['reverse_neighbors_time'] + rownames(a)<-id + rownames(neighbors)<-id + rownames(reverse_neighbors)<-id + if(xmax > 10000){ + xmax <- xmax/1000000 + neighbors <-neighbors/1000000 + reverse_neighbors = reverse_neighbors/1000000 + a <- a/1000000 + #adj-plot + plot_time_figure(t(a),"\\tt{adj}", xlab=F) + #neighbors-plot + plot_time_figure(t(neighbors),"\\tt{neighbors}", xlab=F, xmax=xmax) + #reverse_neighbors-plot + plot_time_figure(t(reverse_neighbors),"\\tt{reverse_neighbors}",constructor=T, xlab=F, xmax=xmax) + } + else { + #adj-plot + plot_time_figure(t(a),"\\tt{adj}", xlab=F) + #neighbors-plot + plot_time_figure(t(neighbors),"\\tt{neighbors}", xlab=F, xmax=xmax) + #reverse_neighbors-plot + plot_time_figure(t(reverse_neighbors),"\\tt{reverse_neighbors}", xmax=xmax) + + } + + old<-par() + dev.off() + tex_doc <- paste(tex_doc,"\\begin{figure}[H] + \\input{",fig_name,"} + \\end{figure}") + #first page end + + #second page start + fig_name <- paste("fig-page2-",tc,".tex",sep="") + open_tikz( fig_name ) + + layout(matrix(c(1, 2, 3), 3, 1, byrow=TRUE), + widths=c(1,1,1), heights=c(1)) + + #constructor-plot + con <-data['constructs_time'] + rownames(con)<-id + plot_time_figure(t(con),"\\tt{construct}",xlab=F,constructor=T) + + #construction-size-plot + tsize<-data[[1,'TC_SIZE']] + consize <-(data['constructs_space']/tsize)*100 + rownames(consize)<-id + + plot_size_figure(t(consize),"\\tt{construction space}", ylab=T) + + #size-plot + tsize<-data[[1,'TC_SIZE']] + size <-(data['k2_size']/tsize)*100 + rownames(size)<-id + plot_size_figure(t(size),"\\tt{space}", ylab=T) + + dev.off() + tex_doc <- paste(tex_doc,"\\begin{figure}[H] + \\input{",fig_name,"} + \\end{figure}") + #second page end +} + +#type identification table +tex_doc<-paste(tex_doc,"\\begin{table}[b] + \\centering", + typeInfoTable("../k2tree.config",data[['K2_ID']], 1, 3, 2), + "\\caption{K2 tree identifier and corresponding sdsl-type.} + \\end{table}") + +#read footer+end +tex_doc <- paste(tex_doc, readLines("k2-footer.tex"),collapse="\n") +sink(tex_file) +cat(tex_doc) +sink(NULL) diff --git a/include/sdsl/k2_tree.hpp b/include/sdsl/k2_tree.hpp new file mode 100644 index 000000000..1cf1a2fb1 --- /dev/null +++ b/include/sdsl/k2_tree.hpp @@ -0,0 +1,556 @@ +/* sdsl - succinct data structures library + Copyright (C) 2016 Francisco Montoto + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see http://www.gnu.org/licenses/ . +*/ +/*! \file k2_tree.hpp + \brief k2_tree.hpp contains a compact k^2-tree. + \author Francisco Montoto +*/ +#ifndef INCLUDED_SDSL_K2_TREE +#define INCLUDED_SDSL_K2_TREE + +#include +#include +#include +#include +#include "sdsl/bit_vectors.hpp" +#include "sdsl/k2_tree_helper.hpp" +#include "sdsl/int_vector_buffer.hpp" + + +//! Namespace for the succint data structure library +namespace sdsl +{ +//! A k^2-tree +/*! A k^2-tree is a compact tree structure to represent a web graph. The + * structure takes advantage of large empty areas of the adjacency matrix of + * the graph. + * + * \par References + * [1] Brisaboa, N. R., Ladra, S., & Navarro, G. (2009, August): + * k2-trees for compact web graph representation. In International + * Symposium on String Processing and Information Retrieval + * (pp. 18-30). Springer Berlin Heidelberg. + */ + +template +class k2_tree +{ + public: + typedef k2_tree_ns::idx_type idx_type; + typedef k2_tree_ns::size_type size_type; + + private: + //! Bit array to store all the bits of the tree, except those in the + //! last level. + t_bv k_t; + //! Bit array to store the last level of the tree. + t_bv k_l; + + t_rank k_t_rank; + + uint8_t k_k; + uint16_t k_height; + + protected: + + void build_from_matrix(const std::vector>& matrix) + { + // Makes the size a power of k. + int simulated_size = std::pow(k, k_height); + std::vector> acc(k_height + 1); + + k2_tree_ns::_build_from_matrix(matrix, k, + simulated_size, k_height, + 1, 0, 0, acc); + + size_type t_size = 0; + size_type l_size = 0; + for (int i = 1; i < k_height; i++) + for (auto it = acc[i].begin(); it != acc[i].end(); it++) + t_size += (*it).size(); + + for (auto it = acc[k_height].begin(); it != acc[k_height].end(); it++) + l_size += (*it).size(); + + bit_vector k_t_(t_size, 0); + bit_vector k_l_(l_size, 0); + + int n = 0; + for (int j = 1; j < k_height; j++) + for (auto it = acc[j].begin(); it != acc[j].end(); it++) + for (unsigned i = 0; i < (*it).size(); i++) { + // TODO there should be a better way to do this + k_t_.set_int(n, (*it).get_int(i, 1), 1); + n++; + } + n = 0; + for (auto it = acc[k_height].begin(); it != acc[k_height].end(); it++) + for (unsigned i = 0; i < (*it).size(); i++) { + // TODO there should be a better way to do this + k_l_.set_int(n * 1, (*it).get_int(i, 1), 1); + n++; + } + + k2_tree_ns::build_template_vector(k_t_, k_l_, k_t, k_l); + } + + + /*! Recursive function to retrieve list of neighbors. + * + * \param n Size of the submatrix in the next recursive step. + * \param row Row of interest in the current submatrix, this is the + * row corresponding the node we are looking neighbors for. + * \param col Column offset of the current submatrix in the global + * matrix. + * \param level Position in k_t:k_l (k_l appended to k_t) of the node + * or leaf being processed at this step. + * \param acc Accumulator to store the neighbors found. + */ + void _neigh(size_type n, idx_type row, idx_type col, size_type level, + std::vector& acc) const + { + if (level >= k_t.size()) { // Last level + if (k_l[level - k_t.size()] == 1) + acc.push_back(col); + return; + } + + if (k_t[level] == 1) { + idx_type y = k_t_rank(level + 1) * std::pow(k_k, 2) + + k_k * std::floor(row/static_cast(n)); + for (unsigned j = 0; j < k_k; j++) + _neigh(n/k_k, row % n, col + n * j, y + j, acc); + } + } + + /*! Recursive function to retrieve list of reverse neighbors. + * + * \param n Size of the submatrix in the next recursive step. + * \param row Row offset of the current submatrix in the global matrix. + * \param col Column of interest in the current submatrix, this is the + * column corresponding the node we are looking reverse neighbors + * for. + * \param level Position in k_t:k_l (k_l appended to k_t) of the node + * or leaf being processed at this step. + * \param acc Accumulator to store the neighbors found. + */ + void _reverse_neigh(size_type n, idx_type row, idx_type col, + size_type level, std::vector& acc) const + { + if (level >= k_t.size()) { // Last level + if (k_l[level - k_t.size()] == 1) { + acc.push_back(row); + } + return; + } + + if (k_t[level] == 1) { + idx_type y = k_t_rank(level + 1) * std::pow(k_k, 2) + + std::floor(col/static_cast(n)); + for (unsigned j = 0; j < k_k; j++) + _reverse_neigh(n/k_k, row + n * j, col % n, + y + j * k_k, acc); + } + } + + //! Build a tree from an edges collection + /*! This method takes a vector of edges describing the graph + * and the graph size. And takes linear time over the amount of + * edges to build the k_2 representation. + * \param edges A vector with all the edges of the graph, it can + * not be empty. + * \param size Size of the graph, all the nodes in edges must be + * within 0 and size ([0, size[). + */ + void build_from_edges(std::vector>& edges, + const size_type size) + { + + typedef std::tuple t_part_tuple; + + k_k = k; + k_height = std::ceil(std::log(size)/std::log(k_k)); + k_height = k_height > 1 ? k_height : 1; // If size == 0 + size_type k_2 = std::pow(k_k, 2); + bit_vector k_t_ = bit_vector(k_2 * k_height * edges.size(), 0); + bit_vector k_l_; + + std::queue q; + idx_type t = 0, last_level = 0; + idx_type i, j, r_0, c_0, it, c, r; + size_type l = std::pow(k_k, k_height - 1); + std::vector pos_by_chunk(k_2 + 1, 0); + + q.push(t_part_tuple(0, edges.size(), l, 0, 0)); + + while (!q.empty()) { + std::vector amount_by_chunk(k_2, 0); + std::tie(i, j, l, r_0, c_0) = q.front(); + q.pop(); + // Get size for each chunk + for (it = i; it < j; it++) + amount_by_chunk[k2_tree_ns::get_chunk_idx( + std::get<0>(edges[it]), std::get<1>(edges[it]), + c_0, r_0, l, k_k)] += 1; + if (l == 1) { + if (last_level == 0) { + last_level = t; + k_l_ = bit_vector(k_t_.size() - last_level, 0); + k_t_.resize(last_level); + last_level = 1; // if t was 0 + t = 0; // Restart counter as we're storing at k_l_ now. + } + for (it = 0; it < k_2; it++,t++) + if (amount_by_chunk[it] != 0) + k_l_[t] = 1; + // At l == 1 we do not put new elements at the queue. + continue; + } + + // Set starting position in the vector for each chunk + pos_by_chunk[0] = i; + for (it = 1; it < k_2; it++) + pos_by_chunk[it] = + pos_by_chunk[it - 1] + amount_by_chunk[it - 1]; + // To handle the last case when it = k_2 - 1 + pos_by_chunk[k_2] = j; + // Push to the queue every non zero elements chunk + for (it = 0; it < k_2; it++,t++) + // If not empty chunk, set bit to 1 + if (amount_by_chunk[it] != 0) { + r = it / k_k; + c = it % k_k; + k_t_[t] = 1; + q.push(t_part_tuple(pos_by_chunk[it], + pos_by_chunk[it + 1], + l/k_k, + r_0 + r * l, + c_0 + c * l)); + } + idx_type chunk; + + // Sort edges' vector + for (unsigned ch = 0; ch < k_2; ch++) { + idx_type be = ch == 0 ? i : pos_by_chunk[ch - 1]; + for (it = pos_by_chunk[ch]; it < be + amount_by_chunk[ch];) { + chunk = k2_tree_ns::get_chunk_idx( + std::get<0>(edges[it]), std::get<1>(edges[it]), + c_0, r_0, l, k_k); + + if (pos_by_chunk[chunk] != it) + std::iter_swap(edges.begin() + it, + edges.begin() + pos_by_chunk[chunk]); + else + it++; + pos_by_chunk[chunk]++; + } + } + } + k_l_.resize(t); + k2_tree_ns::build_template_vector(k_t_, k_l_, k_t, k_l); + + k_t_rank = t_rank(&k_t); + + } + + public: + + k2_tree() = default; + + //! Constructor + /*! This constructos takes the graph adjacency matrix. + * The time complexity for this constructor is linear in the matrix + * size + * \param matrix Adjacency matrix of the graph. It must be a binary + * square matrix. + */ + k2_tree(std::vector>& matrix) + { + if (matrix.size() < 1) { + throw std::logic_error("Matrix has no elements"); + } + std::vector t; + k_k = k; + if (matrix.size() < k_k) + k_height = 1; + else // height = log_k n + k_height = std::ceil(std::log(matrix.size())/std::log(k_k)); + + build_from_matrix(matrix); + + k_t_rank = t_rank(&k_t); + } + + //! Constructor + /*! This constructos takes a vector of edges describing the graph + * and the graph size. And takes linear time over the amount of + * edges to build the k_2 representation. + * \param edges A vector with all the edges of the graph, it can + * not be empty. + * \param size Size of the graph, all the nodes in edges must be + * within 0 and size ([0, size[). + */ + k2_tree(std::vector>& edges, + const size_type size) + { + assert(size > 0); + assert(edges.size() > 0); + + build_from_edges(edges, size); + } + + //! Constructor + /*! This constructos expects a filename prefix. Two serialized + * int_vectors have to be present at filename.x and filename.y. + * Each pair x,y describes an edge of the graph, from the node x + * to the node y. + * \param filename String with the prefix of the files filename.x, + * filename.y each of them containing a serialized + * int_vector<>. + * \param size Size of the graph, all the nodes in the edges defined + * by the files must be within 0 and size ([0, size[). If + * size==0, the size will be taken as the max node + * in the edges. + */ + k2_tree(std::string filename, size_type size=0) + { + int_vector_buffer<> buf_x(filename + ".x", std::ios::in); + int_vector_buffer<> buf_y(filename + ".y", std::ios::in); + + assert(buf_x.size() == buf_y.size()); + assert(buf_x.size() > 0); + + std::vector>edges; + edges.reserve(buf_x.size()); + + if(size==0) { + size_type max = 0; + for(auto v : buf_x) + max = std::max(static_cast(v), max); + for(auto v : buf_y) + max = std::max(static_cast(v), max); + size = max + 1; + } + + for(uint64_t i = 0; i < buf_x.size(); i++) + edges.push_back( + std::tuple {buf_x[i], buf_y[i]}); + + build_from_edges(edges, size); + } + + + k2_tree(const k2_tree& tr) + { + *this = tr; + } + + k2_tree(k2_tree&& tr) + { + *this = std::move(tr); + } + + //! Move assignment operator + k2_tree& operator=(k2_tree&& tr) + { + if (this != &tr) { + k_t = std::move(tr.k_t); + k_l = std::move(tr.k_l); + k_k = std::move(tr.k_k); + k_height = std::move(tr.k_height); + k_t_rank = std::move(tr.k_t_rank); + k_t_rank.set_vector(&k_t); + } + return *this; + } + + //! Assignment operator + k2_tree& operator=(k2_tree& tr) + { + if (this != &tr) { + k_t = tr.k_t; + k_l = tr.k_l; + k_t_rank = tr.k_t_rank; + k_t_rank.set_vector(&k_t); + k_k = tr.k_k; + k_height = tr.k_height; + } + return *this; + } + + //! Swap operator + void swap(k2_tree& tr) + { + if (this != &tr) { + std::swap(k_t, tr.k_t); + std::swap(k_l, tr.k_l); + util::swap_support(k_t_rank, tr.k_t_rank, &k_t, &(tr.k_t)); + std::swap(k_k, tr.k_k); + std::swap(k_height, tr.k_height); + } + } + + //! Equal operator + bool operator==(const k2_tree& tr) const + { + // TODO check the rank support equality? + if (k_k != tr.k_k || k_height != tr.k_height) + return false; + if (k_t.size() != tr.k_t.size() || k_l.size() != tr.k_l.size()) + return false; + for (unsigned i = 0; i < k_t.size(); i++) + if (k_t[i] != tr.k_t[i]) + return false; + for (unsigned i = 0; i < k_l.size(); i++) + if (k_l[i] != tr.k_l[i]) + return false; + return true; + } + + t_bv get_t() + { + return k_t; + } + + t_bv get_l() + { + return k_l; + } + + //! Indicates wheter node j is adjacent to node i or not. + /*! + * \param i Node i. + * \param j Node j. + * \returns true if there is an edge going from node i to node j, + * false otherwise. + */ + bool adj(idx_type i, idx_type j) const + { + if (k_t.size() == 0 && k_l.size() == 0) + return false; + size_type n = std::pow(k_k, k_height - 1); + size_type k_2 = std::pow(k_k, 2); + idx_type col, row; + + // This is duplicated to avoid an extra if at the loop. As idx_type + // is unsigned and rank has an offset of one, is not possible to run + // k_t_rank with zero as parameter at the first iteration. + row = std::floor(i/static_cast(n)); + col = std::floor(j/static_cast(n)); + i = i % n; + j = j % n; + idx_type level = k_k * row + col; + n = n/k_k; + idx_type y; + + while (level < k_t.size()) { + if (k_t[level] == 0) + return false; + row = std::floor(i/static_cast(n)); + col = std::floor(j/static_cast(n)); + i = i % n; + j = j % n; + level = k_t_rank(level + 1) * k_2 + k_k * row + col; + n = n/k_k; + } + + return k_l[level - k_t.size()] == 1; + } + + //! Returns a list of neighbors of node i. + /*! + * \param i Node to get neighbors from. + * \returns A list of neighbors of node i. + */ + std::vectorneigh(idx_type i) const + { + std::vector acc{}; + if (k_l.size() == 0 && k_t.size() == 0) + return acc; + size_type n = + static_cast(std::pow(k_k, k_height)) / k_k; + idx_type y = k_k * std::floor(i/static_cast(n)); + for (unsigned j = 0; j < k_k; j++) + _neigh(n/k_k, i % n, n * j, y + j, acc); + return acc; + } + + //! Returns a list of reverse neighbors of node i. + /*! + * \param i Node to get reverse neighbors from. + * \returns A list of reverse neighbors of node i. + */ + std::vector reverse_neigh(idx_type i) const + { + std::vector acc{}; + if (k_l.size() == 0 && k_t.size() == 0) + return acc; + // Size of the first square division + size_type n = + static_cast(std::pow(k_k, k_height)) / k_k; + idx_type y = std::floor(i/static_cast(n)); + for (unsigned j = 0; j < k_k; j++) + _reverse_neigh(n/k_k, n * j, i % n, y + j * k_k, acc); + + return acc; + } + + + //! Serialize to a stream + /*! Serialize the k2_tree data structure + * \param out Outstream to write the k2_tree. + * \param v + * \param string_name + * \returns The number of written bytes. + */ + size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, + std::string name="") const + { + structure_tree_node* child = structure_tree::add_child( + v, name, util::class_name(*this)); + size_type written_bytes = 0; + + written_bytes += k_t.serialize(out, child, "t"); + written_bytes += k_l.serialize(out, child, "l"); + written_bytes += k_t_rank.serialize(out, child, "t_rank"); + written_bytes += write_member(k_k, out, child, "k"); + written_bytes += write_member(k_height, out, child, "height"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + + + //! Load from istream + /*! Serialize the k2_tree from the given istream. + * \param istream Stream to load the k2_tree from. + */ + void load(std::istream& in) + { + k_t.load(in); + k_l.load(in); + k_t_rank.load(in); + k_t_rank.set_vector(&k_t); + read_member(k_k, in); + read_member(k_height, in); + } + +}; +} + +#endif diff --git a/include/sdsl/k2_tree_helper.hpp b/include/sdsl/k2_tree_helper.hpp new file mode 100644 index 000000000..61ac99968 --- /dev/null +++ b/include/sdsl/k2_tree_helper.hpp @@ -0,0 +1,113 @@ +/* sdsl - succinct data structures library + Copyright (C) 2016 Francisco Montoto + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see http://www.gnu.org/licenses/ . +*/ +/*! \file k2_tree_helper.hpp + \brief k2_tree_helper.hpp contains helper functions and definitions for a k^2-tree implementation. + \author Francisco Montoto +*/ +#ifndef INCLUDED_SDSL_K2_TREE_HELPER +#define INCLUDED_SDSL_K2_TREE_HELPER + +#include +#include + +#include "sdsl/bit_vectors.hpp" + +//! Namespace for the succinct data structure library. +namespace sdsl +{ + +//! Namespace for the k2_tree +namespace k2_tree_ns +{ + +typedef int_vector<>::size_type idx_type; +typedef int_vector<>::size_type size_type; + +template +int _build_from_matrix(const std::vector>& matrix, + const uint8_t k, int n, const int height, + int l, int p, int q, std::vector>& acc) +{ + unsigned i, j, b_size = pow(k, 2); + t_bv b(b_size, 0); + bool is_leaf = (l == height); + + if (is_leaf) { + for (i = 0; i < k; i++) + for (j = 0; j < k; j++) + if (p + i < matrix.size() + && q + j < matrix.size() + && matrix[p + i][q + j] == 1) + b[i * k + j] = 1; + } else { // Internal node + for (i = 0; i < k; i++) + for (j = 0; j < k; j++) + b[i * k + j] = _build_from_matrix(matrix, k, n/k, height, l + 1, + p + i * (n/k), q + j * (n/k), + acc); + } + + // TODO There must be a better way to check if there is a 1 at b. + for (i = 0; i < b_size; i++) + if (b[i] == 1) + break; + if (i == b_size) // If there are not 1s at b. + return 0; + + acc[l].push_back(std::move(b)); + return 1; +} + +/*! Get the chunk index ([0, k^2[) of a submatrix point. + * + * Gets a point in the global matrix and returns its corresponding chunk + * in the submatrix specified. + * + * \param v Row of the point in the global matrix. + * \param u Column of the point in the global matrix. + * \param c_0 Column offset of the submatix in the global matrix. + * \param r_0 Row offset of the submatrix in the global matrix. + * \param l size of the chunk at the submatrix. + * \param k the k parameter from the k^2 tree. + * \returns the index of the chunk containing the point at the submatrix. + */ +inline uint16_t get_chunk_idx(idx_type v, idx_type u, idx_type c_0, + idx_type r_0, size_type l, uint8_t k) +{ + return ((v - r_0) / l) * k + (u - c_0) / l; +} + +template void build_template_vector(bit_vector& k_t_, + bit_vector& k_l_, t_bv& k_t, t_bv& k_l) +{ + k_t = t_bv(k_t_); + k_l = t_bv(k_l_); +} + +template<> void build_template_vector(bit_vector& k_t_, + bit_vector& k_l_, + bit_vector& k_t, + bit_vector& k_l) +{ + k_t.swap(k_t_); + k_l.swap(k_l_); +} + +} // end namespace k2_tree_ns +} // end namespace sdsl + +#endif diff --git a/test/k2_tree_test.cpp b/test/k2_tree_test.cpp new file mode 100644 index 000000000..855823195 --- /dev/null +++ b/test/k2_tree_test.cpp @@ -0,0 +1,548 @@ +#include "sdsl/k2_tree.hpp" +#include "gtest/gtest.h" + +#include +#include +#include + +namespace +{ + +using namespace sdsl; +using namespace std; + +typedef int_vector<>::size_type size_type; + +template +class k2_tree_test_k_2 : public ::testing::Test { }; + +template +class k2_tree_test_k_3 : public ::testing::Test { }; + +template +class k2_tree_test : public ::testing::Test { }; + +using testing::Types; + +namespace k2_tree_test_nm +{ +template +void check_t_l(t_tree& tree, vector expected_t, + vector expected_l) +{ + ASSERT_EQ(expected_t.size(), tree.get_t().size()); + ASSERT_EQ(expected_l.size(), tree.get_l().size()); + for (unsigned i = 0; i < expected_t.size(); i++) + ASSERT_EQ(expected_t[i], tree.get_t().get_int(i, 1)); + for (unsigned i = 0; i < expected_l.size(); i++) + ASSERT_EQ(expected_l[i], tree.get_l().get_int(i, 1)); +} + +template +void check_serialize_load(t_tree& tree) +{ + auto unserialized_tree = t_tree(); + std::stringstream ss; + tree.serialize(ss); + unserialized_tree.load(ss); + ASSERT_EQ(tree, unserialized_tree); +} +}; + +typedef Types< +k2_tree<2, bit_vector, rank_support_v<>>, + k2_tree<2, bit_vector> + > k_2_implementations; + +typedef Types< +k2_tree<3, bit_vector, rank_support_v<>>, + k2_tree<3, bit_vector> + > k_3_implementations; + +typedef Types< +k2_tree<2, bit_vector>, + k2_tree<3, bit_vector>, + k2_tree<7, bit_vector>, + k2_tree<2, rrr_vector<63>>, + k2_tree<3, rrr_vector<63>>, + k2_tree<5, bit_vector, rank_support_v<>>, + k2_tree<4, bit_vector, rank_support_v<>> + > Implementations; + +TYPED_TEST_CASE(k2_tree_test_k_2, k_2_implementations); + +TYPED_TEST(k2_tree_test_k_2, build_from_matrix_test) +{ + vector> mat({{1, 1, 0, 0}, + {0, 1, 0, 0}, + {0, 0, 1, 1}, + {0, 0, 1, 0} + }); + + TypeParam tree(mat); + vector expected_l = {1,1,0,1,1,1,1,0}; + k2_tree_test_nm::check_t_l(tree, {1, 0, 0 ,1}, expected_l); + + mat = vector> ({{0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0} + }); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {}); + + mat = vector>({{0, 0}, + {0, 0} + }); + tree = TypeParam(mat); + ASSERT_TRUE(tree.get_t().empty()); + ASSERT_TRUE(tree.get_l().empty()); + + // Size is minor than k: + mat = vector>({{0}}); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {}); + + mat = vector>({{1}}); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {1, 0, 0, 0}); + + + // Size is non a power of k: + mat = vector>({{0, 0, 1}, + {0, 1, 0}, + {0, 1, 0} + }); + tree = TypeParam(mat); + expected_l = {0,0,0,1,1,0,0,0,0,1,0,0}; + k2_tree_test_nm::check_t_l(tree, {1, 1, 1 ,0}, expected_l); + + mat = vector>({{0, 0, 0}, + {1, 0, 1}, + {0, 1, 1} + }); + tree = TypeParam(mat); + expected_l = {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}; + k2_tree_test_nm::check_t_l(tree, {1, 1, 1 ,1}, expected_l); + + // Sample from 'k^2 trees for compact web graph representation' paper + mat = vector>({{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0} + }); + tree = TypeParam(mat); + vector expected_t = {1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, + 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 + }; + + expected_l = {0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, + 0, 1, 0, 0 + }; + k2_tree_test_nm::check_t_l(tree, expected_t, expected_l); +} + +TYPED_TEST(k2_tree_test_k_2, build_from_edges_array) +{ + typedef std::tuple t_tuple; + vector> e; + + t_tuple a{0, 0}; + t_tuple b{0, 1}; + t_tuple c{1, 0}; + t_tuple d{1, 1}; + e.push_back(t_tuple {1, 2}); + TypeParam tree(e, 4); + + k2_tree_test_nm::check_t_l(tree, {0, 1, 0 ,0}, {0, 0, 1, 0}); + + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {0, 1, 0 ,0}, {0, 0, 1, 0}); + + e.push_back(t_tuple {1, 2}); + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {0, 1, 0 ,0}, {0, 0, 1, 0}); + + e.clear(); + e.push_back(t_tuple {0, 0}); + tree = TypeParam(e, 1); + k2_tree_test_nm::check_t_l(tree, {}, {1, 0, 0, 0}); + + e.push_back(t_tuple {0, 1}); + e.push_back(t_tuple {1, 0}); + e.push_back(t_tuple {1, 1}); + tree = TypeParam(e, 2); + k2_tree_test_nm::check_t_l(tree, {}, {1, 1, 1, 1}); + + e.push_back(t_tuple {2, 2}); + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {1, 0, 0, 1}, {1, 1, 1, 1, 1, 0, 0, 0}); +} + + +TYPED_TEST_CASE(k2_tree_test_k_3, k_3_implementations); + +TYPED_TEST(k2_tree_test_k_3, build_from_matrix_test) +{ + vector> mat({{1, 1, 0, 0, 1}, + {0, 1, 0, 0, 0}, + {0, 0, 1, 1, 0}, + {1, 1, 0, 1, 0}, + {0, 0, 1, 0, 0} + }); + + TypeParam tree(mat); + vector expected_t = {1, 1, 0, 1, 1, 0, 0, 0, 0}; + vector expected_l = {1, 1, 0, 0, 1, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 1, 0, 0, + 1, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0 + }; + k2_tree_test_nm::check_t_l(tree, expected_t, expected_l); + + mat = vector>({{1, 1, 1, 0}, + {1, 0, 0, 0}, + {0, 0, 0, 0}, + {1, 1, 0, 0} + }); + + tree = TypeParam(mat); + expected_t = {1, 0, 0, 1, 0, 0, 0, 0, 0}; + expected_l = {1, 1, 1, 1, 0, 0, 0, 0, 0, + 1, 1, 0, 0, 0, 0, 0, 0, 0 + }; + k2_tree_test_nm::check_t_l(tree, expected_t, expected_l); + + mat = vector>({{0, 0, 0}, + {0, 0, 0}, + {0, 0, 0} + }); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {}); + + // Size is minor than k: + mat = vector>({{0}}); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {}); + + mat = vector>({{1}}); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {1, 0, 0, 0, 0, 0, 0, 0 ,0}); + + mat = vector>({{1, 0}, + {0, 1} + }); + tree = TypeParam(mat); + k2_tree_test_nm::check_t_l(tree, {}, {1, 0, 0, 0, 1, 0, 0, 0 ,0}); + + // Size is a power of k: + mat = vector>({{0, 0, 1, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0, 0} + }); + tree = TypeParam(mat); + expected_t = {1, 0, 0, 0, 0, 0, 1, 0, 0}; + expected_l = {0, 0, 1, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1 + }; + k2_tree_test_nm::check_t_l(tree, expected_t, expected_l); + + mat = vector>({{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0} + }); + tree = TypeParam(mat); + expected_t = {1, 1, 0, 1, 1, 0, 0, 0, 0, + 1, 1, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + expected_l = {0, 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, + 1, 0, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 0, 0 + }; + k2_tree_test_nm::check_t_l(tree, expected_t, expected_l); +} + + +TYPED_TEST(k2_tree_test_k_3, build_from_edges_array) +{ + typedef std::tuple t_tuple; + vector> e; + + e.push_back(t_tuple {1, 2}); + TypeParam tree(e, 4); + + k2_tree_test_nm::check_t_l(tree, {1, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0, 0}); + + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {}, {0, 0, 0, 0, 0, 1, 0, 0, 0}); + + e.push_back(t_tuple {1, 2}); + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {}, {0, 0, 0, 0, 0, 1, 0, 0, 0}); + + e.clear(); + e.push_back(t_tuple {0, 0}); + tree = TypeParam(e, 1); + k2_tree_test_nm::check_t_l(tree, {}, {1, 0, 0, 0, 0, 0, 0, 0, 0}); + + e.push_back(t_tuple {0, 1}); + e.push_back(t_tuple {1, 0}); + e.push_back(t_tuple {1, 1}); + tree = TypeParam(e, 2); + k2_tree_test_nm::check_t_l(tree, {}, {1, 1, 0, 1, 1, 0, 0, 0, 0}); + + e.clear(); + e.push_back(t_tuple {2, 2}); + tree = TypeParam(e, 3); + k2_tree_test_nm::check_t_l(tree, {}, {0, 0, 0, 0, 0, 0, 0, 0, 1}); +} + +TYPED_TEST_CASE(k2_tree_test, Implementations); + +TYPED_TEST(k2_tree_test, edges_array_exhaustive) +{ + typedef std::tuple t_tuple; + vector> e; + e.push_back(t_tuple {5, 7}); + e.push_back(t_tuple {1, 2}); + e.push_back(t_tuple {3, 9}); + e.push_back(t_tuple {2, 2}); + e.push_back(t_tuple {3, 2}); + e.push_back(t_tuple {7, 5}); + e.push_back(t_tuple {1, 6}); + e.push_back(t_tuple {4, 8}); + e.push_back(t_tuple {4, 1}); + e.push_back(t_tuple {5, 2}); + + TypeParam tree(e, 10); + auto expected_neighbors = vector>(10); + expected_neighbors[0] = vector({}); + expected_neighbors[1] = vector({2, 6}); + expected_neighbors[2] = vector({2}); + expected_neighbors[3] = vector({2, 9}); + expected_neighbors[4] = vector({1, 8}); + expected_neighbors[5] = vector({2, 7}); + expected_neighbors[6] = vector({}); + expected_neighbors[7] = vector({5}); + expected_neighbors[8] = vector({}); + expected_neighbors[9] = vector({}); + for (unsigned i = 0; i < 10; i++) { + auto actual_neighbors = tree.neigh(i); + ASSERT_EQ(expected_neighbors[i].size(), actual_neighbors.size()); + for (unsigned j = 0; i < expected_neighbors[i].size(); i++) + ASSERT_EQ(expected_neighbors[i][j], actual_neighbors[j]); + } + + e.clear(); + e.push_back(t_tuple{0, 0}); + tree = TypeParam(e, 1); + ASSERT_EQ(1u, tree.neigh(0).size()); + ASSERT_EQ(0u, tree.neigh(0)[0]); +} + +TYPED_TEST(k2_tree_test, neighbors_test) +{ + vector> mat({{1, 1, 0, 0}, + {0, 1, 0, 0}, + {0, 0, 1, 1}, + {0, 0, 1, 0} + }); + + TypeParam tree(mat); + auto neigh_0 = tree.neigh(0); + vectorexpected_neigh_0({0, 1}); + ASSERT_EQ(expected_neigh_0.size(), neigh_0.size()); + for (unsigned i = 0; i < neigh_0.size(); i++) + ASSERT_EQ(expected_neigh_0[i], neigh_0[i]); + + auto neigh_3 = tree.neigh(3); + vectorexpected_neigh_3({2}); + ASSERT_EQ(expected_neigh_3.size(), neigh_3.size()); + for (unsigned i = 0; i < neigh_3.size(); i++) + ASSERT_EQ(expected_neigh_3[i], neigh_3[i]); + + mat = vector>({{1}}); + tree = TypeParam(mat); + neigh_0 = tree.neigh(0); + ASSERT_EQ(0u, neigh_0[0]); + ASSERT_EQ(1u, neigh_0.size()); + + mat = vector>({{0, 0, 0}, + {1, 0, 1}, + {0, 1, 1} + }); + tree = TypeParam(mat); + neigh_0 = tree.neigh(0); + ASSERT_EQ(0u, neigh_0.size()); + + auto neigh_1 = tree.neigh(1); + auto expected_neigh_1 = vector({0, 2}); + ASSERT_EQ(expected_neigh_1.size(), neigh_1.size()); + for (unsigned i = 0; i < neigh_1.size(); i++) + ASSERT_EQ(expected_neigh_1[i], neigh_1[i]); + + mat = vector>({{0, 0}, + {0, 0} + }); + tree = TypeParam(mat); + neigh_0 = tree.neigh(0); + ASSERT_EQ(0u, neigh_0.size()); +} + +TYPED_TEST(k2_tree_test, reverse_neighbors_test) +{ + vector> mat({{1, 0, 0, 0, 1}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 1, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 0, 1} + }); + + auto tree = TypeParam(mat); + auto r_neigh_0 = tree.reverse_neigh(0); + auto expected_r_neigh_0 = vector({0}); + auto r_neigh_1 = tree.reverse_neigh(1); + auto r_neigh_2 = tree.reverse_neigh(2); + auto expected_r_neigh_2 = vector({2, 4}); + ASSERT_EQ(expected_r_neigh_0.size(), r_neigh_0.size()); + ASSERT_EQ(0u, r_neigh_1.size()); + ASSERT_EQ(expected_r_neigh_2.size(), r_neigh_2.size()); + + for (unsigned i = 0; i < r_neigh_0.size(); i++) + ASSERT_EQ(expected_r_neigh_0[i], r_neigh_0[i]); + + for (unsigned i = 0; i < r_neigh_2.size(); i++) + ASSERT_EQ(expected_r_neigh_2[i], r_neigh_2[i]); + + mat = vector>({{0, 0}, + {0, 0} + }); + tree = TypeParam(mat); + r_neigh_0 = tree.reverse_neigh(0); + r_neigh_1 = tree.reverse_neigh(1); + ASSERT_EQ(0u, r_neigh_0.size()); + ASSERT_EQ(0u, r_neigh_1.size()); + + + mat = vector>({{0, 1}, + {1, 0} + }); + tree = TypeParam(mat); + r_neigh_0 = tree.reverse_neigh(0); + expected_r_neigh_0 = vector({1}); + r_neigh_1 = tree.reverse_neigh(1); + auto expected_r_neigh_1 = vector({0}); + + ASSERT_EQ(expected_r_neigh_0.size(), r_neigh_0.size()); + ASSERT_EQ(expected_r_neigh_1.size(), r_neigh_1.size()); + for (unsigned i = 0; i < r_neigh_0.size(); i++) + ASSERT_EQ(expected_r_neigh_0[i], r_neigh_0[i]); + + for (unsigned i = 0; i < r_neigh_1.size(); i++) + ASSERT_EQ(expected_r_neigh_1[i], r_neigh_1[i]); +} + +TYPED_TEST(k2_tree_test, adj_test) +{ + vector> mat({{1, 0, 0, 0, 1}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 1, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 0, 1} + }); + + auto tree = TypeParam(mat); + ASSERT_TRUE(tree.adj(0, 0)); + ASSERT_TRUE(tree.adj(0, 4)); + ASSERT_FALSE(tree.adj(4, 0)); + ASSERT_TRUE(tree.adj(4, 4)); + ASSERT_FALSE(tree.adj(1, 1)); + ASSERT_TRUE(tree.adj(2, 2)); + ASSERT_TRUE(tree.adj(2, 3)); + + mat = vector>({{0}}); + tree = TypeParam(mat); + ASSERT_FALSE(tree.adj(0,0)); + mat = vector>({{1}}); + tree = TypeParam(mat); + ASSERT_TRUE(tree.adj(0,0)); +} + +TYPED_TEST(k2_tree_test, serialize_test) +{ + vector> mat({{1, 0, 0, 0, 1}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 1, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 1, 0, 1} + }); + + auto tree = TypeParam(mat); + k2_tree_test_nm::check_serialize_load(tree); + + mat = vector>({{0}}); + tree = TypeParam(mat); + k2_tree_test_nm::check_serialize_load(tree); + + tree = TypeParam(); + k2_tree_test_nm::check_serialize_load(tree); + + mat = vector>({{0, 0}, + {0, 0} + }); + tree = TypeParam(mat); + k2_tree_test_nm::check_serialize_load(tree); + + mat = vector>({{1, 1}, + {1, 1} + }); + tree = TypeParam(mat); + k2_tree_test_nm::check_serialize_load(tree); + +} + +} // namespace + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +}