Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/planop parameter serialization #341

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/bin/units_access/ops_select.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,15 @@ TEST_F(SelectTests, simple_expression) {
TEST_F(SelectTests, should_throw_without_predicates) {
Json::Value v(Json::objectValue);
v["type"] = "SimpleTableScan";
ASSERT_THROW(SimpleTableScan::parse(v), std::runtime_error);

std::stringstream ss;
ss << v.toStyledString();

cereal::JSONInputArchive archive(ss);

typename SimpleTableScan::Parameters params;

ASSERT_THROW(params.serialize(archive), std::runtime_error);
}


Expand Down
34 changes: 14 additions & 20 deletions src/lib/access/SimpleTableScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,26 @@

#include "helper/checked_cast.h"

#include <cereal/archives/json.hpp>
#include <cereal/types/string.hpp>
#include <sstream>

namespace hyrise {
namespace access {

namespace {
auto _ = QueryParser::registerPlanOperation<SimpleTableScan>("SimpleTableScan");
auto _ = QueryParser::registerSerializablePlanOperation<SimpleTableScan>("SimpleTableScan");
}

SimpleTableScan::SimpleTableScan() : _comparator(nullptr) {}
SimpleTableScan::SimpleTableScan() : _comparator(nullptr), _ofDelta(false) {}

SimpleTableScan::SimpleTableScan(const Parameters& parameters) : _comparator(nullptr), _ofDelta(false) {
setPredicate(buildExpression(parameters.predicates));
if (parameters.materializing)
setProducesPositions(!*parameters.materializing);
if (parameters.ofDelta)
_ofDelta = *parameters.ofDelta;
}

SimpleTableScan::~SimpleTableScan() {
if (_comparator)
Expand Down Expand Up @@ -62,24 +74,6 @@ void SimpleTableScan::executePlanOperation() {
}
}

std::shared_ptr<PlanOperation> SimpleTableScan::parse(const Json::Value& data) {
std::shared_ptr<SimpleTableScan> pop = std::make_shared<SimpleTableScan>();

if (data.isMember("materializing"))
pop->setProducesPositions(!data["materializing"].asBool());

if (!data.isMember("predicates")) {
throw std::runtime_error("There is no reason for a Selection without predicates");
}
pop->setPredicate(buildExpression(data["predicates"]));

if (data.isMember("ofDelta")) {
pop->_ofDelta = data["ofDelta"].asBool();
}

return pop;
}

const std::string SimpleTableScan::vname() { return "SimpleTableScan"; }

void SimpleTableScan::setPredicate(SimpleExpression* c) { _comparator = c; }
Expand Down
16 changes: 14 additions & 2 deletions src/lib/access/SimpleTableScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,38 @@

#include "access/system/ParallelizablePlanOperation.h"
#include "access/expressions/pred_SimpleExpression.h"
#include "helper/serialization.h"

namespace hyrise {
namespace access {

class SimpleTableScan : public ParallelizablePlanOperation {

public:
struct Parameters {
std::string type;
Json::Value predicates;
std::optional<bool> materializing, ofDelta;

SERIALIZE(type, predicates, materializing, ofDelta)
};

public:
SimpleTableScan();
SimpleTableScan(const Parameters& parameters);
virtual ~SimpleTableScan();

void setupPlanOperation();
void executePlanOperation();
void executePositional();
void executeMaterialized();
static std::shared_ptr<PlanOperation> parse(const Json::Value& data);

const std::string vname();
void setPredicate(SimpleExpression* c);

private:
SimpleExpression* _comparator;
bool _ofDelta = false;
bool _ofDelta;
};
}
}
Expand Down
93 changes: 48 additions & 45 deletions src/lib/access/storage/TableLoad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,35 @@ namespace hyrise {
namespace access {

namespace {
auto _ = QueryParser::registerPlanOperation<TableLoad>("TableLoad");
auto _ = QueryParser::registerSerializablePlanOperation<TableLoad>("TableLoad");
log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("access.plan.PlanOperation"));
}

TableLoad::TableLoad() : _hasDelimiter(false), _binary(false), _unsafe(false), _raw(false), _nonvolatile(false) {}

TableLoad::TableLoad(const Parameters& parameters)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just leave Parameters as a member? The way this is currently implemented enforces a lot of extra typing for no extra value.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See fe50fff.

: _table_name(parameters.table),
_file_name(parameters.filename),
_header_file_name(parameters.header),
_header_string(parameters.header_string),
_delimiter(parameters.delimiter),
_path(parameters.path),
_unsafe(parameters.unsafe),
_raw(parameters.raw) {
if (parameters.path)
_path = *parameters.path;
else
_path = std::string("");
if (parameters.unsafe)
_unsafe = *parameters.unsafe;
else
_unsafe = false;
if (parameters.raw)
_raw = *parameters.raw;
else
_raw = false;
}

TableLoad::TableLoad() : _path(std::string("")), _unsafe(false), _raw(false), _nonvolatile(false), _binary(false) {}

TableLoad::~TableLoad() {}

Expand All @@ -31,32 +55,32 @@ void TableLoad::executePlanOperation() {
// load from absolute path?

// Load Raw Table
if (_raw) {
if (*_raw) {
io::Loader::params p;
p.setHeader(io::CSVHeader(_file_name));
p.setInput(io::RawTableLoader(_file_name));
sm->loadTable(_table_name, p, _path);
sm->loadTable(_table_name, p, *_path);

} else if (!_header_string.empty()) {
} else if (_header_string) {
// Load based on header string
auto p = io::Loader::shortcuts::loadWithStringHeaderParams(_file_name, _header_string);
sm->loadTable(_table_name, p, _path);
auto p = io::Loader::shortcuts::loadWithStringHeaderParams(_file_name, *_header_string);
sm->loadTable(_table_name, p, *_path);

} else if (_header_file_name.empty()) {
} else if (!_header_file_name) {
// Load only with single file
sm->loadTableFile(_table_name, _file_name, _path);
sm->loadTableFile(_table_name, _file_name, *_path);

} else if ((!_table_name.empty()) && (!_file_name.empty()) && (!_header_file_name.empty())) {
} else if ((!_table_name.empty()) && (!_file_name.empty()) && (_header_file_name)) {

// Load with dedicated header file
io::Loader::params p;
p.setCompressed(false);
p.setHeader(io::CSVHeader(_header_file_name));
auto params = io::CSVInput::params().setUnsafe(_unsafe);
if (_hasDelimiter)
params.setCSVParams(io::csv::params().setDelimiter(_delimiter.at(0)));
p.setHeader(io::CSVHeader(*_header_file_name));
auto params = io::CSVInput::params().setUnsafe(*_unsafe);
if (_delimiter)
params.setCSVParams(io::csv::params().setDelimiter((*_delimiter).at(0)));
p.setInput(io::CSVInput(_file_name, params));
sm->loadTable(_table_name, p, _path);
sm->loadTable(_table_name, p, *_path);
}
auto table = sm->getTable(_table_name);
table->setName(_table_name);
Expand All @@ -71,47 +95,26 @@ void TableLoad::executePlanOperation() {
addResult(_table);
}

std::shared_ptr<PlanOperation> TableLoad::parse(const Json::Value& data) {
std::shared_ptr<TableLoad> s = std::make_shared<TableLoad>();
s->setTableName(data["table"].asString());
s->setFileName(data["filename"].asString());
s->setHeaderFileName(data["header"].asString());
s->setHeaderString(data["header_string"].asString());
s->setUnsafe(data["unsafe"].asBool());
s->setRaw(data["raw"].asBool());
if (data.isMember("delimiter")) {
s->setDelimiter(data["delimiter"].asString());
}
if (data.isMember("path")) {
s->setPath(data["path"].asString());
} else {
s->setPath("");
}
return s;
}

const std::string TableLoad::vname() { return "TableLoad"; }

void TableLoad::setTableName(const std::string& tablename) { _table_name = tablename; }

void TableLoad::setFileName(const std::string& filename) { _file_name = filename; }

void TableLoad::setPath(const std::string& path) { _path = path; }

void TableLoad::setHeaderFileName(const std::string& filename) { _header_file_name = filename; }
void TableLoad::setHeaderFileName(const std::string& filename) {
_header_file_name = std::optional<std::string>(filename);
}

void TableLoad::setHeaderString(const std::string& header) { _header_string = header; }
void TableLoad::setHeaderString(const std::string& header) { _header_string = std::optional<std::string>(header); }

void TableLoad::setBinary(const bool binary) { _binary = binary; }
void TableLoad::setUnsafe(const bool unsafe) { _unsafe = std::optional<bool>(unsafe); }

void TableLoad::setUnsafe(const bool unsafe) { _unsafe = unsafe; }
void TableLoad::setRaw(const bool raw) { _raw = std::optional<bool>(raw); }

void TableLoad::setRaw(const bool raw) { _raw = raw; }
void TableLoad::setDelimiter(const std::string& d) { _delimiter = std::optional<std::string>(d); }
void TableLoad::setPath(const std::string& path) { _path = std::optional<std::string>(path); }

void TableLoad::setDelimiter(const std::string& d) {
_delimiter = d;
_hasDelimiter = true;
}
void TableLoad::setBinary(const bool binary) { _binary = binary; }

void TableLoad::setNonvolatile(const bool nonvolatile) { _nonvolatile = nonvolatile; }
}
Expand Down
28 changes: 19 additions & 9 deletions src/lib/access/storage/TableLoad.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define SRC_LIB_ACCESS_TABLELOAD_H_

#include "access/system/PlanOperation.h"
#include "helper/serialization.h"

namespace hyrise {
namespace access {
Expand All @@ -14,12 +15,22 @@ class TableLoad : public PlanOperation {
friend class LoadTests_simple_load_op_Test;
friend class LoadTests_simple_unloadall_op_Test;

public:
struct Parameters {
std::string type, table, filename;
std::optional<std::string> header, header_string, delimiter, path;
std::optional<bool> unsafe, raw;

SERIALIZE(type, table, filename, header, header_string, delimiter, path, unsafe, raw)
};

public:
TableLoad();
TableLoad(const Parameters& parameters);
virtual ~TableLoad();

void executePlanOperation();
static std::shared_ptr<PlanOperation> parse(const Json::Value& data);

const std::string vname();
void setTableName(const std::string& tablename);
void setFileName(const std::string& filename);
Expand All @@ -34,16 +45,15 @@ class TableLoad : public PlanOperation {

private:
std::string _table_name;
std::string _header_file_name;
std::string _file_name;
std::string _path;
std::string _header_string;
std::string _delimiter;
bool _hasDelimiter;
bool _binary;
bool _unsafe;
bool _raw;
std::optional<std::string> _header_file_name;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to list the members of the parameter struct twice?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We wouldn't necessarily need them twice, we could just (as you suggested below) use the Parameters struct as member.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in fe50fff.

std::optional<std::string> _header_string;
std::optional<std::string> _delimiter;
std::optional<std::string> _path;
std::optional<bool> _unsafe;
std::optional<bool> _raw;
bool _nonvolatile;
bool _binary;
};
}
}
Expand Down
25 changes: 25 additions & 0 deletions src/lib/access/system/QueryParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#include <string>
#include <stdexcept>
#include <mutex>
#include <sstream>

#include <json.h>

#include "helper/cereal/HyriseCerealJsonArchive.h"
#include "access/system/BasicParser.h"

const std::string autojsonReferenceTableId = "-1";
Expand Down Expand Up @@ -38,6 +40,7 @@ struct AbstractQueryParserFactory {

struct parse_construct {};
struct default_construct {};
struct cereal_construct {};

template <typename T, typename parse_construction>
struct QueryParserFactory;
Expand All @@ -56,6 +59,22 @@ struct QueryParserFactory<T, default_construct> : public AbstractQueryParserFact
}
};

template <typename T>
struct QueryParserFactory<T, cereal_construct> : public AbstractQueryParserFactory {

virtual std::shared_ptr<PlanOperation> parse(const Json::Value& data) {
std::stringstream ss;
ss << data.toStyledString();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using .toStyledString, I'd suggest using jsoncpps FastWriter (http://jsoncpp.sourceforge.net/class_json_1_1_fast_writer.html)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


cereal::JSONInputArchive archive(ss);

typename T::Parameters params;
params.serialize(archive);

return std::make_shared<T>(params);
}
};

/*
* The Query Parser parses a given Json Value to create a plan operation
*
Expand Down Expand Up @@ -108,6 +127,12 @@ class QueryParser {
return true;
}

template <typename T>
static bool registerSerializablePlanOperation(const std::string& name) {
QueryParser::instance()._factory[name] = new QueryParserFactory<T, cereal_construct>();
return true;
}

std::shared_ptr<PlanOperation> parse(std::string name, const Json::Value& d);

static QueryParser& instance();
Expand Down
Loading