-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from thiagomanel/xeu_utils
Add xeu_utils with parser
- Loading branch information
Showing
10 changed files
with
442 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
## How to build and run the code | ||
|
||
Run the following commands: | ||
|
||
```bash | ||
make # builds the binary | ||
./xeu # runs the binary | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
xeu: *.cpp xeu_utils/*.cpp | ||
g++ xeu_utils/*.cpp *.cpp -o xeu |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#include "xeu_utils/StreamParser.h" | ||
|
||
#include <iostream> | ||
#include <vector> | ||
|
||
using namespace xeu_utils; | ||
using namespace std; | ||
|
||
int main() { | ||
ParsingState p = StreamParser().parse(); | ||
// cout << p.dump(); | ||
// vector<Command> commands = p.commands(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#include "Command.h" | ||
|
||
#include <sstream> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace xeu_utils { | ||
|
||
Command::Command() { | ||
argv_.push_back(0); | ||
} | ||
|
||
const char* Command::filename() { | ||
return argv_[0]; | ||
} | ||
|
||
char* const* Command::argv() { | ||
return &argv_[0]; | ||
} | ||
|
||
const std::vector<std::string>& Command::args() { | ||
return args_; | ||
} | ||
|
||
void Command::add_arg(const std::string& arg) { | ||
args_.push_back(arg); | ||
argv_.back() = const_cast<char*>(args_.back().c_str()); | ||
argv_.push_back(0); | ||
} | ||
|
||
std::string Command::escape_arg(const std::string& unescaped_arg) { | ||
std::stringstream ss; | ||
ss << '"'; | ||
for (size_t i = 0; i < unescaped_arg.length(); i++) { | ||
char c = unescaped_arg[i]; | ||
if (c == '"' || c == '\\') { | ||
ss << '\\'; | ||
} | ||
ss << c; | ||
} | ||
ss << '"'; | ||
return ss.str(); | ||
} | ||
|
||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
#include <string> | ||
|
||
namespace xeu_utils { | ||
|
||
struct Command { | ||
Command(); | ||
|
||
/** | ||
* NOTE: this is useful for using in exec*. | ||
* Returns the filename in the command (i.e. the first arg == argv[0]), or a | ||
* pointer to an empty string if the command has no args. | ||
*/ | ||
const char* filename(); | ||
|
||
/** | ||
* NOTE: this is useful for using in exec*. | ||
* Returns a constant pointer to the args in the format required by some of | ||
* exec variations (possibly the one you will choose to use). | ||
*/ | ||
char* const* argv(); | ||
|
||
/** | ||
* Returns a list of all the args as a vector of strings. | ||
*/ | ||
const std::vector<std::string>& args(); | ||
|
||
/** | ||
* Pushes a new arg to the arg list of the command. | ||
*/ | ||
void add_arg(const std::string& arg); | ||
|
||
/** | ||
* Escapes an arg and embeds it in double quotes. | ||
*/ | ||
static std::string escape_arg(const std::string& unescaped_arg); | ||
|
||
private: | ||
std::vector<char*> argv_; // stores pointers to the data (for use in exec*) | ||
std::vector<std::string> args_; // stores the data | ||
}; | ||
|
||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#include "Command.h" | ||
#include "ParsingState.h" | ||
|
||
#include <sstream> | ||
#include <stdexcept> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace xeu_utils { | ||
|
||
ParsingState::ParsingState() | ||
: completed_(0), backslash_(0), quotes_(0), error_(0) {} | ||
|
||
bool ParsingState::error() { | ||
return error_; | ||
} | ||
|
||
bool ParsingState::completed() { | ||
return completed_; | ||
} | ||
|
||
const std::vector<Command> ParsingState::commands() { | ||
return parsed_commands_; | ||
} | ||
|
||
std::string ParsingState::dump() { | ||
std::stringstream ss; | ||
|
||
// general | ||
{ | ||
ss | ||
<< "ParsingState [addr=" << this << "]" << std::endl | ||
<< "> completed_: " << completed_ << std::endl | ||
<< "> backslash_: " << backslash_ << std::endl | ||
<< "> error_: " << error_ << std::endl | ||
<< "> quotes_: " << quotes_ << " (" << (int) quotes_ << ")" << std::endl | ||
<< "> current_arg_: " << current_arg_ << std::endl; | ||
} | ||
// current_command_ | ||
{ | ||
const std::vector<std::string>& args = current_command_.args(); | ||
ss << "> current_command_.args(): " << args.size() << std::endl; | ||
for (size_t i = 0; i < args.size(); i++) { | ||
ss << ">> [" << i << "] " << args[i] << std::endl; | ||
} | ||
} | ||
// parsed_commands_ | ||
{ | ||
ss << "> parsed_commands_: " << parsed_commands_.size() << std::endl; | ||
for (size_t i = 0; i < parsed_commands_.size(); i++) { | ||
const std::vector<std::string>& args = parsed_commands_[i].args(); | ||
ss << ">> [" << i << "].args(): " << args.size() << std::endl; | ||
for (size_t j = 0; j < args.size(); j++) { | ||
ss << ">>> [" << j << "] " << args[j] << std::endl; | ||
} | ||
} | ||
} | ||
|
||
return ss.str(); | ||
} | ||
|
||
int ParsingState::parse_next(const std::string& s) { | ||
for (size_t i = 0; i < s.length(); i++) { | ||
if (completed_) { | ||
return i; | ||
} | ||
parse_next(s[i]); | ||
} | ||
return s.length(); | ||
} | ||
|
||
void ParsingState::parse_next(char c) { | ||
const char SIMPLE_QUOTES = '\''; | ||
const char SPECIAL_QUOTES = '"'; | ||
const char NO_QUOTES = '\0'; | ||
|
||
if (!c) { | ||
throw std::runtime_error("Can't parse a NUL character."); | ||
} else if (completed_) { | ||
throw std::runtime_error("This is already a final state. See completed()."); | ||
} | ||
|
||
switch (c) { | ||
case '\n': | ||
if (backslash_) { | ||
backslash_ = false; | ||
} else if (quotes_ != NO_QUOTES) { | ||
current_arg_ += c; | ||
} else { | ||
complete_command(true /* set this as a final state */); | ||
} | ||
break; | ||
|
||
case '\\': | ||
if (backslash_ || quotes_ == SIMPLE_QUOTES) { | ||
current_arg_ += c; | ||
backslash_ = false; | ||
} else { | ||
backslash_ = true; | ||
} | ||
break; | ||
|
||
case SPECIAL_QUOTES: | ||
case SIMPLE_QUOTES: | ||
// [special case] (\' inside "") is kept exactly as \' | ||
if (backslash_ && quotes_ == SPECIAL_QUOTES && c == SIMPLE_QUOTES) { | ||
current_arg_ += '\\'; | ||
} | ||
// (\') or (\") or (' inside ") or (" inside ') | ||
if (backslash_ || (quotes_ != NO_QUOTES && c != quotes_)) { | ||
current_arg_ += c; | ||
} else { | ||
quotes_ = (c == quotes_ ? 0 : c); | ||
} | ||
backslash_ = false; | ||
break; | ||
|
||
case '|': | ||
// check if this is a pipe | ||
if (!backslash_ && quotes_ == NO_QUOTES) { | ||
if (!complete_command(false /* do not set this as a final state */)) { | ||
// something like: "some_command | | other_command" happened | ||
// even "cmd1 || cmd2" causes this, since we don't understand "||" as | ||
// the OR operator, but instead we process it as two pipe symbols | ||
error_ = true; | ||
throw std::runtime_error("syntax error near unexpected token `|'"); | ||
} | ||
break; | ||
} | ||
// fall to default, this is not a pipe! | ||
|
||
default: | ||
if (!backslash_ && quotes_ == NO_QUOTES && c <= 32 /* c is a delimiter */) { | ||
complete_arg(); | ||
} else { | ||
// [special case] (\<c> inside "") is kept exactly as \<c> | ||
if (backslash_ && quotes_ == SPECIAL_QUOTES) { | ||
current_arg_ += '\\'; | ||
} | ||
current_arg_ += c; | ||
} | ||
backslash_ = false; | ||
break; | ||
} | ||
} | ||
|
||
void ParsingState::complete_arg() { | ||
if (current_arg_.length() > 0) { | ||
current_command_.add_arg(current_arg_); | ||
current_arg_ = ""; | ||
} | ||
} | ||
|
||
bool ParsingState::complete_command(bool in_final_state) { | ||
complete_arg(); // whatever is left here is part of the args of this command | ||
completed_ = in_final_state; | ||
if (!current_command_.args().empty()) { | ||
parsed_commands_.push_back(current_command_); | ||
current_command_ = Command(); | ||
return true; // new command added successfuly | ||
} | ||
return false; // could not add a new command: command is empty (no args)! | ||
} | ||
|
||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#pragma once | ||
|
||
#include "Command.h" | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
namespace xeu_utils { | ||
|
||
struct ParsingState { | ||
ParsingState(); | ||
|
||
/** | ||
* Returns whether the parsing is in a final ("completed") state or not. | ||
*/ | ||
bool completed(); | ||
|
||
/** | ||
* Returns whether an user/syntax error occurred during parsing or not. | ||
* Errors that we know how to handle do not set this to true; those include: | ||
* - Receiving NUL character: we just ignore it | ||
* - Receiving more input after reaching a final state: we already have a nice | ||
* final state, so no need to set this flag. | ||
* Errors that currently set this flag to true are: | ||
* - Receiving a pipe symbol (|) when we have 0 args for the current command | ||
*/ | ||
bool error(); | ||
|
||
/** | ||
* Returns a list of all the commands fully parsed. Note that each command is | ||
* separated by a pipe character '|' in the original input. | ||
* e.g. "ps aux | grep hh_server | wc -l\n" results in three commands: | ||
* commands()[0] = ps aux | ||
* commands()[1] = grep hh_server | ||
* comamnds()[2] = wc -l | ||
*/ | ||
const std::vector<Command> commands(); | ||
|
||
/** | ||
* Produces a string with a dump of the entire state. You can then print it: | ||
* std::cout << p.dump(); // where p is an instance of this ParsingState | ||
*/ | ||
std::string dump(); | ||
|
||
/** | ||
* Calls parse_next(char) for each character in s, until either we call it | ||
* for all character in s or we reach a final ("completed") state. | ||
* Returns how many characters were parsed (may be less than s.length() if we | ||
* reached a final state before iterating over all characters in s). | ||
*/ | ||
int parse_next(const std::string& s); | ||
|
||
/** | ||
* Advances the state. This is a state machine: we are on a state S(str) and | ||
* receive c, so we must advance to state S(str+c). | ||
*/ | ||
void parse_next(char c); | ||
|
||
private: | ||
void complete_arg(); | ||
bool complete_command(bool in_final_state=true); | ||
|
||
bool completed_; | ||
bool backslash_; | ||
bool error_; | ||
char quotes_; | ||
std::string current_arg_; | ||
Command current_command_; | ||
std::vector<Command> parsed_commands_; | ||
}; | ||
|
||
}; |
Oops, something went wrong.