Skip to content

Commit

Permalink
Merge pull request #5 from thiagomanel/xeu_utils
Browse files Browse the repository at this point in the history
Add xeu_utils with parser
  • Loading branch information
thiagomanel authored Apr 9, 2017
2 parents e05c6d9 + b0481ea commit dc08abb
Show file tree
Hide file tree
Showing 10 changed files with 442 additions and 10 deletions.
8 changes: 8 additions & 0 deletions lab2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## How to build and run the code

Run the following commands:

```bash
make # builds the binary
./xeu # runs the binary
```
2 changes: 2 additions & 0 deletions lab2/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
xeu: *.cpp xeu_utils/*.cpp
g++ xeu_utils/*.cpp *.cpp -o xeu
10 changes: 0 additions & 10 deletions lab2/xeu.c

This file was deleted.

14 changes: 14 additions & 0 deletions lab2/xeu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "xeu_utils/StreamParser.h"

#include <iostream>
#include <vector>

using namespace xeu_utils;
using namespace std;

int main() {
ParsingState p = StreamParser().parse();
// cout << p.dump();
// vector<Command> commands = p.commands();
return 0;
}
45 changes: 45 additions & 0 deletions lab2/xeu_utils/Command.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "Command.h"

#include <sstream>
#include <string>
#include <vector>

namespace xeu_utils {

Command::Command() {
argv_.push_back(0);
}

const char* Command::filename() {
return argv_[0];
}

char* const* Command::argv() {
return &argv_[0];
}

const std::vector<std::string>& Command::args() {
return args_;
}

void Command::add_arg(const std::string& arg) {
args_.push_back(arg);
argv_.back() = const_cast<char*>(args_.back().c_str());
argv_.push_back(0);
}

std::string Command::escape_arg(const std::string& unescaped_arg) {
std::stringstream ss;
ss << '"';
for (size_t i = 0; i < unescaped_arg.length(); i++) {
char c = unescaped_arg[i];
if (c == '"' || c == '\\') {
ss << '\\';
}
ss << c;
}
ss << '"';
return ss.str();
}

};
45 changes: 45 additions & 0 deletions lab2/xeu_utils/Command.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#pragma once

#include <vector>
#include <string>

namespace xeu_utils {

struct Command {
Command();

/**
* NOTE: this is useful for using in exec*.
* Returns the filename in the command (i.e. the first arg == argv[0]), or a
* pointer to an empty string if the command has no args.
*/
const char* filename();

/**
* NOTE: this is useful for using in exec*.
* Returns a constant pointer to the args in the format required by some of
* exec variations (possibly the one you will choose to use).
*/
char* const* argv();

/**
* Returns a list of all the args as a vector of strings.
*/
const std::vector<std::string>& args();

/**
* Pushes a new arg to the arg list of the command.
*/
void add_arg(const std::string& arg);

/**
* Escapes an arg and embeds it in double quotes.
*/
static std::string escape_arg(const std::string& unescaped_arg);

private:
std::vector<char*> argv_; // stores pointers to the data (for use in exec*)
std::vector<std::string> args_; // stores the data
};

};
165 changes: 165 additions & 0 deletions lab2/xeu_utils/ParsingState.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#include "Command.h"
#include "ParsingState.h"

#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

namespace xeu_utils {

ParsingState::ParsingState()
: completed_(0), backslash_(0), quotes_(0), error_(0) {}

bool ParsingState::error() {
return error_;
}

bool ParsingState::completed() {
return completed_;
}

const std::vector<Command> ParsingState::commands() {
return parsed_commands_;
}

std::string ParsingState::dump() {
std::stringstream ss;

// general
{
ss
<< "ParsingState [addr=" << this << "]" << std::endl
<< "> completed_: " << completed_ << std::endl
<< "> backslash_: " << backslash_ << std::endl
<< "> error_: " << error_ << std::endl
<< "> quotes_: " << quotes_ << " (" << (int) quotes_ << ")" << std::endl
<< "> current_arg_: " << current_arg_ << std::endl;
}
// current_command_
{
const std::vector<std::string>& args = current_command_.args();
ss << "> current_command_.args(): " << args.size() << std::endl;
for (size_t i = 0; i < args.size(); i++) {
ss << ">> [" << i << "] " << args[i] << std::endl;
}
}
// parsed_commands_
{
ss << "> parsed_commands_: " << parsed_commands_.size() << std::endl;
for (size_t i = 0; i < parsed_commands_.size(); i++) {
const std::vector<std::string>& args = parsed_commands_[i].args();
ss << ">> [" << i << "].args(): " << args.size() << std::endl;
for (size_t j = 0; j < args.size(); j++) {
ss << ">>> [" << j << "] " << args[j] << std::endl;
}
}
}

return ss.str();
}

int ParsingState::parse_next(const std::string& s) {
for (size_t i = 0; i < s.length(); i++) {
if (completed_) {
return i;
}
parse_next(s[i]);
}
return s.length();
}

void ParsingState::parse_next(char c) {
const char SIMPLE_QUOTES = '\'';
const char SPECIAL_QUOTES = '"';
const char NO_QUOTES = '\0';

if (!c) {
throw std::runtime_error("Can't parse a NUL character.");
} else if (completed_) {
throw std::runtime_error("This is already a final state. See completed().");
}

switch (c) {
case '\n':
if (backslash_) {
backslash_ = false;
} else if (quotes_ != NO_QUOTES) {
current_arg_ += c;
} else {
complete_command(true /* set this as a final state */);
}
break;

case '\\':
if (backslash_ || quotes_ == SIMPLE_QUOTES) {
current_arg_ += c;
backslash_ = false;
} else {
backslash_ = true;
}
break;

case SPECIAL_QUOTES:
case SIMPLE_QUOTES:
// [special case] (\' inside "") is kept exactly as \'
if (backslash_ && quotes_ == SPECIAL_QUOTES && c == SIMPLE_QUOTES) {
current_arg_ += '\\';
}
// (\') or (\") or (' inside ") or (" inside ')
if (backslash_ || (quotes_ != NO_QUOTES && c != quotes_)) {
current_arg_ += c;
} else {
quotes_ = (c == quotes_ ? 0 : c);
}
backslash_ = false;
break;

case '|':
// check if this is a pipe
if (!backslash_ && quotes_ == NO_QUOTES) {
if (!complete_command(false /* do not set this as a final state */)) {
// something like: "some_command | | other_command" happened
// even "cmd1 || cmd2" causes this, since we don't understand "||" as
// the OR operator, but instead we process it as two pipe symbols
error_ = true;
throw std::runtime_error("syntax error near unexpected token `|'");
}
break;
}
// fall to default, this is not a pipe!

default:
if (!backslash_ && quotes_ == NO_QUOTES && c <= 32 /* c is a delimiter */) {
complete_arg();
} else {
// [special case] (\<c> inside "") is kept exactly as \<c>
if (backslash_ && quotes_ == SPECIAL_QUOTES) {
current_arg_ += '\\';
}
current_arg_ += c;
}
backslash_ = false;
break;
}
}

void ParsingState::complete_arg() {
if (current_arg_.length() > 0) {
current_command_.add_arg(current_arg_);
current_arg_ = "";
}
}

bool ParsingState::complete_command(bool in_final_state) {
complete_arg(); // whatever is left here is part of the args of this command
completed_ = in_final_state;
if (!current_command_.args().empty()) {
parsed_commands_.push_back(current_command_);
current_command_ = Command();
return true; // new command added successfuly
}
return false; // could not add a new command: command is empty (no args)!
}

};
72 changes: 72 additions & 0 deletions lab2/xeu_utils/ParsingState.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#pragma once

#include "Command.h"

#include <string>
#include <vector>

namespace xeu_utils {

struct ParsingState {
ParsingState();

/**
* Returns whether the parsing is in a final ("completed") state or not.
*/
bool completed();

/**
* Returns whether an user/syntax error occurred during parsing or not.
* Errors that we know how to handle do not set this to true; those include:
* - Receiving NUL character: we just ignore it
* - Receiving more input after reaching a final state: we already have a nice
* final state, so no need to set this flag.
* Errors that currently set this flag to true are:
* - Receiving a pipe symbol (|) when we have 0 args for the current command
*/
bool error();

/**
* Returns a list of all the commands fully parsed. Note that each command is
* separated by a pipe character '|' in the original input.
* e.g. "ps aux | grep hh_server | wc -l\n" results in three commands:
* commands()[0] = ps aux
* commands()[1] = grep hh_server
* comamnds()[2] = wc -l
*/
const std::vector<Command> commands();

/**
* Produces a string with a dump of the entire state. You can then print it:
* std::cout << p.dump(); // where p is an instance of this ParsingState
*/
std::string dump();

/**
* Calls parse_next(char) for each character in s, until either we call it
* for all character in s or we reach a final ("completed") state.
* Returns how many characters were parsed (may be less than s.length() if we
* reached a final state before iterating over all characters in s).
*/
int parse_next(const std::string& s);

/**
* Advances the state. This is a state machine: we are on a state S(str) and
* receive c, so we must advance to state S(str+c).
*/
void parse_next(char c);

private:
void complete_arg();
bool complete_command(bool in_final_state=true);

bool completed_;
bool backslash_;
bool error_;
char quotes_;
std::string current_arg_;
Command current_command_;
std::vector<Command> parsed_commands_;
};

};
Loading

0 comments on commit dc08abb

Please sign in to comment.