Skip to content

Commit

Permalink
Transformation word_count (#341)
Browse files Browse the repository at this point in the history
* word_count impl

* updated the word_count func

* add dynamic metadata test for JSON word_count

* remove cruft

* revert test

* partially fixed spaces

* account for spaces at beginning

* changelog

* word_count returns int now
  • Loading branch information
EItanya authored Jun 3, 2024
1 parent d53c393 commit cd5f894
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 0 deletions.
6 changes: 6 additions & 0 deletions changelog/v1.30.1-patch2/word_count.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
changelog:
- type: NEW_FEATURE
issueLink: https://github.com/solo-io/envoy-gloo/issues/343
description: >-
Add a new inja template function to count the words in a json message. The new function `word_count`,
can handle any json type, including nested json messages and will count all the words in the message.
72 changes: 72 additions & 0 deletions source/extensions/filters/http/transformation/inja_transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,9 @@ TransformerInstance::TransformerInstance(ThreadLocal::Slot &tls, Envoy::Random::
env_.add_callback("raw_string", 1, [this](Arguments &args) {
return raw_string_callback(args);
});
env_.add_callback("word_count", 1, [](Arguments &args) {
return word_count_callback(args);
});
}

json TransformerInstance::header_callback(const inja::Arguments &args) const {
Expand Down Expand Up @@ -410,6 +413,75 @@ json TransformerInstance::base64url_decode_callback(const inja::Arguments &args)
return Base64Url::decode(input);
}

json TransformerInstance::word_count_callback(const inja::Arguments &args) {
return json_word_count(args.at(0));
}

int TransformerInstance::json_word_count(const nlohmann::json* input) {
if (input->is_string()) {
const std::string &input_string = input->get_ref<const std::string &>();
return word_count(input_string);
} else if (input->is_array()) {
int total_word_count = 0;
const auto &input_array = input->get_ref<const std::vector<json> &>();
for (auto & element : input_array) {
total_word_count += json_word_count(&element);
}
return total_word_count;
} else if (input->is_object()) {
int total_word_count = 0;
const auto element_obj = input->get_ref<const json::object_t &>();
for (auto & [key, value] : element_obj) {
total_word_count += word_count(key);
total_word_count += json_word_count(&value);
}
return total_word_count;
} else if (input->is_number() || input->is_boolean()) {
// Booleans and numbers are constant
return 1;
}
return 0;
}

int TransformerInstance::word_count(const std::string& input_string) {
unsigned long ctr = 0; // Initializing a counter variable to count words

// Advance through all spaces at the beginning


unsigned long first_char = 0;
for (unsigned long x = 0; x < input_string.length(); x++) {
// https://en.cppreference.com/w/cpp/string/byte/isspace
if (!isspace(input_string[x] )){
first_char = x;
break;
}
}

// Loop through the string and count spaces to determine words
bool in_white_space = false;
for (unsigned long x = first_char; x < input_string.length(); x++) {
// https://en.cppreference.com/w/cpp/string/byte/isspace
if (isspace(input_string[x] )){ // Checking for spaces to count words
if (!in_white_space){
ctr++; // Increment the counter for each new "word"
}
in_white_space = true;
} else{
in_white_space = false;
}
}
// Return the count of words by adding 1 to the total number of spaces
// (plus 1 for the last word without a trailing space
// unless it ends with a space
if (isspace(input_string[input_string.length() - 1] )){
return ctr;
} else {
return ctr + 1;
}
}


// return a substring of the input string, starting at the start position
// and extending for length characters. If length is not provided, the
// substring will extend to the end of the string.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class TransformerInstance {
nlohmann::json replace_with_random_callback(const inja::Arguments &args);
std::string& random_for_pattern(const std::string& pattern);
nlohmann::json raw_string_callback(const inja::Arguments &args) const;
static nlohmann::json word_count_callback(const inja::Arguments &args);
static int json_word_count(const nlohmann::json* str);
static int word_count(const std::string& str);

inja::Environment env_;
absl::flat_hash_map<std::string, std::string> pattern_replacements_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,66 @@ TEST_F(InjaTransformerTest, SubstringTwoArguments) {
EXPECT_EQ(body.toString(), "23");
}

TEST_F(InjaTransformerTest, WordCount) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
TransformationTemplate transformation;

transformation.mutable_body()->set_text("{{word_count(body())}}");
transformation.set_parse_body_behavior(TransformationTemplate::DontParse);

InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;

auto test_string = "why don't you accept me";
Buffer::OwnedImpl body(test_string);
transformer.transform(headers, &headers, body, callbacks);
EXPECT_EQ(body.toString(), "5");
}

TEST_F(InjaTransformerTest, WordCountWeirdSpacing) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
TransformationTemplate transformation;

transformation.mutable_body()->set_text("{{word_count(body())}}");
transformation.set_parse_body_behavior(TransformationTemplate::DontParse);

InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;

auto test_string = " why don't you \t\t accept me ";
Buffer::OwnedImpl body(test_string);
transformer.transform(headers, &headers, body, callbacks);
EXPECT_EQ(body.toString(), "5");
}

TEST_F(InjaTransformerTest, WordCountJSON) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
TransformationTemplate transformation;
transformation.set_advanced_templates(true);

auto dynamic_meta = transformation.add_dynamic_metadata_values();
dynamic_meta->set_key("foo");
dynamic_meta->mutable_value()->set_text("{{word_count(body())}}");

InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;

EXPECT_CALL(callbacks.stream_info_,
setDynamicMetadata(SoloHttpFilterNames::get().Transformation, _))
.Times(1)
.WillOnce(
Invoke([](const std::string &, const ProtobufWkt::Struct &value) {
auto field = value.fields().at("foo");
EXPECT_EQ(field.string_value(), "12");
}));
auto test_string = "{\"a\": \"Hal, what's the meaning of life?\", \"b\": \"I don't know John\"}";
Buffer::OwnedImpl body(test_string);
transformer.transform(headers, &headers, body, callbacks);
}

TEST_F(InjaTransformerTest, SubstringOutOfBounds) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
TransformationTemplate transformation;
Expand Down

0 comments on commit cd5f894

Please sign in to comment.