From cd5f8947c21c0f0774cb9c13a9f553ab74b91e02 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Mon, 3 Jun 2024 10:28:52 -0400 Subject: [PATCH] Transformation word_count (#341) * word_count impl * updated the word_count func * add dynamic metadata test for JSON word_count * remove cruft * revert test * partially fixed spaces * account for spaces at beginning * changelog * word_count returns int now --- changelog/v1.30.1-patch2/word_count.yaml | 6 ++ .../http/transformation/inja_transformer.cc | 72 +++++++++++++++++++ .../http/transformation/inja_transformer.h | 3 + .../transformation/inja_transformer_test.cc | 60 ++++++++++++++++ 4 files changed, 141 insertions(+) create mode 100644 changelog/v1.30.1-patch2/word_count.yaml diff --git a/changelog/v1.30.1-patch2/word_count.yaml b/changelog/v1.30.1-patch2/word_count.yaml new file mode 100644 index 00000000..b2f9793a --- /dev/null +++ b/changelog/v1.30.1-patch2/word_count.yaml @@ -0,0 +1,6 @@ +changelog: +- type: NEW_FEATURE + issueLink: https://github.com/solo-io/envoy-gloo/issues/343 + description: >- + Add a new inja template function to count the words in a json message. The new function `word_count`, + can handle any json type, including nested json messages and will count all the words in the message. diff --git a/source/extensions/filters/http/transformation/inja_transformer.cc b/source/extensions/filters/http/transformation/inja_transformer.cc index 2d258e5d..b073e1e3 100644 --- a/source/extensions/filters/http/transformation/inja_transformer.cc +++ b/source/extensions/filters/http/transformation/inja_transformer.cc @@ -264,6 +264,9 @@ TransformerInstance::TransformerInstance(ThreadLocal::Slot &tls, Envoy::Random:: env_.add_callback("raw_string", 1, [this](Arguments &args) { return raw_string_callback(args); }); + env_.add_callback("word_count", 1, [](Arguments &args) { + return word_count_callback(args); + }); } json TransformerInstance::header_callback(const inja::Arguments &args) const { @@ -410,6 +413,75 @@ json TransformerInstance::base64url_decode_callback(const inja::Arguments &args) return Base64Url::decode(input); } +json TransformerInstance::word_count_callback(const inja::Arguments &args) { + return json_word_count(args.at(0)); +} + +int TransformerInstance::json_word_count(const nlohmann::json* input) { + if (input->is_string()) { + const std::string &input_string = input->get_ref(); + return word_count(input_string); + } else if (input->is_array()) { + int total_word_count = 0; + const auto &input_array = input->get_ref &>(); + for (auto & element : input_array) { + total_word_count += json_word_count(&element); + } + return total_word_count; + } else if (input->is_object()) { + int total_word_count = 0; + const auto element_obj = input->get_ref(); + for (auto & [key, value] : element_obj) { + total_word_count += word_count(key); + total_word_count += json_word_count(&value); + } + return total_word_count; + } else if (input->is_number() || input->is_boolean()) { + // Booleans and numbers are constant + return 1; + } + return 0; +} + +int TransformerInstance::word_count(const std::string& input_string) { + unsigned long ctr = 0; // Initializing a counter variable to count words + + // Advance through all spaces at the beginning + + + unsigned long first_char = 0; + for (unsigned long x = 0; x < input_string.length(); x++) { + // https://en.cppreference.com/w/cpp/string/byte/isspace + if (!isspace(input_string[x] )){ + first_char = x; + break; + } + } + + // Loop through the string and count spaces to determine words + bool in_white_space = false; + for (unsigned long x = first_char; x < input_string.length(); x++) { + // https://en.cppreference.com/w/cpp/string/byte/isspace + if (isspace(input_string[x] )){ // Checking for spaces to count words + if (!in_white_space){ + ctr++; // Increment the counter for each new "word" + } + in_white_space = true; + } else{ + in_white_space = false; + } + } + // Return the count of words by adding 1 to the total number of spaces + // (plus 1 for the last word without a trailing space + // unless it ends with a space + if (isspace(input_string[input_string.length() - 1] )){ + return ctr; + } else { + return ctr + 1; + } +} + + // return a substring of the input string, starting at the start position // and extending for length characters. If length is not provided, the // substring will extend to the end of the string. diff --git a/source/extensions/filters/http/transformation/inja_transformer.h b/source/extensions/filters/http/transformation/inja_transformer.h index 82997878..c858a7c6 100644 --- a/source/extensions/filters/http/transformation/inja_transformer.h +++ b/source/extensions/filters/http/transformation/inja_transformer.h @@ -73,6 +73,9 @@ class TransformerInstance { nlohmann::json replace_with_random_callback(const inja::Arguments &args); std::string& random_for_pattern(const std::string& pattern); nlohmann::json raw_string_callback(const inja::Arguments &args) const; + static nlohmann::json word_count_callback(const inja::Arguments &args); + static int json_word_count(const nlohmann::json* str); + static int word_count(const std::string& str); inja::Environment env_; absl::flat_hash_map pattern_replacements_; diff --git a/test/extensions/filters/http/transformation/inja_transformer_test.cc b/test/extensions/filters/http/transformation/inja_transformer_test.cc index 9afa3d8f..169cc9e6 100644 --- a/test/extensions/filters/http/transformation/inja_transformer_test.cc +++ b/test/extensions/filters/http/transformation/inja_transformer_test.cc @@ -967,6 +967,66 @@ TEST_F(InjaTransformerTest, SubstringTwoArguments) { EXPECT_EQ(body.toString(), "23"); } +TEST_F(InjaTransformerTest, WordCount) { + Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}}; + TransformationTemplate transformation; + + transformation.mutable_body()->set_text("{{word_count(body())}}"); + transformation.set_parse_body_behavior(TransformationTemplate::DontParse); + + InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_); + + NiceMock callbacks; + + auto test_string = "why don't you accept me"; + Buffer::OwnedImpl body(test_string); + transformer.transform(headers, &headers, body, callbacks); + EXPECT_EQ(body.toString(), "5"); +} + +TEST_F(InjaTransformerTest, WordCountWeirdSpacing) { + Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}}; + TransformationTemplate transformation; + + transformation.mutable_body()->set_text("{{word_count(body())}}"); + transformation.set_parse_body_behavior(TransformationTemplate::DontParse); + + InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_); + + NiceMock callbacks; + + auto test_string = " why don't you \t\t accept me "; + Buffer::OwnedImpl body(test_string); + transformer.transform(headers, &headers, body, callbacks); + EXPECT_EQ(body.toString(), "5"); +} + +TEST_F(InjaTransformerTest, WordCountJSON) { + Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}}; + TransformationTemplate transformation; + transformation.set_advanced_templates(true); + + auto dynamic_meta = transformation.add_dynamic_metadata_values(); + dynamic_meta->set_key("foo"); + dynamic_meta->mutable_value()->set_text("{{word_count(body())}}"); + + InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_); + + NiceMock callbacks; + + EXPECT_CALL(callbacks.stream_info_, + setDynamicMetadata(SoloHttpFilterNames::get().Transformation, _)) + .Times(1) + .WillOnce( + Invoke([](const std::string &, const ProtobufWkt::Struct &value) { + auto field = value.fields().at("foo"); + EXPECT_EQ(field.string_value(), "12"); + })); + auto test_string = "{\"a\": \"Hal, what's the meaning of life?\", \"b\": \"I don't know John\"}"; + Buffer::OwnedImpl body(test_string); + transformer.transform(headers, &headers, body, callbacks); +} + TEST_F(InjaTransformerTest, SubstringOutOfBounds) { Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}}; TransformationTemplate transformation;