From e5595f8074ed7d87f6b38b96ffc85f7770b11373 Mon Sep 17 00:00:00 2001 From: Ceesjan Luiten Date: Thu, 7 Mar 2024 20:04:20 +0100 Subject: [PATCH] Suggest valid options when the configuration contains unknown keys This changeset is purely to improve the user experience. When the configuration parsing fails with an unknown key we'll test all possible keys and suggest one that looks a lot like it. This way we can be helpful in case someone made a typo or misremembered the precise name. --- FlashMQTests/configtests.cpp | 53 +++++++++++++++++++++++ FlashMQTests/maintests.cpp | 2 + FlashMQTests/maintests.h | 2 + FlashMQTests/testhelpers.cpp | 18 +++++++- FlashMQTests/testhelpers.h | 12 +++++- configfileparser.cpp | 27 ++++++++++++ utils.cpp | 84 ++++++++++++++++++++++++++++++++++++ utils.h | 2 + 8 files changed, 197 insertions(+), 3 deletions(-) diff --git a/FlashMQTests/configtests.cpp b/FlashMQTests/configtests.cpp index db49f397..71884f08 100644 --- a/FlashMQTests/configtests.cpp +++ b/FlashMQTests/configtests.cpp @@ -117,3 +117,56 @@ void MainTests::test_parsing_numbers() } } } + +void MainTests::testStringDistances() +{ + FMQ_COMPARE(distanceBetweenStrings("", ""), (unsigned int)0); + FMQ_COMPARE(distanceBetweenStrings("dog", ""), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("", "dog"), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("dog", "horse"), (unsigned int)4); + FMQ_COMPARE(distanceBetweenStrings("horse", "dog"), (unsigned int)4); + FMQ_COMPARE(distanceBetweenStrings("industry", "interest"), (unsigned int)6); + FMQ_COMPARE(distanceBetweenStrings("kitten", "sitting"), (unsigned int)3); + FMQ_COMPARE(distanceBetweenStrings("uninformed", "uniformed"), (unsigned int)1); +} + +void MainTests::testConfigSuggestion() +{ + // User made a small typo: 'session' instead of 'sessions' + { + ConfFileTemp config; + config.writeLine("expire_session_after_seconds 180"); + config.closeFile(); + + ConfigFileParser parser(config.getFilePath()); + + try + { + parser.loadFile(false); + FMQ_FAIL("The parser is too liberal"); + } + catch (ConfigFileException &ex) + { + FMQ_COMPARE(ex.what(), "Config key 'expire_session_after_seconds' is not valid (here). Did you mean: expire_sessions_after_seconds ?"); + } + } + + // User entered gibberish. Let's not suggest gibberish back + { + ConfFileTemp config; + config.writeLine("foobarbaz 180"); + config.closeFile(); + + ConfigFileParser parser(config.getFilePath()); + + try + { + parser.loadFile(false); + FMQ_FAIL("The parser is too liberal"); + } + catch (ConfigFileException &ex) + { + FMQ_COMPARE(ex.what(), "Config key 'foobarbaz' is not valid (here)."); + } + } +} diff --git a/FlashMQTests/maintests.cpp b/FlashMQTests/maintests.cpp index 5cf740ca..2b028168 100644 --- a/FlashMQTests/maintests.cpp +++ b/FlashMQTests/maintests.cpp @@ -231,6 +231,8 @@ MainTests::MainTests() REGISTER_FUNCTION(testOverrideWillDelayOnSessionDestructionByTakeOver); REGISTER_FUNCTION(testDisabledWills); REGISTER_FUNCTION(testMqtt5DelayedWillsDisabled); + REGISTER_FUNCTION(testStringDistances); + REGISTER_FUNCTION(testConfigSuggestion); REGISTER_FUNCTION(testIncomingTopicAlias); REGISTER_FUNCTION(testOutgoingTopicAlias); REGISTER_FUNCTION(testOutgoingTopicAliasBeyondMax); diff --git a/FlashMQTests/maintests.h b/FlashMQTests/maintests.h index 2a977b15..e0db46a9 100644 --- a/FlashMQTests/maintests.h +++ b/FlashMQTests/maintests.h @@ -120,6 +120,8 @@ class MainTests void testOverrideWillDelayOnSessionDestructionByTakeOver(); void testDisabledWills(); void testMqtt5DelayedWillsDisabled(); + void testStringDistances(); + void testConfigSuggestion(); void testIncomingTopicAlias(); diff --git a/FlashMQTests/testhelpers.cpp b/FlashMQTests/testhelpers.cpp index a5a27f53..0edad193 100644 --- a/FlashMQTests/testhelpers.cpp +++ b/FlashMQTests/testhelpers.cpp @@ -1,6 +1,7 @@ #include "testhelpers.h" #include +#include int assert_count; int assert_fail_count; @@ -16,8 +17,21 @@ bool fmq_assert(bool b, const char *failmsg, const char *actual, const char *exp if (asserts_print) { - std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl - << " in " << file << ", line " << line << std::endl; + // There are two types of failmsg: unformatted ones and formatted ones. + // By testing for a newline we can detect formatted ones. + if (strchr(failmsg, '\n') == nullptr) + { + // unformatted + std::cout << RED << "FAIL" << COLOR_END << ": '" << failmsg << "', " << actual << " != " << expected << std::endl + << " in " << file << ", line " << line << std::endl; + } + else + { + // formatted + std::cout << RED << "FAIL" << COLOR_END << " in " << file << ", line " << line << std::endl; + std::cout << failmsg << std::endl; + std::cout << "Comparison: " << actual << " != " << expected << std::endl; + } } } diff --git a/FlashMQTests/testhelpers.h b/FlashMQTests/testhelpers.h index 937b1ce6..120ef192 100644 --- a/FlashMQTests/testhelpers.h +++ b/FlashMQTests/testhelpers.h @@ -61,7 +61,17 @@ inline bool fmq_compare(const char *c1, const char *c2, const char *actual, cons std::string s2(c2); std::ostringstream oss; - oss << s1 << " != " << s2; + + if (s1.length() + s2.length() < 100) + { + // short form + oss << s1 << " != " << s2; + } + else + { + oss << "Actual: " << s1 << std::endl; + oss << "Expected: " << s2; + } return fmq_assert(s1 == s2, oss.str().c_str(), actual, expected, file, line); } diff --git a/configfileparser.cpp b/configfileparser.cpp index 68e812cf..6518852c 100644 --- a/configfileparser.cpp +++ b/configfileparser.cpp @@ -93,6 +93,33 @@ bool ConfigFileParser::testKeyValidity(const std::string &key, const std::string { std::ostringstream oss; oss << "Config key '" << key << "' is not valid (here)."; + + // Maybe the user made a typo + auto alternative = validKeys.end(); + unsigned int alternative_distance = UINT_MAX; + + for (auto possible_key = validKeys.begin(); possible_key != validKeys.end(); ++possible_key) + { + unsigned int distance = distanceBetweenStrings(key.c_str(), *possible_key); + // We only want to suggest options that look a bit like the unknown + // one. Experimentally I found 50% of the total length a decent + // cutoff. + // + // The mathemathical formula "distance/length < 0.5" can be + // approximated with integers as "distance*2/length < 1" + if ((distance * 2) / key.length() < 1 && distance < alternative_distance) + { + alternative = possible_key; + alternative_distance = distance; + } + } + if (alternative != validKeys.end()) + { + // It might indeed have been a typo. + // The space before the question mark is to make copying using mouse-double-click possible. + oss << " Did you mean: " << alternative->c_str() << " ?"; + } + throw ConfigFileException(oss.str()); } diff --git a/utils.cpp b/utils.cpp index 451dc37c..12bf8f5d 100644 --- a/utils.cpp +++ b/utils.cpp @@ -724,6 +724,90 @@ std::string protocolVersionString(ProtocolVersion p) } } +/** + * @brief Returns the edit distance between the two given strings + * + * This function uses the Wagner–Fischer algorithm to calculate the Levenshtein + * distance between two strings: the total number of insertions, swaps, and + * deletions that are needed to transform the one into the other. + */ +unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB) +{ + // The matrix contains the distances between the substrings. + // You can find a description of the algorithm online. + // + // Roughly: + // + // line_a: "dog" + // line_b: "horse" + // + // --> + // | | # | d | o | g + // V --+---+---+---+---+---+---+--- + // # | P + // h | + // o | + // r | Q X Y + // s | + // e | Z + // + // P = [0, 0] = the distance from "" to "" (which is 0) + // Q = [0, 3] = the distance from "" to "hor" (which is 3, all inserts) + // X = [1, 3] = the distance from "d" to "hor" (which is 3, 1 swap and 2 inserts) + // Y = [3, 3] = the distance from "dog" to "hor" (which is 2, both swaps) + // Z = [3, 5] = the distance from "dog" to "horse" (which is 4, two swaps and 2 inserts) + // + // The matrix does not have to be square, the dimensions depends on the inputs + // + // the position within stringA should always be referred to as x + // the position within stringB should always be referred to as y + + using mymatrix = std::vector>; + // +1 because we also need to store the length from the empty strings + int width = stringA.size() + 1; + int height = stringB.size() + 1; + mymatrix distances(width, std::vector(height)); + + // We know that the distance from the substrings of line_a to "" + // is equal to the length of the substring of line_a + for (int x = 0; x < width; x++) + { + distances[x][0] = x; + } + + // We know that the distance from "" to the substrings of line_b + // is equal to the length of the substring of line_b + for (int y = 0; y < height; y++) + { + distances[0][y] = y; + } + + // Now all we do is to fill out the rest of the matrix, easy peasy + // note we start at 1 because the top row and left column have already been calculated + for (int x = 1; x < width; x++) + { + for (int y = 1; y < height; y++) + { + if (stringA[x - 1] == stringB[y - 1]) + { + // the letters in both words are the same: we can travel from the top-left for free to the current state + distances[x][y] = distances[x - 1][y - 1]; + } + else + { + // let's calculate the different costs and pick the cheapest option + // We use "+1" for all costs since they are all equally likely in our case + int dinstance_with_deletion = distances[x][y - 1] + 1; + int dinstance_with_insertion = distances[x - 1][y] + 1; + int dinstance_with_substitution = distances[x - 1][y - 1] + 1; + distances[x][y] = std::min({dinstance_with_deletion, dinstance_with_insertion, dinstance_with_substitution}); + } + } + } + + return distances[width - 1][height - 1]; // the last cell contains our answer +} + uint32_t ageFromTimePoint(const std::chrono::time_point &point) { auto duration = std::chrono::steady_clock::now() - point; diff --git a/utils.h b/utils.h index ab1f12e0..f2a668e3 100644 --- a/utils.h +++ b/utils.h @@ -124,6 +124,8 @@ std::string websocketCloseCodeToString(uint16_t code); std::string protocolVersionString(ProtocolVersion p); +unsigned int distanceBetweenStrings(const std::string &stringA, const std::string &stringB); + uint32_t ageFromTimePoint(const std::chrono::time_point &point); std::chrono::time_point timepointFromAge(const uint32_t age);