brainboxdotcc · braindigitalis · Dec 20, 2023 · Dec 19, 2023 · Dec 19, 2023 · Dec 19, 2023
diff --git a/include/dpp/utility.h b/include/dpp/utility.h
@@ -738,22 +738,36 @@ uint32_t DPP_EXPORT hsl(int h, int s, int l);
 std::string DPP_EXPORT debug_dump(uint8_t* data, size_t length);
 
 /**
- * @brief Returns the length of a UTF-8 string in codepoints
- * 
+ * @brief Returns the length of a UTF-8 string in codepoints.
+ * @note Result is unspecified for strings that are not valid UTF-8.
+ *
  * @param str string to count length of
- * @return size_t length of string (0 for invalid utf8)
+ * @return size_t Length of string
  */
-size_t DPP_EXPORT utf8len(const std::string &str);
+size_t DPP_EXPORT utf8len(std::string_view str);
 
 /**
- * @brief Return substring of a UTF-8 encoded string in codepoints
- * 
+ * @brief Return subview of a UTF-8 encoded string in codepoints.
+ * @note You must ensure that the resulting view is not used after the lifetime of the viewed string has ended.
+ * @note Result is unspecified for strings that are not valid UTF-8.
+ *
+ * @param str string to return substring from
+ * @param start start codepoint offset
+ * @param length length in codepoints
+ * @return std::string_view The requested subview
+ */
+std::string_view DPP_EXPORT utf8subview(std::string_view str, size_t start, size_t length);
+
+/**
+ * @brief Return substring of a UTF-8 encoded string in codepoints.
+ * @note Result is unspecified for strings that are not valid UTF-8.
+ *
  * @param str string to return substring from
  * @param start start codepoint offset
  * @param length length in codepoints
- * @return std::string Substring in UTF-8 or empty string if invalid UTF-8 passed in
+ * @return std::string The requested substring
  */
-std::string DPP_EXPORT utf8substr(const std::string& str, std::string::size_type start, std::string::size_type length);
+std::string DPP_EXPORT utf8substr(std::string_view str, size_t start, size_t length);
 
 /**
  * @brief Read a whole file into a std::string.

diff --git a/src/dpp/commandhandler.cpp b/src/dpp/commandhandler.cpp
@@ -168,7 +168,7 @@ bool commandhandler::string_has_prefix(std::string &str)
 {
 	for (auto& p : prefixes) {
 		size_t prefix_length = utility::utf8len(p);
-		if (utility::utf8substr(str, 0, prefix_length) == p) {
+		if (utility::utf8subview(str, 0, prefix_length) == p) {
 			str.erase(str.begin(), str.begin() + prefix_length);
 			return true;
 		}

diff --git a/src/dpp/utility.cpp b/src/dpp/utility.cpp
@@ -482,83 +482,67 @@ void exec(const std::string& cmd, std::vector<std::string> parameters, cmd_resul
 	t.detach();
 }
 
-size_t utf8len(const std::string &str)
-{
-	size_t i = 0, iBefore = 0, count = 0;
-	const char* s = str.c_str();
-	if (*s == 0) {
-		return 0;
-	}
+size_t utf8len(std::string_view str) {
+	const size_t raw_len = str.length();
+	size_t pos = 0;
+	size_t code_points = 0;
 
-	while (s[i] > 0) {
-ascii:
-		i++;
-	}
+	while (pos != raw_len) {
+		const unsigned char cur = str[pos];
 
-	count += i - iBefore;
+		size_t code_point_len = 1;
+		code_point_len += static_cast<size_t>(cur >= 0b11000000);
+		code_point_len += static_cast<size_t>(cur >= 0b11100000);
+		code_point_len += static_cast<size_t>(cur >= 0b11110000);
 
-	while (s[i]) {
-		if (s[i] > 0) {
-			iBefore = i;
-			goto ascii;
-		} else {
-			switch (0xF0 & s[i]) {
-			case 0xE0:
-				i += 3;
-				break;
-			case 0xF0:
-				i += 4;
-				break;
-			default:
-				i += 2;
-				break;
-			}
+		if (raw_len - pos < code_point_len) {
+			return 0; // invalid utf8, avoid going past the end
 		}
+		pos += code_point_len;
 
-		count++;
+		code_points += 1;
 	}
 
-	return count;
+	return code_points;
 }
 
-std::string utf8substr(const std::string& str, std::string::size_type start, std::string::size_type leng)
-{
-	if (leng == 0) {
-		return "";
-	}
-	if (start == 0 && leng >= utf8len(str)) {
-		return str;
-	}
-	std::string::size_type i, ix, q, min = std::string::npos, max = std::string::npos;
-	for (q = 0, i = 0, ix = str.length(); i < ix; i++, q++) {
-		if (q == start) {
-			min = i;
+std::string_view utf8subview(std::string_view str, size_t start, size_t length) {
+	const size_t raw_len = str.length();
+	size_t pos = 0;
+	size_t code_points = 0;
+
+	size_t subview_start = raw_len;
+	size_t subview_len = std::string_view::npos;
+
+	while (pos != raw_len) {
+		if (code_points == start) {
+			subview_start = pos;
 		}
-		if (q <= start + leng || leng == std::string::npos) {
-			max = i;
+		if (code_points == start + length) {
+			subview_len = pos - subview_start;
+			break; // no point in traversing the remainder of the string
 		}
 
-		unsigned char c = (unsigned char)str[i];
-		if (c < 0x80) {
-			i += 0;
-		} else if ((c & 0xE0) == 0xC0) {
-			i += 1;
-		} else if ((c & 0xF0) == 0xE0) {
-			i += 2;
-		} else if ((c & 0xF8) == 0xF0) {
-			i += 3;
-		} else {
-			return "";	//invalid utf8
+		const unsigned char cur = str[pos];
+
+		size_t code_point_len = 1;
+		code_point_len += static_cast<size_t>(cur >= 0b11000000);
+		code_point_len += static_cast<size_t>(cur >= 0b11100000);
+		code_point_len += static_cast<size_t>(cur >= 0b11110000);
+
+		if (raw_len - pos < code_point_len) {
+			return ""; // invalid utf8, avoid going past the end
 		}
+		pos += code_point_len;
+
+		code_points += 1;
 	}
-	if (q <= start + leng || leng == std::string::npos) {
-		max = i;
-	}
-	if (min == std::string::npos || max == std::string::npos) {
-		return "";
-	}
 
-	return str.substr(min, max);
+	return str.substr(subview_start, subview_len);
+}
+
+std::string utf8substr(std::string_view str, size_t start, size_t length) {
+	return std::string(utf8subview(str, start, length));
 }
 
 std::string read_file(const std::string& filename)