From af3870a64d0bbea727645eb04d4f4b4604ca3b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20M=C3=BCller?= Date: Tue, 15 Aug 2023 21:32:56 +0200 Subject: [PATCH] =?UTF-8?q?Add=20utility=20functions=20for=20converting=20?= =?UTF-8?q?UTF-8=20bytes=20=E2=86=94=20chars=20offsets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `str_utf8_offset_bytes_to_chars` and `str_utf8_offset_chars_to_bytes` functions to base system to convert between byte and UTF-8 character offsets in UTF-8 strings. Previously, this was separately implemented in the textrender and in the lineinput helper. These textrender functions are entirely replaced by the new functions: - `ITextRender::SelectionToUTF8OffSets` (by `str_utf8_offset_chars_to_bytes`) - `ITextRender::UTF8OffToDecodedOff` (by `str_utf8_offset_bytes_to_chars`) - `ITextRender::DecodedOffToUTF8Off` (by `str_utf8_offset_chars_to_bytes`) These lineinput helper functions are reimplemented using the new functions: - `CLineInput::OffsetFromActualToDisplay` (uses `str_utf8_offset_bytes_to_chars`) - `CLineInput::OffsetFromDisplayToActual` (uses `str_utf8_offset_chars_to_bytes`) --- src/base/system.cpp | 28 ++++++++ src/base/system.h | 26 +++++++ src/engine/client/text.cpp | 97 -------------------------- src/engine/textrender.h | 4 -- src/game/client/components/console.cpp | 9 +-- src/game/client/lineinput.cpp | 54 ++++---------- src/test/str.cpp | 54 ++++++++++++++ 7 files changed, 125 insertions(+), 147 deletions(-) diff --git a/src/base/system.cpp b/src/base/system.cpp index 034dc62c2d3..7a5ce590bd4 100644 --- a/src/base/system.cpp +++ b/src/base/system.cpp @@ -3953,6 +3953,34 @@ void str_utf8_stats(const char *str, size_t max_size, size_t max_count, size_t * } } +size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset) +{ + size_t char_offset = 0; + size_t current_offset = 0; + while(current_offset < byte_offset) + { + const size_t prev_byte_offset = current_offset; + current_offset = str_utf8_forward(str, current_offset); + if(current_offset == prev_byte_offset) + break; + char_offset++; + } + return char_offset; +} + +size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset) +{ + size_t byte_offset = 0; + for(size_t i = 0; i < char_offset; i++) + { + const size_t prev_byte_offset = byte_offset; + byte_offset = str_utf8_forward(str, byte_offset); + if(byte_offset == prev_byte_offset) + break; + } + return byte_offset; +} + unsigned str_quickhash(const char *str) { unsigned hash = 5381; diff --git a/src/base/system.h b/src/base/system.h index 59f81dba8bf..a07a85f0b92 100644 --- a/src/base/system.h +++ b/src/base/system.h @@ -2491,6 +2491,32 @@ int str_utf8_check(const char *str); */ void str_utf8_stats(const char *str, size_t max_size, size_t max_count, size_t *size, size_t *count); +/** + * Converts a byte offset of a utf8 string to the utf8 character offset. + * + * @param text Pointer to the string. + * @param byte_offset Offset in bytes. + * + * @return Offset in utf8 characters. Clamped to the maximum length of the string in utf8 characters. + * + * @remark The string is treated as a zero-terminated utf8 string. + * @remark It's the user's responsibility to make sure the bounds are aligned. + */ +size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset); + +/** + * Converts a utf8 character offset of a utf8 string to the byte offset. + * + * @param text Pointer to the string. + * @param char_offset Offset in utf8 characters. + * + * @return Offset in bytes. Clamped to the maximum length of the string in bytes. + * + * @remark The string is treated as a zero-terminated utf8 string. + * @remark It's the user's responsibility to make sure the bounds are aligned. + */ +size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset); + /* Function: str_next_token Writes the next token after str into buf, returns the rest of the string. diff --git a/src/engine/client/text.cpp b/src/engine/client/text.cpp index 9360bc761b1..cf948df4f56 100644 --- a/src/engine/client/text.cpp +++ b/src/engine/client/text.cpp @@ -2218,103 +2218,6 @@ class CTextRender : public IEngineTextRender return WidthOfText; } - bool SelectionToUTF8OffSets(const char *pText, int SelStart, int SelEnd, int &OffUTF8Start, int &OffUTF8End) const override - { - const char *pIt = pText; - - OffUTF8Start = -1; - OffUTF8End = -1; - - int CharCount = 0; - while(*pIt) - { - const char *pTmp = pIt; - int Character = str_utf8_decode(&pTmp); - if(Character == -1) - return false; - - if(CharCount == SelStart) - OffUTF8Start = (int)((std::intptr_t)(pIt - pText)); - - if(CharCount == SelEnd) - OffUTF8End = (int)((std::intptr_t)(pIt - pText)); - - pIt = pTmp; - ++CharCount; - } - - if(CharCount == SelStart) - OffUTF8Start = (int)((std::intptr_t)(pIt - pText)); - - if(CharCount == SelEnd) - OffUTF8End = (int)((std::intptr_t)(pIt - pText)); - - return OffUTF8Start != -1 && OffUTF8End != -1; - } - - bool UTF8OffToDecodedOff(const char *pText, int UTF8Off, int &DecodedOff) const override - { - const char *pIt = pText; - - DecodedOff = -1; - - int CharCount = 0; - while(*pIt) - { - if((int)(intptr_t)(pIt - pText) == UTF8Off) - { - DecodedOff = CharCount; - return true; - } - - const char *pTmp = pIt; - int Character = str_utf8_decode(&pTmp); - if(Character == -1) - return false; - - pIt = pTmp; - ++CharCount; - } - - if((int)(std::intptr_t)(pIt - pText) == UTF8Off) - { - DecodedOff = CharCount; - return true; - } - - return false; - } - - bool DecodedOffToUTF8Off(const char *pText, int DecodedOff, int &UTF8Off) const override - { - const char *pIt = pText; - - UTF8Off = -1; - - int CharCount = 0; - while(*pIt) - { - const char *pTmp = pIt; - int Character = str_utf8_decode(&pTmp); - if(Character == -1) - return false; - - if(CharCount == DecodedOff) - { - UTF8Off = (int)((std::intptr_t)(pIt - pText)); - return true; - } - - pIt = pTmp; - ++CharCount; - } - - if(CharCount == DecodedOff) - UTF8Off = (int)((std::intptr_t)(pIt - pText)); - - return UTF8Off != -1; - } - void OnPreWindowResize() override { for(auto *pTextContainer : m_vpTextContainers) diff --git a/src/engine/textrender.h b/src/engine/textrender.h index ec405eb56dc..9a96ca05642 100644 --- a/src/engine/textrender.h +++ b/src/engine/textrender.h @@ -286,10 +286,6 @@ class ITextRender : public IInterface virtual float GetGlyphOffsetX(int FontSize, char TextCharacter) const = 0; virtual int CalculateTextWidth(const char *pText, int TextLength, int FontWidth, int FontSize) const = 0; - virtual bool SelectionToUTF8OffSets(const char *pText, int SelStart, int SelEnd, int &OffUTF8Start, int &OffUTF8End) const = 0; - virtual bool UTF8OffToDecodedOff(const char *pText, int UTF8Off, int &DecodedOff) const = 0; - virtual bool DecodedOffToUTF8Off(const char *pText, int DecodedOff, int &UTF8Off) const = 0; - // old foolish interface virtual void TextColor(float r, float g, float b, float a) = 0; virtual void TextColor(ColorRGBA rgb) = 0; diff --git a/src/game/client/components/console.cpp b/src/game/client/components/console.cpp index 046a2447d6a..a26d2df102a 100644 --- a/src/game/client/components/console.cpp +++ b/src/game/client/components/console.cpp @@ -765,12 +765,9 @@ void CGameConsole::OnRender() if(m_WantsSelectionCopy) { const bool HasNewLine = !SelectionString.empty(); - int OffUTF8Start = 0; - int OffUTF8End = 0; - if(TextRender()->SelectionToUTF8OffSets(pEntry->m_aText, pConsole->m_CurSelStart, pConsole->m_CurSelEnd, OffUTF8Start, OffUTF8End)) - { - SelectionString.insert(0, (std::string(&pEntry->m_aText[OffUTF8Start], OffUTF8End - OffUTF8Start) + (HasNewLine ? "\n" : ""))); - } + const size_t OffUTF8Start = str_utf8_offset_chars_to_bytes(pEntry->m_aText, pConsole->m_CurSelStart); + const size_t OffUTF8End = str_utf8_offset_chars_to_bytes(pEntry->m_aText, pConsole->m_CurSelEnd); + SelectionString.insert(0, (std::string(&pEntry->m_aText[OffUTF8Start], OffUTF8End - OffUTF8Start) + (HasNewLine ? "\n" : ""))); } pConsole->m_HasSelection = true; } diff --git a/src/game/client/lineinput.cpp b/src/game/client/lineinput.cpp index 9003d3c3300..bb5579a53ae 100644 --- a/src/game/client/lineinput.cpp +++ b/src/game/client/lineinput.cpp @@ -170,32 +170,14 @@ size_t CLineInput::OffsetFromActualToDisplay(size_t ActualOffset) const { if(!IsHidden()) return ActualOffset; - size_t DisplayOffset = 0; - size_t CurrentOffset = 0; - while(CurrentOffset < ActualOffset) - { - const size_t PrevOffset = CurrentOffset; - CurrentOffset = str_utf8_forward(m_pStr, CurrentOffset); - if(CurrentOffset == PrevOffset) - break; - DisplayOffset++; - } - return DisplayOffset; + return str_utf8_offset_bytes_to_chars(m_pStr, ActualOffset); } size_t CLineInput::OffsetFromDisplayToActual(size_t DisplayOffset) const { if(!IsHidden()) return DisplayOffset; - size_t ActualOffset = 0; - for(size_t i = 0; i < DisplayOffset; i++) - { - const size_t PrevOffset = ActualOffset; - ActualOffset = str_utf8_forward(m_pStr, ActualOffset); - if(ActualOffset == PrevOffset) - break; - } - return ActualOffset; + return str_utf8_offset_chars_to_bytes(m_pStr, DisplayOffset); } bool CLineInput::ProcessInput(const IInput::CEvent &Event) @@ -462,11 +444,11 @@ STextBoundingBox CLineInput::Render(const CUIRect *pRect, float FontSize, int Al m_LastCompositionCursorPos = CaretOffset; const size_t DisplayCompositionEnd = DisplayCursorOffset + Input()->GetCompositionLength(); Cursor.m_CursorMode = TEXT_CURSOR_CURSOR_MODE_SET; - TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter); + Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset); Cursor.m_CalculateSelectionMode = TEXT_CURSOR_SELECTION_MODE_SET; Cursor.m_SelectionHeightFactor = 0.1f; - TextRender()->UTF8OffToDecodedOff(pDisplayStr, DisplayCursorOffset, Cursor.m_SelectionStart); - TextRender()->UTF8OffToDecodedOff(pDisplayStr, DisplayCompositionEnd, Cursor.m_SelectionEnd); + Cursor.m_SelectionStart = str_utf8_offset_bytes_to_chars(pDisplayStr, DisplayCursorOffset); + Cursor.m_SelectionEnd = str_utf8_offset_bytes_to_chars(pDisplayStr, DisplayCompositionEnd); TextRender()->TextSelectionColor(1.0f, 1.0f, 1.0f, 0.8f); TextRender()->TextEx(&Cursor, pDisplayStr); TextRender()->TextSelectionColor(TextRender()->DefaultTextSelectionColor()); @@ -476,38 +458,30 @@ STextBoundingBox CLineInput::Render(const CUIRect *pRect, float FontSize, int Al const size_t Start = OffsetFromActualToDisplay(GetSelectionStart()); const size_t End = OffsetFromActualToDisplay(GetSelectionEnd()); Cursor.m_CursorMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_CURSOR_MODE_CALCULATE : TEXT_CURSOR_CURSOR_MODE_SET; - TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter); + Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset); Cursor.m_CalculateSelectionMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_SELECTION_MODE_CALCULATE : TEXT_CURSOR_SELECTION_MODE_SET; - TextRender()->UTF8OffToDecodedOff(pDisplayStr, Start, Cursor.m_SelectionStart); - TextRender()->UTF8OffToDecodedOff(pDisplayStr, End, Cursor.m_SelectionEnd); + Cursor.m_SelectionStart = str_utf8_offset_bytes_to_chars(pDisplayStr, Start); + Cursor.m_SelectionEnd = str_utf8_offset_bytes_to_chars(pDisplayStr, End); TextRender()->TextEx(&Cursor, pDisplayStr); } else { Cursor.m_CursorMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_CURSOR_MODE_CALCULATE : TEXT_CURSOR_CURSOR_MODE_SET; - TextRender()->UTF8OffToDecodedOff(pDisplayStr, CaretOffset, Cursor.m_CursorCharacter); + Cursor.m_CursorCharacter = str_utf8_offset_bytes_to_chars(pDisplayStr, CaretOffset); Cursor.m_CalculateSelectionMode = m_MouseSelection.m_Selecting ? TEXT_CURSOR_SELECTION_MODE_CALCULATE : TEXT_CURSOR_SELECTION_MODE_NONE; TextRender()->TextEx(&Cursor, pDisplayStr); } if(Cursor.m_CursorMode == TEXT_CURSOR_CURSOR_MODE_CALCULATE) { - int NewCursorOffset; - TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_CursorCharacter, NewCursorOffset); - if(NewCursorOffset >= 0) - { - SetCursorOffset(OffsetFromDisplayToActual(NewCursorOffset)); - } + const size_t NewCursorOffset = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_CursorCharacter); + SetCursorOffset(OffsetFromDisplayToActual(NewCursorOffset)); } if(Cursor.m_CalculateSelectionMode == TEXT_CURSOR_SELECTION_MODE_CALCULATE) { - int NewSelectionStart, NewSelectionEnd; - TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_SelectionStart, NewSelectionStart); - TextRender()->DecodedOffToUTF8Off(pDisplayStr, Cursor.m_SelectionEnd, NewSelectionEnd); - if(NewSelectionStart >= 0 && NewSelectionEnd >= 0) - { - SetSelection(OffsetFromDisplayToActual(NewSelectionStart), OffsetFromDisplayToActual(NewSelectionEnd)); - } + const size_t NewSelectionStart = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_SelectionStart); + const size_t NewSelectionEnd = str_utf8_offset_chars_to_bytes(pDisplayStr, Cursor.m_SelectionEnd); + SetSelection(OffsetFromDisplayToActual(NewSelectionStart), OffsetFromDisplayToActual(NewSelectionEnd)); } m_CaretPosition = Cursor.m_CursorRenderedPosition; diff --git a/src/test/str.cpp b/src/test/str.cpp index eb35e4f7118..fcd75b705c5 100644 --- a/src/test/str.cpp +++ b/src/test/str.cpp @@ -696,6 +696,60 @@ TEST(Str, Utf8Stats) EXPECT_EQ(Count, 3); } +TEST(Str, Utf8OffsetBytesToChars) +{ + EXPECT_EQ(str_utf8_offset_bytes_to_chars("", 0), 0); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("", 100), 0); + + EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 0), 0); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 1), 1); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 2), 2); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 3), 3); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("abc", 100), 3); + + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 0), 0); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 2), 1); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 4), 2); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 6), 3); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 8), 4); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 10), 5); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 12), 6); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("любовь", 100), 6); + + EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 5), 5); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 8), 6); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 11), 7); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 14), 8); + EXPECT_EQ(str_utf8_offset_bytes_to_chars("DDNet最好了", 100), 8); +} + +TEST(Str, Utf8OffsetCharsToBytes) +{ + EXPECT_EQ(str_utf8_offset_chars_to_bytes("", 0), 0); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("", 100), 0); + + EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 0), 0); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 1), 1); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 2), 2); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 3), 3); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("abc", 100), 3); + + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 0), 0); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 1), 2); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 2), 4); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 3), 6); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 4), 8); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 5), 10); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 6), 12); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("любовь", 100), 12); + + EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 5), 5); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 6), 8); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 7), 11); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 8), 14); + EXPECT_EQ(str_utf8_offset_chars_to_bytes("DDNet最好了", 100), 14); +} + TEST(Str, Time) { char aBuf[32] = "foobar";