diff options
Diffstat (limited to 'src/zencore/string.cpp')
| -rw-r--r-- | src/zencore/string.cpp | 210 |
1 files changed, 190 insertions, 20 deletions
diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp index 0ee863b74..ed0ba6f46 100644 --- a/src/zencore/string.cpp +++ b/src/zencore/string.cpp @@ -4,6 +4,7 @@ #include <zencore/memoryview.h> #include <zencore/string.h> #include <zencore/testing.h> +#include <zencore/testutils.h> #include <inttypes.h> #include <math.h> @@ -24,6 +25,10 @@ utf16to8_impl(u16bit_iterator StartIt, u16bit_iterator EndIt, ::zen::StringBuild // Take care of surrogate pairs first if (utf8::internal::is_lead_surrogate(cp)) { + if (StartIt == EndIt) + { + break; + } uint32_t trail_surrogate = utf8::internal::mask16(*StartIt++); cp = (cp << 10) + trail_surrogate + utf8::internal::SURROGATE_OFFSET; } @@ -180,7 +185,21 @@ Utf8ToWide(const std::u8string_view& Str8, WideStringBuilderBase& OutString) if (!ByteCount) { +#if ZEN_SIZEOF_WCHAR_T == 2 + if (CurrentOutChar > 0xFFFF) + { + // Supplementary plane: emit a UTF-16 surrogate pair + uint32_t Adjusted = uint32_t(CurrentOutChar - 0x10000); + OutString.Append(wchar_t(0xD800 + (Adjusted >> 10))); + OutString.Append(wchar_t(0xDC00 + (Adjusted & 0x3FF))); + } + else + { + OutString.Append(wchar_t(CurrentOutChar)); + } +#else OutString.Append(wchar_t(CurrentOutChar)); +#endif CurrentOutChar = 0; } } @@ -249,6 +268,17 @@ namespace { /* kNicenumTime */ 1000}; } // namespace +uint64_t +IntPow(uint64_t Base, int Exp) +{ + uint64_t Result = 1; + for (int I = 0; I < Exp; ++I) + { + Result *= Base; + } + return Result; +} + /* * Convert a number to an appropriately human-readable output. */ @@ -296,7 +326,7 @@ NiceNumGeneral(uint64_t Num, std::span<char> Buffer, NicenumFormat Format) const char* u = UnitStrings[Format][Index]; - if ((Index == 0) || ((Num % (uint64_t)powl((int)KiloUnit[Format], Index)) == 0)) + if ((Index == 0) || ((Num % IntPow(KiloUnit[Format], Index)) == 0)) { /* * If this is an even multiple of the base, always display @@ -320,7 +350,7 @@ NiceNumGeneral(uint64_t Num, std::span<char> Buffer, NicenumFormat Format) for (int i = 2; i >= 0; i--) { - double Value = (double)Num / (uint64_t)powl((int)KiloUnit[Format], Index); + double Value = (double)Num / IntPow(KiloUnit[Format], Index); /* * Don't print floating point values for time. Note, @@ -520,13 +550,38 @@ UrlDecode(std::string_view InUrl) return std::string(Url.ToView()); } -////////////////////////////////////////////////////////////////////////// -// -// Unit tests -// +std::string +HideSensitiveString(std::string_view String) +{ + const size_t Length = String.length(); + const size_t SourceLength = Length > 16 ? 4 : 0; + const size_t PadLength = Min(Length - SourceLength, 4u); + const bool AddEllipsis = (SourceLength + PadLength) < Length; + StringBuilder<16> SB; + if (SourceLength > 0) + { + SB << String.substr(0, SourceLength); + } + if (PadLength > 0) + { + SB << std::string(PadLength, 'X'); + } + if (AddEllipsis) + { + SB << "..."; + } + return SB.ToString(); +}; + + ////////////////////////////////////////////////////////////////////////// + // + // Unit tests + // #if ZEN_WITH_TESTS +TEST_SUITE_BEGIN("core.string"); + TEST_CASE("url") { using namespace std::literals; @@ -793,11 +848,6 @@ TEST_CASE("niceNum") } } -void -string_forcelink() -{ -} - TEST_CASE("StringBuilder") { StringBuilder<64> sb; @@ -963,33 +1013,131 @@ TEST_CASE("ExtendableWideStringBuilder") TEST_CASE("utf8") { + using namespace utf8test; + SUBCASE("utf8towide") { - // TODO: add more extensive testing here - this covers a very small space - WideStringBuilder<32> wout; Utf8ToWide(u8"abcdefghi", wout); CHECK(StringEquals(L"abcdefghi", wout.c_str())); wout.Reset(); + Utf8ToWide(u8"abc\xC3\xA4\xC3\xB6\xC3\xBC", wout); + CHECK(StringEquals(L"abc\u00E4\u00F6\u00FC", wout.c_str())); + + wout.Reset(); + Utf8ToWide(std::string_view(kLatin), wout); + CHECK(StringEquals(kLatinW, wout.c_str())); + + wout.Reset(); + Utf8ToWide(std::string_view(kCyrillic), wout); + CHECK(StringEquals(kCyrillicW, wout.c_str())); + + wout.Reset(); + Utf8ToWide(std::string_view(kCJK), wout); + CHECK(StringEquals(kCJKW, wout.c_str())); + + wout.Reset(); + Utf8ToWide(std::string_view(kMixed), wout); + CHECK(StringEquals(kMixedW, wout.c_str())); - Utf8ToWide(u8"abc���", wout); - CHECK(StringEquals(L"abc���", wout.c_str())); + wout.Reset(); + Utf8ToWide(std::string_view(kEmoji), wout); + CHECK(StringEquals(kEmojiW, wout.c_str())); } SUBCASE("widetoutf8") { - // TODO: add more extensive testing here - this covers a very small space - - StringBuilder<32> out; + StringBuilder<64> out; WideToUtf8(L"abcdefghi", out); CHECK(StringEquals("abcdefghi", out.c_str())); out.Reset(); + WideToUtf8(kLatinW, out); + CHECK(StringEquals(kLatin, out.c_str())); + + out.Reset(); + WideToUtf8(kCyrillicW, out); + CHECK(StringEquals(kCyrillic, out.c_str())); + + out.Reset(); + WideToUtf8(kCJKW, out); + CHECK(StringEquals(kCJK, out.c_str())); + + out.Reset(); + WideToUtf8(kMixedW, out); + CHECK(StringEquals(kMixed, out.c_str())); - WideToUtf8(L"abc���", out); - CHECK(StringEquals(u8"abc���", out.c_str())); + out.Reset(); + WideToUtf8(kEmojiW, out); + CHECK(StringEquals(kEmoji, out.c_str())); + } + + SUBCASE("roundtrip") + { + // UTF-8 -> Wide -> UTF-8 identity + const char* Utf8Strings[] = {kLatin, kCyrillic, kCJK, kMixed, kEmoji}; + for (const char* Utf8Str : Utf8Strings) + { + ExtendableWideStringBuilder<64> Wide; + Utf8ToWide(std::string_view(Utf8Str), Wide); + + ExtendableStringBuilder<64> Back; + WideToUtf8(std::wstring_view(Wide.c_str()), Back); + CHECK(StringEquals(Utf8Str, Back.c_str())); + } + + // Wide -> UTF-8 -> Wide identity + const wchar_t* WideStrings[] = {kLatinW, kCyrillicW, kCJKW, kMixedW, kEmojiW}; + for (const wchar_t* WideStr : WideStrings) + { + ExtendableStringBuilder<64> Utf8; + WideToUtf8(std::wstring_view(WideStr), Utf8); + + ExtendableWideStringBuilder<64> Back; + Utf8ToWide(std::string_view(Utf8.c_str()), Back); + CHECK(StringEquals(WideStr, Back.c_str())); + } + + // Empty string round-trip + { + ExtendableWideStringBuilder<8> Wide; + Utf8ToWide(std::string_view(""), Wide); + CHECK(Wide.Size() == 0); + + ExtendableStringBuilder<8> Narrow; + WideToUtf8(std::wstring_view(L""), Narrow); + CHECK(Narrow.Size() == 0); + } + } + + SUBCASE("IsValidUtf8") + { + // Valid inputs + CHECK(IsValidUtf8("")); + CHECK(IsValidUtf8("hello world")); + CHECK(IsValidUtf8(kLatin)); + CHECK(IsValidUtf8(kCyrillic)); + CHECK(IsValidUtf8(kCJK)); + CHECK(IsValidUtf8(kMixed)); + CHECK(IsValidUtf8(kEmoji)); + + // Invalid: truncated 2-byte sequence + CHECK(!IsValidUtf8(std::string_view("\xC3", 1))); + + // Invalid: truncated 3-byte sequence + CHECK(!IsValidUtf8(std::string_view("\xE6\x97", 2))); + + // Invalid: truncated 4-byte sequence + CHECK(!IsValidUtf8(std::string_view("\xF0\x9F\x93", 3))); + + // Invalid: bad start byte + CHECK(!IsValidUtf8(std::string_view("\xFF", 1))); + CHECK(!IsValidUtf8(std::string_view("\xFE", 1))); + + // Invalid: overlong encoding of '/' (U+002F) + CHECK(!IsValidUtf8(std::string_view("\xC0\xAF", 2))); } } @@ -1105,6 +1253,28 @@ TEST_CASE("string") } } +TEST_CASE("hidesensitivestring") +{ + using namespace std::literals; + + CHECK_EQ(HideSensitiveString(""sv), ""sv); + CHECK_EQ(HideSensitiveString("A"sv), "X"sv); + CHECK_EQ(HideSensitiveString("ABCD"sv), "XXXX"sv); + CHECK_EQ(HideSensitiveString("ABCDE"sv), "XXXX..."sv); + CHECK_EQ(HideSensitiveString("ABCDEFGH"sv), "XXXX..."sv); + CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOP"sv), "XXXX..."sv); + CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOPQ"sv), "ABCDXXXX..."sv); + CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"sv), "ABCDXXXX..."sv); + CHECK_EQ(HideSensitiveString("1234567890123456789"sv), "1234XXXX..."sv); +} + +TEST_SUITE_END(); + +void +string_forcelink() +{ +} + #endif } // namespace zen |