aboutsummaryrefslogtreecommitdiff
path: root/src/zencore/string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/zencore/string.cpp')
-rw-r--r--src/zencore/string.cpp210
1 files changed, 190 insertions, 20 deletions
diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp
index 0ee863b74..ed0ba6f46 100644
--- a/src/zencore/string.cpp
+++ b/src/zencore/string.cpp
@@ -4,6 +4,7 @@
#include <zencore/memoryview.h>
#include <zencore/string.h>
#include <zencore/testing.h>
+#include <zencore/testutils.h>
#include <inttypes.h>
#include <math.h>
@@ -24,6 +25,10 @@ utf16to8_impl(u16bit_iterator StartIt, u16bit_iterator EndIt, ::zen::StringBuild
// Take care of surrogate pairs first
if (utf8::internal::is_lead_surrogate(cp))
{
+ if (StartIt == EndIt)
+ {
+ break;
+ }
uint32_t trail_surrogate = utf8::internal::mask16(*StartIt++);
cp = (cp << 10) + trail_surrogate + utf8::internal::SURROGATE_OFFSET;
}
@@ -180,7 +185,21 @@ Utf8ToWide(const std::u8string_view& Str8, WideStringBuilderBase& OutString)
if (!ByteCount)
{
+#if ZEN_SIZEOF_WCHAR_T == 2
+ if (CurrentOutChar > 0xFFFF)
+ {
+ // Supplementary plane: emit a UTF-16 surrogate pair
+ uint32_t Adjusted = uint32_t(CurrentOutChar - 0x10000);
+ OutString.Append(wchar_t(0xD800 + (Adjusted >> 10)));
+ OutString.Append(wchar_t(0xDC00 + (Adjusted & 0x3FF)));
+ }
+ else
+ {
+ OutString.Append(wchar_t(CurrentOutChar));
+ }
+#else
OutString.Append(wchar_t(CurrentOutChar));
+#endif
CurrentOutChar = 0;
}
}
@@ -249,6 +268,17 @@ namespace {
/* kNicenumTime */ 1000};
} // namespace
+uint64_t
+IntPow(uint64_t Base, int Exp)
+{
+ uint64_t Result = 1;
+ for (int I = 0; I < Exp; ++I)
+ {
+ Result *= Base;
+ }
+ return Result;
+}
+
/*
* Convert a number to an appropriately human-readable output.
*/
@@ -296,7 +326,7 @@ NiceNumGeneral(uint64_t Num, std::span<char> Buffer, NicenumFormat Format)
const char* u = UnitStrings[Format][Index];
- if ((Index == 0) || ((Num % (uint64_t)powl((int)KiloUnit[Format], Index)) == 0))
+ if ((Index == 0) || ((Num % IntPow(KiloUnit[Format], Index)) == 0))
{
/*
* If this is an even multiple of the base, always display
@@ -320,7 +350,7 @@ NiceNumGeneral(uint64_t Num, std::span<char> Buffer, NicenumFormat Format)
for (int i = 2; i >= 0; i--)
{
- double Value = (double)Num / (uint64_t)powl((int)KiloUnit[Format], Index);
+ double Value = (double)Num / IntPow(KiloUnit[Format], Index);
/*
* Don't print floating point values for time. Note,
@@ -520,13 +550,38 @@ UrlDecode(std::string_view InUrl)
return std::string(Url.ToView());
}
-//////////////////////////////////////////////////////////////////////////
-//
-// Unit tests
-//
+std::string
+HideSensitiveString(std::string_view String)
+{
+ const size_t Length = String.length();
+ const size_t SourceLength = Length > 16 ? 4 : 0;
+ const size_t PadLength = Min(Length - SourceLength, 4u);
+ const bool AddEllipsis = (SourceLength + PadLength) < Length;
+ StringBuilder<16> SB;
+ if (SourceLength > 0)
+ {
+ SB << String.substr(0, SourceLength);
+ }
+ if (PadLength > 0)
+ {
+ SB << std::string(PadLength, 'X');
+ }
+ if (AddEllipsis)
+ {
+ SB << "...";
+ }
+ return SB.ToString();
+};
+
+ //////////////////////////////////////////////////////////////////////////
+ //
+ // Unit tests
+ //
#if ZEN_WITH_TESTS
+TEST_SUITE_BEGIN("core.string");
+
TEST_CASE("url")
{
using namespace std::literals;
@@ -793,11 +848,6 @@ TEST_CASE("niceNum")
}
}
-void
-string_forcelink()
-{
-}
-
TEST_CASE("StringBuilder")
{
StringBuilder<64> sb;
@@ -963,33 +1013,131 @@ TEST_CASE("ExtendableWideStringBuilder")
TEST_CASE("utf8")
{
+ using namespace utf8test;
+
SUBCASE("utf8towide")
{
- // TODO: add more extensive testing here - this covers a very small space
-
WideStringBuilder<32> wout;
Utf8ToWide(u8"abcdefghi", wout);
CHECK(StringEquals(L"abcdefghi", wout.c_str()));
wout.Reset();
+ Utf8ToWide(u8"abc\xC3\xA4\xC3\xB6\xC3\xBC", wout);
+ CHECK(StringEquals(L"abc\u00E4\u00F6\u00FC", wout.c_str()));
+
+ wout.Reset();
+ Utf8ToWide(std::string_view(kLatin), wout);
+ CHECK(StringEquals(kLatinW, wout.c_str()));
+
+ wout.Reset();
+ Utf8ToWide(std::string_view(kCyrillic), wout);
+ CHECK(StringEquals(kCyrillicW, wout.c_str()));
+
+ wout.Reset();
+ Utf8ToWide(std::string_view(kCJK), wout);
+ CHECK(StringEquals(kCJKW, wout.c_str()));
+
+ wout.Reset();
+ Utf8ToWide(std::string_view(kMixed), wout);
+ CHECK(StringEquals(kMixedW, wout.c_str()));
- Utf8ToWide(u8"abc���", wout);
- CHECK(StringEquals(L"abc���", wout.c_str()));
+ wout.Reset();
+ Utf8ToWide(std::string_view(kEmoji), wout);
+ CHECK(StringEquals(kEmojiW, wout.c_str()));
}
SUBCASE("widetoutf8")
{
- // TODO: add more extensive testing here - this covers a very small space
-
- StringBuilder<32> out;
+ StringBuilder<64> out;
WideToUtf8(L"abcdefghi", out);
CHECK(StringEquals("abcdefghi", out.c_str()));
out.Reset();
+ WideToUtf8(kLatinW, out);
+ CHECK(StringEquals(kLatin, out.c_str()));
+
+ out.Reset();
+ WideToUtf8(kCyrillicW, out);
+ CHECK(StringEquals(kCyrillic, out.c_str()));
+
+ out.Reset();
+ WideToUtf8(kCJKW, out);
+ CHECK(StringEquals(kCJK, out.c_str()));
+
+ out.Reset();
+ WideToUtf8(kMixedW, out);
+ CHECK(StringEquals(kMixed, out.c_str()));
- WideToUtf8(L"abc���", out);
- CHECK(StringEquals(u8"abc���", out.c_str()));
+ out.Reset();
+ WideToUtf8(kEmojiW, out);
+ CHECK(StringEquals(kEmoji, out.c_str()));
+ }
+
+ SUBCASE("roundtrip")
+ {
+ // UTF-8 -> Wide -> UTF-8 identity
+ const char* Utf8Strings[] = {kLatin, kCyrillic, kCJK, kMixed, kEmoji};
+ for (const char* Utf8Str : Utf8Strings)
+ {
+ ExtendableWideStringBuilder<64> Wide;
+ Utf8ToWide(std::string_view(Utf8Str), Wide);
+
+ ExtendableStringBuilder<64> Back;
+ WideToUtf8(std::wstring_view(Wide.c_str()), Back);
+ CHECK(StringEquals(Utf8Str, Back.c_str()));
+ }
+
+ // Wide -> UTF-8 -> Wide identity
+ const wchar_t* WideStrings[] = {kLatinW, kCyrillicW, kCJKW, kMixedW, kEmojiW};
+ for (const wchar_t* WideStr : WideStrings)
+ {
+ ExtendableStringBuilder<64> Utf8;
+ WideToUtf8(std::wstring_view(WideStr), Utf8);
+
+ ExtendableWideStringBuilder<64> Back;
+ Utf8ToWide(std::string_view(Utf8.c_str()), Back);
+ CHECK(StringEquals(WideStr, Back.c_str()));
+ }
+
+ // Empty string round-trip
+ {
+ ExtendableWideStringBuilder<8> Wide;
+ Utf8ToWide(std::string_view(""), Wide);
+ CHECK(Wide.Size() == 0);
+
+ ExtendableStringBuilder<8> Narrow;
+ WideToUtf8(std::wstring_view(L""), Narrow);
+ CHECK(Narrow.Size() == 0);
+ }
+ }
+
+ SUBCASE("IsValidUtf8")
+ {
+ // Valid inputs
+ CHECK(IsValidUtf8(""));
+ CHECK(IsValidUtf8("hello world"));
+ CHECK(IsValidUtf8(kLatin));
+ CHECK(IsValidUtf8(kCyrillic));
+ CHECK(IsValidUtf8(kCJK));
+ CHECK(IsValidUtf8(kMixed));
+ CHECK(IsValidUtf8(kEmoji));
+
+ // Invalid: truncated 2-byte sequence
+ CHECK(!IsValidUtf8(std::string_view("\xC3", 1)));
+
+ // Invalid: truncated 3-byte sequence
+ CHECK(!IsValidUtf8(std::string_view("\xE6\x97", 2)));
+
+ // Invalid: truncated 4-byte sequence
+ CHECK(!IsValidUtf8(std::string_view("\xF0\x9F\x93", 3)));
+
+ // Invalid: bad start byte
+ CHECK(!IsValidUtf8(std::string_view("\xFF", 1)));
+ CHECK(!IsValidUtf8(std::string_view("\xFE", 1)));
+
+ // Invalid: overlong encoding of '/' (U+002F)
+ CHECK(!IsValidUtf8(std::string_view("\xC0\xAF", 2)));
}
}
@@ -1105,6 +1253,28 @@ TEST_CASE("string")
}
}
+TEST_CASE("hidesensitivestring")
+{
+ using namespace std::literals;
+
+ CHECK_EQ(HideSensitiveString(""sv), ""sv);
+ CHECK_EQ(HideSensitiveString("A"sv), "X"sv);
+ CHECK_EQ(HideSensitiveString("ABCD"sv), "XXXX"sv);
+ CHECK_EQ(HideSensitiveString("ABCDE"sv), "XXXX..."sv);
+ CHECK_EQ(HideSensitiveString("ABCDEFGH"sv), "XXXX..."sv);
+ CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOP"sv), "XXXX..."sv);
+ CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOPQ"sv), "ABCDXXXX..."sv);
+ CHECK_EQ(HideSensitiveString("ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"sv), "ABCDXXXX..."sv);
+ CHECK_EQ(HideSensitiveString("1234567890123456789"sv), "1234XXXX..."sv);
+}
+
+TEST_SUITE_END();
+
+void
+string_forcelink()
+{
+}
+
#endif
} // namespace zen