From 31dd0f8906aa5a27b8c453c72f6d10964a3be9eb Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 6 Apr 2022 15:46:29 +0200 Subject: structured cache with block store --- zenserver/cache/structuredcachestore.cpp | 1977 ++++++++++++++++++++++++++---- zenserver/cache/structuredcachestore.h | 93 +- 2 files changed, 1811 insertions(+), 259 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 738e4c1fd..c5ccef523 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -14,13 +14,13 @@ #include #include #include -#include -#include #include #include #include #include +#include + #if ZEN_PLATFORM_WINDOWS # include #endif @@ -30,10 +30,220 @@ ZEN_THIRD_PARTY_INCLUDES_START #include ZEN_THIRD_PARTY_INCLUDES_END +#if ZEN_WITH_TESTS +# include +# include +# include +# include +#endif + ////////////////////////////////////////////////////////////////////////// +#pragma pack(push) +#pragma pack(1) + namespace zen { +namespace { + +#pragma pack(push) +#pragma pack(1) + + struct CacheBucketIndexHeader + { + static constexpr uint32_t ExpectedMagic = 0x75696478; // 'uidx'; + static constexpr uint32_t CurrentVersion = 1; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint64_t EntryCount = 0; + uint64_t LogPosition = 0; + uint32_t PayloadAlignment = 0; + uint32_t Checksum = 0; + + static uint32_t ComputeChecksum(const CacheBucketIndexHeader& Header) + { + return XXH32(&Header.Magic, sizeof(CacheBucketIndexHeader) - sizeof(uint32_t), 0xC0C0'BABA); + } + }; + + static_assert(sizeof(CacheBucketIndexHeader) == 32); + + struct LegacyDiskLocation + { + inline LegacyDiskLocation() = default; + + inline LegacyDiskLocation(uint64_t Offset, uint64_t ValueSize, uint32_t IndexSize, uint64_t Flags) + : OffsetAndFlags(CombineOffsetAndFlags(Offset, Flags)) + , LowerSize(ValueSize & 0xFFFFffff) + , IndexDataSize(IndexSize) + { + } + + static const uint64_t kOffsetMask = 0x0000'ffFF'ffFF'ffFFull; + static const uint64_t kSizeMask = 0x00FF'0000'0000'0000ull; // Most significant bits of value size (lower 32 bits in LowerSize) + static const uint64_t kFlagsMask = 0xff00'0000'0000'0000ull; + static const uint64_t kStandaloneFile = 0x8000'0000'0000'0000ull; // Stored as a separate file + static const uint64_t kStructured = 0x4000'0000'0000'0000ull; // Serialized as compact binary + static const uint64_t kTombStone = 0x2000'0000'0000'0000ull; // Represents a deleted key/value + static const uint64_t kCompressed = 0x1000'0000'0000'0000ull; // Stored in compressed buffer format + + static uint64_t CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags) { return Offset | Flags; } + + inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; } + inline uint64_t Size() const { return LowerSize; } + inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; } + inline ZenContentType GetContentType() const + { + ZenContentType ContentType = ZenContentType::kBinary; + + if (IsFlagSet(LegacyDiskLocation::kStructured)) + { + ContentType = ZenContentType::kCbObject; + } + + if (IsFlagSet(LegacyDiskLocation::kCompressed)) + { + ContentType = ZenContentType::kCompressedBinary; + } + + return ContentType; + } + inline uint64_t Flags() const { return OffsetAndFlags & kFlagsMask; } + + private: + uint64_t OffsetAndFlags = 0; + uint32_t LowerSize = 0; + uint32_t IndexDataSize = 0; + }; + + struct LegacyDiskIndexEntry + { + IoHash Key; + LegacyDiskLocation Location; + }; + +#pragma pack(pop) + + static_assert(sizeof(LegacyDiskIndexEntry) == 36); + + const char* IndexExtension = ".uidx"; + const char* LogExtension = ".slog"; + const char* DataExtension = ".sobs"; + + std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) + { + ExtendablePathBuilder<256> Path; + + char BlockHexString[9]; + ToHexNumber(BlockIndex, BlockHexString); + + Path.Append(BlocksBasePath); + Path.AppendSeparator(); + Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); + Path.AppendSeparator(); + Path.Append(BlockHexString); + Path.Append(DataExtension); + return Path.ToPath(); + } + + std::filesystem::path GetIndexPath(const std::filesystem::path& BucketDir, const std::string& BucketName) + { + return BucketDir / (BucketName + IndexExtension); + } + + std::filesystem::path GetTempIndexPath(const std::filesystem::path& BucketDir, const std::string& BucketName) + { + return BucketDir / (BucketName + ".tmp" + IndexExtension); + } + + std::filesystem::path GetLogPath(const std::filesystem::path& BucketDir, const std::string& BucketName) + { + return BucketDir / (BucketName + LogExtension); + } + + std::filesystem::path GetLegacyLogPath(const std::filesystem::path& BucketDir) + { + return BucketDir / (std::string("zen") + LogExtension); + } + + std::filesystem::path GetLegacyDataPath(const std::filesystem::path& BucketDir) + { + return BucketDir / (std::string("zen") + DataExtension); + } + + std::vector MakeDiskIndexEntries(const std::unordered_map& MovedChunks, + const std::vector& DeletedChunks) + { + std::vector result; + result.reserve(MovedChunks.size()); + for (const auto& MovedEntry : MovedChunks) + { + result.push_back({.Key = MovedEntry.first, .Location = MovedEntry.second}); + } + for (const IoHash& ChunkHash : DeletedChunks) + { + DiskLocation Location; + Location.Flags |= DiskLocation::kTombStone; + result.push_back({.Key = ChunkHash, .Location = Location}); + } + return result; + } + + bool ValidateLegacyEntry(const LegacyDiskIndexEntry& Entry, std::string& OutReason) + { + if (Entry.Key == IoHash::Zero) + { + OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString()); + return false; + } + if (Entry.Location.Flags() & ~(LegacyDiskLocation::kStandaloneFile | LegacyDiskLocation::kStructured | + LegacyDiskLocation::kTombStone | LegacyDiskLocation::kCompressed)) + { + OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Location.Flags(), Entry.Key.ToHexString()); + return false; + } + if (!Entry.Location.IsFlagSet(LegacyDiskLocation::kTombStone)) + { + return true; + } + uint64_t Size = Entry.Location.Size(); + if (Size == 0) + { + OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString()); + return false; + } + return true; + } + + bool ValidateEntry(const DiskIndexEntry& Entry, std::string& OutReason) + { + if (Entry.Key == IoHash::Zero) + { + OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString()); + return false; + } + if (Entry.Location.GetFlags() & + ~(DiskLocation::kStandaloneFile | DiskLocation::kStructured | DiskLocation::kTombStone | DiskLocation::kCompressed)) + { + OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Location.GetFlags(), Entry.Key.ToHexString()); + return false; + } + if (Entry.Location.IsFlagSet(DiskLocation::kTombStone)) + { + return true; + } + uint64_t Size = Entry.Location.Size(); + if (Size == 0) + { + OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString()); + return false; + } + return true; + } + +} // namespace + namespace fs = std::filesystem; static CbObject @@ -60,9 +270,9 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) } ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) -: GcStorage(Gc) +: m_RootDir(RootDir) +, GcStorage(Gc) , GcContributor(Gc) -, m_RootDir(RootDir) , m_DiskLayer(RootDir) { ZEN_INFO("initializing structured cache at '{}'", RootDir); @@ -425,6 +635,8 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo { using namespace std::literals; + m_BlocksBasePath = BucketDir / "blocks"; + CreateDirectories(BucketDir); std::filesystem::path ManifestPath{BucketDir / "zen_manifest"}; @@ -470,48 +682,694 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo } void -ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool IsNew) +ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() { - m_BucketDir = BucketDir; + ZEN_INFO("write store snapshot for '{}'", m_BucketDir / m_BucketName); + uint64_t EntryCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([this, &EntryCount, &Timer] { + ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}", + m_BucketDir / m_BucketName, + EntryCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); - uint64_t MaxFileOffset = 0; - uint64_t InvalidEntryCount = 0; - m_SobsCursor = 0; - m_TotalSize = 0; + namespace fs = std::filesystem; - m_Index.clear(); + fs::path IndexPath = GetIndexPath(m_BucketDir, m_BucketName); + fs::path STmpIndexPath = GetTempIndexPath(m_BucketDir, m_BucketName); - std::filesystem::path SobsPath{BucketDir / "zen.sobs"}; - std::filesystem::path SlogPath{BucketDir / "zen.slog"}; + // Move index away, we keep it if something goes wrong + if (fs::is_regular_file(STmpIndexPath)) + { + fs::remove(STmpIndexPath); + } + if (fs::is_regular_file(IndexPath)) + { + fs::rename(IndexPath, STmpIndexPath); + } - m_SobsFile.Open(SobsPath, IsNew ? BasicFile::Mode::kTruncate : BasicFile::Mode::kWrite); - m_SlogFile.Open(SlogPath, IsNew ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite); + try + { + m_SlogFile.Flush(); + + // Write the current state of the location map to a new index state + uint64_t LogCount = 0; + std::vector Entries; - m_SlogFile.Replay( - [&](const DiskIndexEntry& Entry) { - if (Entry.Key == IoHash::Zero) + { + RwLock::SharedLockScope __(m_InsertLock); + RwLock::SharedLockScope ___(m_IndexLock); + Entries.resize(m_Index.size()); + + uint64_t EntryIndex = 0; + for (auto& Entry : m_Index) { - ++InvalidEntryCount; + DiskIndexEntry& IndexEntry = Entries[EntryIndex++]; + IndexEntry.Key = Entry.first; + IndexEntry.Location = Entry.second.Location; } - else if (Entry.Location.IsFlagSet(DiskLocation::kTombStone)) + + LogCount = m_SlogFile.GetLogCount(); + } + + BasicFile ObjectIndexFile; + ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kTruncate); + CacheBucketIndexHeader Header = {.EntryCount = Entries.size(), + .LogPosition = LogCount, + .PayloadAlignment = gsl::narrow(m_PayloadAlignment)}; + + Header.Checksum = CacheBucketIndexHeader::ComputeChecksum(Header); + + ObjectIndexFile.Write(&Header, sizeof(CacheBucketIndexHeader), 0); + ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(DiskIndexEntry), sizeof(CacheBucketIndexHeader)); + ObjectIndexFile.Flush(); + ObjectIndexFile.Close(); + EntryCount = Entries.size(); + } + catch (std::exception& Err) + { + ZEN_ERROR("snapshot FAILED, reason: '{}'", Err.what()); + + // Restore any previous snapshot + + if (fs::is_regular_file(STmpIndexPath)) + { + fs::remove(IndexPath); + fs::rename(STmpIndexPath, IndexPath); + } + } + if (fs::is_regular_file(STmpIndexPath)) + { + fs::remove(STmpIndexPath); + } +} + +uint64_t +ZenCacheDiskLayer::CacheBucket::ReadIndexFile() +{ + std::vector Entries; + std::filesystem::path IndexPath = GetIndexPath(m_BucketDir, m_BucketName); + if (std::filesystem::is_regular_file(IndexPath)) + { + Stopwatch Timer; + const auto _ = MakeGuard([this, &Entries, &Timer] { + ZEN_INFO("read store '{}' index containing #{} entries in {}", + m_BucketDir / m_BucketName, + Entries.size(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + BasicFile ObjectIndexFile; + ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead); + uint64_t Size = ObjectIndexFile.FileSize(); + if (Size >= sizeof(CacheBucketIndexHeader)) + { + uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CacheBucketIndexHeader))) / sizeof(DiskIndexEntry); + CacheBucketIndexHeader Header; + ObjectIndexFile.Read(&Header, sizeof(Header), 0); + if ((Header.Magic == CacheBucketIndexHeader::ExpectedMagic) && (Header.Version == CacheBucketIndexHeader::CurrentVersion) && + (Header.Checksum == CacheBucketIndexHeader::ComputeChecksum(Header)) && (Header.PayloadAlignment > 0) && + (Header.EntryCount <= ExpectedEntryCount)) { - m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed); + Entries.resize(Header.EntryCount); + ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(DiskIndexEntry), sizeof(CacheBucketIndexHeader)); + m_PayloadAlignment = Header.PayloadAlignment; + + std::string InvalidEntryReason; + for (const DiskIndexEntry& Entry : Entries) + { + if (!ValidateEntry(Entry, InvalidEntryReason)) + { + ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", IndexPath, InvalidEntryReason); + continue; + } + m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); + } + + return Header.LogPosition; } else { - m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); - m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed); + ZEN_WARN("skipping invalid index file '{}'", IndexPath); + } + } + } + return 0; +} + +uint64_t +ZenCacheDiskLayer::CacheBucket::ReadLog(uint64_t SkipEntryCount) +{ + std::vector Entries; + std::filesystem::path LogPath = GetLogPath(m_BucketDir, m_BucketName); + if (std::filesystem::is_regular_file(LogPath)) + { + Stopwatch Timer; + const auto _ = MakeGuard([LogPath, &Entries, &Timer] { + ZEN_INFO("read store '{}' log containing #{} entries in {}", LogPath, Entries.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + TCasLogFile CasLog; + CasLog.Open(LogPath, CasLogFile::Mode::kRead); + if (CasLog.Initialize()) + { + uint64_t EntryCount = CasLog.GetLogCount(); + if (EntryCount < SkipEntryCount) + { + ZEN_WARN("reading full log at '{}', reason: Log position from index snapshot is out of range", LogPath); + SkipEntryCount = 0; + } + uint64_t ReadCount = EntryCount - SkipEntryCount; + m_Index.reserve(ReadCount); + uint64_t InvalidEntryCount = 0; + CasLog.Replay( + [&](const DiskIndexEntry& Record) { + std::string InvalidEntryReason; + if (Record.Location.Flags & DiskLocation::kTombStone) + { + m_Index.erase(Record.Key); + return; + } + if (!ValidateEntry(Record, InvalidEntryReason)) + { + ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LogPath, InvalidEntryReason); + ++InvalidEntryCount; + return; + } + m_Index.insert_or_assign(Record.Key, IndexEntry(Record.Location, GcClock::TickCount())); + }, + SkipEntryCount); + if (InvalidEntryCount) + { + ZEN_WARN("found #{} invalid entries in '{}'", InvalidEntryCount, m_BucketDir / m_BucketName); } - MaxFileOffset = std::max(MaxFileOffset, Entry.Location.Offset() + Entry.Location.Size()); - }, - 0); + } + } + return 0; +}; + +uint64_t +ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) +{ + std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_BucketDir); + + if (!std::filesystem::is_regular_file(LegacyLogPath) || std::filesystem::file_size(LegacyLogPath) == 0) + { + return 0; + } + + ZEN_INFO("migrating store {}", m_BucketDir / m_BucketName); + + std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_BucketDir); + uint64_t MigratedChunkCount = 0; + uint32_t MigratedBlockCount = 0; + Stopwatch MigrationTimer; + uint64_t TotalSize = 0; + const auto _ = MakeGuard([this, &MigrationTimer, &MigratedChunkCount, &MigratedBlockCount, &TotalSize] { + ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})", + m_BucketDir / m_BucketName, + MigratedChunkCount, + MigratedBlockCount, + NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()), + NiceBytes(TotalSize)); + }); + + uint32_t WriteBlockIndex = 0; + while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + { + ++WriteBlockIndex; + } + + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); + if (Error) + { + ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BucketDir, Error.message()); + return 0; + } + + if (Space.Free < MaxBlockSize) + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}", + m_BucketDir / m_BucketName, + MaxBlockSize, + NiceBytes(Space.Free)); + return 0; + } + + BasicFile BlockFile; + BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); + + std::unordered_map LegacyDiskIndex; + uint64_t InvalidEntryCount = 0; + + TCasLogFile LegacyCasLog; + LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead); + { + Stopwatch Timer; + const auto __ = MakeGuard([LegacyLogPath, &LegacyDiskIndex, &Timer] { + ZEN_INFO("read store '{}' legacy log containing #{} entries in {}", + LegacyLogPath, + LegacyDiskIndex.size(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + if (LegacyCasLog.Initialize()) + { + LegacyDiskIndex.reserve(LegacyCasLog.GetLogCount()); + LegacyCasLog.Replay( + [&](const LegacyDiskIndexEntry& Record) { + if (Record.Location.IsFlagSet(LegacyDiskLocation::kTombStone)) + { + LegacyDiskIndex.erase(Record.Key); + return; + } + std::string InvalidEntryReason; + if (!ValidateLegacyEntry(Record, InvalidEntryReason)) + { + ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LegacyLogPath, InvalidEntryReason); + ++InvalidEntryCount; + return; + } + if (m_Index.contains(Record.Key)) + { + return; + } + LegacyDiskIndex[Record.Key] = Record; + }, + 0); + + std::vector BadEntries; + uint64_t BlockFileSize = BlockFile.FileSize(); + for (const auto& Entry : LegacyDiskIndex) + { + const LegacyDiskIndexEntry& Record(Entry.second); + if (Record.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) + { + continue; + } + if (Record.Location.Offset() + Record.Location.Size() <= BlockFileSize) + { + continue; + } + ZEN_WARN("skipping invalid entry in '{}', reason: location is outside of file", LegacyLogPath); + BadEntries.push_back(Entry.first); + } + for (const IoHash& BadHash : BadEntries) + { + LegacyDiskIndex.erase(BadHash); + } + InvalidEntryCount += BadEntries.size(); + } + } if (InvalidEntryCount) { - ZEN_WARN("found {} invalid entries in '{}'", InvalidEntryCount, SlogPath); + ZEN_WARN("found #{} invalid entries in '{}'", InvalidEntryCount, m_BucketDir / m_BucketName); + } + + if (LegacyDiskIndex.empty()) + { + LegacyCasLog.Close(); + BlockFile.Close(); + if (CleanSource) + { + // Older versions of ZenCacheDiskLayer expects the legacy files to exist if it can find + // a manifest and crashes on startup if they don't. + // In order to not break startup when switching back an older version, lets just reset + // the legacy data files to zero length. + + BasicFile LegacyLog; + LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate); + BasicFile LegacySobs; + LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate); + } + return 0; + } + + uint64_t BlockChunkCount = 0; + uint64_t BlockTotalSize = 0; + for (const auto& Entry : LegacyDiskIndex) + { + const LegacyDiskIndexEntry& Record(Entry.second); + if (Record.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) + { + continue; + } + BlockChunkCount++; + BlockTotalSize += Record.Location.Size(); + } + + uint64_t RequiredDiskSpace = BlockTotalSize + ((m_PayloadAlignment - 1) * BlockChunkCount); + uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, MaxBlockSize) / MaxBlockSize; + if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex) + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", + m_BucketDir / m_BucketName, + MaxRequiredBlockCount, + BlockStoreDiskLocation::MaxBlockIndex); + return 0; + } + + constexpr const uint64_t DiskReserve = 1ul << 28; + + if (CleanSource) + { + if (Space.Free < (MaxBlockSize + DiskReserve)) + { + ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", + m_BucketDir / m_BucketName, + NiceBytes(MaxBlockSize + DiskReserve), + NiceBytes(Space.Free)); + return 0; + } + } + else + { + if (Space.Free < (RequiredDiskSpace + DiskReserve)) + { + ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", + m_BucketDir / m_BucketName, + NiceBytes(RequiredDiskSpace + DiskReserve), + NiceBytes(Space.Free)); + return 0; + } + } + + std::filesystem::path LogPath = GetLogPath(m_BucketDir, m_BucketName); + CreateDirectories(LogPath.parent_path()); + TCasLogFile CasLog; + CasLog.Open(LogPath, CasLogFile::Mode::kWrite); + + if (CleanSource && (MaxRequiredBlockCount < 2)) + { + std::vector LogEntries; + LogEntries.reserve(LegacyDiskIndex.size()); + + // We can use the block as is, just move it and add the blocks to our new log + for (auto& Entry : LegacyDiskIndex) + { + const LegacyDiskIndexEntry& Record(Entry.second); + + DiskLocation NewLocation; + uint8_t Flags = 0xff & (Record.Location.Flags() >> 56); + if (Record.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) + { + NewLocation = DiskLocation(Record.Location.Size(), Flags); + } + else + { + BlockStoreLocation NewChunkLocation(WriteBlockIndex, Record.Location.Offset(), Record.Location.Size()); + NewLocation = DiskLocation(NewChunkLocation, m_PayloadAlignment, Flags); + } + LogEntries.push_back({.Key = Entry.second.Key, .Location = NewLocation}); + } + std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); + CreateDirectories(BlockPath.parent_path()); + BlockFile.Close(); + std::filesystem::rename(LegacyDataPath, BlockPath); + CasLog.Append(LogEntries); + for (const DiskIndexEntry& Entry : LogEntries) + { + m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); + } + + MigratedChunkCount += LogEntries.size(); + MigratedBlockCount++; + } + else + { + std::vector ChunkHashes; + ChunkHashes.reserve(LegacyDiskIndex.size()); + for (const auto& Entry : LegacyDiskIndex) + { + ChunkHashes.push_back(Entry.first); + } + + std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { + auto LhsKeyIt = LegacyDiskIndex.find(Lhs); + auto RhsKeyIt = LegacyDiskIndex.find(Rhs); + return LhsKeyIt->second.Location.Offset() < RhsKeyIt->second.Location.Offset(); + }); + + uint64_t BlockSize = 0; + uint64_t BlockOffset = 0; + std::vector NewLocations; + struct BlockData + { + std::vector> Chunks; + uint64_t BlockOffset; + uint64_t BlockSize; + uint32_t BlockIndex; + }; + + std::vector BlockRanges; + std::vector> Chunks; + BlockRanges.reserve(MaxRequiredBlockCount); + for (const IoHash& ChunkHash : ChunkHashes) + { + const LegacyDiskIndexEntry& LegacyEntry = LegacyDiskIndex[ChunkHash]; + const LegacyDiskLocation& LegacyChunkLocation = LegacyEntry.Location; + + if (LegacyChunkLocation.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) + { + // For standalone files we just store the chunk hash an use the size from the legacy index as is + Chunks.push_back({ChunkHash, {}}); + continue; + } + + uint64_t ChunkOffset = LegacyChunkLocation.Offset(); + uint64_t ChunkSize = LegacyChunkLocation.Size(); + uint64_t ChunkEnd = ChunkOffset + ChunkSize; + + if (BlockSize == 0) + { + BlockOffset = ChunkOffset; + } + if ((ChunkEnd - BlockOffset) > MaxBlockSize) + { + BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}; + BlockRange.Chunks.swap(Chunks); + BlockRanges.push_back(BlockRange); + + WriteBlockIndex++; + while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + { + ++WriteBlockIndex; + } + BlockOffset = ChunkOffset; + BlockSize = 0; + } + BlockSize = RoundUp(BlockSize, m_PayloadAlignment); + BlockStoreLocation ChunkLocation = {.BlockIndex = WriteBlockIndex, .Offset = ChunkOffset - BlockOffset, .Size = ChunkSize}; + Chunks.push_back({ChunkHash, ChunkLocation}); + BlockSize = ChunkEnd - BlockOffset; + } + if (BlockSize > 0) + { + BlockRanges.push_back( + {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}); + } + Stopwatch WriteBlockTimer; + + std::reverse(BlockRanges.begin(), BlockRanges.end()); + std::vector Buffer(1 << 28); + for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx) + { + const BlockData& BlockRange = BlockRanges[Idx]; + if (Idx > 0) + { + uint64_t Remaining = BlockRange.BlockOffset + BlockRange.BlockSize; + uint64_t Completed = BlockOffset + BlockSize - Remaining; + uint64_t ETA = (WriteBlockTimer.GetElapsedTimeMs() * Remaining) / Completed; + + ZEN_INFO("migrating store '{}' {}/{} blocks, remaining {} ({}) ETA: {}", + m_BucketDir / m_BucketDir, + Idx, + BlockRanges.size(), + NiceBytes(BlockRange.BlockOffset + BlockRange.BlockSize), + NiceBytes(BlockOffset + BlockSize), + NiceTimeSpanMs(ETA)); + } + + std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); + BlockStoreFile ChunkBlock(BlockPath); + ChunkBlock.Create(BlockRange.BlockSize); + uint64_t Offset = 0; + while (Offset < BlockRange.BlockSize) + { + uint64_t Size = BlockRange.BlockSize - Offset; + if (Size > Buffer.size()) + { + Size = Buffer.size(); + } + BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset); + ChunkBlock.Write(Buffer.data(), Size, Offset); + Offset += Size; + } + ChunkBlock.Truncate(Offset); + ChunkBlock.Flush(); + + std::vector LogEntries; + LogEntries.reserve(BlockRange.Chunks.size()); + for (const auto& Entry : BlockRange.Chunks) + { + const LegacyDiskIndexEntry& LegacyEntry = LegacyDiskIndex[Entry.first]; + + DiskLocation NewLocation; + uint8_t Flags = 0xff & (LegacyEntry.Location.Flags() >> 56); + if (LegacyEntry.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) + { + NewLocation = DiskLocation(LegacyEntry.Location.Size(), Flags); + } + else + { + NewLocation = DiskLocation(Entry.second, m_PayloadAlignment, Flags); + } + LogEntries.push_back({.Key = Entry.first, .Location = NewLocation}); + } + CasLog.Append(LogEntries); + for (const DiskIndexEntry& Entry : LogEntries) + { + m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); + } + MigratedChunkCount += LogEntries.size(); + MigratedBlockCount++; + + if (CleanSource) + { + std::vector LegacyLogEntries; + LegacyLogEntries.reserve(BlockRange.Chunks.size()); + for (const auto& Entry : BlockRange.Chunks) + { + LegacyLogEntries.push_back( + {.Key = Entry.first, .Location = LegacyDiskLocation(0, 0, 0, LegacyDiskLocation::kTombStone)}); + } + LegacyCasLog.Append(LegacyLogEntries); + BlockFile.SetFileSize(BlockRange.BlockOffset); + } + } + } + BlockFile.Close(); + LegacyCasLog.Close(); + CasLog.Close(); + + if (CleanSource) + { + // Older versions of ZenCacheDiskLayer expects the legacy files to exist if it can find + // a manifest and crashes on startup if they don't. + // In order to not break startup when switching back an older version, lets just reset + // the legacy data files to zero length. + + BasicFile LegacyLog; + LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate); + BasicFile LegacySobs; + LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate); } + return MigratedChunkCount; +} + +void +ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool IsNew) +{ + m_BucketDir = BucketDir; + + m_TotalSize = 0; + + m_Index.clear(); + + std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_BucketDir); + std::filesystem::path LogPath = GetLogPath(m_BucketDir, m_BucketName); + std::filesystem::path IndexPath = GetIndexPath(m_BucketDir, m_BucketName); + + if (IsNew) + { + std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_BucketDir); + fs::remove(LegacyLogPath); + fs::remove(LegacyDataPath); + fs::remove(LogPath); + fs::remove(IndexPath); + fs::remove_all(m_BlocksBasePath); + } + + uint64_t LogPosition = ReadIndexFile(); + uint64_t LogEntryCount = ReadLog(LogPosition); + uint64_t LegacyLogEntryCount = MigrateLegacyData(true); - m_SobsCursor = (MaxFileOffset + 15) & ~15; + CreateDirectories(m_BucketDir); + + m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); + + std::unordered_set KnownBlocks; + for (const auto& Entry : m_Index) + { + const DiskLocation& Location = Entry.second.Location; + m_TotalSize.fetch_add(Location.Size(), std::memory_order_release); + if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) + { + continue; + } + KnownBlocks.insert(Location.GetBlockLocation(m_PayloadAlignment).BlockIndex); + } + + if (std::filesystem::is_directory(m_BlocksBasePath)) + { + std::vector FoldersToScan; + FoldersToScan.push_back(m_BlocksBasePath); + size_t FolderOffset = 0; + while (FolderOffset < FoldersToScan.size()) + { + for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset])) + { + if (Entry.is_directory()) + { + FoldersToScan.push_back(Entry.path()); + continue; + } + if (Entry.is_regular_file()) + { + const std::filesystem::path Path = Entry.path(); + if (Path.extension() != DataExtension) + { + continue; + } + std::string FileName = Path.stem().string(); + uint32_t BlockIndex; + bool OK = ParseHexNumber(FileName, BlockIndex); + if (!OK) + { + continue; + } + if (!KnownBlocks.contains(BlockIndex)) + { + // Log removing unreferenced block + // Clear out unused blocks + ZEN_INFO("removing unused block for '{}' at '{}'", m_BucketDir / m_BucketName, Path); + std::error_code Ec; + std::filesystem::remove(Path, Ec); + if (Ec) + { + ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message()); + } + continue; + } + Ref BlockFile = new BlockStoreFile(Path); + BlockFile->Open(); + m_ChunkBlocks[BlockIndex] = BlockFile; + } + } + ++FolderOffset; + } + } + else + { + CreateDirectories(m_BlocksBasePath); + } + + if (IsNew || ((LogEntryCount + LegacyLogEntryCount) > 0)) + { + MakeIndexSnapshot(); + } + // TODO: should validate integrity of container files here } void @@ -537,7 +1395,10 @@ ZenCacheDiskLayer::CacheBucket::GetInlineCacheValue(const DiskLocation& Loc, Zen return false; } - OutValue.Value = IoBufferBuilder::MakeFromFileHandle(m_SobsFile.Handle(), Loc.Offset(), Loc.Size()); + const BlockStoreLocation& Location = Loc.GetBlockLocation(m_PayloadAlignment); + Ref ChunkBlock = m_ChunkBlocks[Location.BlockIndex]; + + OutValue.Value = ChunkBlock->GetChunk(Location.Offset, Location.Size); OutValue.Value.SetContentType(Loc.GetContentType()); return true; @@ -562,23 +1423,6 @@ ZenCacheDiskLayer::CacheBucket::GetStandaloneCacheValue(const DiskLocation& Loc, return false; } -void -ZenCacheDiskLayer::CacheBucket::DeleteStandaloneCacheValue(const DiskLocation& Loc, - const IoHash& HashKey, - const fs::path& Path, - std::error_code& Ec) -{ - ZEN_DEBUG("deleting standalone cache file '{}'", Path); - fs::remove(Path, Ec); - - if (!Ec) - { - m_SlogFile.Append(DiskIndexEntry{.Key = HashKey, .Location = {0, Loc.Size(), 0, DiskLocation::kTombStone}}); - m_Index.erase(HashKey); - m_TotalSize.fetch_sub(Loc.Size(), std::memory_order::relaxed); - } -} - bool ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutValue) { @@ -619,54 +1463,91 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& { return PutStandaloneCacheValue(HashKey, Value); } - else + + // Small object put + + uint8_t EntryFlags = 0; + + if (Value.Value.GetContentType() == ZenContentType::kCbObject) + { + EntryFlags |= DiskLocation::kStructured; + } + else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) { - // Small object put + EntryFlags |= DiskLocation::kCompressed; + } + + uint64_t ChunkSize = Value.Value.Size(); + + uint32_t WriteBlockIndex; + Ref WriteBlock; + uint64_t InsertOffset; - uint64_t EntryFlags = 0; + { + RwLock::ExclusiveLockScope _(m_InsertLock); - if (Value.Value.GetContentType() == ZenContentType::kCbObject) - { - EntryFlags |= DiskLocation::kStructured; - } - else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); + bool IsWriting = m_WriteBlock != nullptr; + if (!IsWriting || (m_CurrentInsertOffset + ChunkSize) > MaxBlockSize) { - EntryFlags |= DiskLocation::kCompressed; + if (m_WriteBlock) + { + m_WriteBlock = nullptr; + } + { + RwLock::ExclusiveLockScope __(m_IndexLock); + if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) + { + throw std::runtime_error(fmt::format("unable to allocate a new block in '{}'", m_BucketDir / m_BucketName)); + } + WriteBlockIndex += IsWriting ? 1 : 0; + while (m_ChunkBlocks.contains(WriteBlockIndex)) + { + WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; + } + std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); + m_WriteBlock = new BlockStoreFile(BlockPath); + m_ChunkBlocks[WriteBlockIndex] = m_WriteBlock; + m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); + } + m_CurrentInsertOffset = 0; + m_WriteBlock->Create(MaxBlockSize); } + InsertOffset = m_CurrentInsertOffset; + m_CurrentInsertOffset = RoundUp(InsertOffset + ChunkSize, m_PayloadAlignment); + WriteBlock = m_WriteBlock; + } - RwLock::ExclusiveLockScope _(m_IndexLock); - - DiskLocation Loc(m_SobsCursor, Value.Value.Size(), 0, EntryFlags); + DiskLocation Location({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = ChunkSize}, m_PayloadAlignment, EntryFlags); + const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; - m_SobsCursor = RoundUp(m_SobsCursor + Loc.Size(), 16); + WriteBlock->Write(Value.Value.Data(), ChunkSize, InsertOffset); + m_SlogFile.Append(DiskIndexEntry); - if (auto It = m_Index.find(HashKey); It == m_Index.end()) - { - // Previously unknown object - m_Index.insert({HashKey, {Loc, GcClock::TickCount()}}); - } - else + m_TotalSize.fetch_add(ChunkSize, std::memory_order::relaxed); + { + RwLock::ExclusiveLockScope __(m_IndexLock); + if (auto It = m_Index.find(HashKey); It != m_Index.end()) { // TODO: should check if write is idempotent and bail out if it is? // this would requiring comparing contents on disk unless we add a // content hash to the index entry IndexEntry& Entry = It.value(); - Entry.Location = Loc; + Entry.Location = Location; Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); } - - m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_SobsFile.Write(Value.Value.Data(), Loc.Size(), Loc.Offset()); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); + else + { + m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); + } } } void ZenCacheDiskLayer::CacheBucket::Drop() { + // TODO: close all open files and manage locking // TODO: add error handling - - m_SobsFile.Close(); m_SlogFile.Close(); DeleteDirectories(m_BucketDir); } @@ -674,10 +1555,20 @@ ZenCacheDiskLayer::CacheBucket::Drop() void ZenCacheDiskLayer::CacheBucket::Flush() { + { + RwLock::ExclusiveLockScope _(m_InsertLock); + if (m_CurrentInsertOffset > 0) + { + uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); + WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; + m_WriteBlock = nullptr; + m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); + m_CurrentInsertOffset = 0; + } + } RwLock::SharedLockScope _(m_IndexLock); - m_SobsFile.Flush(); - m_SlogFile.Flush(); + MakeIndexSnapshot(); SaveManifest(); } @@ -754,9 +1645,10 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) { // Log a tombstone and delete the in-memory index for the bad entry - const auto It = m_Index.find(BadKey); - const DiskLocation& Location = It->second.Location; - m_SlogFile.Append(DiskIndexEntry{.Key = BadKey, .Location = {Location.Offset(), Location.Size(), 0, DiskLocation::kTombStone}}); + const auto It = m_Index.find(BadKey); + DiskLocation Location = It->second.Location; + Location.Flags |= DiskLocation::kTombStone; + m_SlogFile.Append(DiskIndexEntry{.Key = BadKey, .Location = Location}); m_Index.erase(BadKey); } } @@ -768,8 +1660,9 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) ZEN_TRACE_CPU("Z$::DiskLayer::CacheBucket::GatherReferences"); Stopwatch Timer; - const auto Guard = MakeGuard( - [this, &Timer] { ZEN_INFO("gathered references from '{}' in {}", m_BucketDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto Guard = MakeGuard([this, &Timer] { + ZEN_INFO("gathered references from '{}' in {}", m_BucketDir / m_BucketName, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); const GcClock::TimePoint ExpireTime = GcCtx.MaxCacheDuration() == GcClock::Duration::max() ? GcClock::TimePoint::min() : GcCtx.Time() - GcCtx.MaxCacheDuration(); @@ -820,6 +1713,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) } } } + _.ReleaseNow(); ValidKeys.reserve(std::distance(ValidIt, Entries.end())); ExpiredKeys.reserve(std::distance(Entries.begin(), ValidIt)); @@ -836,202 +1730,480 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { ZEN_TRACE_CPU("Z$::DiskLayer::CacheBucket::CollectGarbage"); - Flush(); - - RwLock::ExclusiveLockScope _(m_IndexLock); + std::vector ExpiredStandaloneEntries; + + Stopwatch TotalTimer; + uint64_t WriteBlockTimeUs = 0; + uint64_t WriteBlockLongestTimeUs = 0; + uint64_t ReadBlockTimeUs = 0; + uint64_t ReadBlockLongestTimeUs = 0; + uint64_t TotalChunkCount = 0; + uint64_t DeletedSize = 0; + uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); + + uint64_t DeletedCount = 0; + uint64_t MovedCount = 0; + + const auto _ = MakeGuard([this, + &TotalTimer, + &WriteBlockTimeUs, + &WriteBlockLongestTimeUs, + &ReadBlockTimeUs, + &ReadBlockLongestTimeUs, + &TotalChunkCount, + &DeletedCount, + &MovedCount, + &DeletedSize, + &OldTotalSize] { + ZEN_INFO( + "garbage collect from '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " + "#{} " + "of #{} " + "entires ({}).", + m_BucketDir / m_BucketName, + NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), + NiceLatencyNs(WriteBlockTimeUs), + NiceLatencyNs(WriteBlockLongestTimeUs), + NiceLatencyNs(ReadBlockTimeUs), + NiceLatencyNs(ReadBlockLongestTimeUs), + NiceBytes(DeletedSize), + DeletedCount, + MovedCount, + TotalChunkCount, + NiceBytes(OldTotalSize)); + RwLock::SharedLockScope _(m_IndexLock); + SaveManifest(); + }); - const uint64_t OldCount = m_Index.size(); - const uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); + m_SlogFile.Flush(); - ZEN_INFO("collecting garbage from z$ bucket '{}'", m_BucketDir); + IndexMap Index; + size_t BlockCount; + uint64_t ExcludeBlockIndex = 0x800000000ull; - Stopwatch Timer; - const auto Guard = MakeGuard([this, &Timer, &OldCount, &OldTotalSize] { - const uint64_t NewCount = m_Index.size(); - const uint64_t NewTotalSize = m_TotalSize.load(std::memory_order::relaxed); - ZEN_INFO("garbage collect from '{}' DONE after {}, collected {} ({}) chunks of total {} ({})", - m_BucketDir, - NiceTimeSpanMs(Timer.GetElapsedTimeMs()), - OldCount - NewCount, - NiceBytes(OldTotalSize - NewTotalSize), - OldCount, - NiceBytes(OldTotalSize)); - SaveManifest(); + std::span ExpiredCacheKeys = GcCtx.ExpiredCacheKeys(m_BucketName); + std::vector DeleteCacheKeys; + DeleteCacheKeys.reserve(ExpiredCacheKeys.size()); + GcCtx.FilterCas(ExpiredCacheKeys, [&](const IoHash& ChunkHash, bool Keep) { + if (Keep) + { + return; + } + DeleteCacheKeys.push_back(ChunkHash); }); - - if (m_Index.empty()) + if (DeleteCacheKeys.empty()) { + ZEN_INFO("garbage collect SKIPPED, for '{}', no expired cache keys found", m_BucketDir / m_BucketName); return; } - - auto AddEntries = [this](std::span Keys, std::vector& OutEntries) { - for (const IoHash& Key : Keys) + { + RwLock::SharedLockScope __(m_InsertLock); + RwLock::SharedLockScope ___(m_IndexLock); { - if (auto It = m_Index.find(Key); It != m_Index.end()) + Stopwatch Timer; + const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + if (m_Index.empty()) + { + ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_BucketDir / m_BucketName); + return; + } + if (m_WriteBlock) { - OutEntries.push_back(*It); + ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); } + __.ReleaseNow(); } - }; - - std::vector ValidEntries; - std::vector ExpiredEntries; + SaveManifest(); + Index = m_Index; + BlockCount = m_ChunkBlocks.size(); - AddEntries(GcCtx.ValidCacheKeys(m_BucketName), ValidEntries); - AddEntries(GcCtx.ExpiredCacheKeys(m_BucketName), ExpiredEntries); + for (const IoHash& Key : DeleteCacheKeys) + { + if (auto It = Index.find(Key); It != Index.end()) + { + DiskIndexEntry Entry = {.Key = It->first, .Location = It->second.Location}; + if (Entry.Location.Flags & DiskLocation::kStandaloneFile) + { + Entry.Location.Flags |= DiskLocation::kTombStone; + ExpiredStandaloneEntries.push_back(Entry); + } + } + } + if (GcCtx.IsDeletionMode()) + { + for (const auto& Entry : ExpiredStandaloneEntries) + { + m_Index.erase(Entry.Key); + } + m_SlogFile.Append(ExpiredStandaloneEntries); + } + } - // Remove all standalone file(s) - // NOTE: This can probably be made asynchronously + if (GcCtx.IsDeletionMode()) { std::error_code Ec; ExtendablePathBuilder<256> Path; - for (const auto& Entry : ExpiredEntries) + for (const auto& Entry : ExpiredStandaloneEntries) { - const IoHash& Key = Entry.first; - const DiskLocation& Loc = Entry.second.Location; + const IoHash& Key = Entry.Key; + const DiskLocation& Loc = Entry.Location; - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) - { - Path.Reset(); - BuildPath(Path, Key); + Path.Reset(); + BuildPath(Path, Key); - // NOTE: this will update index and log file - DeleteStandaloneCacheValue(Loc, Key, Path.c_str(), Ec); + { + RwLock::SharedLockScope __(m_IndexLock); + if (m_Index.contains(Key)) + { + // Someone added it back, let the file on disk be + ZEN_DEBUG("skipping z$ delete standalone of file '{}' FAILED, it has been added back", Path.ToUtf8()); + continue; + } + ZEN_DEBUG("deleting standalone cache file '{}'", Path.ToUtf8()); + fs::remove(Path.c_str(), Ec); + } - if (Ec) + if (Ec) + { + ZEN_WARN("delete expired z$ standalone file '{}' FAILED, reason: '{}'", Path.ToUtf8(), Ec.message()); + Ec.clear(); + DiskLocation RestoreLocation = Loc; + RestoreLocation.Flags &= ~DiskLocation::kTombStone; + + RwLock::ExclusiveLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + if (m_Index.contains(Key)) { - ZEN_WARN("delete expired z$ standalone file '{}' FAILED, reason '{}'", Path.ToUtf8(), Ec.message()); - Ec.clear(); + continue; } + m_SlogFile.Append(DiskIndexEntry{.Key = Key, .Location = RestoreLocation}); + m_Index.insert({Key, {Loc, GcClock::TickCount()}}); + m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed); + continue; } + m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed); + DeletedSize += Entry.Location.Size(); + DeletedCount++; } } - if (GcCtx.CollectSmallObjects() && !ExpiredEntries.empty()) + TotalChunkCount = Index.size(); + + std::vector TotalChunkHashes; + TotalChunkHashes.reserve(TotalChunkCount); + for (const auto& Entry : Index) { - // Naive GC implementation of small objects. Needs enough free - // disk space to store intermediate sob container along side the - // old container + const DiskLocation& Location = Entry.second.Location; - const auto ResetSobStorage = [this, &ValidEntries]() { - m_SobsFile.Close(); - m_SlogFile.Close(); + if (Location.Flags & DiskLocation::kStandaloneFile) + { + continue; + } + TotalChunkHashes.push_back(Entry.first); + } - const bool IsNew = true; - m_SobsFile.Open(m_BucketDir / "zen.sobs", IsNew ? BasicFile::Mode::kTruncate : BasicFile::Mode::kWrite); - m_SlogFile.Open(m_BucketDir / "zen.slog", IsNew ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite); + if (TotalChunkHashes.empty()) + { + return; + } + std::unordered_map BlockIndexToChunkMapIndex; + std::vector> KeepChunks; + std::vector> DeleteChunks; - m_SobsCursor = 0; - m_TotalSize = 0; - m_Index.clear(); + BlockIndexToChunkMapIndex.reserve(BlockCount); + KeepChunks.reserve(BlockCount); + DeleteChunks.reserve(BlockCount); + size_t GuesstimateCountPerBlock = TotalChunkHashes.size() / BlockCount / 2; - for (const auto& Entry : ValidEntries) - { - const IoHash& Key = Entry.first; - const DiskLocation& Loc = Entry.second.Location; + uint64_t DeleteCount = 0; - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) - { - m_SlogFile.Append({.Key = Key, .Location = Loc}); - m_Index.insert({Key, {Loc, GcClock::TickCount()}}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); - } - } - }; + uint64_t NewTotalSize = 0; + + std::unordered_set Expired; + Expired.insert(DeleteCacheKeys.begin(), DeleteCacheKeys.end()); + + GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { + auto KeyIt = Index.find(ChunkHash); + const DiskLocation& Location = KeyIt->second.Location; + BlockStoreLocation BlockLocation = Location.GetBlockLocation(m_PayloadAlignment); + + uint32_t BlockIndex = BlockLocation.BlockIndex; - uint64_t NewContainerSize{}; - for (const auto& Entry : ValidEntries) + if (static_cast(BlockIndex) == ExcludeBlockIndex) { - const DiskLocation& Loc = Entry.second.Location; + return; + } - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile) == false) - { - NewContainerSize += (Loc.Size() + sizeof(DiskLocation)); - } + auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(BlockIndex); + size_t ChunkMapIndex = 0; + if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) + { + ChunkMapIndex = KeepChunks.size(); + BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex; + KeepChunks.resize(ChunkMapIndex + 1); + KeepChunks.back().reserve(GuesstimateCountPerBlock); + DeleteChunks.resize(ChunkMapIndex + 1); + DeleteChunks.back().reserve(GuesstimateCountPerBlock); + } + else + { + ChunkMapIndex = BlockIndexPtr->second; + } + if (Keep) + { + std::vector& ChunkMap = KeepChunks[ChunkMapIndex]; + ChunkMap.push_back(ChunkHash); + NewTotalSize += BlockLocation.Size; + } + else + { + std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; + ChunkMap.push_back(ChunkHash); + DeleteCount++; } + }); - if (NewContainerSize == 0) + std::unordered_set BlocksToReWrite; + BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); + for (const auto& Entry : BlockIndexToChunkMapIndex) + { + uint32_t BlockIndex = Entry.first; + size_t ChunkMapIndex = Entry.second; + const std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; + if (ChunkMap.empty()) { - ResetSobStorage(); - return; + continue; } + BlocksToReWrite.insert(BlockIndex); + } - const uint64_t DiskSpaceMargin = (256 << 10); + const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); + if (!PerformDelete) + { + uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed); + ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", + m_BucketDir / m_BucketName, + DeleteCount, + NiceBytes(TotalSize - NewTotalSize), + TotalChunkCount, + NiceBytes(TotalSize)); + return; + } - std::error_code Ec; - DiskSpace Space = DiskSpaceInfo(m_BucketDir, Ec); - if (Ec || Space.Free < NewContainerSize + DiskSpaceMargin) + auto AddToDeleted = [this, &Index, &DeletedCount, &DeletedSize](const std::vector& DeletedEntries) { + for (const IoHash& ChunkHash : DeletedEntries) { - ZEN_WARN("garbage collect z$ bucket '{}' FAILED, not enough disk space {}/{} (required/free)", - m_BucketDir, - NiceBytes(NewContainerSize), - NiceBytes(Space.Free)); - return; + const DiskLocation& Location = Index[ChunkHash].Location; + ZEN_ASSERT(!Location.IsFlagSet(DiskLocation::kStandaloneFile)); + DeletedSize += Index[ChunkHash].Location.GetBlockLocation(m_PayloadAlignment).Size; } + DeletedCount += DeletedEntries.size(); + }; - std::filesystem::path TmpSobsPath{m_BucketDir / "zen.sobs.tmp"}; - std::filesystem::path TmpSlogPath{m_BucketDir / "zen.slog.tmp"}; + // Move all chunks in blocks that have chunks removed to new blocks - // Copy non expired sob(s) to temporary sob container + Ref NewBlockFile; + uint64_t WriteOffset = 0; + uint32_t NewBlockIndex = 0; + auto UpdateLocations = [this](const std::span& Entries) { + for (const DiskIndexEntry& Entry : Entries) { - BasicFile TmpSobs; - TCasLogFile TmpLog; - uint64_t TmpCursor{}; - std::vector Chunk; + if (Entry.Location.IsFlagSet(DiskLocation::kTombStone)) + { + auto KeyIt = m_Index.find(Entry.Key); + uint64_t ChunkSize = KeyIt->second.Location.GetBlockLocation(m_PayloadAlignment).Size; + m_TotalSize.fetch_sub(ChunkSize); + m_Index.erase(KeyIt); + continue; + } + m_Index[Entry.Key].Location = Entry.Location; + } + }; - TmpSobs.Open(TmpSobsPath, BasicFile::Mode::kTruncate); - TmpLog.Open(TmpSlogPath, CasLogFile::Mode::kTruncate); + std::unordered_map MovedBlockChunks; + for (uint32_t BlockIndex : BlocksToReWrite) + { + const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; + + Ref OldBlockFile; + { + RwLock::SharedLockScope _i(m_IndexLock); + OldBlockFile = m_ChunkBlocks[BlockIndex]; + } - for (const auto& Entry : ValidEntries) + const std::vector& KeepMap = KeepChunks[ChunkMapIndex]; + if (KeepMap.empty()) + { + const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + std::vector LogEntries = MakeDiskIndexEntries({}, DeleteMap); + m_SlogFile.Append(LogEntries); + m_SlogFile.Flush(); + { + RwLock::ExclusiveLockScope _i(m_IndexLock); + Stopwatch Timer; + const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + UpdateLocations(LogEntries); + m_ChunkBlocks[BlockIndex] = nullptr; + } + AddToDeleted(DeleteMap); + ZEN_DEBUG("marking cas store file for delete '{}', block #{}, '{}'", + m_BucketDir / m_BucketName, + BlockIndex, + OldBlockFile->GetPath()); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) { - const IoHash& Key = Entry.first; - const DiskLocation& Loc = Entry.second.Location; + ZEN_WARN("Failed to flag file '{}' for deletion, reason: '{}'", OldBlockFile->GetPath(), Ec.message()); + } + continue; + } + + std::vector Chunk; + for (const IoHash& ChunkHash : KeepMap) + { + auto KeyIt = Index.find(ChunkHash); + const BlockStoreLocation ChunkLocation = KeyIt->second.Location.GetBlockLocation(m_PayloadAlignment); + Chunk.resize(ChunkLocation.Size); + OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); - DiskLocation NewLoc; + if (!NewBlockFile || (WriteOffset + Chunk.size() > MaxBlockSize)) + { + uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order::memory_order_relaxed); + std::vector LogEntries = MakeDiskIndexEntries(MovedBlockChunks, {}); + m_SlogFile.Append(LogEntries); + m_SlogFile.Flush(); - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + if (NewBlockFile) { - NewLoc = DiskLocation(0, Loc.Size(), 0, Loc.GetFlags()); + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); } - else { - Chunk.resize(Loc.Size()); - m_SobsFile.Read(Chunk.data(), Chunk.size(), Loc.Offset()); - - NewLoc = DiskLocation(TmpCursor, Chunk.size(), 0, Loc.GetFlags()); - TmpSobs.Write(Chunk.data(), Chunk.size(), TmpCursor); - TmpCursor = RoundUp(TmpCursor + Chunk.size(), 16); + RwLock::ExclusiveLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + UpdateLocations(LogEntries); + if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) + { + ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", + m_BucketDir / m_BucketName, + static_cast(std::numeric_limits::max()) + 1); + return; + } + while (m_ChunkBlocks.contains(NextBlockIndex)) + { + NextBlockIndex = (NextBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; + } + std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); + NewBlockFile = new BlockStoreFile(NewBlockPath); + m_ChunkBlocks[NextBlockIndex] = NewBlockFile; } - TmpLog.Append(DiskIndexEntry{.Key = Key, .Location = NewLoc}); + MovedCount += MovedBlockChunks.size(); + MovedBlockChunks.clear(); + + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); + if (Error) + { + ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BucketDir, Error.message()); + return; + } + if (Space.Free < MaxBlockSize) + { + uint64_t ReclaimedSpace = GcCtx.ClaimGCReserve(); + if (Space.Free + ReclaimedSpace < MaxBlockSize) + { + ZEN_WARN("garbage collect from '{}' FAILED, required disk space {}, free {}", + m_BucketDir / m_BucketName, + MaxBlockSize, + NiceBytes(Space.Free + ReclaimedSpace)); + RwLock::ExclusiveLockScope _l(m_IndexLock); + Stopwatch Timer; + const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + m_ChunkBlocks.erase(NextBlockIndex); + return; + } + + ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", + m_BucketDir / m_BucketName, + ReclaimedSpace, + NiceBytes(Space.Free + ReclaimedSpace)); + } + NewBlockFile->Create(MaxBlockSize); + NewBlockIndex = NextBlockIndex; + WriteOffset = 0; } - } - // Swap state - try + NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); + MovedBlockChunks.emplace(ChunkHash, + DiskLocation({.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}, + m_PayloadAlignment, + KeyIt->second.Location.Flags)); + WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment); + } + Chunk.clear(); + if (NewBlockFile) { - fs::path SobsPath{m_BucketDir / "zen.sobs"}; - fs::path SlogPath{m_BucketDir / "zen.slog"}; - - m_SobsFile.Close(); - m_SlogFile.Close(); - - fs::remove(SobsPath); - fs::remove(SlogPath); - - fs::rename(TmpSobsPath, SobsPath); - fs::rename(TmpSlogPath, SlogPath); + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); + NewBlockFile = {}; + } - const bool IsNew = false; - OpenLog(m_BucketDir, IsNew); + const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + std::vector LogEntries = MakeDiskIndexEntries(MovedBlockChunks, DeleteMap); + m_SlogFile.Append(LogEntries); + m_SlogFile.Flush(); + { + RwLock::ExclusiveLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + UpdateLocations(LogEntries); + m_ChunkBlocks[BlockIndex] = nullptr; } - catch (std::exception& Err) + MovedCount += MovedBlockChunks.size(); + AddToDeleted(DeleteMap); + MovedBlockChunks.clear(); + + ZEN_DEBUG("marking cas store file for delete '{}', block #{}, '{}'", + m_BucketDir / m_BucketName, + BlockIndex, + OldBlockFile->GetPath()); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) { - ZEN_ERROR("garbage collection FAILED, reason '{}'", Err.what()); - ResetSobStorage(); + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); } + OldBlockFile = nullptr; } } @@ -1144,16 +2316,20 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c // Update index - uint64_t EntryFlags = DiskLocation::kStandaloneFile; + uint8_t EntryFlags = DiskLocation::kStandaloneFile; if (Value.Value.GetContentType() == ZenContentType::kCbObject) { EntryFlags |= DiskLocation::kStructured; } + else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + { + EntryFlags |= DiskLocation::kCompressed; + } RwLock::ExclusiveLockScope _(m_IndexLock); - DiskLocation Loc(/* Offset */ 0, Value.Value.Size(), 0, EntryFlags); + DiskLocation Loc(Value.Value.Size(), EntryFlags); IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); if (auto It = m_Index.find(HashKey); It == m_Index.end()) @@ -1255,10 +2431,10 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z auto It = m_Buckets.try_emplace(BucketName, BucketName); Bucket = &It.first->second; - std::filesystem::path bucketPath = m_RootDir; - bucketPath /= BucketName; + std::filesystem::path BucketPath = m_RootDir; + BucketPath /= BucketName; - Bucket->OpenOrCreate(bucketPath); + Bucket->OpenOrCreate(BucketPath); } } @@ -1363,11 +2539,11 @@ void ZenCacheDiskLayer::Flush() { std::vector Buckets; - Buckets.reserve(m_Buckets.size()); { RwLock::SharedLockScope _(m_Lock); + Buckets.reserve(m_Buckets.size()); for (auto& Kv : m_Buckets) { Buckets.push_back(&Kv.second); @@ -1419,6 +2595,9 @@ ZenCacheDiskLayer::TotalSize() const ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS +} + +namespace zen { using namespace std::literals; @@ -1427,10 +2606,18 @@ namespace testutils { IoBuffer CreateBinaryCacheValue(uint64_t Size) { - std::vector Data(size_t(Size / sizeof(uint32_t))); - std::generate(Data.begin(), Data.end(), [Idx = 0]() mutable { return Idx++; }); + static std::random_device rd; + static std::mt19937 g(rd()); - IoBuffer Buf(IoBuffer::Clone, Data.data(), Data.size() * sizeof(uint32_t)); + std::vector Values; + Values.resize(Size); + for (size_t Idx = 0; Idx < Size; ++Idx) + { + Values[Idx] = static_cast(Idx); + } + std::shuffle(Values.begin(), Values.end(), g); + + IoBuffer Buf(IoBuffer::Clone, Values.data(), Values.size()); Buf.SetContentType(ZenContentType::kBinary); return Buf; }; @@ -1737,6 +2924,7 @@ TEST_CASE("z$.gc") GcCtx.MaxCacheDuration(std::chrono::minutes(2)); GcCtx.CollectSmallObjects(true); + Zcs.Flush(); Gc.CollectGarbage(GcCtx); for (const auto& Key : Keys) @@ -1751,6 +2939,357 @@ TEST_CASE("z$.gc") } } +TEST_CASE("z$.legacyconversion") +{ + ScopedTemporaryDirectory TempDir; + + uint64_t ChunkSizes[] = {2041, + 1123, + 1223, + 1239, + 341, + 1412, + 912, + 774, + 341, + 431, + 554, + 1098, + 2048, + 339 + 64 * 1024, + 561 + 64 * 1024, + 16 + 64 * 1024, + 16 + 64 * 1024, + 2048, + 2048}; + size_t ChunkCount = sizeof(ChunkSizes) / sizeof(uint64_t); + size_t SingleBlockSize = 0; + std::vector Chunks; + Chunks.reserve(ChunkCount); + for (uint64_t Size : ChunkSizes) + { + Chunks.push_back(testutils::CreateBinaryCacheValue(Size)); + SingleBlockSize += Size; + } + + std::vector ChunkHashes; + ChunkHashes.reserve(ChunkCount); + for (const IoBuffer& Chunk : Chunks) + { + ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); + } + + CreateDirectories(TempDir.Path()); + + const std::string Bucket = "rightintwo"; + { + CasGc Gc; + ZenCacheStore Zcs(Gc, TempDir.Path()); + const GcClock::TimePoint CurrentTime = GcClock::Now(); + + for (size_t i = 0; i < ChunkCount; i++) + { + Zcs.Put(Bucket, ChunkHashes[i], {.Value = Chunks[i]}); + } + + std::vector KeepChunks; + for (size_t i = 0; i < ChunkCount; i += 2) + { + KeepChunks.push_back(ChunkHashes[i]); + } + GcContext GcCtx(CurrentTime + std::chrono::hours(2)); + GcCtx.MaxCacheDuration(std::chrono::minutes(2)); + GcCtx.CollectSmallObjects(true); + GcCtx.ContributeCas(KeepChunks); + Zcs.Flush(); + Gc.CollectGarbage(GcCtx); + } + std::filesystem::path BucketDir = TempDir.Path() / Bucket; + std::filesystem::path BlocksBaseDir = BucketDir / "blocks"; + + std::filesystem::path CasPath = GetBlockPath(BlocksBaseDir, 1); + std::filesystem::path LegacyDataPath = GetLegacyDataPath(BucketDir); + std::filesystem::remove(LegacyDataPath); + std::filesystem::rename(CasPath, LegacyDataPath); + + std::vector LogEntries; + std::filesystem::path IndexPath = GetIndexPath(BucketDir, Bucket); + if (std::filesystem::is_regular_file(IndexPath)) + { + BasicFile ObjectIndexFile; + ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead); + uint64_t Size = ObjectIndexFile.FileSize(); + if (Size >= sizeof(CacheBucketIndexHeader)) + { + uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CacheBucketIndexHeader))) / sizeof(DiskIndexEntry); + CacheBucketIndexHeader Header; + ObjectIndexFile.Read(&Header, sizeof(Header), 0); + if (Header.Magic == CacheBucketIndexHeader::ExpectedMagic && Header.Version == CacheBucketIndexHeader::CurrentVersion && + Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount) + { + LogEntries.resize(Header.EntryCount); + ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(DiskIndexEntry), sizeof(CacheBucketIndexHeader)); + } + } + ObjectIndexFile.Close(); + std::filesystem::remove(IndexPath); + } + + std::filesystem::path LogPath = GetLogPath(BucketDir, Bucket); + { + TCasLogFile CasLog; + CasLog.Open(LogPath, CasLogFile::Mode::kRead); + LogEntries.reserve(CasLog.GetLogCount()); + CasLog.Replay([&](const DiskIndexEntry& Record) { LogEntries.push_back(Record); }, 0); + } + TCasLogFile LegacyLog; + std::filesystem::path LegacylogPath = GetLegacyLogPath(BucketDir); + LegacyLog.Open(LegacylogPath, CasLogFile::Mode::kTruncate); + + for (const DiskIndexEntry& Entry : LogEntries) + { + uint64_t Size; + uint64_t Offset; + if (Entry.Location.IsFlagSet(DiskLocation::kStandaloneFile)) + { + Size = Entry.Location.Location.StandaloneSize; + Offset = 0; + } + else + { + BlockStoreLocation Location = Entry.Location.GetBlockLocation(16); + Size = Location.Size; + Offset = Location.Offset; + } + LegacyDiskLocation LegacyLocation(Offset, Size, 0, static_cast(Entry.Location.Flags) << 56); + LegacyDiskIndexEntry LegacyEntry = {.Key = Entry.Key, .Location = LegacyLocation}; + LegacyLog.Append(LegacyEntry); + } + LegacyLog.Close(); + + std::filesystem::remove_all(BlocksBaseDir); + std::filesystem::remove(LogPath); + std::filesystem::remove(IndexPath); + + { + CasGc Gc; + ZenCacheStore Zcs(Gc, TempDir.Path()); + + for (size_t i = 0; i < ChunkCount; i += 2) + { + ZenCacheValue Value; + CHECK(Zcs.Get(Bucket, ChunkHashes[i], Value)); + CHECK(ChunkHashes[i] == IoHash::HashBuffer(Value.Value)); + CHECK(!Zcs.Get(Bucket, ChunkHashes[i + 1], Value)); + } + } +} + +# if 0 +TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) +{ + // for (uint32_t i = 0; i < 100; ++i) + { + ScopedTemporaryDirectory TempDir; + + CasStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path(); + + CreateDirectories(CasConfig.RootDirectory); + + const uint64_t kChunkSize = 1048; + const int32_t kChunkCount = 8192; + + std::vector ChunkHashes; + ChunkHashes.reserve(kChunkCount); + std::vector Chunks; + Chunks.reserve(kChunkCount); + + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + { + IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + ChunkHashes.emplace_back(Hash); + Chunks.emplace_back(Chunk); + } + + WorkerThreadPool ThreadPool(4); + CasGc Gc; + CasContainerStrategy Cas(CasConfig, Gc); + Cas.Initialize("test", 32768, 16, true); + { + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + { + const IoBuffer& Chunk = Chunks[Idx]; + const IoHash& Hash = ChunkHashes[Idx]; + ThreadPool.ScheduleWork([&Cas, Chunk, Hash]() { + CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash); + ZEN_ASSERT(InsertResult.New); + }); + } + while (ThreadPool.PendingWork() > 0) + { + Sleep(1); + } + } + + const uint64_t TotalSize = Cas.StorageSize().DiskSize; + CHECK_EQ(kChunkSize * kChunkCount, TotalSize); + + { + std::vector OldChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + { + ThreadPool.ScheduleWork([&Cas, &OldChunkHashes, Idx]() { + IoHash ChunkHash = OldChunkHashes[Idx]; + IoBuffer Chunk = Cas.FindChunk(ChunkHash); + IoHash Hash = IoHash::HashBuffer(Chunk); + CHECK(ChunkHash == Hash); + }); + } + while (ThreadPool.PendingWork() > 0) + { + Sleep(1); + } + } + + std::unordered_set GcChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); + { + std::vector OldChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); + std::vector NewChunkHashes; + NewChunkHashes.reserve(kChunkCount); + std::vector NewChunks; + NewChunks.reserve(kChunkCount); + + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + { + IoBuffer Chunk = CreateChunk(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + NewChunkHashes.emplace_back(Hash); + NewChunks.emplace_back(Chunk); + } + + RwLock ChunkHashesLock; + std::atomic_uint32_t AddedChunkCount; + + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + { + const IoBuffer& Chunk = NewChunks[Idx]; + const IoHash& Hash = NewChunkHashes[Idx]; + ThreadPool.ScheduleWork([&Cas, Chunk, Hash, &AddedChunkCount]() { + CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash); + ZEN_ASSERT(InsertResult.New); + AddedChunkCount.fetch_add(1); + }); + ThreadPool.ScheduleWork([&Cas, &ChunkHashesLock, &OldChunkHashes, Idx]() { + IoHash ChunkHash = OldChunkHashes[Idx]; + IoBuffer Chunk = Cas.FindChunk(OldChunkHashes[Idx]); + if (Chunk) + { + CHECK(ChunkHash == IoHash::HashBuffer(Chunk)); + } + }); + } + + while (AddedChunkCount.load() < kChunkCount) + { + std::vector AddedHashes; + { + RwLock::ExclusiveLockScope _(ChunkHashesLock); + AddedHashes.swap(NewChunkHashes); + } + // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope + for (const IoHash& ChunkHash : AddedHashes) + { + if (Cas.HaveChunk(ChunkHash)) + { + GcChunkHashes.emplace(ChunkHash); + } + } + std::vector KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); + size_t C = 0; + while (C < KeepHashes.size()) + { + if (C % 155 == 0) + { + if (C < KeepHashes.size() - 1) + { + KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); + } + if (C + 3 < KeepHashes.size() - 1) + { + KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); + } + } + C++; + } + + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + GcCtx.ContributeCas(KeepHashes); + Cas.CollectGarbage(GcCtx); + CasChunkSet& Deleted = GcCtx.DeletedCas(); + Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + } + + while (ThreadPool.PendingWork() > 0) + { + Sleep(1); + } + + { + std::vector AddedHashes; + { + RwLock::ExclusiveLockScope _(ChunkHashesLock); + AddedHashes.swap(NewChunkHashes); + } + // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope + for (const IoHash& ChunkHash : AddedHashes) + { + if (Cas.HaveChunk(ChunkHash)) + { + GcChunkHashes.emplace(ChunkHash); + } + } + std::vector KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); + size_t C = 0; + while (C < KeepHashes.size()) + { + if (C % 77 == 0 && C < KeepHashes.size() - 1) + { + KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); + } + C++; + } + + GcContext GcCtx; + GcCtx.CollectSmallObjects(true); + GcCtx.ContributeCas(KeepHashes); + Cas.CollectGarbage(GcCtx); + CasChunkSet& Deleted = GcCtx.DeletedCas(); + Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); + } + } + { + for (const IoHash& ChunkHash : GcChunkHashes) + { + ThreadPool.ScheduleWork([&Cas, ChunkHash]() { + CHECK(Cas.HaveChunk(ChunkHash)); + CHECK(ChunkHash == IoHash::HashBuffer(Cas.FindChunk(ChunkHash))); + }); + } + while (ThreadPool.PendingWork() > 0) + { + Sleep(1); + } + } + } +} +# endif + #endif void diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index f39d01747..91ac00f35 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -76,37 +77,32 @@ struct DiskLocation { inline DiskLocation() = default; - inline DiskLocation(uint64_t Offset, uint64_t ValueSize, uint32_t IndexSize, uint64_t Flags) - : OffsetAndFlags(CombineOffsetAndFlags(Offset, Flags)) - , LowerSize(ValueSize & 0xFFFFffff) - , IndexDataSize(IndexSize) + inline DiskLocation(uint64_t ValueSize, uint8_t Flags) : Flags(Flags | kStandaloneFile) { Location.StandaloneSize = ValueSize; } + + inline DiskLocation(const BlockStoreLocation& Location, uint64_t PayloadAlignment, uint8_t Flags) : Flags(Flags & ~kStandaloneFile) { + this->Location.BlockLocation = BlockStoreDiskLocation(Location, PayloadAlignment); } - static const uint64_t kOffsetMask = 0x0000'ffFF'ffFF'ffFFull; - static const uint64_t kSizeMask = 0x00FF'0000'0000'0000ull; // Most significant bits of value size (lower 32 bits in LowerSize) - static const uint64_t kFlagsMask = 0xff00'0000'0000'0000ull; - static const uint64_t kStandaloneFile = 0x8000'0000'0000'0000ull; // Stored as a separate file - static const uint64_t kStructured = 0x4000'0000'0000'0000ull; // Serialized as compact binary - static const uint64_t kTombStone = 0x2000'0000'0000'0000ull; // Represents a deleted key/value - static const uint64_t kCompressed = 0x1000'0000'0000'0000ull; // Stored in compressed buffer format - - static uint64_t CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags) { return Offset | Flags; } + inline BlockStoreLocation GetBlockLocation(uint64_t PayloadAlignment) const + { + ZEN_ASSERT(!(Flags & kStandaloneFile)); + return Location.BlockLocation.Get(PayloadAlignment); + } - inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; } - inline uint64_t Size() const { return LowerSize; } - inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; } - inline uint64_t GetFlags() const { return OffsetAndFlags & kFlagsMask; } + inline uint64_t Size() const { return (Flags & kStandaloneFile) ? Location.StandaloneSize : Location.BlockLocation.GetSize(); } + inline uint8_t IsFlagSet(uint64_t Flag) const { return Flags & Flag; } + inline uint8_t GetFlags() const { return Flags; } inline ZenContentType GetContentType() const { ZenContentType ContentType = ZenContentType::kBinary; - if (IsFlagSet(DiskLocation::kStructured)) + if (IsFlagSet(kStructured)) { ContentType = ZenContentType::kCbObject; } - if (IsFlagSet(DiskLocation::kCompressed)) + if (IsFlagSet(kCompressed)) { ContentType = ZenContentType::kCompressedBinary; } @@ -114,21 +110,29 @@ struct DiskLocation return ContentType; } -private: - uint64_t OffsetAndFlags = 0; - uint32_t LowerSize = 0; - uint32_t IndexDataSize = 0; + union + { + BlockStoreDiskLocation BlockLocation; // 10 bytes + uint64_t StandaloneSize = 0; // 8 bytes + } Location; + + static const uint8_t kStandaloneFile = 0x80u; // Stored as a separate file + static const uint8_t kStructured = 0x40u; // Serialized as compact binary + static const uint8_t kTombStone = 0x20u; // Represents a deleted key/value + static const uint8_t kCompressed = 0x10u; // Stored in compressed buffer format + + uint8_t Flags = 0; + uint8_t Reserved = 0; }; struct DiskIndexEntry { - IoHash Key; - DiskLocation Location; + IoHash Key; // 20 bytes + DiskLocation Location; // 12 bytes }; - #pragma pack(pop) -static_assert(sizeof(DiskIndexEntry) == 36); +static_assert(sizeof(DiskIndexEntry) == 32); /** In-memory cache storage @@ -245,15 +249,18 @@ private: inline uint64_t TotalSize() const { return m_TotalSize.load(std::memory_order::relaxed); } private: + const uint64_t MaxBlockSize = 1ull << 30; + uint64_t m_PayloadAlignment = 1ull << 4; + std::string m_BucketName; std::filesystem::path m_BucketDir; + std::filesystem::path m_BlocksBasePath; Oid m_BucketId; bool m_IsOk = false; uint64_t m_LargeObjectThreshold = 64 * 1024; // These files are used to manage storage of small objects for this bucket - BasicFile m_SobsFile; TCasLogFile m_SlogFile; struct IndexEntry @@ -277,20 +284,26 @@ private: using IndexMap = tsl::robin_map; - RwLock m_IndexLock; - IndexMap m_Index; - uint64_t m_SobsCursor = 0; + RwLock m_IndexLock; + IndexMap m_Index; + std::unordered_map> m_ChunkBlocks; + + RwLock m_InsertLock; // used to serialize inserts + Ref m_WriteBlock; + std::uint64_t m_CurrentInsertOffset = 0; + + std::atomic_uint32_t m_WriteBlockIndex{}; std::atomic_uint64_t m_TotalSize{}; - void BuildPath(PathBuilderBase& Path, const IoHash& HashKey); - void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); - bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue); - void DeleteStandaloneCacheValue(const DiskLocation& Loc, - const IoHash& HashKey, - const std::filesystem::path& Path, - std::error_code& Ec); - bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue); - void OpenLog(const std::filesystem::path& BucketDir, const bool IsNew); + void BuildPath(PathBuilderBase& Path, const IoHash& HashKey); + void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); + bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue); + bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue); + void MakeIndexSnapshot(); + uint64_t ReadIndexFile(); + uint64_t ReadLog(uint64_t LogPosition); + uint64_t MigrateLegacyData(bool CleanSource); + void OpenLog(const std::filesystem::path& BucketDir, const bool IsNew); // These locks are here to avoid contention on file creation, therefore it's sufficient // that we take the same lock for the same hash -- cgit v1.2.3 From 4e7d13b038bda5806647530ddfca9de825001a5f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 7 Apr 2022 22:20:35 +0200 Subject: cleaner GatherReferences --- zenserver/cache/structuredcachestore.cpp | 91 ++++++++++++++++++++------------ zenserver/cache/structuredcachestore.h | 2 +- 2 files changed, 57 insertions(+), 36 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index c5ccef523..5c71ad7bb 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1659,9 +1659,20 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) { ZEN_TRACE_CPU("Z$::DiskLayer::CacheBucket::GatherReferences"); - Stopwatch Timer; - const auto Guard = MakeGuard([this, &Timer] { - ZEN_INFO("gathered references from '{}' in {}", m_BucketDir / m_BucketName, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + uint64_t WriteBlockTimeUs = 0; + uint64_t WriteBlockLongestTimeUs = 0; + uint64_t ReadBlockTimeUs = 0; + uint64_t ReadBlockLongestTimeUs = 0; + + Stopwatch TotalTimer; + const auto _ = MakeGuard([this, &TotalTimer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + ZEN_INFO("gathered references from '{}' in {} write lock: {} ({}), read lock: {} ({})", + m_BucketDir / m_BucketName, + NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), + NiceLatencyNs(WriteBlockTimeUs), + NiceLatencyNs(WriteBlockLongestTimeUs), + NiceLatencyNs(ReadBlockTimeUs), + NiceLatencyNs(ReadBlockLongestTimeUs)); }); const GcClock::TimePoint ExpireTime = @@ -1669,60 +1680,70 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) const GcClock::Tick ExpireTicks = ExpireTime.time_since_epoch().count(); - RwLock::SharedLockScope _(m_IndexLock); - - std::vector ValidKeys; - std::vector ExpiredKeys; - std::vector Cids; - std::vector Entries(m_Index.begin(), m_Index.end()); - - std::sort(Entries.begin(), Entries.end(), [](const auto& LHS, const auto& RHS) { - return LHS.second.LastAccess < RHS.second.LastAccess; - }); + IndexMap Index; + { + RwLock::SharedLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + Index = m_Index; + } - const auto ValidIt = std::lower_bound(Entries.begin(), Entries.end(), ExpireTicks, [](const auto& Kv, auto Ticks) { - const IndexEntry& Entry = Kv.second; - return Entry.LastAccess < Ticks; - }); + std::vector ExpiredKeys; + ExpiredKeys.reserve(1024); + for (const auto& Entry : Index) + { + if (Entry.second.LastAccess < ExpireTicks) + { + ExpiredKeys.push_back(Entry.first); + } + } + std::vector Cids; Cids.reserve(1024); - for (auto Kv = ValidIt; Kv != Entries.end(); ++Kv) + for (const auto& Key : ExpiredKeys) { - const IoHash& Key = Kv->first; - const DiskLocation& Loc = Kv->second.Location; + IndexEntry& Entry = Index[Key]; + const DiskLocation& Loc = Entry.Location; if (Loc.IsFlagSet(DiskLocation::kStructured)) { + if (Cids.size() > 1024) + { + GcCtx.ContributeCids(Cids); + Cids.clear(); + } + ZenCacheValue CacheValue; - if (!GetInlineCacheValue(Loc, CacheValue)) { - GetStandaloneCacheValue(Loc, Key, CacheValue); + RwLock::SharedLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + if (!GetInlineCacheValue(Loc, CacheValue)) + { + GetStandaloneCacheValue(Loc, Key, CacheValue); + } } if (CacheValue.Value) { ZEN_ASSERT(CacheValue.Value.GetContentType() == ZenContentType::kCbObject); - if (Cids.size() > 1024) - { - GcCtx.ContributeCids(Cids); - Cids.clear(); - } CbObject Obj(SharedBuffer{CacheValue.Value}); Obj.IterateAttachments([&Cids](CbFieldView Field) { Cids.push_back(Field.AsAttachment()); }); } } } - _.ReleaseNow(); - - ValidKeys.reserve(std::distance(ValidIt, Entries.end())); - ExpiredKeys.reserve(std::distance(Entries.begin(), ValidIt)); - - std::transform(ValidIt, Entries.end(), std::back_inserter(ValidKeys), [](const auto& Kv) { return Kv.first; }); - std::transform(Entries.begin(), ValidIt, std::back_inserter(ExpiredKeys), [](const auto& Kv) { return Kv.first; }); GcCtx.ContributeCids(Cids); - GcCtx.ContributeCacheKeys(m_BucketName, std::move(ValidKeys), std::move(ExpiredKeys)); + GcCtx.ContributeCacheKeys(m_BucketName, {}, std::move(ExpiredKeys)); } void diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 91ac00f35..8e8b6ee78 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -274,7 +274,7 @@ private: IndexEntry(IndexEntry&& E) : Location(std::move(E.Location)), LastAccess(E.LastAccess.load(std::memory_order_relaxed)) {} IndexEntry& operator=(const IndexEntry& E) { return *this = IndexEntry(E); } - IndexEntry& operator=(IndexEntry&& E) + IndexEntry& operator=(IndexEntry&& E) noexcept { Location = std::move(E.Location); LastAccess.store(E.LastAccess.load(), std::memory_order_relaxed); -- cgit v1.2.3 From 5e43b80df4cf8fff6bd350139783fb15d9d25207 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 7 Apr 2022 23:34:15 +0200 Subject: correct expire vs contribute --- zenserver/cache/structuredcachestore.cpp | 19 +++++++++---------- zenserver/cache/structuredcachestore.h | 4 +++- zenstore/cidstore.cpp | 23 +++++++++++------------ zenstore/gc.cpp | 9 +-------- zenstore/include/zenstore/gc.h | 3 +-- 5 files changed, 25 insertions(+), 33 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 5c71ad7bb..d28964502 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1694,21 +1694,20 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) std::vector ExpiredKeys; ExpiredKeys.reserve(1024); + + std::vector Cids; + Cids.reserve(1024); + for (const auto& Entry : Index) { + const IoHash& Key = Entry.first; if (Entry.second.LastAccess < ExpireTicks) { - ExpiredKeys.push_back(Entry.first); + ExpiredKeys.push_back(Key); + continue; } - } - std::vector Cids; - Cids.reserve(1024); - - for (const auto& Key : ExpiredKeys) - { - IndexEntry& Entry = Index[Key]; - const DiskLocation& Loc = Entry.Location; + const DiskLocation& Loc = Entry.second.Location; if (Loc.IsFlagSet(DiskLocation::kStructured)) { @@ -1743,7 +1742,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) } GcCtx.ContributeCids(Cids); - GcCtx.ContributeCacheKeys(m_BucketName, {}, std::move(ExpiredKeys)); + GcCtx.ContributeCacheKeys(m_BucketName, std::move(ExpiredKeys)); } void diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 8e8b6ee78..c107983b5 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -271,7 +271,9 @@ private: IndexEntry() : Location(), LastAccess() {} IndexEntry(const DiskLocation& Loc, const int64_t Timestamp) : Location(Loc), LastAccess(Timestamp) {} IndexEntry(const IndexEntry& E) : Location(E.Location), LastAccess(E.LastAccess.load(std::memory_order_relaxed)) {} - IndexEntry(IndexEntry&& E) : Location(std::move(E.Location)), LastAccess(E.LastAccess.load(std::memory_order_relaxed)) {} + IndexEntry(IndexEntry&& E) noexcept : Location(std::move(E.Location)), LastAccess(E.LastAccess.load(std::memory_order_relaxed)) + { + } IndexEntry& operator=(const IndexEntry& E) { return *this = IndexEntry(E); } IndexEntry& operator=(IndexEntry&& E) noexcept diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp index 509d21abe..6cf9ee734 100644 --- a/zenstore/cidstore.cpp +++ b/zenstore/cidstore.cpp @@ -234,23 +234,22 @@ struct CidStore::Impl void RemoveCids(CasChunkSet& CasChunks) { - RwLock::ExclusiveLockScope _(m_Lock); - - for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;) + std::vector RemovedEntries; + RemovedEntries.reserve(CasChunks.GetSize()); { - if (CasChunks.ContainsChunk(It->second)) - { - const IoHash& BadHash = It->first; - - // Log a tombstone record - LogMapping(BadHash, IoHash::Zero); - It = m_CidMap.erase(It); - } - else + RwLock::SharedLockScope _(m_Lock); + for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;) { + if (CasChunks.ContainsChunk(It->second)) + { + RemovedEntries.push_back({It->first, IoHash::Zero}); + It = m_CidMap.erase(It); + continue; + } ++It; } } + m_LogFile.Append(RemovedEntries); } uint64_t m_LastScrubTime = 0; diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index 856f9af02..21522e46a 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -212,9 +212,8 @@ GcContext::ContributeCas(std::span Cas) } void -GcContext::ContributeCacheKeys(const std::string& Bucket, std::vector ValidKeys, std::vector ExpiredKeys) +GcContext::ContributeCacheKeys(const std::string& Bucket, std::vector&& ExpiredKeys) { - m_State->m_CacheBuckets[Bucket].ValidKeys = std::move(ValidKeys); m_State->m_CacheBuckets[Bucket].ExpiredKeys = std::move(ExpiredKeys); } @@ -254,12 +253,6 @@ GcContext::DeletedCas() return m_State->m_DeletedCasChunks; } -std::span -GcContext::ValidCacheKeys(const std::string& Bucket) const -{ - return m_State->m_CacheBuckets[Bucket].ValidKeys; -} - std::span GcContext::ExpiredCacheKeys(const std::string& Bucket) const { diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h index bc8dee9a3..6268588ec 100644 --- a/zenstore/include/zenstore/gc.h +++ b/zenstore/include/zenstore/gc.h @@ -53,7 +53,7 @@ public: void ContributeCids(std::span Cid); void ContributeCas(std::span Hash); - void ContributeCacheKeys(const std::string& Bucket, std::vector ValidKeys, std::vector ExpiredKeys); + void ContributeCacheKeys(const std::string& Bucket, std::vector&& ExpiredKeys); void IterateCids(std::function Callback); @@ -64,7 +64,6 @@ public: void DeletedCas(std::span Cas); CasChunkSet& DeletedCas(); - std::span ValidCacheKeys(const std::string& Bucket) const; std::span ExpiredCacheKeys(const std::string& Bucket) const; bool IsDeletionMode() const; -- cgit v1.2.3 From 4e6abaacede5bb6dea1ff788b1259efeb1212bcc Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 12 Apr 2022 13:58:38 +0200 Subject: Add z$.threadedinsert test --- zenserver/cache/structuredcachestore.cpp | 205 ++++++++++++++++++------------- 1 file changed, 118 insertions(+), 87 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index d28964502..0ce473e89 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -604,7 +604,7 @@ ZenCacheMemoryLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue m_CacheMap.insert_or_assign(HashKey, BucketValue(Value.Value, GcClock::TickCount())); } - m_TotalSize.fetch_add(Value.Value.GetSize(), std::memory_order::relaxed); + m_TotalSize.fetch_add(Value.Value.GetSize(), std::memory_order_seq_cst); } ////////////////////////////////////////////////////////////////////////// @@ -1303,7 +1303,7 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool Is for (const auto& Entry : m_Index) { const DiskLocation& Location = Entry.second.Location; - m_TotalSize.fetch_add(Location.Size(), std::memory_order_release); + m_TotalSize.fetch_add(Location.Size(), std::memory_order_seq_cst); if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) { continue; @@ -1524,7 +1524,7 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& WriteBlock->Write(Value.Value.Data(), ChunkSize, InsertOffset); m_SlogFile.Append(DiskIndexEntry); - m_TotalSize.fetch_add(ChunkSize, std::memory_order::relaxed); + m_TotalSize.fetch_add(ChunkSize, std::memory_order_seq_cst); { RwLock::ExclusiveLockScope __(m_IndexLock); if (auto It = m_Index.find(HashKey); It != m_Index.end()) @@ -1908,10 +1908,10 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) } m_SlogFile.Append(DiskIndexEntry{.Key = Key, .Location = RestoreLocation}); m_Index.insert({Key, {Loc, GcClock::TickCount()}}); - m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed); + m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order_seq_cst); continue; } - m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed); + m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order_seq_cst); DeletedSize += Entry.Location.Size(); DeletedCount++; } @@ -2043,7 +2043,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { auto KeyIt = m_Index.find(Entry.Key); uint64_t ChunkSize = KeyIt->second.Location.GetBlockLocation(m_PayloadAlignment).Size; - m_TotalSize.fetch_sub(ChunkSize); + m_TotalSize.fetch_sub(ChunkSize, std::memory_order_seq_cst); m_Index.erase(KeyIt); continue; } @@ -2364,7 +2364,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); + m_TotalSize.fetch_add(Loc.Size(), std::memory_order_seq_cst); } ////////////////////////////////////////////////////////////////////////// @@ -3105,47 +3105,51 @@ TEST_CASE("z$.legacyconversion") } } -# if 0 TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) { // for (uint32_t i = 0; i < 100; ++i) { ScopedTemporaryDirectory TempDir; - CasStoreConfiguration CasConfig; - CasConfig.RootDirectory = TempDir.Path(); - - CreateDirectories(CasConfig.RootDirectory); - const uint64_t kChunkSize = 1048; const int32_t kChunkCount = 8192; - std::vector ChunkHashes; - ChunkHashes.reserve(kChunkCount); - std::vector Chunks; + struct Chunk + { + std::string Bucket; + IoBuffer Buffer; + }; + std::unordered_map Chunks; Chunks.reserve(kChunkCount); + const std::string Bucket1 = "rightinone"; + const std::string Bucket2 = "rightintwo"; + for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { - IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); - IoHash Hash = HashBuffer(Chunk); - ChunkHashes.emplace_back(Hash); - Chunks.emplace_back(Chunk); + { + IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + Chunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; + } + { + IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + Chunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; + } } - WorkerThreadPool ThreadPool(4); - CasGc Gc; - CasContainerStrategy Cas(CasConfig, Gc); - Cas.Initialize("test", 32768, 16, true); + CreateDirectories(TempDir.Path()); + + WorkerThreadPool ThreadPool(4); + CasGc Gc; + ZenCacheStore Zcs(Gc, TempDir.Path()); + const GcClock::TimePoint CurrentTime = GcClock::Now(); + { - for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + for (const auto& Chunk : Chunks) { - const IoBuffer& Chunk = Chunks[Idx]; - const IoHash& Hash = ChunkHashes[Idx]; - ThreadPool.ScheduleWork([&Cas, Chunk, Hash]() { - CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash); - ZEN_ASSERT(InsertResult.New); - }); + ThreadPool.ScheduleWork([&Zcs, &Chunk]() { Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}); }); } while (ThreadPool.PendingWork() > 0) { @@ -3153,17 +3157,19 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } - const uint64_t TotalSize = Cas.StorageSize().DiskSize; - CHECK_EQ(kChunkSize * kChunkCount, TotalSize); + const uint64_t TotalSize = Zcs.StorageSize().DiskSize; + CHECK_EQ(kChunkSize * Chunks.size(), TotalSize); { - std::vector OldChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); - for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + for (const auto& Chunk : Chunks) { - ThreadPool.ScheduleWork([&Cas, &OldChunkHashes, Idx]() { - IoHash ChunkHash = OldChunkHashes[Idx]; - IoBuffer Chunk = Cas.FindChunk(ChunkHash); - IoHash Hash = IoHash::HashBuffer(Chunk); + ThreadPool.ScheduleWork([&Zcs, &Chunk]() { + std::string Bucket = Chunk.second.Bucket; + IoHash ChunkHash = Chunk.first; + ZenCacheValue CacheValue; + + CHECK(Zcs.Get(Bucket, ChunkHash, CacheValue)); + IoHash Hash = IoHash::HashBuffer(CacheValue.Value); CHECK(ChunkHash == Hash); }); } @@ -3172,62 +3178,73 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) Sleep(1); } } - - std::unordered_set GcChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); + std::unordered_map GcChunkHashes; + GcChunkHashes.reserve(Chunks.size()); + for (const auto& Chunk : Chunks) { - std::vector OldChunkHashes(ChunkHashes.begin(), ChunkHashes.end()); - std::vector NewChunkHashes; - NewChunkHashes.reserve(kChunkCount); - std::vector NewChunks; - NewChunks.reserve(kChunkCount); + GcChunkHashes[Chunk.first] = Chunk.second.Bucket; + } + { + std::unordered_map NewChunks; for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { - IoBuffer Chunk = CreateChunk(kChunkSize); - IoHash Hash = HashBuffer(Chunk); - NewChunkHashes.emplace_back(Hash); - NewChunks.emplace_back(Chunk); + { + IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + NewChunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; + } + { + IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); + IoHash Hash = HashBuffer(Chunk); + NewChunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; + } } RwLock ChunkHashesLock; std::atomic_uint32_t AddedChunkCount; - for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) + for (const auto& Chunk : NewChunks) { - const IoBuffer& Chunk = NewChunks[Idx]; - const IoHash& Hash = NewChunkHashes[Idx]; - ThreadPool.ScheduleWork([&Cas, Chunk, Hash, &AddedChunkCount]() { - CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash); - ZEN_ASSERT(InsertResult.New); + ThreadPool.ScheduleWork([&Zcs, Chunk, &AddedChunkCount]() { + Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}); AddedChunkCount.fetch_add(1); }); - ThreadPool.ScheduleWork([&Cas, &ChunkHashesLock, &OldChunkHashes, Idx]() { - IoHash ChunkHash = OldChunkHashes[Idx]; - IoBuffer Chunk = Cas.FindChunk(OldChunkHashes[Idx]); - if (Chunk) + } + + for (const auto& Chunk : Chunks) + { + ThreadPool.ScheduleWork([&Zcs, Chunk]() { + ZenCacheValue CacheValue; + if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) { - CHECK(ChunkHash == IoHash::HashBuffer(Chunk)); + CHECK(Chunk.first == IoHash::HashBuffer(CacheValue.Value)); } }); } - - while (AddedChunkCount.load() < kChunkCount) + while (AddedChunkCount.load() < NewChunks.size()) { - std::vector AddedHashes; + std::unordered_map AddedChunks; { RwLock::ExclusiveLockScope _(ChunkHashesLock); - AddedHashes.swap(NewChunkHashes); + AddedChunks.swap(NewChunks); } // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const IoHash& ChunkHash : AddedHashes) + for (const auto& Chunk : AddedChunks) { - if (Cas.HaveChunk(ChunkHash)) + ZenCacheValue CacheValue; + if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) { - GcChunkHashes.emplace(ChunkHash); + GcChunkHashes[Chunk.first] = Chunk.second.Bucket; } } - std::vector KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); - size_t C = 0; + std::vector KeepHashes; + KeepHashes.reserve(GcChunkHashes.size()); + for (const auto& Entry : GcChunkHashes) + { + KeepHashes.push_back(Entry.first); + } + size_t C = 0; while (C < KeepHashes.size()) { if (C % 155 == 0) @@ -3249,7 +3266,7 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) GcContext GcCtx; GcCtx.CollectSmallObjects(true); GcCtx.ContributeCas(KeepHashes); - Cas.CollectGarbage(GcCtx); + Zcs.CollectGarbage(GcCtx); CasChunkSet& Deleted = GcCtx.DeletedCas(); Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } @@ -3260,27 +3277,41 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } { - std::vector AddedHashes; + std::unordered_map AddedChunks; { RwLock::ExclusiveLockScope _(ChunkHashesLock); - AddedHashes.swap(NewChunkHashes); + AddedChunks.swap(NewChunks); } // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const IoHash& ChunkHash : AddedHashes) + for (const auto& Chunk : AddedChunks) { - if (Cas.HaveChunk(ChunkHash)) + ZenCacheValue CacheValue; + if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) { - GcChunkHashes.emplace(ChunkHash); + GcChunkHashes[Chunk.first] = Chunk.second.Bucket; } } - std::vector KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end()); - size_t C = 0; + std::vector KeepHashes; + KeepHashes.reserve(GcChunkHashes.size()); + for (const auto& Entry : GcChunkHashes) + { + KeepHashes.push_back(Entry.first); + } + size_t C = 0; while (C < KeepHashes.size()) { - if (C % 77 == 0 && C < KeepHashes.size() - 1) + if (C % 155 == 0) { - KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; - KeepHashes.pop_back(); + if (C < KeepHashes.size() - 1) + { + KeepHashes[C] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); + } + if (C + 3 < KeepHashes.size() - 1) + { + KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1]; + KeepHashes.pop_back(); + } } C++; } @@ -3288,17 +3319,18 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) GcContext GcCtx; GcCtx.CollectSmallObjects(true); GcCtx.ContributeCas(KeepHashes); - Cas.CollectGarbage(GcCtx); + Zcs.CollectGarbage(GcCtx); CasChunkSet& Deleted = GcCtx.DeletedCas(); Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } } { - for (const IoHash& ChunkHash : GcChunkHashes) + for (const auto& Chunk : GcChunkHashes) { - ThreadPool.ScheduleWork([&Cas, ChunkHash]() { - CHECK(Cas.HaveChunk(ChunkHash)); - CHECK(ChunkHash == IoHash::HashBuffer(Cas.FindChunk(ChunkHash))); + ThreadPool.ScheduleWork([&Zcs, Chunk]() { + ZenCacheValue CacheValue; + CHECK(Zcs.Get(Chunk.second, Chunk.first, CacheValue)); + CHECK(Chunk.first == IoHash::HashBuffer(CacheValue.Value)); }); } while (ThreadPool.PendingWork() > 0) @@ -3308,7 +3340,6 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } } -# endif #endif -- cgit v1.2.3 From b0e81cd90b705c27bc94f0aa9bdf73eeadccc164 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 12 Apr 2022 14:19:05 +0200 Subject: Make sure we generate unique chunks --- zenserver/cache/structuredcachestore.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 0ce473e89..9ae01c5df 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -270,9 +270,9 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) } ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) -: m_RootDir(RootDir) -, GcStorage(Gc) +: GcStorage(Gc) , GcContributor(Gc) +, m_RootDir(RootDir) , m_DiskLayer(RootDir) { ZEN_INFO("initializing structured cache at '{}'", RootDir); @@ -2104,7 +2104,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) if (!NewBlockFile || (WriteOffset + Chunk.size() > MaxBlockSize)) { - uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order::memory_order_relaxed); + uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); std::vector LogEntries = MakeDiskIndexEntries(MovedBlockChunks, {}); m_SlogFile.Append(LogEntries); m_SlogFile.Flush(); @@ -3127,15 +3127,28 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { + + while(true) { IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); IoHash Hash = HashBuffer(Chunk); + if (Chunks.contains(Hash)) + { + continue; + } Chunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; + break; } + while(true) { IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); IoHash Hash = HashBuffer(Chunk); + if (Chunks.contains(Hash)) + { + continue; + } Chunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; + break; } } @@ -3144,7 +3157,6 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) WorkerThreadPool ThreadPool(4); CasGc Gc; ZenCacheStore Zcs(Gc, TempDir.Path()); - const GcClock::TimePoint CurrentTime = GcClock::Now(); { for (const auto& Chunk : Chunks) @@ -3160,7 +3172,7 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) const uint64_t TotalSize = Zcs.StorageSize().DiskSize; CHECK_EQ(kChunkSize * Chunks.size(), TotalSize); - { + { for (const auto& Chunk : Chunks) { ThreadPool.ScheduleWork([&Zcs, &Chunk]() { -- cgit v1.2.3 From 2f362392dd2eba0d949e261ce5781965b8943d30 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 12 Apr 2022 14:47:32 +0200 Subject: remove unneeded lock in threaded test --- zenserver/cache/structuredcachestore.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 9ae01c5df..2746dc673 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -3213,7 +3213,6 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } - RwLock ChunkHashesLock; std::atomic_uint32_t AddedChunkCount; for (const auto& Chunk : NewChunks) @@ -3236,13 +3235,8 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } while (AddedChunkCount.load() < NewChunks.size()) { - std::unordered_map AddedChunks; - { - RwLock::ExclusiveLockScope _(ChunkHashesLock); - AddedChunks.swap(NewChunks); - } // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const auto& Chunk : AddedChunks) + for (const auto& Chunk : NewChunks) { ZenCacheValue CacheValue; if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) @@ -3289,13 +3283,8 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } { - std::unordered_map AddedChunks; - { - RwLock::ExclusiveLockScope _(ChunkHashesLock); - AddedChunks.swap(NewChunks); - } // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope - for (const auto& Chunk : AddedChunks) + for (const auto& Chunk : NewChunks) { ZenCacheValue CacheValue; if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) -- cgit v1.2.3 From 1cc2c8b9547e5244134299707ade3eb5afbf6c55 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 12 Apr 2022 22:33:35 +0200 Subject: Wait for work to complete rather than being picked up --- zenserver/cache/structuredcachestore.cpp | 49 +++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 2746dc673..3ba4e6b05 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -3127,8 +3127,7 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) for (int32_t Idx = 0; Idx < kChunkCount; ++Idx) { - - while(true) + while (true) { IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); IoHash Hash = HashBuffer(Chunk); @@ -3136,10 +3135,10 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) { continue; } - Chunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; + Chunks[Hash] = {.Bucket = Bucket1, .Buffer = Chunk}; break; } - while(true) + while (true) { IoBuffer Chunk = testutils::CreateBinaryCacheValue(kChunkSize); IoHash Hash = HashBuffer(Chunk); @@ -3147,23 +3146,27 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) { continue; } - Chunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; + Chunks[Hash] = {.Bucket = Bucket2, .Buffer = Chunk}; break; } } CreateDirectories(TempDir.Path()); - WorkerThreadPool ThreadPool(4); - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path()); + WorkerThreadPool ThreadPool(4); + CasGc Gc; + ZenCacheStore Zcs(Gc, TempDir.Path()); { + std::atomic WorkCompleted = 0; for (const auto& Chunk : Chunks) { - ThreadPool.ScheduleWork([&Zcs, &Chunk]() { Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}); }); + ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, &Chunk]() { + Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}); + WorkCompleted.fetch_add(1); + }); } - while (ThreadPool.PendingWork() > 0) + while (WorkCompleted < Chunks.size()) { Sleep(1); } @@ -3172,10 +3175,11 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) const uint64_t TotalSize = Zcs.StorageSize().DiskSize; CHECK_EQ(kChunkSize * Chunks.size(), TotalSize); - { + { + std::atomic WorkCompleted = 0; for (const auto& Chunk : Chunks) { - ThreadPool.ScheduleWork([&Zcs, &Chunk]() { + ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, &Chunk]() { std::string Bucket = Chunk.second.Bucket; IoHash ChunkHash = Chunk.first; ZenCacheValue CacheValue; @@ -3183,9 +3187,10 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) CHECK(Zcs.Get(Bucket, ChunkHash, CacheValue)); IoHash Hash = IoHash::HashBuffer(CacheValue.Value); CHECK(ChunkHash == Hash); + WorkCompleted.fetch_add(1); }); } - while (ThreadPool.PendingWork() > 0) + while (WorkCompleted < Chunks.size()) { Sleep(1); } @@ -3213,24 +3218,26 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } - std::atomic_uint32_t AddedChunkCount; - + std::atomic WorkCompleted = 0; + std::atomic_uint32_t AddedChunkCount = 0; for (const auto& Chunk : NewChunks) { - ThreadPool.ScheduleWork([&Zcs, Chunk, &AddedChunkCount]() { + ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, Chunk, &AddedChunkCount]() { Zcs.Put(Chunk.second.Bucket, Chunk.first, {.Value = Chunk.second.Buffer}); AddedChunkCount.fetch_add(1); + WorkCompleted.fetch_add(1); }); } for (const auto& Chunk : Chunks) { - ThreadPool.ScheduleWork([&Zcs, Chunk]() { + ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, Chunk]() { ZenCacheValue CacheValue; if (Zcs.Get(Chunk.second.Bucket, Chunk.first, CacheValue)) { CHECK(Chunk.first == IoHash::HashBuffer(CacheValue.Value)); } + WorkCompleted.fetch_add(1); }); } while (AddedChunkCount.load() < NewChunks.size()) @@ -3277,7 +3284,7 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); }); } - while (ThreadPool.PendingWork() > 0) + while (WorkCompleted < NewChunks.size() + Chunks.size()) { Sleep(1); } @@ -3326,15 +3333,17 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } { + std::atomic WorkCompleted = 0; for (const auto& Chunk : GcChunkHashes) { - ThreadPool.ScheduleWork([&Zcs, Chunk]() { + ThreadPool.ScheduleWork([&Zcs, &WorkCompleted, Chunk]() { ZenCacheValue CacheValue; CHECK(Zcs.Get(Chunk.second, Chunk.first, CacheValue)); CHECK(Chunk.first == IoHash::HashBuffer(CacheValue.Value)); + WorkCompleted.fetch_add(1); }); } - while (ThreadPool.PendingWork() > 0) + while (WorkCompleted < GcChunkHashes.size()) { Sleep(1); } -- cgit v1.2.3 From f7476199742256fed6f867afb6db8cecdacfe547 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 26 Apr 2022 13:36:02 +0200 Subject: Batch log removal of Cid and take proper lock when modifying m_CidMap (#80) * Batch log removal of Cid and take proper lock when modifying m_CidMap * variable name casing * Don't access m_Buckets without a lock --- zenserver/cache/structuredcachestore.cpp | 9 +++++---- zenstore/cidstore.cpp | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 738e4c1fd..6bf513105 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1255,10 +1255,10 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z auto It = m_Buckets.try_emplace(BucketName, BucketName); Bucket = &It.first->second; - std::filesystem::path bucketPath = m_RootDir; - bucketPath /= BucketName; + std::filesystem::path BucketPath = m_RootDir; + BucketPath /= BucketName; - Bucket->OpenOrCreate(bucketPath); + Bucket->OpenOrCreate(BucketPath); } } @@ -1363,11 +1363,12 @@ void ZenCacheDiskLayer::Flush() { std::vector Buckets; - Buckets.reserve(m_Buckets.size()); + { RwLock::SharedLockScope _(m_Lock); + Buckets.reserve(m_Buckets.size()); for (auto& Kv : m_Buckets) { Buckets.push_back(&Kv.second); diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp index 509d21abe..55bec817f 100644 --- a/zenstore/cidstore.cpp +++ b/zenstore/cidstore.cpp @@ -234,23 +234,22 @@ struct CidStore::Impl void RemoveCids(CasChunkSet& CasChunks) { - RwLock::ExclusiveLockScope _(m_Lock); - - for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;) + std::vector RemovedEntries; + RemovedEntries.reserve(CasChunks.GetSize()); { - if (CasChunks.ContainsChunk(It->second)) - { - const IoHash& BadHash = It->first; - - // Log a tombstone record - LogMapping(BadHash, IoHash::Zero); - It = m_CidMap.erase(It); - } - else + RwLock::ExclusiveLockScope _(m_Lock); + for (auto It = m_CidMap.begin(), End = m_CidMap.end(); It != End;) { + if (CasChunks.ContainsChunk(It->second)) + { + RemovedEntries.push_back({It->first, IoHash::Zero}); + It = m_CidMap.erase(It); + continue; + } ++It; } } + m_LogFile.Append(RemovedEntries); } uint64_t m_LastScrubTime = 0; -- cgit v1.2.3 From 4e538014c4e5089f10aca69657a948e4d912292d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 26 Apr 2022 23:18:44 +0200 Subject: Use GetCacheValues when checking upstream to reduce number of calls to upstream Added some timing info to debug logs --- zenserver/cache/structuredcache.cpp | 148 ++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 47 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 8ae531720..8daf08bff 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -650,42 +650,52 @@ HttpStructuredCacheService::HandleCacheValueRequest(HttpServerRequest& Request, void HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) { + Stopwatch Timer; + IoBuffer Value = m_CidStore.FindChunkByCid(Ref.ValueContentId); bool InUpstreamCache = false; CachePolicy Policy = PolicyFromURL; - const bool QueryUpstream = !Value && EnumHasAllFlags(Policy, CachePolicy::QueryRemote); - - if (QueryUpstream) { - if (auto UpstreamResult = m_UpstreamCache.GetCacheValue({Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); - UpstreamResult.Success) + const bool QueryUpstream = !Value && EnumHasAllFlags(Policy, CachePolicy::QueryRemote); + + if (QueryUpstream) { - if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(UpstreamResult.Value))) - { - m_CidStore.AddChunk(Compressed); - InUpstreamCache = true; - } - else + if (auto UpstreamResult = m_UpstreamCache.GetCacheValue({Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); + UpstreamResult.Success) { - ZEN_WARN("got uncompressed upstream cache value"); + if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(UpstreamResult.Value))) + { + m_CidStore.AddChunk(Compressed); + InUpstreamCache = true; + } + else + { + ZEN_WARN("got uncompressed upstream cache value"); + } } } } if (!Value) { - ZEN_DEBUG("MISS - '{}/{}/{}' '{}'", Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, ToString(Request.AcceptContentType())); + ZEN_DEBUG("MISS - '{}/{}/{}' '{}' in {}", + Ref.BucketSegment, + Ref.HashKey, + Ref.ValueContentId, + ToString(Request.AcceptContentType()), + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); m_CacheStats.MissCount++; return Request.WriteResponse(HttpResponseCode::NotFound); } - ZEN_DEBUG("HIT - '{}/{}/{}' {} '{}' ({})", + ZEN_DEBUG("HIT - '{}/{}/{}' {} '{}' ({}) in {}", Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, NiceBytes(Value.Size()), ToString(Value.GetContentType()), - InUpstreamCache ? "UPSTREAM" : "LOCAL"); + InUpstreamCache ? "UPSTREAM" : "LOCAL", + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); m_CacheStats.HitCount++; if (InUpstreamCache) @@ -709,6 +719,8 @@ HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, // Note: Individual cacherecord values are not propagated upstream until a valid cache record has been stored ZEN_UNUSED(PolicyFromURL); + Stopwatch Timer; + IoBuffer Body = Request.ReadPayload(); if (!Body || Body.Size() == 0) @@ -734,13 +746,14 @@ HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, CidStore::InsertResult Result = m_CidStore.AddChunk(Compressed); - ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}' ({})", + ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}' ({}) in {}", Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, NiceBytes(Body.Size()), ToString(Body.GetContentType()), - Result.New ? "NEW" : "OLD"); + Result.New ? "NEW" : "OLD", + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); const HttpResponseCode ResponseCode = Result.New ? HttpResponseCode::Created : HttpResponseCode::OK; @@ -1444,8 +1457,12 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http ZEN_ASSERT(RpcRequest["Method"sv].AsString() == "GetCacheValues"sv); + std::vector RemoteRequestIndexes; + for (CbFieldView RequestField : Params["Requests"sv]) { + Stopwatch Timer; + RequestData& Request = Requests.emplace_back(); CbObjectView RequestObject = RequestField.AsObjectView(); CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); @@ -1463,46 +1480,28 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http CachePolicy Policy = Request.Policy; CompressedBuffer& Result = Request.Result; - ZenCacheValue CacheValue; - std::string_view Source; + ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { if (m_CacheStore.Get(Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) { Result = CompressedBuffer::FromCompressed(SharedBuffer(CacheValue.Value)); - if (Result) - { - Source = "LOCAL"sv; - } - } - } - if (!Result && EnumHasAllFlags(Policy, CachePolicy::QueryRemote)) - { - GetUpstreamCacheResult UpstreamResult = - m_UpstreamCache.GetCacheRecord({Key.Bucket, Key.Hash}, ZenContentType::kCompressedBinary); - if (UpstreamResult.Success && IsCompressedBinary(UpstreamResult.Value.GetContentType())) - { - Result = CompressedBuffer::FromCompressed(SharedBuffer(UpstreamResult.Value)); - if (Result) - { - UpstreamResult.Value.SetContentType(ZenContentType::kCompressedBinary); - Source = "UPSTREAM"sv; - // TODO: Respect the StoreLocal flag once we have upstream existence-only checks. For now the requirement - // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive - // for us to keep the data only on the upstream server. - // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) - { - m_CacheStore.Put(Key.Bucket, Key.Hash, ZenCacheValue{UpstreamResult.Value}); - } - } } } - if (Result) { - ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({})", Key.Bucket, Key.Hash, NiceBytes(Result.GetCompressed().GetSize()), Source); + ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", + Key.Bucket, + Key.Hash, + NiceBytes(Result.GetCompressed().GetSize()), + "LOCAL"sv, + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); m_CacheStats.HitCount++; } + else if (EnumHasAllFlags(Policy, CachePolicy::QueryRemote)) + { + RemoteRequestIndexes.push_back(Requests.size() - 1); + } else if (!EnumHasAnyFlags(Policy, CachePolicy::Query)) { // If they requested no query, do not record this as a miss @@ -1510,10 +1509,65 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http } else { - ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}'", Key.Bucket, Key.Hash); + ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}' ({}) in {}", + Key.Bucket, + Key.Hash, + "LOCAL"sv, + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); m_CacheStats.MissCount++; } } + + if (!RemoteRequestIndexes.empty()) + { + std::vector RequestedRecordsData; + std::vector CacheChunkRequests; + RequestedRecordsData.reserve(RemoteRequestIndexes.size()); + CacheChunkRequests.reserve(RemoteRequestIndexes.size()); + for (size_t Index : RemoteRequestIndexes) + { + RequestData& Request = Requests[Index]; + RequestedRecordsData.push_back({Request.Key.Bucket, Request.Key.Hash}); + CacheChunkRequests.push_back(&RequestedRecordsData.back()); + } + Stopwatch Timer; + m_UpstreamCache.GetCacheValues( + CacheChunkRequests, + [this, &RequestedRecordsData, &Requests, &RemoteRequestIndexes, &Timer](CacheValueGetCompleteParams&& Params) { + CacheChunkRequest& ChunkRequest = Params.Request; + if (Params.Value) + { + size_t RequestOffset = std::distance(RequestedRecordsData.data(), &ChunkRequest); + size_t RequestIndex = RemoteRequestIndexes[RequestOffset]; + RequestData& Request = Requests[RequestIndex]; + Request.Result = CompressedBuffer::FromCompressed(SharedBuffer(Params.Value)); + if (Request.Result && IsCompressedBinary(Params.Value.GetContentType())) + { + // TODO: Respect the StoreLocal flag once we have upstream existence-only checks. For now the requirement + // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive + // for us to keep the data only on the upstream server. + // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) + m_CacheStore.Put(Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); + ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", + ChunkRequest.Key.Bucket, + ChunkRequest.Key.Hash, + NiceBytes(Request.Result.GetCompressed().GetSize()), + "UPSTREAM"sv, + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); + m_CacheStats.HitCount++; + m_CacheStats.UpstreamHitCount++; + return; + } + } + ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}' ({}) in {}", + ChunkRequest.Key.Bucket, + ChunkRequest.Key.Hash, + "UPSTREAM"sv, + NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); + m_CacheStats.MissCount++; + }); + } + if (Requests.empty()) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); -- cgit v1.2.3 From 427a616c5a1abfe111cc5bb87526df5080de37d2 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 27 Apr 2022 08:34:13 +0200 Subject: trigger clang format --- zenserver/cache/structuredcachestore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 738e4c1fd..f499cf194 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1363,8 +1363,8 @@ void ZenCacheDiskLayer::Flush() { std::vector Buckets; - Buckets.reserve(m_Buckets.size()); + Buckets.reserve(m_Buckets.size()); { RwLock::SharedLockScope _(m_Lock); -- cgit v1.2.3 From 38d11ffd48dd61ea3186fc2b27cb1f312206da55 Mon Sep 17 00:00:00 2001 From: Per Larsson Date: Wed, 27 Apr 2022 21:01:13 +0200 Subject: Serialize compact binary from JSON. --- zencore/compactbinary.cpp | 320 ++++++++++++++++++++++++++++++++ zencore/include/zencore/compactbinary.h | 6 + zencore/include/zencore/string.h | 2 +- 3 files changed, 327 insertions(+), 1 deletion(-) diff --git a/zencore/compactbinary.cpp b/zencore/compactbinary.cpp index ffc1da10c..00c5aa0de 100644 --- a/zencore/compactbinary.cpp +++ b/zencore/compactbinary.cpp @@ -1715,6 +1715,225 @@ CompactBinaryToJson(const CbArrayView& Array, StringBuilderBase& Builder) ////////////////////////////////////////////////////////////////////////// +class CbJsonReader +{ +public: + static CbFieldIterator Read(std::string_view JsonText, std::string& Error) + { + using namespace json11; + + const Json Json = Json::parse(std::string(JsonText), Error); + + if (Error.empty()) + { + CbWriter Writer; + if (ReadField(Writer, Json, std::string_view(), Error)) + { + return Writer.Save(); + } + } + + return CbFieldIterator(); + } + +private: + static bool ReadField(CbWriter& Writer, const json11::Json& Json, const std::string_view FieldName, std::string& Error) + { + using namespace json11; + + switch (Json.type()) + { + case Json::Type::OBJECT: + { + if (FieldName.empty()) + { + Writer.BeginObject(); + } + else + { + Writer.BeginObject(FieldName); + } + + for (const auto& Kv : Json.object_items()) + { + const std::string& Name = Kv.first; + const json11::Json& Item = Kv.second; + + if (ReadField(Writer, Item, Name, Error) == false) + { + return false; + } + } + + Writer.EndObject(); + } + break; + case Json::Type::ARRAY: + { + if (FieldName.empty()) + { + Writer.BeginArray(); + } + else + { + Writer.BeginArray(FieldName); + } + + for (const json11::Json& Item : Json.array_items()) + { + if (ReadField(Writer, Item, std::string_view(), Error) == false) + { + return false; + } + } + + Writer.EndArray(); + } + break; + case Json::Type::NUL: + { + if (FieldName.empty()) + { + Writer.AddNull(); + } + else + { + Writer.AddNull(FieldName); + } + } + break; + case Json::Type::BOOL: + { + if (FieldName.empty()) + { + Writer.AddBool(Json.bool_value()); + } + else + { + Writer.AddBool(FieldName, Json.bool_value()); + } + } + break; + case Json::Type::NUMBER: + { + if (FieldName.empty()) + { + Writer.AddFloat(Json.number_value()); + } + else + { + Writer.AddFloat(FieldName, Json.number_value()); + } + } + break; + case Json::Type::STRING: + { + Oid Id; + if (TryParseObjectId(Json.string_value(), Id)) + { + if (FieldName.empty()) + { + Writer.AddObjectId(Id); + } + else + { + Writer.AddObjectId(FieldName, Id); + } + + return true; + } + + IoHash Hash; + if (TryParseIoHash(Json.string_value(), Hash)) + { + if (FieldName.empty()) + { + Writer.AddHash(Hash); + } + else + { + Writer.AddHash(FieldName, Hash); + } + + return true; + } + + if (FieldName.empty()) + { + Writer.AddString(Json.string_value()); + } + else + { + Writer.AddString(FieldName, Json.string_value()); + } + } + break; + default: + break; + } + + return true; + } + + static constexpr AsciiSet HexCharSet = AsciiSet("0123456789abcdefABCDEF"); + + static bool TryParseObjectId(std::string_view Str, Oid& Id) + { + using namespace std::literals; + + if (Str.size() == Oid::StringLength && AsciiSet::HasOnly(Str, HexCharSet)) + { + Id = Oid::FromHexString(Str); + return true; + } + + if (Str.starts_with("0x"sv)) + { + return TryParseObjectId(Str.substr(2), Id); + } + + return false; + } + + static bool TryParseIoHash(std::string_view Str, IoHash& Hash) + { + using namespace std::literals; + + if (Str.size() == IoHash::StringLength && AsciiSet::HasOnly(Str, HexCharSet)) + { + Hash = IoHash::FromHexString(Str); + return true; + } + + if (Str.starts_with("0x"sv)) + { + return TryParseIoHash(Str.substr(2), Hash); + } + + return false; + } +}; + +CbFieldIterator +LoadCompactBinaryFromJson(std::string_view Json, std::string& Error) +{ + if (Json.empty() == false) + { + return CbJsonReader::Read(Json, Error); + } + + return CbFieldIterator(); +} + +CbFieldIterator +LoadCompactBinaryFromJson(std::string_view Json) +{ + std::string Error; + return LoadCompactBinaryFromJson(Json, Error); +} + +////////////////////////////////////////////////////////////////////////// + #if ZEN_WITH_TESTS void uson_forcelink() @@ -1970,6 +2189,107 @@ TEST_CASE("uson.datetime") CHECK_EQ(D72.GetSecond(), 10); } } + +TEST_CASE("json.uson") +{ + using namespace std::literals; + using namespace json11; + + SUBCASE("empty") + { + CbFieldIterator It = LoadCompactBinaryFromJson(""sv); + CHECK(It.HasValue() == false); + } + + SUBCASE("object") + { + const Json JsonObject = Json::object{{"Null", nullptr}, + {"String", "Value1"}, + {"Bool", true}, + {"Number", 46.2}, + {"Array", Json::array{1, 2, 3}}, + {"Object", + Json::object{ + {"String", "Value2"}, + }}}; + + CbObject Cb = LoadCompactBinaryFromJson(JsonObject.dump()).AsObject(); + + CHECK(Cb["Null"].IsNull()); + CHECK(Cb["String"].AsString() == "Value1"sv); + CHECK(Cb["Bool"].AsBool()); + CHECK(Cb["Number"].AsDouble() == 46.2); + CHECK(Cb["Object"].IsObject()); + CbObjectView Object = Cb["Object"].AsObjectView(); + CHECK(Object["String"].AsString() == "Value2"sv); + } + + SUBCASE("array") + { + const Json JsonArray = Json::array{42, 43, 44}; + CbArray Cb = LoadCompactBinaryFromJson(JsonArray.dump()).AsArray(); + + auto It = Cb.CreateIterator(); + CHECK((*It).AsDouble() == 42); + It++; + CHECK((*It).AsDouble() == 43); + It++; + CHECK((*It).AsDouble() == 44); + } + + SUBCASE("objectid") + { + const Oid& Id = Oid::NewOid(); + + StringBuilder<64> Sb; + Id.ToString(Sb); + + Json JsonObject = Json::object{{"value", Sb.ToString()}}; + CbObject Cb = LoadCompactBinaryFromJson(JsonObject.dump()).AsObject(); + + CHECK(Cb["value"sv].IsObjectId()); + CHECK(Cb["value"sv].AsObjectId() == Id); + + Sb.Reset(); + Sb << "0x"; + Id.ToString(Sb); + + JsonObject = Json::object{{"value", Sb.ToString()}}; + Cb = LoadCompactBinaryFromJson(JsonObject.dump()).AsObject(); + + CHECK(Cb["value"sv].IsObjectId()); + CHECK(Cb["value"sv].AsObjectId() == Id); + } + + SUBCASE("iohash") + { + const uint8_t Data[] = { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + }; + + const IoHash Hash = IoHash::HashBuffer(Data, sizeof(Data)); + + Json JsonObject = Json::object{{"value", Hash.ToHexString()}}; + CbObject Cb = LoadCompactBinaryFromJson(JsonObject.dump()).AsObject(); + + CHECK(Cb["value"sv].IsHash()); + CHECK(Cb["value"sv].AsHash() == Hash); + + JsonObject = Json::object{{"value", "0x" + Hash.ToHexString()}}; + Cb = LoadCompactBinaryFromJson(JsonObject.dump()).AsObject(); + + CHECK(Cb["value"sv].IsHash()); + CHECK(Cb["value"sv].AsHash() == Hash); + } +} #endif } // namespace zen diff --git a/zencore/include/zencore/compactbinary.h b/zencore/include/zencore/compactbinary.h index 19f1597dc..eba4a1694 100644 --- a/zencore/include/zencore/compactbinary.h +++ b/zencore/include/zencore/compactbinary.h @@ -1405,6 +1405,12 @@ ZENCORE_API CbObject LoadCompactBinaryObject(const IoBuffer& Payload); ZENCORE_API CbObject LoadCompactBinaryObject(CompressedBuffer&& Payload); ZENCORE_API CbObject LoadCompactBinaryObject(const CompressedBuffer& Payload); +/** + * Load a compact binary from JSON. + */ +ZENCORE_API CbFieldIterator LoadCompactBinaryFromJson(std::string_view Json, std::string& Error); +ZENCORE_API CbFieldIterator LoadCompactBinaryFromJson(std::string_view Json); + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** diff --git a/zencore/include/zencore/string.h b/zencore/include/zencore/string.h index 027730063..012ee73ee 100644 --- a/zencore/include/zencore/string.h +++ b/zencore/include/zencore/string.h @@ -999,7 +999,7 @@ public: static constexpr bool HasOnly(const StringType& Str, AsciiSet Set) { auto End = Str.data() + Str.size(); - return FindFirst(Set, GetData(Str), End) == End; + return FindFirst(Set, Str.data(), End) == End; } private: -- cgit v1.2.3 From 971faf7906975c3769d137113f58160ae3528b44 Mon Sep 17 00:00:00 2001 From: Per Larsson Date: Thu, 28 Apr 2022 09:05:07 +0200 Subject: Added missing includes. --- zencore/compactbinary.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zencore/compactbinary.cpp b/zencore/compactbinary.cpp index 00c5aa0de..a51253989 100644 --- a/zencore/compactbinary.cpp +++ b/zencore/compactbinary.cpp @@ -3,6 +3,7 @@ #include "zencore/compactbinary.h" #include +#include #include #include #include @@ -22,10 +23,9 @@ # include #endif -#if ZEN_WITH_TESTS -# include -# include -#endif +ZEN_THIRD_PARTY_INCLUDES_START +#include +ZEN_THIRD_PARTY_INCLUDES_END namespace zen { -- cgit v1.2.3 From db2bee546354a03ca9a94a647161b041b3033491 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 28 Apr 2022 15:32:55 +0200 Subject: Reduce risk of reallocating backing std::vector in CbWriter::AddBinary Shard up g_MappingLock in IoBufferExtendedCore::Materialize() to reduce contention during high load Don't queue upstream cache records if we don't have any upstreams --- zencore/compactbinarybuilder.cpp | 5 +++-- zencore/iobuffer.cpp | 12 ++++++++++-- zenserver/upstream/upstreamcache.cpp | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/zencore/compactbinarybuilder.cpp b/zencore/compactbinarybuilder.cpp index 5111504e1..1d2ba45df 100644 --- a/zencore/compactbinarybuilder.cpp +++ b/zencore/compactbinarybuilder.cpp @@ -436,9 +436,10 @@ CbWriter::AddNull() void CbWriter::AddBinary(const void* const Value, const uint64_t Size) { + const size_t SizeByteCount = MeasureVarUInt(Size); + Data.reserve(Data.size() + 1 + SizeByteCount + Size); BeginField(); - const uint32_t SizeByteCount = MeasureVarUInt(Size); - const int64_t SizeOffset = Data.size(); + const size_t SizeOffset = Data.size(); Data.resize(Data.size() + SizeByteCount); WriteVarUInt(Size, Data.data() + SizeOffset); Data.insert(Data.end(), static_cast(Value), static_cast(Value) + Size); diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp index 8a3ab8427..c069aa0f1 100644 --- a/zencore/iobuffer.cpp +++ b/zencore/iobuffer.cpp @@ -226,7 +226,15 @@ IoBufferExtendedCore::~IoBufferExtendedCore() m_DataPtr = nullptr; } -static RwLock g_MappingLock; +static RwLock g_MappingLock[0x40]; + +static RwLock& +MappingLockForInstance(const IoBufferExtendedCore* instance) +{ + intptr_t base = (intptr_t)instance; + size_t lock_index = ((base >> 8) ^ (base >> 16)) & 0x3f; + return g_MappingLock[lock_index]; +} void IoBufferExtendedCore::Materialize() const @@ -237,7 +245,7 @@ IoBufferExtendedCore::Materialize() const if (m_Flags.load(std::memory_order_acquire) & kIsMaterialized) return; - RwLock::ExclusiveLockScope _(g_MappingLock); + RwLock::ExclusiveLockScope _(MappingLockForInstance(this)); // Someone could have gotten here first // We can use memory_order_relaxed on this load because the mutex has already provided the fence diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index da0743f0a..dba80faa9 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -1451,7 +1451,7 @@ public: virtual void EnqueueUpstream(UpstreamCacheRecord CacheRecord) override { - if (m_RunState.IsRunning && m_Options.WriteUpstream) + if (m_RunState.IsRunning && m_Options.WriteUpstream && m_Endpoints.size() > 0) { if (!m_UpstreamThreads.empty()) { -- cgit v1.2.3 From 042ebaa2822400d8cab69c51126f61f131ecfc8e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 28 Apr 2022 16:56:32 +0200 Subject: naming cleanup --- zencore/iobuffer.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp index c069aa0f1..ccf92da62 100644 --- a/zencore/iobuffer.cpp +++ b/zencore/iobuffer.cpp @@ -226,14 +226,17 @@ IoBufferExtendedCore::~IoBufferExtendedCore() m_DataPtr = nullptr; } -static RwLock g_MappingLock[0x40]; +static constexpr size_t MappingLockCount = 64; +static_assert((MappingLockCount & (MappingLockCount - 1)) == 0, "MappingLockCount must be power of two"); + +static RwLock g_MappingLocks[MappingLockCount]; static RwLock& MappingLockForInstance(const IoBufferExtendedCore* instance) { intptr_t base = (intptr_t)instance; - size_t lock_index = ((base >> 8) ^ (base >> 16)) & 0x3f; - return g_MappingLock[lock_index]; + size_t lock_index = ((base >> 8) ^ (base >> 16)) & (MappingLockCount - 1u); + return g_MappingLocks[lock_index]; } void -- cgit v1.2.3 From f9f58ce2f568d8e3d39acad746242bbe340f68cc Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 29 Apr 2022 08:52:01 +0200 Subject: use IsPow2 for mapping lock count --- zencore/iobuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp index ccf92da62..46b9ab336 100644 --- a/zencore/iobuffer.cpp +++ b/zencore/iobuffer.cpp @@ -227,7 +227,7 @@ IoBufferExtendedCore::~IoBufferExtendedCore() } static constexpr size_t MappingLockCount = 64; -static_assert((MappingLockCount & (MappingLockCount - 1)) == 0, "MappingLockCount must be power of two"); +static_assert(IsPow2(MappingLockCount), "MappingLockCount must be power of two"); static RwLock g_MappingLocks[MappingLockCount]; -- cgit v1.2.3 From cfbd92d50918d45583d970baf938c0e634e12d95 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 29 Apr 2022 08:57:01 +0200 Subject: mac compilation fix --- zenserver/cache/structuredcache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 8daf08bff..e1d9de976 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -1527,7 +1527,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http for (size_t Index : RemoteRequestIndexes) { RequestData& Request = Requests[Index]; - RequestedRecordsData.push_back({Request.Key.Bucket, Request.Key.Hash}); + RequestedRecordsData.push_back({{Request.Key.Bucket, Request.Key.Hash}}); CacheChunkRequests.push_back(&RequestedRecordsData.back()); } Stopwatch Timer; -- cgit v1.2.3 From de6057de814a4dc16654bdda84f697476b2ebef5 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sat, 30 Apr 2022 01:29:24 +0200 Subject: first pass at generic block store with gc --- zenstore/blockstore.cpp | 508 +++++++++++++++++++++++++++++++ zenstore/compactcas.cpp | 538 +++++---------------------------- zenstore/compactcas.h | 7 +- zenstore/include/zenstore/blockstore.h | 41 +++ 4 files changed, 634 insertions(+), 460 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 1eb859d5a..a897ed902 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -2,6 +2,9 @@ #include "compactcas.h" +#include +#include +#include #include #if ZEN_WITH_TESTS @@ -108,6 +111,511 @@ BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::functio m_File.StreamByteRange(FileOffset, Size, std::move(ChunkFun)); } +namespace { + const char* DataExtension = ".ucas"; + + std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) + { + ExtendablePathBuilder<256> Path; + + char BlockHexString[9]; + ToHexNumber(BlockIndex, BlockHexString); + + Path.Append(BlocksBasePath); + Path.AppendSeparator(); + Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); + Path.AppendSeparator(); + Path.Append(BlockHexString); + Path.Append(DataExtension); + return Path.ToPath(); + } +} // namespace + +void +BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount, + const std::vector& KnownLocations) +{ + ZEN_ASSERT(MaxBlockSize > 0); + ZEN_ASSERT(MaxBlockCount > 0); + ZEN_ASSERT(IsPow2(MaxBlockCount)); + + m_BlocksBasePath = BlocksBasePath; + m_MaxBlockSize = MaxBlockSize; + + m_TotalSize = 0; + m_ChunkBlocks.clear(); + + std::unordered_set KnownBlocks; + for (const auto& Entry : KnownLocations) + { + m_TotalSize.fetch_add(Entry.Size, std::memory_order_seq_cst); + KnownBlocks.insert(Entry.BlockIndex); + } + + if (std::filesystem::is_directory(m_BlocksBasePath)) + { + std::vector FoldersToScan; + FoldersToScan.push_back(m_BlocksBasePath); + size_t FolderOffset = 0; + while (FolderOffset < FoldersToScan.size()) + { + for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset])) + { + if (Entry.is_directory()) + { + FoldersToScan.push_back(Entry.path()); + continue; + } + if (Entry.is_regular_file()) + { + const std::filesystem::path Path = Entry.path(); + if (Path.extension() != DataExtension) + { + continue; + } + std::string FileName = Path.stem().string(); + uint32_t BlockIndex; + bool OK = ParseHexNumber(FileName, BlockIndex); + if (!OK) + { + continue; + } + if (!KnownBlocks.contains(BlockIndex)) + { + // Log removing unreferenced block + // Clear out unused blocks + ZEN_INFO("removing unused block for '{}' at '{}'", m_BlocksBasePath, Path); + std::error_code Ec; + std::filesystem::remove(Path, Ec); + if (Ec) + { + ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message()); + } + continue; + } + Ref BlockFile = new BlockStoreFile(Path); + BlockFile->Open(); + m_ChunkBlocks[BlockIndex] = BlockFile; + } + } + ++FolderOffset; + } + } + else + { + CreateDirectories(m_BlocksBasePath); + } +} + +BlockStoreLocation +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) +{ + RwLock::ExclusiveLockScope InsertLock(m_InsertLock); + + uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); + bool IsWriting = m_WriteBlock != nullptr; + if (!IsWriting || (m_CurrentInsertOffset + Size) > m_MaxBlockSize) + { + if (m_WriteBlock) + { + m_WriteBlock = nullptr; + } + { + if (m_ChunkBlocks.size() == m_MaxBlockCount) + { + throw std::runtime_error(fmt::format("unable to allocate a new block in '{}'", m_BlocksBasePath)); + } + WriteBlockIndex += IsWriting ? 1 : 0; + while (m_ChunkBlocks.contains(WriteBlockIndex)) + { + WriteBlockIndex = (WriteBlockIndex + 1) & (m_MaxBlockCount - 1); + } + std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); + m_WriteBlock = new BlockStoreFile(BlockPath); + m_ChunkBlocks[WriteBlockIndex] = m_WriteBlock; + m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); + } + m_CurrentInsertOffset = 0; + m_WriteBlock->Create(m_MaxBlockSize); + } + uint64_t InsertOffset = m_CurrentInsertOffset; + m_CurrentInsertOffset = RoundUp(InsertOffset + Size, Alignment); + Ref WriteBlock = m_WriteBlock; + InsertLock.ReleaseNow(); + + BlockStoreLocation Location{.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}; + WriteBlock->Write(Data, Size, InsertOffset); + + return Location; +} + +/* +IoBuffer +BlockStore::ReadChunk(const BlockStoreLocation& Location) +{ + RwLock::SharedLockScope InsertLock(m_InsertLock); + Ref ChunkBlock = m_ChunkBlocks[Location.BlockIndex]; + InsertLock.ReleaseNow(); + return ChunkBlock->GetChunk(Location.Offset, Location.Size); +} +*/ + +Ref +BlockStore::GetChunkBlock(const BlockStoreLocation& Location) +{ + RwLock::SharedLockScope InsertLock(m_InsertLock); + return m_ChunkBlocks[Location.BlockIndex]; +} + +void +BlockStore::Flush() +{ + RwLock::ExclusiveLockScope _(m_InsertLock); + if (m_CurrentInsertOffset > 0) + { + uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); + WriteBlockIndex = (WriteBlockIndex + 1) & (m_MaxBlockCount - 1); + m_WriteBlock = nullptr; + m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); + m_CurrentInsertOffset = 0; + } +} + +// TODO: Almost there - some bug remain and API might need tweaking +void +BlockStore::ReclaimSpace(const std::vector& ChunkLocations, + const std::vector& KeepChunkIndexes, + uint64_t PayloadAlignment, + bool DryRun, + const ReclaimCallback& Callback) +{ + if (ChunkLocations.empty()) + { + return; + } + uint64_t WriteBlockTimeUs = 0; + uint64_t WriteBlockLongestTimeUs = 0; + uint64_t ReadBlockTimeUs = 0; + uint64_t ReadBlockLongestTimeUs = 0; + uint64_t TotalChunkCount = ChunkLocations.size(); + uint64_t DeletedSize = 0; + uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); + + uint64_t MovedCount = 0; + uint64_t DeletedCount = 0; + + Stopwatch TotalTimer; + const auto _ = MakeGuard([this, + &TotalTimer, + &WriteBlockTimeUs, + &WriteBlockLongestTimeUs, + &ReadBlockTimeUs, + &ReadBlockLongestTimeUs, + &TotalChunkCount, + &DeletedCount, + &MovedCount, + &DeletedSize, + OldTotalSize] { + ZEN_INFO( + "garbage collect for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " + "#{} " + "of #{} " + "chunks ({}).", + m_BlocksBasePath, + NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), + NiceLatencyNs(WriteBlockTimeUs), + NiceLatencyNs(WriteBlockLongestTimeUs), + NiceLatencyNs(ReadBlockTimeUs), + NiceLatencyNs(ReadBlockLongestTimeUs), + NiceBytes(DeletedSize), + DeletedCount, + MovedCount, + TotalChunkCount, + NiceBytes(OldTotalSize)); + }); + + size_t BlockCount = 0; + uint64_t ExcludeBlockIndex = 0x800000000ull; + { + RwLock::ExclusiveLockScope __(m_InsertLock); + if (m_WriteBlock) + { + ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); + } + BlockCount = m_ChunkBlocks.size(); + } + + std::unordered_map LocationLookup; + LocationLookup.reserve(TotalChunkCount); + + std::unordered_set KeepChunkMap; + KeepChunkMap.reserve(KeepChunkIndexes.size()); + for (size_t KeepChunkIndex : KeepChunkIndexes) + { + const BlockStoreLocation& Location = ChunkLocations[KeepChunkIndex]; + if (Location.BlockIndex == ExcludeBlockIndex) + { + continue; + } + KeepChunkMap.insert(KeepChunkIndex); + } + std::unordered_set DeleteChunkMap; + DeleteChunkMap.reserve(ChunkLocations.size() - KeepChunkIndexes.size()); + + std::unordered_map BlockIndexToChunkMapIndex; + std::vector> KeepChunks; + std::vector> DeleteChunks; + + BlockIndexToChunkMapIndex.reserve(BlockCount); + KeepChunks.reserve(BlockCount); + DeleteChunks.reserve(BlockCount); + size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2; + + size_t DeleteCount = 0; + uint64_t NewTotalSize = 0; + for (size_t Index = 0; Index < TotalChunkCount; ++Index) + { + const BlockStoreLocation& Location = ChunkLocations[Index]; + LocationLookup[Index] = Location; + if (Location.BlockIndex == ExcludeBlockIndex) + { + continue; + } + + auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(Location.BlockIndex); + size_t ChunkMapIndex = 0; + if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) + { + ChunkMapIndex = KeepChunks.size(); + BlockIndexToChunkMapIndex[Location.BlockIndex] = ChunkMapIndex; + KeepChunks.resize(ChunkMapIndex + 1); + KeepChunks.back().reserve(GuesstimateCountPerBlock); + DeleteChunks.resize(ChunkMapIndex + 1); + DeleteChunks.back().reserve(GuesstimateCountPerBlock); + } + else + { + ChunkMapIndex = BlockIndexPtr->second; + } + + if (KeepChunkMap.contains(Index)) + { + std::vector& IndexMap = KeepChunks[ChunkMapIndex]; + IndexMap.push_back(Index); + NewTotalSize += Location.Size; + continue; + } + std::vector& IndexMap = DeleteChunks[ChunkMapIndex]; + IndexMap.push_back(Index); + DeleteCount++; + } + + std::unordered_set BlocksToReWrite; + BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); + for (const auto& Entry : BlockIndexToChunkMapIndex) + { + uint32_t BlockIndex = Entry.first; + size_t ChunkMapIndex = Entry.second; + const std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; + if (ChunkMap.empty()) + { + continue; + } + BlocksToReWrite.insert(BlockIndex); + } + + if (DryRun) + { + uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed); + ZEN_INFO("garbage collect for '{}' DISABLED, found #{} {} chunks of total #{} {}", + m_BlocksBasePath, + DeleteCount, + NiceBytes(TotalSize - NewTotalSize), + TotalChunkCount, + NiceBytes(TotalSize)); + return; + } + + std::unordered_map MovedChunks; + std::vector RemovedChunks; + + Ref NewBlockFile; + uint64_t WriteOffset = 0; + uint32_t NewBlockIndex = 0; + + for (uint32_t BlockIndex : BlocksToReWrite) + { + const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; + + Ref OldBlockFile; + { + RwLock::SharedLockScope _i(m_InsertLock); + OldBlockFile = m_ChunkBlocks[BlockIndex]; + ZEN_ASSERT(OldBlockFile); + } + + const std::vector& KeepMap = KeepChunks[ChunkMapIndex]; + if (KeepMap.empty()) + { + const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + for (size_t DeleteIndex : DeleteMap) + { + RemovedChunks.push_back(DeleteIndex); + DeletedSize += ChunkLocations[DeleteIndex].Size; + DeletedCount++; + } + Callback(MovedChunks, RemovedChunks); + MovedChunks.clear(); + RemovedChunks.clear(); + { + RwLock::ExclusiveLockScope _i(m_InsertLock); + Stopwatch Timer; + const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + m_ChunkBlocks[BlockIndex] = nullptr; + } + ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_BlocksBasePath, BlockIndex, OldBlockFile->GetPath()); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) + { + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); + } + continue; + } + + std::vector Chunk; + for (const size_t& ChunkIndex : KeepMap) + { + const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; + Chunk.resize(ChunkLocation.Size); + OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); + + if (!NewBlockFile || (WriteOffset + Chunk.size() > m_MaxBlockSize)) + { + uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); + + if (NewBlockFile) + { + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); + } + { + Callback(MovedChunks, RemovedChunks); + MovedChunks.clear(); + RemovedChunks.clear(); + RwLock::ExclusiveLockScope __(m_InsertLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + if (m_ChunkBlocks.size() == m_MaxBlockCount) + { + ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", + m_BlocksBasePath, + static_cast(std::numeric_limits::max()) + 1); + return; + } + while (m_ChunkBlocks.contains(NextBlockIndex)) + { + NextBlockIndex = (NextBlockIndex + 1) & (m_MaxBlockCount - 1); + } + std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); + NewBlockFile = new BlockStoreFile(NewBlockPath); + m_ChunkBlocks[NextBlockIndex] = NewBlockFile; + } + + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BlocksBasePath, Error); + if (Error) + { + ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BlocksBasePath, Error.message()); + return; + } + if (Space.Free < m_MaxBlockSize) + { + uint64_t ReclaimedSpace = 0; // GcCtx.ClaimGCReserve(); + if (Space.Free + ReclaimedSpace < m_MaxBlockSize) + { + ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", + m_BlocksBasePath, + m_MaxBlockSize, + NiceBytes(Space.Free + ReclaimedSpace)); + RwLock::ExclusiveLockScope _l(m_InsertLock); + Stopwatch Timer; + const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + m_ChunkBlocks.erase(NextBlockIndex); + return; + } + + ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", + m_BlocksBasePath, + ReclaimedSpace, + NiceBytes(Space.Free + ReclaimedSpace)); + } + NewBlockFile->Create(m_MaxBlockSize); + NewBlockIndex = NextBlockIndex; + WriteOffset = 0; + } + + NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); + MovedChunks[ChunkIndex] = {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}; + WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + MovedCount++; + } + Chunk.clear(); + if (NewBlockFile) + { + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); + NewBlockFile = {}; + } + + const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + for (size_t DeleteIndex : DeleteMap) + { + RemovedChunks.push_back(DeleteIndex); + DeletedSize += ChunkLocations[DeleteIndex].Size; + DeletedCount++; + } + + Callback(MovedChunks, RemovedChunks); + MovedChunks.clear(); + RemovedChunks.clear(); + { + RwLock::ExclusiveLockScope __(m_InsertLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + m_ChunkBlocks[BlockIndex] = nullptr; + } + ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_BlocksBasePath, BlockIndex, OldBlockFile->GetPath()); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) + { + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); + } + OldBlockFile = nullptr; + } + + return; +} + #if ZEN_WITH_TESTS static bool diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 920ed965f..2b48eb143 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -263,53 +263,12 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint3 CasStore::InsertResult CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash) { - uint32_t WriteBlockIndex; - Ref WriteBlock; - uint64_t InsertOffset; { - RwLock::ExclusiveLockScope _(m_InsertLock); - - { - RwLock::SharedLockScope __(m_LocationMapLock); - if (m_LocationMap.contains(ChunkHash)) - { - return CasStore::InsertResult{.New = false}; - } - } - - // New entry - - WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - bool IsWriting = m_WriteBlock != nullptr; - if (!IsWriting || (m_CurrentInsertOffset + ChunkSize) > m_MaxBlockSize) + RwLock::SharedLockScope _(m_LocationMapLock); + if (m_LocationMap.contains(ChunkHash)) { - if (m_WriteBlock) - { - m_WriteBlock = nullptr; - } - { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) - { - throw std::runtime_error( - fmt::format("unable to allocate a new block in '{}'", m_Config.RootDirectory / m_ContainerBaseName)); - } - WriteBlockIndex += IsWriting ? 1 : 0; - while (m_ChunkBlocks.contains(WriteBlockIndex)) - { - WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); - m_WriteBlock = new BlockStoreFile(BlockPath); - m_ChunkBlocks[WriteBlockIndex] = m_WriteBlock; - m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); - } - m_CurrentInsertOffset = 0; - m_WriteBlock->Create(m_MaxBlockSize); + return CasStore::InsertResult{.New = false}; } - InsertOffset = m_CurrentInsertOffset; - m_CurrentInsertOffset = RoundUp(InsertOffset + ChunkSize, m_PayloadAlignment); - WriteBlock = m_WriteBlock; } // We can end up in a situation that InsertChunk writes the same chunk data in @@ -324,17 +283,15 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const // This should be a rare occasion and the current flow reduces the time we block for // reads, insert and GC. - BlockStoreDiskLocation Location({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = ChunkSize}, m_PayloadAlignment); - const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location}; - - WriteBlock->Write(ChunkData, ChunkSize, InsertOffset); + BlockStoreLocation Location = m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment); + BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); + const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; m_CasLog.Append(IndexEntry); - - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - m_LocationMap.emplace(ChunkHash, Location); + RwLock::ExclusiveLockScope _(m_LocationMapLock); + m_LocationMap.emplace(ChunkHash, DiskLocation); } + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); return CasStore::InsertResult{.New = true}; } @@ -348,20 +305,16 @@ CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) IoBuffer CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { - Ref ChunkBlock; - BlockStoreLocation Location; + RwLock::SharedLockScope _(m_LocationMapLock); + auto KeyIt = m_LocationMap.find(ChunkHash); + if (KeyIt == m_LocationMap.end()) { - RwLock::SharedLockScope _(m_LocationMapLock); - if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) - { - Location = KeyIt->second.Get(m_PayloadAlignment); - ChunkBlock = m_ChunkBlocks[Location.BlockIndex]; - } - else - { - return IoBuffer(); - } + return IoBuffer(); } + BlockStoreLocation Location = KeyIt->second.Get(m_PayloadAlignment); + Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); // m_ChunkBlocks[Location.BlockIndex]; + _.ReleaseNow(); + return ChunkBlock->GetChunk(Location.Offset, Location.Size); } @@ -388,7 +341,8 @@ CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) void CasContainerStrategy::Flush() { - { + m_BlockStore.Flush(); + /* { RwLock::ExclusiveLockScope _(m_InsertLock); if (m_CurrentInsertOffset > 0) { @@ -398,13 +352,15 @@ CasContainerStrategy::Flush() m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); m_CurrentInsertOffset = 0; } - } + }*/ MakeIndexSnapshot(); } void CasContainerStrategy::Scrub(ScrubContext& Ctx) { + ZEN_UNUSED(Ctx); +#if 0 std::vector BadChunks; // We do a read sweep through the payloads file and validate @@ -508,118 +464,31 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) // Let whomever it concerns know about the bad chunks. This could // be used to invalidate higher level data structures more efficiently // than a full validation pass might be able to do - Ctx.ReportBadCasChunks(BadChunkHashes); +#endif // 0 } void CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { - // It collects all the blocks that we want to delete chunks from. For each such - // block we keep a list of chunks to retain and a list of chunks to delete. - // - // If there is a block that we are currently writing to, that block is omitted - // from the garbage collection. - // - // Next it will iterate over all blocks that we want to remove chunks from. - // If the block is empty after removal of chunks we mark the block as pending - // delete - we want to delete it as soon as there are no IoBuffers using the - // block file. - // Once complete we update the m_LocationMap by removing the chunks. - // - // If the block is non-empty we write out the chunks we want to keep to a new - // block file (creating new block files as needed). - // - // We update the index as we complete each new block file. This makes it possible - // to break the GC if we want to limit time for execution. - // - // GC can fairly parallell to regular operation - it will block while taking - // a snapshot of the current m_LocationMap state. - // - // While moving blocks it will do a blocking operation and update the m_LocationMap - // after each new block is written and figuring out the path to the next new block. - - ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); uint64_t WriteBlockTimeUs = 0; uint64_t WriteBlockLongestTimeUs = 0; uint64_t ReadBlockTimeUs = 0; uint64_t ReadBlockLongestTimeUs = 0; - uint64_t TotalChunkCount = 0; - uint64_t DeletedSize = 0; - uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); - - std::vector DeletedChunks; - uint64_t MovedCount = 0; - - Stopwatch TotalTimer; - const auto _ = MakeGuard([this, - &TotalTimer, - &WriteBlockTimeUs, - &WriteBlockLongestTimeUs, - &ReadBlockTimeUs, - &ReadBlockLongestTimeUs, - &TotalChunkCount, - &DeletedChunks, - &MovedCount, - &DeletedSize, - OldTotalSize] { - ZEN_INFO( - "garbage collect for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " - "#{} " - "of #{} " - "chunks ({}).", - m_Config.RootDirectory / m_ContainerBaseName, - NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), - NiceLatencyNs(WriteBlockTimeUs), - NiceLatencyNs(WriteBlockLongestTimeUs), - NiceLatencyNs(ReadBlockTimeUs), - NiceLatencyNs(ReadBlockLongestTimeUs), - NiceBytes(DeletedSize), - DeletedChunks.size(), - MovedCount, - TotalChunkCount, - NiceBytes(OldTotalSize)); - }); LocationMap_t LocationMap; - size_t BlockCount; - uint64_t ExcludeBlockIndex = 0x800000000ull; { - RwLock::SharedLockScope __(m_InsertLock); RwLock::SharedLockScope ___(m_LocationMapLock); - { - Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - if (m_WriteBlock) - { - ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - } - __.ReleaseNow(); - } - LocationMap = m_LocationMap; - BlockCount = m_ChunkBlocks.size(); + Stopwatch Timer; + const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + LocationMap = m_LocationMap; } - if (LocationMap.empty()) - { - ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_Config.RootDirectory / m_ContainerBaseName); - return; - } - - TotalChunkCount = LocationMap.size(); - - std::unordered_map BlockIndexToChunkMapIndex; - std::vector> KeepChunks; - std::vector> DeleteChunks; - - BlockIndexToChunkMapIndex.reserve(BlockCount); - KeepChunks.reserve(BlockCount); - DeleteChunks.reserve(BlockCount); - size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2; + uint64_t TotalChunkCount = LocationMap.size(); std::vector TotalChunkHashes; TotalChunkHashes.reserve(TotalChunkCount); @@ -628,272 +497,83 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) TotalChunkHashes.push_back(Entry.first); } - uint64_t DeleteCount = 0; + std::vector ChunkLocations; + std::vector KeepChunkIndexes; + std::vector ChunkIndexToChunkHash; + ChunkLocations.reserve(TotalChunkCount); + ChunkLocations.reserve(TotalChunkCount); + ChunkIndexToChunkHash.reserve(TotalChunkCount); - uint64_t NewTotalSize = 0; GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { - auto KeyIt = LocationMap.find(ChunkHash); - const BlockStoreDiskLocation& Location = KeyIt->second; - uint32_t BlockIndex = Location.GetBlockIndex(); + auto KeyIt = LocationMap.find(ChunkHash); + const BlockStoreDiskLocation& DiskLocation = KeyIt->second; + BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); + size_t ChunkIndex = ChunkLocations.size(); - if (static_cast(BlockIndex) == ExcludeBlockIndex) - { - return; - } - - auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(BlockIndex); - size_t ChunkMapIndex = 0; - if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) - { - ChunkMapIndex = KeepChunks.size(); - BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex; - KeepChunks.resize(ChunkMapIndex + 1); - KeepChunks.back().reserve(GuesstimateCountPerBlock); - DeleteChunks.resize(ChunkMapIndex + 1); - DeleteChunks.back().reserve(GuesstimateCountPerBlock); - } - else - { - ChunkMapIndex = BlockIndexPtr->second; - } + ChunkLocations.push_back(Location); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; if (Keep) { - std::vector& ChunkMap = KeepChunks[ChunkMapIndex]; - ChunkMap.push_back(ChunkHash); - NewTotalSize += Location.GetSize(); - } - else - { - std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; - ChunkMap.push_back(ChunkHash); - DeleteCount++; + KeepChunkIndexes.push_back(ChunkIndex); } }); - std::unordered_set BlocksToReWrite; - BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); - for (const auto& Entry : BlockIndexToChunkMapIndex) - { - uint32_t BlockIndex = Entry.first; - size_t ChunkMapIndex = Entry.second; - const std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; - if (ChunkMap.empty()) - { - continue; - } - BlocksToReWrite.insert(BlockIndex); - } - const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); if (!PerformDelete) { - uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed); - ZEN_INFO("garbage collect for '{}' DISABLED, found #{} {} chunks of total #{} {}", - m_Config.RootDirectory / m_ContainerBaseName, - DeleteCount, - NiceBytes(TotalSize - NewTotalSize), - TotalChunkCount, - NiceBytes(TotalSize)); + m_BlockStore.ReclaimSpace(ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true); return; } - - // Move all chunks in blocks that have chunks removed to new blocks - - Ref NewBlockFile; - uint64_t WriteOffset = 0; - uint32_t NewBlockIndex = 0; - DeletedChunks.reserve(DeleteCount); - - auto UpdateLocations = [this](const std::span& Entries) { - for (const CasDiskIndexEntry& Entry : Entries) - { - if (Entry.Flags & CasDiskIndexEntry::kTombstone) + std::vector DeletedChunks; + m_BlockStore.ReclaimSpace( + ChunkLocations, + KeepChunkIndexes, + m_PayloadAlignment, + false, + [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( + const std::unordered_map& MovedChunks, + const std::vector RemovedChunks) { + std::vector LogEntries; + LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); + for (const auto& Entry : MovedChunks) { - auto KeyIt = m_LocationMap.find(Entry.Key); - uint64_t ChunkSize = KeyIt->second.GetSize(); - m_TotalSize.fetch_sub(ChunkSize); - m_LocationMap.erase(KeyIt); - continue; + size_t ChunkIndex = Entry.first; + const BlockStoreLocation& NewLocation = Entry.second; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + LogEntries.push_back({.Key = ChunkHash, .Location = {NewLocation, m_PayloadAlignment}}); + } + for (const size_t ChunkIndex : RemovedChunks) + { + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const BlockStoreDiskLocation& OldDiskLocation = LocationMap[ChunkHash]; + LogEntries.push_back({.Key = ChunkHash, .Location = OldDiskLocation, .Flags = CasDiskIndexEntry::kTombstone}); + DeletedChunks.push_back(ChunkHash); } - m_LocationMap[Entry.Key] = Entry.Location; - } - }; - - std::unordered_map MovedBlockChunks; - for (uint32_t BlockIndex : BlocksToReWrite) - { - const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; - - Ref OldBlockFile; - { - RwLock::SharedLockScope _i(m_LocationMapLock); - OldBlockFile = m_ChunkBlocks[BlockIndex]; - } - const std::vector& KeepMap = KeepChunks[ChunkMapIndex]; - if (KeepMap.empty()) - { - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; - std::vector LogEntries = MakeCasDiskEntries({}, DeleteMap); m_CasLog.Append(LogEntries); m_CasLog.Flush(); { - RwLock::ExclusiveLockScope _i(m_LocationMapLock); + RwLock::ExclusiveLockScope __(m_LocationMapLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); - UpdateLocations(LogEntries); - m_ChunkBlocks[BlockIndex] = nullptr; - } - DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end()); - ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", - m_ContainerBaseName, - BlockIndex, - OldBlockFile->GetPath()); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - continue; - } - - std::vector Chunk; - for (const IoHash& ChunkHash : KeepMap) - { - auto KeyIt = LocationMap.find(ChunkHash); - const BlockStoreLocation ChunkLocation = KeyIt->second.Get(m_PayloadAlignment); - Chunk.resize(ChunkLocation.Size); - OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); - - if (!NewBlockFile || (WriteOffset + Chunk.size() > m_MaxBlockSize)) - { - uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); - std::vector LogEntries = MakeCasDiskEntries(MovedBlockChunks, {}); - m_CasLog.Append(LogEntries); - m_CasLog.Flush(); - - if (NewBlockFile) + for (const CasDiskIndexEntry& Entry : LogEntries) { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); - } - { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - UpdateLocations(LogEntries); - if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) - { - ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", - m_Config.RootDirectory / m_ContainerBaseName, - static_cast(std::numeric_limits::max()) + 1); - return; - } - while (m_ChunkBlocks.contains(NextBlockIndex)) + if (Entry.Flags & CasDiskIndexEntry::kTombstone) { - NextBlockIndex = (NextBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - } - std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); - NewBlockFile = new BlockStoreFile(NewBlockPath); - m_ChunkBlocks[NextBlockIndex] = NewBlockFile; - } - - MovedCount += MovedBlockChunks.size(); - MovedBlockChunks.clear(); - - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error); - if (Error) - { - ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_Config.RootDirectory, Error.message()); - return; - } - if (Space.Free < m_MaxBlockSize) - { - uint64_t ReclaimedSpace = GcCtx.ClaimGCReserve(); - if (Space.Free + ReclaimedSpace < m_MaxBlockSize) - { - ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", - m_Config.RootDirectory / m_ContainerBaseName, - m_MaxBlockSize, - NiceBytes(Space.Free + ReclaimedSpace)); - RwLock::ExclusiveLockScope _l(m_LocationMapLock); - Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - m_ChunkBlocks.erase(NextBlockIndex); - return; + m_LocationMap.erase(Entry.Key); + auto KeyIt = m_LocationMap.find(Entry.Key); + uint64_t ChunkSize = Entry.Location.GetSize(); + m_TotalSize.fetch_sub(ChunkSize); + continue; } - - ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", - m_Config.RootDirectory / m_ContainerBaseName, - ReclaimedSpace, - NiceBytes(Space.Free + ReclaimedSpace)); + m_LocationMap[Entry.Key] = Entry.Location; } - NewBlockFile->Create(m_MaxBlockSize); - NewBlockIndex = NextBlockIndex; - WriteOffset = 0; } - - NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - MovedBlockChunks.emplace( - ChunkHash, - BlockStoreDiskLocation({.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}, m_PayloadAlignment)); - WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment); - } - Chunk.clear(); - if (NewBlockFile) - { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); - NewBlockFile = {}; - } - - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; - std::vector LogEntries = MakeCasDiskEntries(MovedBlockChunks, DeleteMap); - m_CasLog.Append(LogEntries); - m_CasLog.Flush(); - { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - UpdateLocations(LogEntries); - m_ChunkBlocks[BlockIndex] = nullptr; - } - MovedCount += MovedBlockChunks.size(); - DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end()); - MovedBlockChunks.clear(); - - ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_ContainerBaseName, BlockIndex, OldBlockFile->GetPath()); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - OldBlockFile = nullptr; - } - - for (const IoHash& ChunkHash : DeletedChunks) - { - DeletedSize += LocationMap[ChunkHash].GetSize(); - } + }); GcCtx.DeletedCas(DeletedChunks); } @@ -935,7 +615,6 @@ CasContainerStrategy::MakeIndexSnapshot() std::vector Entries; { - RwLock::SharedLockScope __(m_InsertLock); RwLock::SharedLockScope ___(m_LocationMapLock); Entries.resize(m_LocationMap.size()); @@ -1480,67 +1159,18 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - std::unordered_set KnownBlocks; + std::unordered_set KnownBlocks; + std::vector KnownLocations; + KnownLocations.reserve(m_LocationMap.size()); for (const auto& Entry : m_LocationMap) { const BlockStoreDiskLocation& Location = Entry.second; m_TotalSize.fetch_add(Location.GetSize(), std::memory_order_seq_cst); KnownBlocks.insert(Location.GetBlockIndex()); + KnownLocations.push_back(Location.Get(m_PayloadAlignment)); } - if (std::filesystem::is_directory(m_BlocksBasePath)) - { - std::vector FoldersToScan; - FoldersToScan.push_back(m_BlocksBasePath); - size_t FolderOffset = 0; - while (FolderOffset < FoldersToScan.size()) - { - for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset])) - { - if (Entry.is_directory()) - { - FoldersToScan.push_back(Entry.path()); - continue; - } - if (Entry.is_regular_file()) - { - const std::filesystem::path Path = Entry.path(); - if (Path.extension() != DataExtension) - { - continue; - } - std::string FileName = Path.stem().string(); - uint32_t BlockIndex; - bool OK = ParseHexNumber(FileName, BlockIndex); - if (!OK) - { - continue; - } - if (!KnownBlocks.contains(BlockIndex)) - { - // Log removing unreferenced block - // Clear out unused blocks - ZEN_INFO("removing unused block for '{}' at '{}'", m_ContainerBaseName, Path); - std::error_code Ec; - std::filesystem::remove(Path, Ec); - if (Ec) - { - ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message()); - } - continue; - } - Ref BlockFile = new BlockStoreFile(Path); - BlockFile->Open(); - m_ChunkBlocks[BlockIndex] = BlockFile; - } - } - ++FolderOffset; - } - } - else - { - CreateDirectories(m_BlocksBasePath); - } + m_BlockStore.Initialize(m_BlocksBasePath, m_MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations); if (IsNewStore || ((LogEntryCount + LegacyLogEntryCount) > 0)) { diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index 11da37202..114a6a48c 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -78,17 +78,12 @@ private: TCasLogFile m_CasLog; std::string m_ContainerBaseName; std::filesystem::path m_BlocksBasePath; + BlockStore m_BlockStore; RwLock m_LocationMapLock; typedef std::unordered_map LocationMap_t; LocationMap_t m_LocationMap; - std::unordered_map> m_ChunkBlocks; - RwLock m_InsertLock; // used to serialize inserts - Ref m_WriteBlock; - std::uint64_t m_CurrentInsertOffset = 0; - - std::atomic_uint32_t m_WriteBlockIndex{}; std::atomic_uint64_t m_TotalSize{}; }; diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 424db461a..4dd6e5289 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -15,8 +15,14 @@ struct BlockStoreLocation uint32_t BlockIndex; uint64_t Offset; uint64_t Size; + + inline auto operator<=>(const BlockStoreLocation& Rhs) const = default; }; +constexpr BlockStoreLocation InvalidBlockStoreLocation{.BlockIndex = 0xfffffffful, + .Offset = 0xffffffffffffffffull, + .Size = 0xffffffffffffffffull}; + #pragma pack(push) #pragma pack(1) @@ -99,6 +105,41 @@ private: BasicFile m_File; }; +class BlockStore +{ +public: + void Initialize(const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount, + const std::vector& KnownLocations); + BlockStoreLocation WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment); + Ref GetChunkBlock(const BlockStoreLocation& Location); + void Flush(); + + typedef std::function& MovedChunks, const std::vector RemovedChunks)> + ReclaimCallback; + + void ReclaimSpace( + const std::vector& ChunkLocations, + const std::vector& KeepChunkIndexes, + uint64_t PayloadAlignment, + bool DryRun, + const ReclaimCallback& Callback = [](const std::unordered_map&, const std::vector&) {}); + +private: + std::unordered_map> m_ChunkBlocks; + + RwLock m_InsertLock; // used to serialize inserts + Ref m_WriteBlock; + std::uint64_t m_CurrentInsertOffset = 0; + std::atomic_uint32_t m_WriteBlockIndex{}; + + uint64_t m_MaxBlockSize = 1u << 28; + uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1; + std::filesystem::path m_BlocksBasePath; + std::atomic_uint64_t m_TotalSize{}; +}; + void blockstore_forcelink(); } // namespace zen -- cgit v1.2.3 From 7dc31ec99aa3fc2f40000258e45d5d6381403ff8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 10:17:35 +0200 Subject: threading issues resolved --- zenstore/blockstore.cpp | 113 ++++++++++++++------------------- zenstore/compactcas.cpp | 53 +++++++++++----- zenstore/include/zenstore/blockstore.h | 54 ++++++++++------ 3 files changed, 120 insertions(+), 100 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index a897ed902..4cf3c6486 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -209,8 +209,8 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, } } -BlockStoreLocation -BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) +void +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteCompleteCallback Callback) { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -243,24 +243,30 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) uint64_t InsertOffset = m_CurrentInsertOffset; m_CurrentInsertOffset = RoundUp(InsertOffset + Size, Alignment); Ref WriteBlock = m_WriteBlock; + m_ActiveWriteBlockIndexes.push_back(WriteBlockIndex); InsertLock.ReleaseNow(); - BlockStoreLocation Location{.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}; WriteBlock->Write(Data, Size, InsertOffset); - return Location; + Callback({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}); + + RwLock::ExclusiveLockScope _(m_InsertLock); + m_ActiveWriteBlockIndexes.erase(std::find(m_ActiveWriteBlockIndexes.begin(), m_ActiveWriteBlockIndexes.end(), WriteBlockIndex)); } -/* -IoBuffer -BlockStore::ReadChunk(const BlockStoreLocation& Location) +BlockStore::ReclaimSnapshotState +BlockStore::GetReclaimSnapshotState() { - RwLock::SharedLockScope InsertLock(m_InsertLock); - Ref ChunkBlock = m_ChunkBlocks[Location.BlockIndex]; - InsertLock.ReleaseNow(); - return ChunkBlock->GetChunk(Location.Offset, Location.Size); + ReclaimSnapshotState State; + RwLock::ExclusiveLockScope _(m_InsertLock); + for (uint32_t BlockIndex : m_ActiveWriteBlockIndexes) + { + State.ExcludeBlockIndexes.insert(BlockIndex); + } + State.BlockCount = m_ChunkBlocks.size(); + _.ReleaseNow(); + return State; } -*/ Ref BlockStore::GetChunkBlock(const BlockStoreLocation& Location) @@ -283,9 +289,9 @@ BlockStore::Flush() } } -// TODO: Almost there - some bug remain and API might need tweaking void -BlockStore::ReclaimSpace(const std::vector& ChunkLocations, +BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, + const std::vector& ChunkLocations, const std::vector& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, @@ -336,41 +342,22 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, NiceBytes(OldTotalSize)); }); - size_t BlockCount = 0; - uint64_t ExcludeBlockIndex = 0x800000000ull; - { - RwLock::ExclusiveLockScope __(m_InsertLock); - if (m_WriteBlock) - { - ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - } - BlockCount = m_ChunkBlocks.size(); - } - - std::unordered_map LocationLookup; - LocationLookup.reserve(TotalChunkCount); + size_t BlockCount = Snapshot.BlockCount; std::unordered_set KeepChunkMap; KeepChunkMap.reserve(KeepChunkIndexes.size()); for (size_t KeepChunkIndex : KeepChunkIndexes) { - const BlockStoreLocation& Location = ChunkLocations[KeepChunkIndex]; - if (Location.BlockIndex == ExcludeBlockIndex) - { - continue; - } KeepChunkMap.insert(KeepChunkIndex); } - std::unordered_set DeleteChunkMap; - DeleteChunkMap.reserve(ChunkLocations.size() - KeepChunkIndexes.size()); std::unordered_map BlockIndexToChunkMapIndex; - std::vector> KeepChunks; - std::vector> DeleteChunks; + std::vector> BlockKeepChunks; + std::vector> BlockDeleteChunks; BlockIndexToChunkMapIndex.reserve(BlockCount); - KeepChunks.reserve(BlockCount); - DeleteChunks.reserve(BlockCount); + BlockKeepChunks.reserve(BlockCount); + BlockDeleteChunks.reserve(BlockCount); size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2; size_t DeleteCount = 0; @@ -378,8 +365,7 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, for (size_t Index = 0; Index < TotalChunkCount; ++Index) { const BlockStoreLocation& Location = ChunkLocations[Index]; - LocationLookup[Index] = Location; - if (Location.BlockIndex == ExcludeBlockIndex) + if (Snapshot.ExcludeBlockIndexes.contains(Location.BlockIndex)) { continue; } @@ -388,12 +374,12 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, size_t ChunkMapIndex = 0; if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) { - ChunkMapIndex = KeepChunks.size(); + ChunkMapIndex = BlockKeepChunks.size(); BlockIndexToChunkMapIndex[Location.BlockIndex] = ChunkMapIndex; - KeepChunks.resize(ChunkMapIndex + 1); - KeepChunks.back().reserve(GuesstimateCountPerBlock); - DeleteChunks.resize(ChunkMapIndex + 1); - DeleteChunks.back().reserve(GuesstimateCountPerBlock); + BlockKeepChunks.resize(ChunkMapIndex + 1); + BlockKeepChunks.back().reserve(GuesstimateCountPerBlock); + BlockDeleteChunks.resize(ChunkMapIndex + 1); + BlockDeleteChunks.back().reserve(GuesstimateCountPerBlock); } else { @@ -402,12 +388,12 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, if (KeepChunkMap.contains(Index)) { - std::vector& IndexMap = KeepChunks[ChunkMapIndex]; + std::vector& IndexMap = BlockKeepChunks[ChunkMapIndex]; IndexMap.push_back(Index); NewTotalSize += Location.Size; continue; } - std::vector& IndexMap = DeleteChunks[ChunkMapIndex]; + std::vector& IndexMap = BlockDeleteChunks[ChunkMapIndex]; IndexMap.push_back(Index); DeleteCount++; } @@ -418,7 +404,7 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, { uint32_t BlockIndex = Entry.first; size_t ChunkMapIndex = Entry.second; - const std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; + const std::vector& ChunkMap = BlockDeleteChunks[ChunkMapIndex]; if (ChunkMap.empty()) { continue; @@ -438,9 +424,6 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, return; } - std::unordered_map MovedChunks; - std::vector RemovedChunks; - Ref NewBlockFile; uint64_t WriteOffset = 0; uint32_t NewBlockIndex = 0; @@ -456,19 +439,16 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, ZEN_ASSERT(OldBlockFile); } - const std::vector& KeepMap = KeepChunks[ChunkMapIndex]; + const std::vector& KeepMap = BlockKeepChunks[ChunkMapIndex]; if (KeepMap.empty()) { - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + const std::vector& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; for (size_t DeleteIndex : DeleteMap) { - RemovedChunks.push_back(DeleteIndex); DeletedSize += ChunkLocations[DeleteIndex].Size; - DeletedCount++; } - Callback(MovedChunks, RemovedChunks); - MovedChunks.clear(); - RemovedChunks.clear(); + Callback(BlockIndex, {}, DeleteMap); + DeletedCount += DeleteMap.size(); { RwLock::ExclusiveLockScope _i(m_InsertLock); Stopwatch Timer; @@ -489,7 +469,8 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, continue; } - std::vector Chunk; + std::unordered_map MovedChunks; + std::vector Chunk; for (const size_t& ChunkIndex : KeepMap) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; @@ -506,9 +487,9 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, NewBlockFile->Flush(); } { - Callback(MovedChunks, RemovedChunks); + Callback(0xfffffffful, MovedChunks, {}); + MovedCount += KeepMap.size(); MovedChunks.clear(); - RemovedChunks.clear(); RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { @@ -572,7 +553,6 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); MovedChunks[ChunkIndex] = {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}; WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); - MovedCount++; } Chunk.clear(); if (NewBlockFile) @@ -582,17 +562,16 @@ BlockStore::ReclaimSpace(const std::vector& ChunkLocations, NewBlockFile = {}; } - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; + const std::vector& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; for (size_t DeleteIndex : DeleteMap) { - RemovedChunks.push_back(DeleteIndex); DeletedSize += ChunkLocations[DeleteIndex].Size; - DeletedCount++; } - Callback(MovedChunks, RemovedChunks); + Callback(BlockIndex, MovedChunks, DeleteMap); + MovedCount += KeepMap.size(); + DeletedCount += DeleteMap.size(); MovedChunks.clear(); - RemovedChunks.clear(); { RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 2b48eb143..84019d7aa 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -283,15 +283,16 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const // This should be a rare occasion and the current flow reduces the time we block for // reads, insert and GC. - BlockStoreLocation Location = m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment); - BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); - const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; - m_CasLog.Append(IndexEntry); - { - RwLock::ExclusiveLockScope _(m_LocationMapLock); - m_LocationMap.emplace(ChunkHash, DiskLocation); - } - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); + m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment, [this, &ChunkHash, ChunkSize](const BlockStoreLocation& Location) { + BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); + const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; + m_CasLog.Append(IndexEntry); + { + RwLock::ExclusiveLockScope _(m_LocationMapLock); + m_LocationMap.emplace(ChunkHash, DiskLocation); + } + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); + }); return CasStore::InsertResult{.New = true}; } @@ -311,10 +312,15 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { return IoBuffer(); } - BlockStoreLocation Location = KeyIt->second.Get(m_PayloadAlignment); - Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); // m_ChunkBlocks[Location.BlockIndex]; + BlockStoreLocation Location = KeyIt->second.Get(m_PayloadAlignment); _.ReleaseNow(); + Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); // m_ChunkBlocks[Location.BlockIndex]; + if (!ChunkBlock) + { + return IoBuffer(); + } + return ChunkBlock->GetChunk(Location.Offset, Location.Size); } @@ -476,7 +482,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) uint64_t ReadBlockTimeUs = 0; uint64_t ReadBlockLongestTimeUs = 0; - LocationMap_t LocationMap; + LocationMap_t LocationMap; + BlockStore::ReclaimSnapshotState BlockStoreState; { RwLock::SharedLockScope ___(m_LocationMapLock); Stopwatch Timer; @@ -486,6 +493,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); }); LocationMap = m_LocationMap; + BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); } uint64_t TotalChunkCount = LocationMap.size(); @@ -521,18 +529,23 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); if (!PerformDelete) { - m_BlockStore.ReclaimSpace(ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true); + m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true); return; } + + auto GetChunkLocations = [] {}; + std::vector DeletedChunks; m_BlockStore.ReclaimSpace( + BlockStoreState, ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( + uint32_t BlockIndex, const std::unordered_map& MovedChunks, - const std::vector RemovedChunks) { + const std::vector& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) @@ -572,6 +585,10 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) } m_LocationMap[Entry.Key] = Entry.Location; } + for (const auto& Entry : m_LocationMap) + { + ZEN_ASSERT(Entry.second.GetBlockIndex() != BlockIndex); + } } }); @@ -2093,7 +2110,13 @@ TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true)) { ThreadPool.ScheduleWork([&Cas, &WorkCompleted, ChunkHash]() { CHECK(Cas.HaveChunk(ChunkHash)); - CHECK(ChunkHash == IoHash::HashBuffer(Cas.FindChunk(ChunkHash))); + if (ChunkHash != IoHash::HashBuffer(Cas.FindChunk(ChunkHash))) + { + IoBuffer Buffer = Cas.FindChunk(ChunkHash); + CHECK(Buffer); + IoHash BufferHash = IoHash::HashBuffer(Buffer); + CHECK(ChunkHash == BufferHash); + } WorkCompleted.fetch_add(1); }); } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 4dd6e5289..084142636 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -6,6 +6,8 @@ #include #include +#include + namespace zen { ////////////////////////////////////////////////////////////////////////// @@ -108,31 +110,47 @@ private: class BlockStore { public: - void Initialize(const std::filesystem::path& BlocksBasePath, - uint64_t MaxBlockSize, - uint64_t MaxBlockCount, - const std::vector& KnownLocations); - BlockStoreLocation WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment); + struct ReclaimSnapshotState + { + std::unordered_set ExcludeBlockIndexes; + size_t BlockCount; + }; + typedef std::function& MovedChunks, + const std::vector& RemovedChunks)> + ReclaimCallback; + typedef std::function WriteCompleteCallback; + + void Initialize(const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount, + const std::vector& KnownLocations); + void WriteChunk( + const void* Data, + uint64_t Size, + uint64_t Alignment, + WriteCompleteCallback Callback = [](const BlockStoreLocation&) {}); Ref GetChunkBlock(const BlockStoreLocation& Location); void Flush(); - typedef std::function& MovedChunks, const std::vector RemovedChunks)> - ReclaimCallback; - - void ReclaimSpace( - const std::vector& ChunkLocations, - const std::vector& KeepChunkIndexes, - uint64_t PayloadAlignment, - bool DryRun, - const ReclaimCallback& Callback = [](const std::unordered_map&, const std::vector&) {}); + ReclaimSnapshotState GetReclaimSnapshotState(); + void ReclaimSpace( + const ReclaimSnapshotState& Snapshot, + const std::vector& ChunkLocations, + const std::vector& KeepChunkIndexes, + uint64_t PayloadAlignment, + bool DryRun, + const ReclaimCallback& Callback = [](uint32_t, const std::unordered_map&, const std::vector&) { + }); private: std::unordered_map> m_ChunkBlocks; - RwLock m_InsertLock; // used to serialize inserts - Ref m_WriteBlock; - std::uint64_t m_CurrentInsertOffset = 0; - std::atomic_uint32_t m_WriteBlockIndex{}; + RwLock m_InsertLock; // used to serialize inserts + Ref m_WriteBlock; + std::uint64_t m_CurrentInsertOffset = 0; + std::atomic_uint32_t m_WriteBlockIndex{}; + std::vector m_ActiveWriteBlockIndexes; uint64_t m_MaxBlockSize = 1u << 28; uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1; -- cgit v1.2.3 From be12749e0adde39d47875d3c4d2136dbcffbcb3d Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 22:34:31 +0200 Subject: collectgarbage for compactcas and structured cache uses shared implementation --- zenserver/cache/structuredcachestore.cpp | 628 ++++++++----------------------- zenserver/cache/structuredcachestore.h | 12 +- zenstore/blockstore.cpp | 49 +-- zenstore/compactcas.cpp | 53 +-- zenstore/include/zenstore/blockstore.h | 3 + 5 files changed, 202 insertions(+), 543 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 53a479edb..d313cd0c2 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -127,25 +127,9 @@ namespace { static_assert(sizeof(LegacyDiskIndexEntry) == 36); - const char* IndexExtension = ".uidx"; - const char* LogExtension = ".slog"; - const char* DataExtension = ".sobs"; - - std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) - { - ExtendablePathBuilder<256> Path; - - char BlockHexString[9]; - ToHexNumber(BlockIndex, BlockHexString); - - Path.Append(BlocksBasePath); - Path.AppendSeparator(); - Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); - Path.AppendSeparator(); - Path.Append(BlockHexString); - Path.Append(DataExtension); - return Path.ToPath(); - } + const char* IndexExtension = ".uidx"; + const char* LogExtension = ".slog"; + const char* LegacyDataExtension = ".sobs"; std::filesystem::path GetIndexPath(const std::filesystem::path& BucketDir, const std::string& BucketName) { @@ -169,7 +153,7 @@ namespace { std::filesystem::path GetLegacyDataPath(const std::filesystem::path& BucketDir) { - return BucketDir / (std::string("zen") + DataExtension); + return BucketDir / (std::string("zen") + LegacyDataExtension); } std::vector MakeDiskIndexEntries(const std::unordered_map& MovedChunks, @@ -718,8 +702,6 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() std::vector Entries; { - RwLock::SharedLockScope __(m_InsertLock); - RwLock::SharedLockScope ___(m_IndexLock); Entries.resize(m_Index.size()); uint64_t EntryIndex = 0; @@ -896,7 +878,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) }); uint32_t WriteBlockIndex = 0; - while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + while (std::filesystem::exists(BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) { ++WriteBlockIndex; } @@ -1083,7 +1065,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) } LogEntries.push_back({.Key = Entry.second.Key, .Location = NewLocation}); } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); + std::filesystem::path BlockPath = BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex); CreateDirectories(BlockPath.parent_path()); BlockFile.Close(); std::filesystem::rename(LegacyDataPath, BlockPath); @@ -1152,7 +1134,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) BlockRanges.push_back(BlockRange); WriteBlockIndex++; - while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + while (std::filesystem::exists(BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) { ++WriteBlockIndex; } @@ -1191,7 +1173,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) NiceTimeSpanMs(ETA)); } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); + std::filesystem::path BlockPath = BlockStore ::GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); BlockStoreFile ChunkBlock(BlockPath); ChunkBlock.Create(BlockRange.BlockSize); uint64_t Offset = 0; @@ -1299,7 +1281,8 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool Is m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); - std::unordered_set KnownBlocks; + std::vector KnownLocations; + KnownLocations.reserve(m_Index.size()); for (const auto& Entry : m_Index) { const DiskLocation& Location = Entry.second.Location; @@ -1308,62 +1291,11 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool Is { continue; } - KnownBlocks.insert(Location.GetBlockLocation(m_PayloadAlignment).BlockIndex); + const BlockStoreLocation& BlockLocation = Location.GetBlockLocation(m_PayloadAlignment); + KnownLocations.push_back(BlockLocation); } - if (std::filesystem::is_directory(m_BlocksBasePath)) - { - std::vector FoldersToScan; - FoldersToScan.push_back(m_BlocksBasePath); - size_t FolderOffset = 0; - while (FolderOffset < FoldersToScan.size()) - { - for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset])) - { - if (Entry.is_directory()) - { - FoldersToScan.push_back(Entry.path()); - continue; - } - if (Entry.is_regular_file()) - { - const std::filesystem::path Path = Entry.path(); - if (Path.extension() != DataExtension) - { - continue; - } - std::string FileName = Path.stem().string(); - uint32_t BlockIndex; - bool OK = ParseHexNumber(FileName, BlockIndex); - if (!OK) - { - continue; - } - if (!KnownBlocks.contains(BlockIndex)) - { - // Log removing unreferenced block - // Clear out unused blocks - ZEN_INFO("removing unused block for '{}' at '{}'", m_BucketDir / m_BucketName, Path); - std::error_code Ec; - std::filesystem::remove(Path, Ec); - if (Ec) - { - ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message()); - } - continue; - } - Ref BlockFile = new BlockStoreFile(Path); - BlockFile->Open(); - m_ChunkBlocks[BlockIndex] = BlockFile; - } - } - ++FolderOffset; - } - } - else - { - CreateDirectories(m_BlocksBasePath); - } + m_BlockStore.Initialize(m_BlocksBasePath, MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1, KnownLocations); if (IsNew || ((LogEntryCount + LegacyLogEntryCount) > 0)) { @@ -1390,14 +1322,14 @@ ZenCacheDiskLayer::CacheBucket::BuildPath(PathBuilderBase& Path, const IoHash& H bool ZenCacheDiskLayer::CacheBucket::GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue) { - if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + BlockStoreLocation Location = Loc.GetBlockLocation(m_PayloadAlignment); + + Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); + if (!ChunkBlock) { return false; } - const BlockStoreLocation& Location = Loc.GetBlockLocation(m_PayloadAlignment); - Ref ChunkBlock = m_ChunkBlocks[Location.BlockIndex]; - OutValue.Value = ChunkBlock->GetChunk(Location.Offset, Location.Size); OutValue.Value.SetContentType(Loc.GetContentType()); @@ -1437,15 +1369,17 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal { IndexEntry& Entry = It.value(); Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); + DiskLocation Location = Entry.Location; + _.ReleaseNow(); - if (GetInlineCacheValue(Entry.Location, OutValue)) + if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) + { + return GetStandaloneCacheValue(Location, HashKey, OutValue); + } + if (GetInlineCacheValue(Location, OutValue)) { return true; } - - _.ReleaseNow(); - - return GetStandaloneCacheValue(Entry.Location, HashKey, OutValue); } return false; @@ -1463,84 +1397,7 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& { return PutStandaloneCacheValue(HashKey, Value); } - - // Small object put - - uint8_t EntryFlags = 0; - - if (Value.Value.GetContentType() == ZenContentType::kCbObject) - { - EntryFlags |= DiskLocation::kStructured; - } - else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) - { - EntryFlags |= DiskLocation::kCompressed; - } - - uint64_t ChunkSize = Value.Value.Size(); - - uint32_t WriteBlockIndex; - Ref WriteBlock; - uint64_t InsertOffset; - - { - RwLock::ExclusiveLockScope _(m_InsertLock); - - WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - bool IsWriting = m_WriteBlock != nullptr; - if (!IsWriting || (m_CurrentInsertOffset + ChunkSize) > MaxBlockSize) - { - if (m_WriteBlock) - { - m_WriteBlock = nullptr; - } - { - RwLock::ExclusiveLockScope __(m_IndexLock); - if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) - { - throw std::runtime_error(fmt::format("unable to allocate a new block in '{}'", m_BucketDir / m_BucketName)); - } - WriteBlockIndex += IsWriting ? 1 : 0; - while (m_ChunkBlocks.contains(WriteBlockIndex)) - { - WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); - m_WriteBlock = new BlockStoreFile(BlockPath); - m_ChunkBlocks[WriteBlockIndex] = m_WriteBlock; - m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); - } - m_CurrentInsertOffset = 0; - m_WriteBlock->Create(MaxBlockSize); - } - InsertOffset = m_CurrentInsertOffset; - m_CurrentInsertOffset = RoundUp(InsertOffset + ChunkSize, m_PayloadAlignment); - WriteBlock = m_WriteBlock; - } - - DiskLocation Location({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = ChunkSize}, m_PayloadAlignment, EntryFlags); - const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; - - WriteBlock->Write(Value.Value.Data(), ChunkSize, InsertOffset); - m_SlogFile.Append(DiskIndexEntry); - - m_TotalSize.fetch_add(ChunkSize, std::memory_order_seq_cst); - { - RwLock::ExclusiveLockScope __(m_IndexLock); - if (auto It = m_Index.find(HashKey); It != m_Index.end()) - { - // TODO: should check if write is idempotent and bail out if it is? - // this would requiring comparing contents on disk unless we add a - // content hash to the index entry - IndexEntry& Entry = It.value(); - Entry.Location = Location; - Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); - } - else - { - m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); - } - } + PutInlineCacheValue(HashKey, Value); } void @@ -1555,21 +1412,10 @@ ZenCacheDiskLayer::CacheBucket::Drop() void ZenCacheDiskLayer::CacheBucket::Flush() { - { - RwLock::ExclusiveLockScope _(m_InsertLock); - if (m_CurrentInsertOffset > 0) - { - uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - m_WriteBlock = nullptr; - m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); - m_CurrentInsertOffset = 0; - } - } - RwLock::SharedLockScope _(m_IndexLock); + m_BlockStore.Flush(); + RwLock::SharedLockScope _(m_IndexLock); MakeIndexSnapshot(); - SaveManifest(); } @@ -1615,20 +1461,22 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) ZenCacheValue Value; - if (GetInlineCacheValue(Loc, Value)) + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { - // Validate contents + if (GetInlineCacheValue(Loc, Value)) + { + // Validate contents + continue; + } } else if (GetStandaloneCacheValue(Loc, HashKey, Value)) { // Note: we cannot currently validate contents since we don't // have a content hash! + continue; } - else - { - // Value not found - BadKeys.push_back(HashKey); - } + // Value not found + BadKeys.push_back(HashKey); } } @@ -1726,18 +1574,23 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); }); - if (!GetInlineCacheValue(Loc, CacheValue)) + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { - GetStandaloneCacheValue(Loc, Key, CacheValue); + if (!GetStandaloneCacheValue(Loc, Key, CacheValue)) + { + continue; + } + } + else if (!GetInlineCacheValue(Loc, CacheValue)) + { + continue; } } - if (CacheValue.Value) - { - ZEN_ASSERT(CacheValue.Value.GetContentType() == ZenContentType::kCbObject); - CbObject Obj(SharedBuffer{CacheValue.Value}); - Obj.IterateAttachments([&Cids](CbFieldView Field) { Cids.push_back(Field.AsAttachment()); }); - } + ZEN_ASSERT(CacheValue.Value); + ZEN_ASSERT(CacheValue.Value.GetContentType() == ZenContentType::kCbObject); + CbObject Obj(SharedBuffer{CacheValue.Value}); + Obj.IterateAttachments([&Cids](CbFieldView Field) { Cids.push_back(Field.AsAttachment()); }); } } @@ -1797,10 +1650,6 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_SlogFile.Flush(); - IndexMap Index; - size_t BlockCount; - uint64_t ExcludeBlockIndex = 0x800000000ull; - std::span ExpiredCacheKeys = GcCtx.ExpiredCacheKeys(m_BucketName); std::vector DeleteCacheKeys; DeleteCacheKeys.reserve(ExpiredCacheKeys.size()); @@ -1816,30 +1665,27 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) ZEN_INFO("garbage collect SKIPPED, for '{}', no expired cache keys found", m_BucketDir / m_BucketName); return; } + + IndexMap Index; + BlockStore::ReclaimSnapshotState BlockStoreState; { - RwLock::SharedLockScope __(m_InsertLock); - RwLock::SharedLockScope ___(m_IndexLock); + RwLock::SharedLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); { - Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); if (m_Index.empty()) { ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_BucketDir / m_BucketName); return; } - if (m_WriteBlock) - { - ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - } - __.ReleaseNow(); + BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); } SaveManifest(); - Index = m_Index; - BlockCount = m_ChunkBlocks.size(); + Index = m_Index; for (const IoHash& Key : DeleteCacheKeys) { @@ -1936,295 +1782,102 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { return; } - std::unordered_map BlockIndexToChunkMapIndex; - std::vector> KeepChunks; - std::vector> DeleteChunks; - - BlockIndexToChunkMapIndex.reserve(BlockCount); - KeepChunks.reserve(BlockCount); - DeleteChunks.reserve(BlockCount); - size_t GuesstimateCountPerBlock = TotalChunkHashes.size() / BlockCount / 2; - - uint64_t DeleteCount = 0; - - uint64_t NewTotalSize = 0; + TotalChunkCount = TotalChunkHashes.size(); - std::unordered_set Expired; - Expired.insert(DeleteCacheKeys.begin(), DeleteCacheKeys.end()); + std::vector ChunkLocations; + std::vector KeepChunkIndexes; + std::vector ChunkIndexToChunkHash; + ChunkLocations.reserve(TotalChunkCount); + ChunkLocations.reserve(TotalChunkCount); + ChunkIndexToChunkHash.reserve(TotalChunkCount); GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { - auto KeyIt = Index.find(ChunkHash); - const DiskLocation& Location = KeyIt->second.Location; - BlockStoreLocation BlockLocation = Location.GetBlockLocation(m_PayloadAlignment); - - uint32_t BlockIndex = BlockLocation.BlockIndex; - - if (static_cast(BlockIndex) == ExcludeBlockIndex) - { - return; - } - - auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(BlockIndex); - size_t ChunkMapIndex = 0; - if (BlockIndexPtr == BlockIndexToChunkMapIndex.end()) - { - ChunkMapIndex = KeepChunks.size(); - BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex; - KeepChunks.resize(ChunkMapIndex + 1); - KeepChunks.back().reserve(GuesstimateCountPerBlock); - DeleteChunks.resize(ChunkMapIndex + 1); - DeleteChunks.back().reserve(GuesstimateCountPerBlock); - } - else - { - ChunkMapIndex = BlockIndexPtr->second; - } + auto KeyIt = Index.find(ChunkHash); + const DiskLocation& DiskLocation = KeyIt->second.Location; + BlockStoreLocation Location = DiskLocation.GetBlockLocation(m_PayloadAlignment); + size_t ChunkIndex = ChunkLocations.size(); + ChunkLocations.push_back(Location); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; if (Keep) { - std::vector& ChunkMap = KeepChunks[ChunkMapIndex]; - ChunkMap.push_back(ChunkHash); - NewTotalSize += BlockLocation.Size; - } - else - { - std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; - ChunkMap.push_back(ChunkHash); - DeleteCount++; + KeepChunkIndexes.push_back(ChunkIndex); } }); - std::unordered_set BlocksToReWrite; - BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); - for (const auto& Entry : BlockIndexToChunkMapIndex) - { - uint32_t BlockIndex = Entry.first; - size_t ChunkMapIndex = Entry.second; - const std::vector& ChunkMap = DeleteChunks[ChunkMapIndex]; - if (ChunkMap.empty()) - { - continue; - } - BlocksToReWrite.insert(BlockIndex); - } + size_t DeleteCount = TotalChunkCount - KeepChunkIndexes.size(); const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects(); if (!PerformDelete) { + m_BlockStore.ReclaimSpace(BlockStoreState, ChunkLocations, KeepChunkIndexes, m_PayloadAlignment, true); uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed); ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}", m_BucketDir / m_BucketName, DeleteCount, - NiceBytes(TotalSize - NewTotalSize), + 0, // NiceBytes(TotalSize - NewTotalSize), TotalChunkCount, NiceBytes(TotalSize)); return; } - auto AddToDeleted = [this, &Index, &DeletedCount, &DeletedSize](const std::vector& DeletedEntries) { - for (const IoHash& ChunkHash : DeletedEntries) - { - const DiskLocation& Location = Index[ChunkHash].Location; - ZEN_ASSERT(!Location.IsFlagSet(DiskLocation::kStandaloneFile)); - DeletedSize += Index[ChunkHash].Location.GetBlockLocation(m_PayloadAlignment).Size; - } - DeletedCount += DeletedEntries.size(); - }; - - // Move all chunks in blocks that have chunks removed to new blocks - - Ref NewBlockFile; - uint64_t WriteOffset = 0; - uint32_t NewBlockIndex = 0; - - auto UpdateLocations = [this](const std::span& Entries) { - for (const DiskIndexEntry& Entry : Entries) - { - if (Entry.Location.IsFlagSet(DiskLocation::kTombStone)) + std::vector DeletedChunks; + m_BlockStore.ReclaimSpace( + BlockStoreState, + ChunkLocations, + KeepChunkIndexes, + m_PayloadAlignment, + false, + [this, &DeletedChunks, &ChunkIndexToChunkHash, &Index, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( + uint32_t, + const std::unordered_map& MovedChunks, + const std::vector& RemovedChunks) { + std::vector LogEntries; + LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); + for (const auto& Entry : MovedChunks) { - auto KeyIt = m_Index.find(Entry.Key); - uint64_t ChunkSize = KeyIt->second.Location.GetBlockLocation(m_PayloadAlignment).Size; - m_TotalSize.fetch_sub(ChunkSize, std::memory_order_seq_cst); - m_Index.erase(KeyIt); - continue; + size_t ChunkIndex = Entry.first; + const BlockStoreLocation& NewLocation = Entry.second; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const DiskLocation& OldDiskLocation = Index[ChunkHash].Location; + LogEntries.push_back( + {.Key = ChunkHash, .Location = DiskLocation(NewLocation, m_PayloadAlignment, OldDiskLocation.GetFlags())}); + } + for (const size_t ChunkIndex : RemovedChunks) + { + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const DiskLocation& OldDiskLocation = Index[ChunkHash].Location; + LogEntries.push_back({.Key = ChunkHash, + .Location = DiskLocation(OldDiskLocation.GetBlockLocation(m_PayloadAlignment), + m_PayloadAlignment, + OldDiskLocation.GetFlags() | DiskLocation::kTombStone)}); + DeletedChunks.push_back(ChunkHash); } - m_Index[Entry.Key].Location = Entry.Location; - } - }; - - std::unordered_map MovedBlockChunks; - for (uint32_t BlockIndex : BlocksToReWrite) - { - const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; - - Ref OldBlockFile; - { - RwLock::SharedLockScope _i(m_IndexLock); - OldBlockFile = m_ChunkBlocks[BlockIndex]; - } - const std::vector& KeepMap = KeepChunks[ChunkMapIndex]; - if (KeepMap.empty()) - { - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; - std::vector LogEntries = MakeDiskIndexEntries({}, DeleteMap); m_SlogFile.Append(LogEntries); m_SlogFile.Flush(); { - RwLock::ExclusiveLockScope _i(m_IndexLock); + RwLock::ExclusiveLockScope __(m_IndexLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); - UpdateLocations(LogEntries); - m_ChunkBlocks[BlockIndex] = nullptr; - } - AddToDeleted(DeleteMap); - ZEN_DEBUG("marking cas store file for delete '{}', block #{}, '{}'", - m_BucketDir / m_BucketName, - BlockIndex, - OldBlockFile->GetPath()); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion, reason: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - continue; - } - - std::vector Chunk; - for (const IoHash& ChunkHash : KeepMap) - { - auto KeyIt = Index.find(ChunkHash); - const BlockStoreLocation ChunkLocation = KeyIt->second.Location.GetBlockLocation(m_PayloadAlignment); - Chunk.resize(ChunkLocation.Size); - OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); - - if (!NewBlockFile || (WriteOffset + Chunk.size() > MaxBlockSize)) - { - uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); - std::vector LogEntries = MakeDiskIndexEntries(MovedBlockChunks, {}); - m_SlogFile.Append(LogEntries); - m_SlogFile.Flush(); - - if (NewBlockFile) - { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); - } + for (const DiskIndexEntry& Entry : LogEntries) { - RwLock::ExclusiveLockScope __(m_IndexLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - UpdateLocations(LogEntries); - if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex) + if (Entry.Location.GetFlags() & DiskLocation::kTombStone) { - ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", - m_BucketDir / m_BucketName, - static_cast(std::numeric_limits::max()) + 1); - return; - } - while (m_ChunkBlocks.contains(NextBlockIndex)) - { - NextBlockIndex = (NextBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - } - std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); - NewBlockFile = new BlockStoreFile(NewBlockPath); - m_ChunkBlocks[NextBlockIndex] = NewBlockFile; - } - - MovedCount += MovedBlockChunks.size(); - MovedBlockChunks.clear(); - - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); - if (Error) - { - ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BucketDir, Error.message()); - return; - } - if (Space.Free < MaxBlockSize) - { - uint64_t ReclaimedSpace = GcCtx.ClaimGCReserve(); - if (Space.Free + ReclaimedSpace < MaxBlockSize) - { - ZEN_WARN("garbage collect from '{}' FAILED, required disk space {}, free {}", - m_BucketDir / m_BucketName, - MaxBlockSize, - NiceBytes(Space.Free + ReclaimedSpace)); - RwLock::ExclusiveLockScope _l(m_IndexLock); - Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - m_ChunkBlocks.erase(NextBlockIndex); - return; + m_Index.erase(Entry.Key); + uint64_t ChunkSize = Entry.Location.GetBlockLocation(m_PayloadAlignment).Size; + m_TotalSize.fetch_sub(ChunkSize); + continue; } - - ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", - m_BucketDir / m_BucketName, - ReclaimedSpace, - NiceBytes(Space.Free + ReclaimedSpace)); + m_Index[Entry.Key].Location = Entry.Location; } - NewBlockFile->Create(MaxBlockSize); - NewBlockIndex = NextBlockIndex; - WriteOffset = 0; } + }); - NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - MovedBlockChunks.emplace(ChunkHash, - DiskLocation({.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}, - m_PayloadAlignment, - KeyIt->second.Location.Flags)); - WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment); - } - Chunk.clear(); - if (NewBlockFile) - { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); - NewBlockFile = {}; - } - - const std::vector& DeleteMap = DeleteChunks[ChunkMapIndex]; - std::vector LogEntries = MakeDiskIndexEntries(MovedBlockChunks, DeleteMap); - m_SlogFile.Append(LogEntries); - m_SlogFile.Flush(); - { - RwLock::ExclusiveLockScope __(m_IndexLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - UpdateLocations(LogEntries); - m_ChunkBlocks[BlockIndex] = nullptr; - } - MovedCount += MovedBlockChunks.size(); - AddToDeleted(DeleteMap); - MovedBlockChunks.clear(); - - ZEN_DEBUG("marking cas store file for delete '{}', block #{}, '{}'", - m_BucketDir / m_BucketName, - BlockIndex, - OldBlockFile->GetPath()); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - OldBlockFile = nullptr; - } + GcCtx.DeletedCas(DeletedChunks); } void @@ -2367,6 +2020,47 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c m_TotalSize.fetch_add(Loc.Size(), std::memory_order_seq_cst); } +void +ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const ZenCacheValue& Value) +{ + uint8_t EntryFlags = 0; + + if (Value.Value.GetContentType() == ZenContentType::kCbObject) + { + EntryFlags |= DiskLocation::kStructured; + } + else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + { + EntryFlags |= DiskLocation::kCompressed; + } + + uint64_t ChunkSize = Value.Value.Size(); + + m_BlockStore.WriteChunk(Value.Value.Data(), + ChunkSize, + m_PayloadAlignment, + [this, &HashKey, EntryFlags](const BlockStoreLocation& BlockStoreLocation) { + DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); + const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; + m_SlogFile.Append(DiskIndexEntry); + m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order_seq_cst); + RwLock::ExclusiveLockScope __(m_IndexLock); + if (auto It = m_Index.find(HashKey); It != m_Index.end()) + { + // TODO: should check if write is idempotent and bail out if it is? + // this would requiring comparing contents on disk unless we add a + // content hash to the index entry + IndexEntry& Entry = It.value(); + Entry.Location = Location; + Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); + } + else + { + m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); + } + }); +} + ////////////////////////////////////////////////////////////////////////// ZenCacheDiskLayer::ZenCacheDiskLayer(const std::filesystem::path& RootDir) : m_RootDir(RootDir) @@ -3026,7 +2720,7 @@ TEST_CASE("z$.legacyconversion") std::filesystem::path BucketDir = TempDir.Path() / Bucket; std::filesystem::path BlocksBaseDir = BucketDir / "blocks"; - std::filesystem::path CasPath = GetBlockPath(BlocksBaseDir, 1); + std::filesystem::path CasPath = BlockStore ::GetBlockPath(BlocksBaseDir, 1); std::filesystem::path LegacyDataPath = GetLegacyDataPath(BucketDir); std::filesystem::remove(LegacyDataPath); std::filesystem::rename(CasPath, LegacyDataPath); diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index c107983b5..0c2a7c0b2 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -255,6 +255,7 @@ private: std::string m_BucketName; std::filesystem::path m_BucketDir; std::filesystem::path m_BlocksBasePath; + BlockStore m_BlockStore; Oid m_BucketId; bool m_IsOk = false; uint64_t m_LargeObjectThreshold = 64 * 1024; @@ -286,20 +287,15 @@ private: using IndexMap = tsl::robin_map; - RwLock m_IndexLock; - IndexMap m_Index; - std::unordered_map> m_ChunkBlocks; + RwLock m_IndexLock; + IndexMap m_Index; - RwLock m_InsertLock; // used to serialize inserts - Ref m_WriteBlock; - std::uint64_t m_CurrentInsertOffset = 0; - - std::atomic_uint32_t m_WriteBlockIndex{}; std::atomic_uint64_t m_TotalSize{}; void BuildPath(PathBuilderBase& Path, const IoHash& HashKey); void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue); + void PutInlineCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue); void MakeIndexSnapshot(); uint64_t ReadIndexFile(); diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 4cf3c6486..309c99d1e 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -1,11 +1,11 @@ // Copyright Epic Games, Inc. All Rights Reserved. -#include "compactcas.h" +#include #include #include #include -#include +#include #if ZEN_WITH_TESTS # include @@ -111,26 +111,6 @@ BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::functio m_File.StreamByteRange(FileOffset, Size, std::move(ChunkFun)); } -namespace { - const char* DataExtension = ".ucas"; - - std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) - { - ExtendablePathBuilder<256> Path; - - char BlockHexString[9]; - ToHexNumber(BlockIndex, BlockHexString); - - Path.Append(BlocksBasePath); - Path.AppendSeparator(); - Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); - Path.AppendSeparator(); - Path.Append(BlockHexString); - Path.Append(DataExtension); - return Path.ToPath(); - } -} // namespace - void BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, @@ -171,7 +151,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, if (Entry.is_regular_file()) { const std::filesystem::path Path = Entry.path(); - if (Path.extension() != DataExtension) + if (Path.extension() != GetBlockFileExtension()) { continue; } @@ -595,6 +575,29 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, return; } +const char* +BlockStore::GetBlockFileExtension() +{ + return ".ucas"; +} + +std::filesystem::path +BlockStore::GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) +{ + ExtendablePathBuilder<256> Path; + + char BlockHexString[9]; + ToHexNumber(BlockIndex, BlockHexString); + + Path.Append(BlocksBasePath); + Path.AppendSeparator(); + Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); + Path.AppendSeparator(); + Path.Append(BlockHexString); + Path.Append(GetBlockFileExtension()); + return Path.ToPath(); +} + #if ZEN_WITH_TESTS static bool diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 84019d7aa..30747f554 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -66,7 +66,6 @@ namespace { const char* IndexExtension = ".uidx"; const char* LogExtension = ".ulog"; - const char* DataExtension = ".ucas"; std::filesystem::path GetBasePath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) { @@ -93,22 +92,6 @@ namespace { return GetBasePath(RootPath, ContainerBaseName) / "blocks"; } - std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex) - { - ExtendablePathBuilder<256> Path; - - char BlockHexString[9]; - ToHexNumber(BlockIndex, BlockHexString); - - Path.Append(BlocksBasePath); - Path.AppendSeparator(); - Path.AppendAsciiRange(BlockHexString, BlockHexString + 4); - Path.AppendSeparator(); - Path.Append(BlockHexString); - Path.Append(DataExtension); - return Path.ToPath(); - } - std::filesystem::path GetLegacyLogPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) { return RootPath / (ContainerBaseName + LogExtension); @@ -116,7 +99,7 @@ namespace { std::filesystem::path GetLegacyDataPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) { - return RootPath / (ContainerBaseName + DataExtension); + return RootPath / (ContainerBaseName + ".ucas"); } std::filesystem::path GetLegacyIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName) @@ -315,7 +298,7 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) BlockStoreLocation Location = KeyIt->second.Get(m_PayloadAlignment); _.ReleaseNow(); - Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); // m_ChunkBlocks[Location.BlockIndex]; + Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); if (!ChunkBlock) { return IoBuffer(); @@ -348,17 +331,6 @@ void CasContainerStrategy::Flush() { m_BlockStore.Flush(); - /* { - RwLock::ExclusiveLockScope _(m_InsertLock); - if (m_CurrentInsertOffset > 0) - { - uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); - WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex; - m_WriteBlock = nullptr; - m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release); - m_CurrentInsertOffset = 0; - } - }*/ MakeIndexSnapshot(); } @@ -533,8 +505,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) return; } - auto GetChunkLocations = [] {}; - std::vector DeletedChunks; m_BlockStore.ReclaimSpace( BlockStoreState, @@ -543,7 +513,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - uint32_t BlockIndex, + uint32_t, const std::unordered_map& MovedChunks, const std::vector& RemovedChunks) { std::vector LogEntries; @@ -578,17 +548,12 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) if (Entry.Flags & CasDiskIndexEntry::kTombstone) { m_LocationMap.erase(Entry.Key); - auto KeyIt = m_LocationMap.find(Entry.Key); uint64_t ChunkSize = Entry.Location.GetSize(); m_TotalSize.fetch_sub(ChunkSize); continue; } m_LocationMap[Entry.Key] = Entry.Location; } - for (const auto& Entry : m_LocationMap) - { - ZEN_ASSERT(Entry.second.GetBlockIndex() != BlockIndex); - } } }); @@ -809,7 +774,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) }); uint32_t WriteBlockIndex = 0; - while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + while (std::filesystem::exists(BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) { ++WriteBlockIndex; } @@ -976,7 +941,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) LogEntries.push_back( {.Key = Entry.second.Key, .Location = NewLocation, .ContentType = Record.ContentType, .Flags = Record.Flags}); } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex); + std::filesystem::path BlockPath = BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex); CreateDirectories(BlockPath.parent_path()); BlockFile.Close(); std::filesystem::rename(LegacyDataPath, BlockPath); @@ -1038,7 +1003,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) BlockRanges.push_back(BlockRange); WriteBlockIndex++; - while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + while (std::filesystem::exists(BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) { ++WriteBlockIndex; } @@ -1077,7 +1042,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) NiceTimeSpanMs(ETA)); } - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); + std::filesystem::path BlockPath = BlockStore::GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); BlockStoreFile ChunkBlock(BlockPath); ChunkBlock.Create(BlockRange.BlockSize); uint64_t Offset = 0; @@ -1176,14 +1141,12 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - std::unordered_set KnownBlocks; std::vector KnownLocations; KnownLocations.reserve(m_LocationMap.size()); for (const auto& Entry : m_LocationMap) { const BlockStoreDiskLocation& Location = Entry.second; m_TotalSize.fetch_add(Location.GetSize(), std::memory_order_seq_cst); - KnownBlocks.insert(Location.GetBlockIndex()); KnownLocations.push_back(Location.Get(m_PayloadAlignment)); } @@ -1842,7 +1805,7 @@ TEST_CASE("compactcas.legacyconversion") Gc.CollectGarbage(GcCtx); } - std::filesystem::path BlockPath = GetBlockPath(GetBlocksBasePath(CasConfig.RootDirectory, "test"), 1); + std::filesystem::path BlockPath = BlockStore::GetBlockPath(GetBlocksBasePath(CasConfig.RootDirectory, "test"), 1); std::filesystem::path LegacyDataPath = GetLegacyDataPath(CasConfig.RootDirectory, "test"); std::filesystem::rename(BlockPath, LegacyDataPath); diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 084142636..1eff46367 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -143,6 +143,9 @@ public: const ReclaimCallback& Callback = [](uint32_t, const std::unordered_map&, const std::vector&) { }); + static const char* GetBlockFileExtension(); + static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex); + private: std::unordered_map> m_ChunkBlocks; -- cgit v1.2.3 From 6e6035499b3fe40b22e1be5aee9ac3a9675d27b0 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 22:55:43 +0200 Subject: remove m_TotalSize for blockstore fix scrub logic in structured cache store --- zenserver/cache/structuredcachestore.cpp | 21 ++++++++------- zenstore/blockstore.cpp | 44 +++++++++++++++++--------------- zenstore/include/zenstore/blockstore.h | 1 - 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index d313cd0c2..f26d599ab 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1376,10 +1376,7 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal { return GetStandaloneCacheValue(Location, HashKey, OutValue); } - if (GetInlineCacheValue(Location, OutValue)) - { - return true; - } + return GetInlineCacheValue(Location, OutValue); } return false; @@ -1463,16 +1460,16 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { - if (GetInlineCacheValue(Loc, Value)) + if (GetStandaloneCacheValue(Loc, HashKey, Value)) { - // Validate contents + // Note: we cannot currently validate contents since we don't + // have a content hash! continue; } } - else if (GetStandaloneCacheValue(Loc, HashKey, Value)) + else if (GetInlineCacheValue(Loc, Value)) { - // Note: we cannot currently validate contents since we don't - // have a content hash! + // Validate contents continue; } // Value not found @@ -1724,6 +1721,12 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { RwLock::SharedLockScope __(m_IndexLock); + Stopwatch Timer; + const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); if (m_Index.contains(Key)) { // Someone added it back, let the file on disk be diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 309c99d1e..b4aa0f7c3 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -124,13 +124,11 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, m_BlocksBasePath = BlocksBasePath; m_MaxBlockSize = MaxBlockSize; - m_TotalSize = 0; m_ChunkBlocks.clear(); std::unordered_set KnownBlocks; for (const auto& Entry : KnownLocations) { - m_TotalSize.fetch_add(Entry.Size, std::memory_order_seq_cst); KnownBlocks.insert(Entry.BlockIndex); } @@ -287,7 +285,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, uint64_t ReadBlockLongestTimeUs = 0; uint64_t TotalChunkCount = ChunkLocations.size(); uint64_t DeletedSize = 0; - uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); + uint64_t OldTotalSize = 0; + uint64_t NewTotalSize = 0; uint64_t MovedCount = 0; uint64_t DeletedCount = 0; @@ -305,7 +304,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, &DeletedSize, OldTotalSize] { ZEN_INFO( - "garbage collect for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " + "reclaim space for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " "#{} " "of #{} " "chunks ({}).", @@ -340,11 +339,11 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, BlockDeleteChunks.reserve(BlockCount); size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2; - size_t DeleteCount = 0; - uint64_t NewTotalSize = 0; + size_t DeleteCount = 0; for (size_t Index = 0; Index < TotalChunkCount; ++Index) { const BlockStoreLocation& Location = ChunkLocations[Index]; + OldTotalSize += Location.Size; if (Snapshot.ExcludeBlockIndexes.contains(Location.BlockIndex)) { continue; @@ -394,13 +393,12 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, if (DryRun) { - uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed); ZEN_INFO("garbage collect for '{}' DISABLED, found #{} {} chunks of total #{} {}", m_BlocksBasePath, DeleteCount, - NiceBytes(TotalSize - NewTotalSize), + NiceBytes(OldTotalSize - NewTotalSize), TotalChunkCount, - NiceBytes(TotalSize)); + OldTotalSize); return; } @@ -415,7 +413,13 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, Ref OldBlockFile; { RwLock::SharedLockScope _i(m_InsertLock); - OldBlockFile = m_ChunkBlocks[BlockIndex]; + Stopwatch Timer; + const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + OldBlockFile = m_ChunkBlocks[BlockIndex]; ZEN_ASSERT(OldBlockFile); } @@ -432,10 +436,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { RwLock::ExclusiveLockScope _i(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); m_ChunkBlocks[BlockIndex] = nullptr; } @@ -472,10 +476,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, MovedChunks.clear(); RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); if (m_ChunkBlocks.size() == m_MaxBlockCount) { @@ -511,10 +515,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NiceBytes(Space.Free + ReclaimedSpace)); RwLock::ExclusiveLockScope _l(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); m_ChunkBlocks.erase(NextBlockIndex); return; @@ -571,8 +575,6 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } OldBlockFile = nullptr; } - - return; } const char* diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 1eff46367..31d9145f9 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -158,7 +158,6 @@ private: uint64_t m_MaxBlockSize = 1u << 28; uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1; std::filesystem::path m_BlocksBasePath; - std::atomic_uint64_t m_TotalSize{}; }; void blockstore_forcelink(); -- cgit v1.2.3 From 08a0dc388f98e6d3eb8387b983a9a7fb959fe603 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 23:31:35 +0200 Subject: reimplement CasContainerStrategy::Scrub --- zenstore/blockstore.cpp | 82 ++++++++++++++++++++ zenstore/compactcas.cpp | 132 ++++++++++++++------------------- zenstore/include/zenstore/blockstore.h | 4 + 3 files changed, 140 insertions(+), 78 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index b4aa0f7c3..559dfc1ee 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -105,6 +105,12 @@ BlockStoreFile::Flush() m_File.Flush(); } +BasicFile& +BlockStoreFile::GetBasicFile() +{ + return m_File; +} + void BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& ChunkFun) { @@ -577,6 +583,82 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } } +void +BlockStore::IterateChunks(const std::vector& ChunkLocations, + std::function SmallChunkCallback, + std::function LargeChunkCallback) +{ + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. + + { + std::vector BigChunks; + const uint64_t WindowSize = 4 * 1024 * 1024; + IoBuffer ReadBuffer{WindowSize}; + void* BufferBase = ReadBuffer.MutableData(); + + RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time? + + for (const auto& Block : m_ChunkBlocks) + { + uint64_t WindowStart = 0; + uint64_t WindowEnd = WindowSize; + uint32_t BlockIndex = Block.first; + const Ref& BlockFile = Block.second; + BlockFile->Open(); + const uint64_t FileSize = BlockFile->FileSize(); + + do + { + const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart); + BlockFile->Read(BufferBase, ChunkSize, WindowStart); + + // TODO: We could be smarter here if the ChunkLocations were sorted on block index - we could + // then only scan a subset of ChunkLocations instead of scanning through them all... + for (size_t ChunkIndex = 0; ChunkIndex < ChunkLocations.size(); ++ChunkIndex) + { + const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; + if (BlockIndex != Location.BlockIndex) + { + continue; + } + + const uint64_t EntryOffset = Location.Offset; + if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) + { + const uint64_t EntryEnd = EntryOffset + Location.Size; + + if (EntryEnd >= WindowEnd) + { + BigChunks.push_back(ChunkIndex); + + continue; + } + + SmallChunkCallback(ChunkIndex, + reinterpret_cast(BufferBase) + Location.Offset - WindowStart, + Location.Size); + } + } + + WindowStart += WindowSize; + WindowEnd += WindowSize; + } while (WindowStart < FileSize); + } + + // Deal with large chunks + + for (size_t ChunkIndex : BigChunks) + { + const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; + BasicFile& BlockFile = m_ChunkBlocks[Location.BlockIndex]->GetBasicFile(); + LargeChunkCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size); + } + } +} + const char* BlockStore::GetBlockFileExtension() { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 30747f554..a6e617474 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -338,84 +338,56 @@ void CasContainerStrategy::Scrub(ScrubContext& Ctx) { ZEN_UNUSED(Ctx); -#if 0 - std::vector BadChunks; - // We do a read sweep through the payloads file and validate - // any entries that are contained within each segment, with - // the assumption that most entries will be checked in this - // pass. An alternative strategy would be to use memory mapping. + RwLock::SharedLockScope _(m_LocationMapLock); + uint64_t TotalChunkCount = m_LocationMap.size(); + std::vector ChunkLocations; + std::vector ChunkIndexToChunkHash; + ChunkLocations.reserve(TotalChunkCount); + ChunkIndexToChunkHash.reserve(TotalChunkCount); { - std::vector BigChunks; - const uint64_t WindowSize = 4 * 1024 * 1024; - IoBuffer ReadBuffer{WindowSize}; - void* BufferBase = ReadBuffer.MutableData(); - - RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time? - RwLock::SharedLockScope __(m_LocationMapLock); - - for (const auto& Block : m_ChunkBlocks) + for (const auto& Entry : m_LocationMap) { - uint64_t WindowStart = 0; - uint64_t WindowEnd = WindowSize; - const Ref& BlockFile = Block.second; - BlockFile->Open(); - const uint64_t FileSize = BlockFile->FileSize(); + const IoHash& ChunkHash = Entry.first; + const BlockStoreDiskLocation& DiskLocation = Entry.second; + BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment); + size_t ChunkIndex = ChunkLocations.size(); - do - { - const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart); - BlockFile->Read(BufferBase, ChunkSize, WindowStart); - - for (auto& Entry : m_LocationMap) - { - const BlockStoreLocation Location = Entry.second.Get(m_PayloadAlignment); - const uint64_t EntryOffset = Location.Offset; - - if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) - { - const uint64_t EntryEnd = EntryOffset + Location.Size; - - if (EntryEnd >= WindowEnd) - { - BigChunks.push_back({.Key = Entry.first, .Location = Entry.second}); - - continue; - } - - const IoHash ComputedHash = - IoHash::HashBuffer(reinterpret_cast(BufferBase) + Location.Offset - WindowStart, Location.Size); - - if (Entry.first != ComputedHash) - { - // Hash mismatch - BadChunks.push_back({.Key = Entry.first, .Location = Entry.second, .Flags = CasDiskIndexEntry::kTombstone}); - } - } - } - - WindowStart += WindowSize; - WindowEnd += WindowSize; - } while (WindowStart < FileSize); + ChunkLocations.push_back(Location); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; } + } - // Deal with large chunks + std::vector BadChunks; - for (const CasDiskIndexEntry& Entry : BigChunks) - { - IoHashStream Hasher; - const BlockStoreLocation Location = Entry.Location.Get(m_PayloadAlignment); - const Ref& BlockFile = m_ChunkBlocks[Location.BlockIndex]; - BlockFile->StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); - IoHash ComputedHash = Hasher.GetHash(); + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. - if (Entry.Key != ComputedHash) + m_BlockStore.IterateChunks( + ChunkLocations, + [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, const void* Data, uint64_t Size) { + const IoHash ComputedHash = IoHash::HashBuffer(Data, Size); + const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; + if (ComputedHash != ExpectedHash) { - BadChunks.push_back({.Key = Entry.Key, .Location = Entry.Location, .Flags = CasDiskIndexEntry::kTombstone}); + // Hash mismatch + BadChunks.push_back(ExpectedHash); } - } - } + }, + [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { + IoHashStream Hasher; + BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); + IoHash ComputedHash = Hasher.GetHash(); + const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; + if (ComputedHash != ExpectedHash) + { + // Hash mismatch + BadChunks.push_back(ExpectedHash); + } + }); if (BadChunks.empty()) { @@ -424,26 +396,31 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_Config.RootDirectory / m_ContainerBaseName); + _.ReleaseNow(); // Deal with bad chunks by removing them from our lookup map - std::vector BadChunkHashes; - BadChunkHashes.reserve(BadChunks.size()); - - m_CasLog.Append(BadChunks); + std::vector LogEntries; + LogEntries.reserve(BadChunks.size()); { - RwLock::ExclusiveLockScope _(m_LocationMapLock); - for (const CasDiskIndexEntry& Entry : BadChunks) + RwLock::ExclusiveLockScope __(m_LocationMapLock); + for (const IoHash& ChunkHash : BadChunks) { - BadChunkHashes.push_back(Entry.Key); - m_LocationMap.erase(Entry.Key); + const auto KeyIt = m_LocationMap.find(ChunkHash); + if (KeyIt == m_LocationMap.end()) + { + // Might have been GC'd + continue; + } + LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(KeyIt); } } + m_CasLog.Append(LogEntries); // Let whomever it concerns know about the bad chunks. This could // be used to invalidate higher level data structures more efficiently // than a full validation pass might be able to do - Ctx.ReportBadCasChunks(BadChunkHashes); -#endif // 0 + Ctx.ReportBadCasChunks(BadChunks); } void @@ -481,7 +458,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) std::vector KeepChunkIndexes; std::vector ChunkIndexToChunkHash; ChunkLocations.reserve(TotalChunkCount); - ChunkLocations.reserve(TotalChunkCount); ChunkIndexToChunkHash.reserve(TotalChunkCount); GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) { diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 31d9145f9..5af416b59 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -99,6 +99,7 @@ struct BlockStoreFile : public RefCounted void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Truncate(uint64_t Size); void Flush(); + BasicFile& GetBasicFile(); void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function&& ChunkFun); private: @@ -142,6 +143,9 @@ public: bool DryRun, const ReclaimCallback& Callback = [](uint32_t, const std::unordered_map&, const std::vector&) { }); + void IterateChunks(const std::vector& ChunkLocations, + std::function SmallChunkCallback, + std::function LargeChunkCallback); static const char* GetBlockFileExtension(); static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex); -- cgit v1.2.3 From 75b1dd112aead7c5246fa84928b9cd96dde49cbc Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 23:34:20 +0200 Subject: respect Ctx.RunRecovery() --- zenserver/cache/structuredcachestore.cpp | 5 ++++ zenstore/compactcas.cpp | 44 +++++++++++++++++--------------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index f26d599ab..9ae5b0f17 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1497,6 +1497,11 @@ ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) m_Index.erase(BadKey); } } + + // Let whomever it concerns know about the bad chunks. This could + // be used to invalidate higher level data structures more efficiently + // than a full validation pass might be able to do + Ctx.ReportBadCasChunks(BadKeys); } void diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index a6e617474..a79928fba 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -359,7 +359,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } } - std::vector BadChunks; + std::vector BadKeys; // We do a read sweep through the payloads file and validate // any entries that are contained within each segment, with @@ -368,16 +368,16 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) m_BlockStore.IterateChunks( ChunkLocations, - [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, const void* Data, uint64_t Size) { + [&ChunkIndexToChunkHash, &BadKeys](size_t ChunkIndex, const void* Data, uint64_t Size) { const IoHash ComputedHash = IoHash::HashBuffer(Data, Size); const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; if (ComputedHash != ExpectedHash) { // Hash mismatch - BadChunks.push_back(ExpectedHash); + BadKeys.push_back(ExpectedHash); } }, - [&ChunkIndexToChunkHash, &BadChunks](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { + [&ChunkIndexToChunkHash, &BadKeys](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { IoHashStream Hasher; BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); @@ -385,42 +385,46 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) if (ComputedHash != ExpectedHash) { // Hash mismatch - BadChunks.push_back(ExpectedHash); + BadKeys.push_back(ExpectedHash); } }); - if (BadChunks.empty()) + if (BadKeys.empty()) { return; } - ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_Config.RootDirectory / m_ContainerBaseName); + ZEN_ERROR("Scrubbing found #{} bad chunks in '{}'", BadKeys.size(), m_Config.RootDirectory / m_ContainerBaseName); _.ReleaseNow(); - // Deal with bad chunks by removing them from our lookup map - std::vector LogEntries; - LogEntries.reserve(BadChunks.size()); + if (Ctx.RunRecovery()) { - RwLock::ExclusiveLockScope __(m_LocationMapLock); - for (const IoHash& ChunkHash : BadChunks) + // Deal with bad chunks by removing them from our lookup map + + std::vector LogEntries; + LogEntries.reserve(BadKeys.size()); { - const auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt == m_LocationMap.end()) + RwLock::ExclusiveLockScope __(m_LocationMapLock); + for (const IoHash& ChunkHash : BadKeys) { - // Might have been GC'd - continue; + const auto KeyIt = m_LocationMap.find(ChunkHash); + if (KeyIt == m_LocationMap.end()) + { + // Might have been GC'd + continue; + } + LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); + m_LocationMap.erase(KeyIt); } - LogEntries.push_back({.Key = KeyIt->first, .Location = KeyIt->second, .Flags = CasDiskIndexEntry::kTombstone}); - m_LocationMap.erase(KeyIt); } + m_CasLog.Append(LogEntries); } - m_CasLog.Append(LogEntries); // Let whomever it concerns know about the bad chunks. This could // be used to invalidate higher level data structures more efficiently // than a full validation pass might be able to do - Ctx.ReportBadCasChunks(BadChunks); + Ctx.ReportBadCasChunks(BadKeys); } void -- cgit v1.2.3 From b5f5b1e792a559d0358ab8a755b335cfb2185e9f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 1 May 2022 23:38:59 +0200 Subject: cleanup --- zenstore/blockstore.cpp | 16 ++++++++-------- zenstore/include/zenstore/blockstore.h | 21 ++++++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 559dfc1ee..9961e734d 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -194,7 +194,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, } void -BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteCompleteCallback Callback) +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback) { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -584,9 +584,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } void -BlockStore::IterateChunks(const std::vector& ChunkLocations, - std::function SmallChunkCallback, - std::function LargeChunkCallback) +BlockStore::IterateChunks(const std::vector& ChunkLocations, + IterateChunksSmallSizeCallback SmallSizeCallback, + IterateChunksLargeSizeCallback LargeSizeCallback) { // We do a read sweep through the payloads file and validate // any entries that are contained within each segment, with @@ -637,9 +637,9 @@ BlockStore::IterateChunks(const std::vector& continue; } - SmallChunkCallback(ChunkIndex, - reinterpret_cast(BufferBase) + Location.Offset - WindowStart, - Location.Size); + SmallSizeCallback(ChunkIndex, + reinterpret_cast(BufferBase) + Location.Offset - WindowStart, + Location.Size); } } @@ -654,7 +654,7 @@ BlockStore::IterateChunks(const std::vector& { const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; BasicFile& BlockFile = m_ChunkBlocks[Location.BlockIndex]->GetBasicFile(); - LargeChunkCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size); + LargeSizeCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size); } } } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 5af416b59..21c02d389 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -6,6 +6,7 @@ #include #include +#include #include namespace zen { @@ -119,18 +120,20 @@ public: typedef std::function& MovedChunks, const std::vector& RemovedChunks)> - ReclaimCallback; - typedef std::function WriteCompleteCallback; + ReclaimCallback; + typedef std::function WriteChunkCallback; + typedef std::function IterateChunksSmallSizeCallback; + typedef std::function IterateChunksLargeSizeCallback; void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount, const std::vector& KnownLocations); void WriteChunk( - const void* Data, - uint64_t Size, - uint64_t Alignment, - WriteCompleteCallback Callback = [](const BlockStoreLocation&) {}); + const void* Data, + uint64_t Size, + uint64_t Alignment, + WriteChunkCallback Callback = [](const BlockStoreLocation&) {}); Ref GetChunkBlock(const BlockStoreLocation& Location); void Flush(); @@ -143,9 +146,9 @@ public: bool DryRun, const ReclaimCallback& Callback = [](uint32_t, const std::unordered_map&, const std::vector&) { }); - void IterateChunks(const std::vector& ChunkLocations, - std::function SmallChunkCallback, - std::function LargeChunkCallback); + void IterateChunks(const std::vector& ChunkLocations, + IterateChunksSmallSizeCallback SmallSizeCallback, + IterateChunksLargeSizeCallback LargeSizeCallback); static const char* GetBlockFileExtension(); static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex); -- cgit v1.2.3 From c89190f7fabf8a08cda2255937dc99ca35972210 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 10:18:31 +0200 Subject: Move bulk of MigrateLegacyData to blockstore.cpp --- zenserver/cache/structuredcachestore.cpp | 1 - zenstore/blockstore.cpp | 219 ++++++++++++++++++++++++- zenstore/compactcas.cpp | 268 ++++++------------------------- zenstore/include/zenstore/blockstore.h | 23 ++- 4 files changed, 281 insertions(+), 230 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 9ae5b0f17..5cebaa948 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1836,7 +1836,6 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &Index, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - uint32_t, const std::unordered_map& MovedChunks, const std::vector& RemovedChunks) { std::vector LogEntries; diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 9961e734d..6f5578be8 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -437,7 +437,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { DeletedSize += ChunkLocations[DeleteIndex].Size; } - Callback(BlockIndex, {}, DeleteMap); + Callback({}, DeleteMap); DeletedCount += DeleteMap.size(); { RwLock::ExclusiveLockScope _i(m_InsertLock); @@ -477,7 +477,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NewBlockFile->Flush(); } { - Callback(0xfffffffful, MovedChunks, {}); + Callback(MovedChunks, {}); MovedCount += KeepMap.size(); MovedChunks.clear(); RwLock::ExclusiveLockScope __(m_InsertLock); @@ -558,7 +558,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, DeletedSize += ChunkLocations[DeleteIndex].Size; } - Callback(BlockIndex, MovedChunks, DeleteMap); + Callback(MovedChunks, DeleteMap); MovedCount += KeepMap.size(); DeletedCount += DeleteMap.size(); MovedChunks.clear(); @@ -599,7 +599,7 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, IoBuffer ReadBuffer{WindowSize}; void* BufferBase = ReadBuffer.MutableData(); - RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time? + RwLock::SharedLockScope _(m_InsertLock); for (const auto& Block : m_ChunkBlocks) { @@ -659,6 +659,217 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, } } +bool +BlockStore::Split(const std::vector& ChunkLocations, + const std::filesystem::path& SourceBlockFilePath, + const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount, + size_t PayloadAlignment, + bool CleanSource, + const SplitCallback& Callback) +{ + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(BlocksBasePath.parent_path(), Error); + if (Error) + { + ZEN_ERROR("get disk space in {} FAILED, reason: '{}'", BlocksBasePath, Error.message()); + return false; + } + + if (Space.Free < MaxBlockSize) + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}", + BlocksBasePath, + MaxBlockSize, + NiceBytes(Space.Free)); + return false; + } + + size_t TotalSize = 0; + for (const BlockStoreLocation& Location : ChunkLocations) + { + TotalSize += Location.Size; + } + size_t ChunkCount = ChunkLocations.size(); + uint64_t RequiredDiskSpace = TotalSize + ((PayloadAlignment - 1) * ChunkCount); + uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, MaxBlockSize) / MaxBlockSize; + if (MaxRequiredBlockCount > MaxBlockCount) + { + ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", + BlocksBasePath, + MaxRequiredBlockCount, + MaxBlockCount); + return false; + } + + constexpr const uint64_t DiskReserve = 1ul << 28; + + if (CleanSource) + { + if (Space.Free < (MaxBlockSize + DiskReserve)) + { + ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", + BlocksBasePath, + NiceBytes(MaxBlockSize + DiskReserve), + NiceBytes(Space.Free)); + return false; + } + } + else + { + if (Space.Free < (RequiredDiskSpace + DiskReserve)) + { + ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", + BlocksBasePath, + NiceBytes(RequiredDiskSpace + DiskReserve), + NiceBytes(Space.Free)); + return false; + } + } + + uint32_t WriteBlockIndex = 0; + while (std::filesystem::exists(BlockStore::GetBlockPath(BlocksBasePath, WriteBlockIndex))) + { + ++WriteBlockIndex; + } + + BasicFile BlockFile; + BlockFile.Open(SourceBlockFilePath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); + + if (CleanSource && (MaxRequiredBlockCount < 2)) + { + std::vector> Chunks; + Chunks.reserve(ChunkCount); + for (size_t Index = 0; Index < ChunkCount; ++Index) + { + const BlockStoreLocation& ChunkLocation = ChunkLocations[Index]; + Chunks.push_back({Index, {.BlockIndex = WriteBlockIndex, .Offset = ChunkLocation.Offset, .Size = ChunkLocation.Size}}); + } + std::filesystem::path BlockPath = BlockStore::GetBlockPath(BlocksBasePath, WriteBlockIndex); + CreateDirectories(BlockPath.parent_path()); + BlockFile.Close(); + std::filesystem::rename(SourceBlockFilePath, BlockPath); + Callback(Chunks); + return true; + } + + std::vector ChunkIndexes; + ChunkIndexes.reserve(ChunkCount); + for (size_t Index = 0; Index < ChunkCount; ++Index) + { + ChunkIndexes.push_back(Index); + } + + std::sort(begin(ChunkIndexes), end(ChunkIndexes), [&ChunkLocations](size_t Lhs, size_t Rhs) { + const BlockStoreLocation& LhsLocation = ChunkLocations[Lhs]; + const BlockStoreLocation& RhsLocation = ChunkLocations[Rhs]; + return LhsLocation.Offset < RhsLocation.Offset; + }); + + uint64_t BlockSize = 0; + uint64_t BlockOffset = 0; + std::vector NewLocations; + struct BlockData + { + std::vector> Chunks; + uint64_t BlockOffset; + uint64_t BlockSize; + uint32_t BlockIndex; + }; + + std::vector BlockRanges; + std::vector> Chunks; + BlockRanges.reserve(MaxRequiredBlockCount); + for (const size_t& ChunkIndex : ChunkIndexes) + { + const BlockStoreLocation& LegacyChunkLocation = ChunkLocations[ChunkIndex]; + + uint64_t ChunkOffset = LegacyChunkLocation.Offset; + uint64_t ChunkSize = LegacyChunkLocation.Size; + uint64_t ChunkEnd = ChunkOffset + ChunkSize; + + if (BlockSize == 0) + { + BlockOffset = ChunkOffset; + } + if ((ChunkEnd - BlockOffset) > MaxBlockSize) + { + BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}; + BlockRange.Chunks.swap(Chunks); + BlockRanges.push_back(BlockRange); + + WriteBlockIndex++; + while (std::filesystem::exists(BlockStore::GetBlockPath(BlocksBasePath, WriteBlockIndex))) + { + ++WriteBlockIndex; + } + BlockOffset = ChunkOffset; + BlockSize = 0; + } + BlockSize = RoundUp(BlockSize, PayloadAlignment); + BlockStoreLocation ChunkLocation = {.BlockIndex = WriteBlockIndex, .Offset = ChunkOffset - BlockOffset, .Size = ChunkSize}; + Chunks.push_back({ChunkIndex, ChunkLocation}); + BlockSize = ChunkEnd - BlockOffset; + } + if (BlockSize > 0) + { + BlockRanges.push_back( + {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}); + } + + Stopwatch WriteBlockTimer; + + std::reverse(BlockRanges.begin(), BlockRanges.end()); + std::vector Buffer(1 << 28); + for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx) + { + const BlockData& BlockRange = BlockRanges[Idx]; + if (Idx > 0) + { + uint64_t Remaining = BlockRange.BlockOffset + BlockRange.BlockSize; + uint64_t Completed = BlockOffset + BlockSize - Remaining; + uint64_t ETA = (WriteBlockTimer.GetElapsedTimeMs() * Remaining) / Completed; + + ZEN_INFO("migrating store '{}' {}/{} blocks, remaining {} ({}) ETA: {}", + BlocksBasePath, + Idx, + BlockRanges.size(), + NiceBytes(BlockRange.BlockOffset + BlockRange.BlockSize), + NiceBytes(BlockOffset + BlockSize), + NiceTimeSpanMs(ETA)); + } + + std::filesystem::path BlockPath = BlockStore::GetBlockPath(BlocksBasePath, BlockRange.BlockIndex); + BlockStoreFile ChunkBlock(BlockPath); + ChunkBlock.Create(BlockRange.BlockSize); + uint64_t Offset = 0; + while (Offset < BlockRange.BlockSize) + { + uint64_t Size = BlockRange.BlockSize - Offset; + if (Size > Buffer.size()) + { + Size = Buffer.size(); + } + BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset); + ChunkBlock.Write(Buffer.data(), Size, Offset); + Offset += Size; + } + ChunkBlock.Truncate(Offset); + ChunkBlock.Flush(); + + Callback(BlockRange.Chunks); + + if (CleanSource) + { + BlockFile.SetFileSize(BlockRange.BlockOffset); + } + } + BlockFile.Close(); + + return true; +} + const char* BlockStore::GetBlockFileExtension() { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index a79928fba..8d90ba186 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -493,7 +493,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - uint32_t, const std::unordered_map& MovedChunks, const std::vector& RemovedChunks) { std::vector LogEntries; @@ -753,32 +752,13 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) NiceBytes(TotalSize)); }); - uint32_t WriteBlockIndex = 0; - while (std::filesystem::exists(BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + uint64_t BlockFileSize = 0; { - ++WriteBlockIndex; + BasicFile BlockFile; + BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); + BlockFileSize = BlockFile.FileSize(); } - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error); - if (Error) - { - ZEN_ERROR("get disk space in {} FAILED, reason: '{}'", m_Config.RootDirectory, Error.message()); - return 0; - } - - if (Space.Free < m_MaxBlockSize) - { - ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}", - m_Config.RootDirectory / m_ContainerBaseName, - m_MaxBlockSize, - NiceBytes(Space.Free)); - return 0; - } - - BasicFile BlockFile; - BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); - std::unordered_map LegacyDiskIndex; uint64_t InvalidEntryCount = 0; @@ -814,7 +794,6 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) 0); std::vector BadEntries; - uint64_t BlockFileSize = BlockFile.FileSize(); for (const auto& Entry : LegacyDiskIndex) { const LegacyCasDiskIndexEntry& Record(Entry.second); @@ -840,7 +819,6 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) if (LegacyDiskIndex.empty()) { - BlockFile.Close(); LegacyCasLog.Close(); if (CleanSource) { @@ -859,219 +837,75 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) return 0; } - for (const auto& Entry : LegacyDiskIndex) - { - const LegacyCasDiskIndexEntry& Record(Entry.second); - TotalSize += Record.Location.GetSize(); - } - - uint64_t RequiredDiskSpace = TotalSize + ((m_PayloadAlignment - 1) * LegacyDiskIndex.size()); - uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, m_MaxBlockSize) / m_MaxBlockSize; - if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex) - { - ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", - m_Config.RootDirectory / m_ContainerBaseName, - MaxRequiredBlockCount, - BlockStoreDiskLocation::MaxBlockIndex); - return 0; - } - - constexpr const uint64_t DiskReserve = 1ul << 28; - - if (CleanSource) - { - if (Space.Free < (m_MaxBlockSize + DiskReserve)) - { - ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", - m_Config.RootDirectory / m_ContainerBaseName, - NiceBytes(m_MaxBlockSize + DiskReserve), - NiceBytes(Space.Free)); - return 0; - } - } - else - { - if (Space.Free < (RequiredDiskSpace + DiskReserve)) - { - ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", - m_Config.RootDirectory / m_ContainerBaseName, - NiceBytes(RequiredDiskSpace + DiskReserve), - NiceBytes(Space.Free)); - return 0; - } - } - std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName); CreateDirectories(LogPath.parent_path()); TCasLogFile CasLog; CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - if (CleanSource && (MaxRequiredBlockCount < 2)) + std::unordered_map ChunkIndexToChunkHash; + std::vector ChunkLocations; + ChunkIndexToChunkHash.reserve(LegacyDiskIndex.size()); + ChunkLocations.reserve(LegacyDiskIndex.size()); + for (const auto& Entry : LegacyDiskIndex) { - std::vector LogEntries; - LogEntries.reserve(LegacyDiskIndex.size()); - - // We can use the block as is, just move it and add the blocks to our new log - for (auto& Entry : LegacyDiskIndex) - { - const LegacyCasDiskIndexEntry& Record(Entry.second); - - BlockStoreLocation NewChunkLocation{WriteBlockIndex, Record.Location.GetOffset(), Record.Location.GetSize()}; - BlockStoreDiskLocation NewLocation(NewChunkLocation, m_PayloadAlignment); - LogEntries.push_back( - {.Key = Entry.second.Key, .Location = NewLocation, .ContentType = Record.ContentType, .Flags = Record.Flags}); - } - std::filesystem::path BlockPath = BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex); - CreateDirectories(BlockPath.parent_path()); - BlockFile.Close(); - std::filesystem::rename(LegacyDataPath, BlockPath); - CasLog.Append(LogEntries); - for (const CasDiskIndexEntry& Entry : LogEntries) - { - m_LocationMap.insert_or_assign(Entry.Key, Entry.Location); - } - - MigratedChunkCount += LogEntries.size(); - MigratedBlockCount++; + const LegacyCasDiskLocation& Location = Entry.second.Location; + const IoHash& ChunkHash = Entry.first; + size_t ChunkIndex = ChunkLocations.size(); + ChunkLocations.push_back({.BlockIndex = 0, .Offset = Location.GetOffset(), .Size = Location.GetSize()}); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; + TotalSize += Location.GetSize(); } - else - { - std::vector ChunkHashes; - ChunkHashes.reserve(LegacyDiskIndex.size()); - for (const auto& Entry : LegacyDiskIndex) - { - ChunkHashes.push_back(Entry.first); - } - - std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { - auto LhsKeyIt = LegacyDiskIndex.find(Lhs); - auto RhsKeyIt = LegacyDiskIndex.find(Rhs); - return LhsKeyIt->second.Location.GetOffset() < RhsKeyIt->second.Location.GetOffset(); - }); - - uint64_t BlockSize = 0; - uint64_t BlockOffset = 0; - std::vector NewLocations; - struct BlockData - { - std::vector> Chunks; - uint64_t BlockOffset; - uint64_t BlockSize; - uint32_t BlockIndex; - }; - - std::vector BlockRanges; - std::vector> Chunks; - BlockRanges.reserve(MaxRequiredBlockCount); - for (const IoHash& ChunkHash : ChunkHashes) - { - const LegacyCasDiskIndexEntry& LegacyEntry = LegacyDiskIndex[ChunkHash]; - const LegacyCasDiskLocation& LegacyChunkLocation = LegacyEntry.Location; - - uint64_t ChunkOffset = LegacyChunkLocation.GetOffset(); - uint64_t ChunkSize = LegacyChunkLocation.GetSize(); - uint64_t ChunkEnd = ChunkOffset + ChunkSize; - - if (BlockSize == 0) - { - BlockOffset = ChunkOffset; - } - if ((ChunkEnd - BlockOffset) > m_MaxBlockSize) - { - BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}; - BlockRange.Chunks.swap(Chunks); - BlockRanges.push_back(BlockRange); - - WriteBlockIndex++; - while (std::filesystem::exists(BlockStore::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) - { - ++WriteBlockIndex; - } - BlockOffset = ChunkOffset; - BlockSize = 0; - } - BlockSize = RoundUp(BlockSize, m_PayloadAlignment); - BlockStoreLocation ChunkLocation = {.BlockIndex = WriteBlockIndex, .Offset = ChunkOffset - BlockOffset, .Size = ChunkSize}; - Chunks.push_back({ChunkHash, ChunkLocation}); - BlockSize = ChunkEnd - BlockOffset; - } - if (BlockSize > 0) - { - BlockRanges.push_back( - {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}); - } - Stopwatch WriteBlockTimer; - - std::reverse(BlockRanges.begin(), BlockRanges.end()); - std::vector Buffer(1 << 28); - for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx) - { - const BlockData& BlockRange = BlockRanges[Idx]; - if (Idx > 0) - { - uint64_t Remaining = BlockRange.BlockOffset + BlockRange.BlockSize; - uint64_t Completed = BlockOffset + BlockSize - Remaining; - uint64_t ETA = (WriteBlockTimer.GetElapsedTimeMs() * Remaining) / Completed; - - ZEN_INFO("migrating store '{}' {}/{} blocks, remaining {} ({}) ETA: {}", - m_Config.RootDirectory / m_ContainerBaseName, - Idx, - BlockRanges.size(), - NiceBytes(BlockRange.BlockOffset + BlockRange.BlockSize), - NiceBytes(BlockOffset + BlockSize), - NiceTimeSpanMs(ETA)); - } - - std::filesystem::path BlockPath = BlockStore::GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); - BlockStoreFile ChunkBlock(BlockPath); - ChunkBlock.Create(BlockRange.BlockSize); - uint64_t Offset = 0; - while (Offset < BlockRange.BlockSize) - { - uint64_t Size = BlockRange.BlockSize - Offset; - if (Size > Buffer.size()) - { - Size = Buffer.size(); - } - BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset); - ChunkBlock.Write(Buffer.data(), Size, Offset); - Offset += Size; - } - ChunkBlock.Truncate(Offset); - ChunkBlock.Flush(); - + m_BlockStore.Split( + ChunkLocations, + LegacyDataPath, + m_BlocksBasePath, + m_MaxBlockSize, + BlockStoreDiskLocation::MaxBlockIndex + 1, + m_PayloadAlignment, + CleanSource, + [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount]( + const std::vector>& MovedChunks) { std::vector LogEntries; - LogEntries.reserve(BlockRange.Chunks.size()); - for (const auto& Entry : BlockRange.Chunks) + LogEntries.reserve(MovedChunks.size()); + for (const auto& Entry : MovedChunks) { - const LegacyCasDiskIndexEntry& LegacyEntry = LegacyDiskIndex[Entry.first]; - BlockStoreDiskLocation Location(Entry.second, m_PayloadAlignment); - LogEntries.push_back( - {.Key = Entry.first, .Location = Location, .ContentType = LegacyEntry.ContentType, .Flags = LegacyEntry.Flags}); + size_t ChunkIndex = Entry.first; + const BlockStoreLocation& NewLocation = Entry.second; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; + LogEntries.push_back({.Key = ChunkHash, + .Location = {NewLocation, m_PayloadAlignment}, + .ContentType = OldEntry.ContentType, + .Flags = OldEntry.Flags}); } - CasLog.Append(LogEntries); for (const CasDiskIndexEntry& Entry : LogEntries) { m_LocationMap.insert_or_assign(Entry.Key, Entry.Location); } - MigratedChunkCount += LogEntries.size(); - MigratedBlockCount++; - + CasLog.Append(LogEntries); + CasLog.Flush(); if (CleanSource) { std::vector LegacyLogEntries; - LegacyLogEntries.reserve(BlockRange.Chunks.size()); - for (const auto& Entry : BlockRange.Chunks) + LegacyLogEntries.reserve(MovedChunks.size()); + for (const auto& Entry : MovedChunks) { - LegacyLogEntries.push_back({.Key = Entry.first, .Flags = LegacyCasDiskIndexEntry::kTombstone}); + size_t ChunkIndex = Entry.first; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const LegacyCasDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; + LegacyLogEntries.push_back( + LegacyCasDiskIndexEntry{.Key = ChunkHash, + .Location = OldEntry.Location, + .ContentType = OldEntry.ContentType, + .Flags = (uint8_t)(OldEntry.Flags | LegacyCasDiskIndexEntry::kTombstone)}); } LegacyCasLog.Append(LegacyLogEntries); - BlockFile.SetFileSize(BlockRange.BlockOffset); + LegacyCasLog.Flush(); } - } - } + MigratedBlockCount++; + MigratedChunkCount += MovedChunks.size(); + }); - BlockFile.Close(); LegacyCasLog.Close(); CasLog.Close(); diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 21c02d389..0cef7600f 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -117,14 +117,14 @@ public: std::unordered_set ExcludeBlockIndexes; size_t BlockCount; }; - typedef std::function& MovedChunks, - const std::vector& RemovedChunks)> + typedef std::function& MovedChunks, const std::vector& RemovedChunks)> ReclaimCallback; typedef std::function WriteChunkCallback; typedef std::function IterateChunksSmallSizeCallback; typedef std::function IterateChunksLargeSizeCallback; + typedef std::function>& MovedChunks)> SplitCallback; + void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount, @@ -144,11 +144,18 @@ public: const std::vector& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& Callback = [](uint32_t, const std::unordered_map&, const std::vector&) { - }); - void IterateChunks(const std::vector& ChunkLocations, - IterateChunksSmallSizeCallback SmallSizeCallback, - IterateChunksLargeSizeCallback LargeSizeCallback); + const ReclaimCallback& Callback = [](const std::unordered_map&, const std::vector&) {}); + void IterateChunks(const std::vector& ChunkLocations, + IterateChunksSmallSizeCallback SmallSizeCallback, + IterateChunksLargeSizeCallback LargeSizeCallback); + static bool Split(const std::vector& ChunkLocations, + const std::filesystem::path& SourceBlockFilePath, + const std::filesystem::path& BlocksBasePath, + uint64_t MaxBlockSize, + uint64_t MaxBlockCount, + size_t PayloadAlignment, + bool CleanSource, + const SplitCallback& Callback); static const char* GetBlockFileExtension(); static std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex); -- cgit v1.2.3 From 48f2e3af59e2a06c81e37170db95e432b148e5e8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 10:48:57 +0200 Subject: refactor structured cache to use blockstore migrate --- zenserver/cache/structuredcachestore.cpp | 309 +++++++------------------------ zenstore/blockstore.cpp | 6 +- 2 files changed, 69 insertions(+), 246 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 5cebaa948..e9c051f88 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -877,35 +877,17 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) NiceBytes(TotalSize)); }); - uint32_t WriteBlockIndex = 0; - while (std::filesystem::exists(BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) + uint64_t BlockFileSize = 0; { - ++WriteBlockIndex; + BasicFile BlockFile; + BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); + BlockFileSize = BlockFile.FileSize(); } - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_BucketDir, Error); - if (Error) - { - ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BucketDir, Error.message()); - return 0; - } - - if (Space.Free < MaxBlockSize) - { - ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}", - m_BucketDir / m_BucketName, - MaxBlockSize, - NiceBytes(Space.Free)); - return 0; - } - - BasicFile BlockFile; - BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); - std::unordered_map LegacyDiskIndex; uint64_t InvalidEntryCount = 0; + size_t BlockChunkCount = 0; TCasLogFile LegacyCasLog; LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead); { @@ -942,7 +924,6 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) 0); std::vector BadEntries; - uint64_t BlockFileSize = BlockFile.FileSize(); for (const auto& Entry : LegacyDiskIndex) { const LegacyDiskIndexEntry& Record(Entry.second); @@ -952,6 +933,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) } if (Record.Location.Offset() + Record.Location.Size() <= BlockFileSize) { + BlockChunkCount++; continue; } ZEN_WARN("skipping invalid entry in '{}', reason: location is outside of file", LegacyLogPath); @@ -972,7 +954,6 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) if (LegacyDiskIndex.empty()) { LegacyCasLog.Close(); - BlockFile.Close(); if (CleanSource) { // Older versions of ZenCacheDiskLayer expects the legacy files to exist if it can find @@ -988,250 +969,92 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) return 0; } - uint64_t BlockChunkCount = 0; - uint64_t BlockTotalSize = 0; - for (const auto& Entry : LegacyDiskIndex) - { - const LegacyDiskIndexEntry& Record(Entry.second); - if (Record.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) - { - continue; - } - BlockChunkCount++; - BlockTotalSize += Record.Location.Size(); - } - - uint64_t RequiredDiskSpace = BlockTotalSize + ((m_PayloadAlignment - 1) * BlockChunkCount); - uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, MaxBlockSize) / MaxBlockSize; - if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex) - { - ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}", - m_BucketDir / m_BucketName, - MaxRequiredBlockCount, - BlockStoreDiskLocation::MaxBlockIndex); - return 0; - } - - constexpr const uint64_t DiskReserve = 1ul << 28; - - if (CleanSource) - { - if (Space.Free < (MaxBlockSize + DiskReserve)) - { - ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", - m_BucketDir / m_BucketName, - NiceBytes(MaxBlockSize + DiskReserve), - NiceBytes(Space.Free)); - return 0; - } - } - else - { - if (Space.Free < (RequiredDiskSpace + DiskReserve)) - { - ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})", - m_BucketDir / m_BucketName, - NiceBytes(RequiredDiskSpace + DiskReserve), - NiceBytes(Space.Free)); - return 0; - } - } - std::filesystem::path LogPath = GetLogPath(m_BucketDir, m_BucketName); CreateDirectories(LogPath.parent_path()); TCasLogFile CasLog; CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - if (CleanSource && (MaxRequiredBlockCount < 2)) - { - std::vector LogEntries; - LogEntries.reserve(LegacyDiskIndex.size()); + std::unordered_map ChunkIndexToChunkHash; + std::vector ChunkLocations; + ChunkIndexToChunkHash.reserve(BlockChunkCount); + ChunkLocations.reserve(BlockChunkCount); - // We can use the block as is, just move it and add the blocks to our new log - for (auto& Entry : LegacyDiskIndex) - { - const LegacyDiskIndexEntry& Record(Entry.second); + std::vector LogEntries; + LogEntries.reserve(LegacyDiskIndex.size() - BlockChunkCount); - DiskLocation NewLocation; - uint8_t Flags = 0xff & (Record.Location.Flags() >> 56); - if (Record.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) - { - NewLocation = DiskLocation(Record.Location.Size(), Flags); - } - else - { - BlockStoreLocation NewChunkLocation(WriteBlockIndex, Record.Location.Offset(), Record.Location.Size()); - NewLocation = DiskLocation(NewChunkLocation, m_PayloadAlignment, Flags); - } - LogEntries.push_back({.Key = Entry.second.Key, .Location = NewLocation}); - } - std::filesystem::path BlockPath = BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex); - CreateDirectories(BlockPath.parent_path()); - BlockFile.Close(); - std::filesystem::rename(LegacyDataPath, BlockPath); - CasLog.Append(LogEntries); - for (const DiskIndexEntry& Entry : LogEntries) + for (const auto& Entry : LegacyDiskIndex) + { + const IoHash& ChunkHash = Entry.first; + const LegacyDiskLocation& Location = Entry.second.Location; + if (Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) { - m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); + uint8_t Flags = 0xff & (Location.Flags() >> 56); + DiskLocation NewLocation = DiskLocation(Location.Size(), Flags); + LogEntries.push_back({.Key = Entry.second.Key, .Location = NewLocation}); + continue; } - - MigratedChunkCount += LogEntries.size(); - MigratedBlockCount++; + size_t ChunkIndex = ChunkLocations.size(); + ChunkLocations.push_back({.BlockIndex = 0, .Offset = Location.Offset(), .Size = Location.Size()}); + ChunkIndexToChunkHash[ChunkIndex] = ChunkHash; + TotalSize += Location.Size(); } - else + for (const DiskIndexEntry& Entry : LogEntries) { - std::vector ChunkHashes; - ChunkHashes.reserve(LegacyDiskIndex.size()); - for (const auto& Entry : LegacyDiskIndex) - { - ChunkHashes.push_back(Entry.first); - } - - std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) { - auto LhsKeyIt = LegacyDiskIndex.find(Lhs); - auto RhsKeyIt = LegacyDiskIndex.find(Rhs); - return LhsKeyIt->second.Location.Offset() < RhsKeyIt->second.Location.Offset(); - }); - - uint64_t BlockSize = 0; - uint64_t BlockOffset = 0; - std::vector NewLocations; - struct BlockData - { - std::vector> Chunks; - uint64_t BlockOffset; - uint64_t BlockSize; - uint32_t BlockIndex; - }; - - std::vector BlockRanges; - std::vector> Chunks; - BlockRanges.reserve(MaxRequiredBlockCount); - for (const IoHash& ChunkHash : ChunkHashes) - { - const LegacyDiskIndexEntry& LegacyEntry = LegacyDiskIndex[ChunkHash]; - const LegacyDiskLocation& LegacyChunkLocation = LegacyEntry.Location; - - if (LegacyChunkLocation.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) - { - // For standalone files we just store the chunk hash an use the size from the legacy index as is - Chunks.push_back({ChunkHash, {}}); - continue; - } - - uint64_t ChunkOffset = LegacyChunkLocation.Offset(); - uint64_t ChunkSize = LegacyChunkLocation.Size(); - uint64_t ChunkEnd = ChunkOffset + ChunkSize; - - if (BlockSize == 0) - { - BlockOffset = ChunkOffset; - } - if ((ChunkEnd - BlockOffset) > MaxBlockSize) - { - BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}; - BlockRange.Chunks.swap(Chunks); - BlockRanges.push_back(BlockRange); - - WriteBlockIndex++; - while (std::filesystem::exists(BlockStore ::GetBlockPath(m_BlocksBasePath, WriteBlockIndex))) - { - ++WriteBlockIndex; - } - BlockOffset = ChunkOffset; - BlockSize = 0; - } - BlockSize = RoundUp(BlockSize, m_PayloadAlignment); - BlockStoreLocation ChunkLocation = {.BlockIndex = WriteBlockIndex, .Offset = ChunkOffset - BlockOffset, .Size = ChunkSize}; - Chunks.push_back({ChunkHash, ChunkLocation}); - BlockSize = ChunkEnd - BlockOffset; - } - if (BlockSize > 0) - { - BlockRanges.push_back( - {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex}); - } - Stopwatch WriteBlockTimer; - - std::reverse(BlockRanges.begin(), BlockRanges.end()); - std::vector Buffer(1 << 28); - for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx) - { - const BlockData& BlockRange = BlockRanges[Idx]; - if (Idx > 0) - { - uint64_t Remaining = BlockRange.BlockOffset + BlockRange.BlockSize; - uint64_t Completed = BlockOffset + BlockSize - Remaining; - uint64_t ETA = (WriteBlockTimer.GetElapsedTimeMs() * Remaining) / Completed; - - ZEN_INFO("migrating store '{}' {}/{} blocks, remaining {} ({}) ETA: {}", - m_BucketDir / m_BucketDir, - Idx, - BlockRanges.size(), - NiceBytes(BlockRange.BlockOffset + BlockRange.BlockSize), - NiceBytes(BlockOffset + BlockSize), - NiceTimeSpanMs(ETA)); - } - - std::filesystem::path BlockPath = BlockStore ::GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex); - BlockStoreFile ChunkBlock(BlockPath); - ChunkBlock.Create(BlockRange.BlockSize); - uint64_t Offset = 0; - while (Offset < BlockRange.BlockSize) - { - uint64_t Size = BlockRange.BlockSize - Offset; - if (Size > Buffer.size()) - { - Size = Buffer.size(); - } - BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset); - ChunkBlock.Write(Buffer.data(), Size, Offset); - Offset += Size; - } - ChunkBlock.Truncate(Offset); - ChunkBlock.Flush(); + m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); + } + CasLog.Append(LogEntries); + m_BlockStore.Split( + ChunkLocations, + LegacyDataPath, + m_BlocksBasePath, + MaxBlockSize, + BlockStoreDiskLocation::MaxBlockIndex + 1, + m_PayloadAlignment, + CleanSource, + [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount]( + const std::vector>& MovedChunks) { std::vector LogEntries; - LogEntries.reserve(BlockRange.Chunks.size()); - for (const auto& Entry : BlockRange.Chunks) + LogEntries.reserve(MovedChunks.size()); + for (const auto& Entry : MovedChunks) { - const LegacyDiskIndexEntry& LegacyEntry = LegacyDiskIndex[Entry.first]; - - DiskLocation NewLocation; - uint8_t Flags = 0xff & (LegacyEntry.Location.Flags() >> 56); - if (LegacyEntry.Location.IsFlagSet(LegacyDiskLocation::kStandaloneFile)) - { - NewLocation = DiskLocation(LegacyEntry.Location.Size(), Flags); - } - else - { - NewLocation = DiskLocation(Entry.second, m_PayloadAlignment, Flags); - } - LogEntries.push_back({.Key = Entry.first, .Location = NewLocation}); + size_t ChunkIndex = Entry.first; + const BlockStoreLocation& NewLocation = Entry.second; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const LegacyDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; + const LegacyDiskLocation& OldLocation = OldEntry.Location; + uint8_t Flags = 0xff & (OldLocation.Flags() >> 56); + LogEntries.push_back({.Key = ChunkHash, .Location = DiskLocation(NewLocation, m_PayloadAlignment, Flags)}); } - CasLog.Append(LogEntries); for (const DiskIndexEntry& Entry : LogEntries) { m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount())); } - MigratedChunkCount += LogEntries.size(); - MigratedBlockCount++; - + CasLog.Append(LogEntries); + CasLog.Flush(); if (CleanSource) { std::vector LegacyLogEntries; - LegacyLogEntries.reserve(BlockRange.Chunks.size()); - for (const auto& Entry : BlockRange.Chunks) + LegacyLogEntries.reserve(MovedChunks.size()); + for (const auto& Entry : MovedChunks) { - LegacyLogEntries.push_back( - {.Key = Entry.first, .Location = LegacyDiskLocation(0, 0, 0, LegacyDiskLocation::kTombStone)}); + size_t ChunkIndex = Entry.first; + const IoHash& ChunkHash = ChunkIndexToChunkHash[ChunkIndex]; + const LegacyDiskIndexEntry& OldEntry = LegacyDiskIndex[ChunkHash]; + const LegacyDiskLocation& OldLocation = OldEntry.Location; + LegacyDiskLocation NewLocation(OldLocation.Offset(), + OldLocation.Size(), + 0, + OldLocation.Flags() | LegacyDiskLocation::kTombStone); + LegacyLogEntries.push_back(LegacyDiskIndexEntry(ChunkHash, NewLocation)); } LegacyCasLog.Append(LegacyLogEntries); - BlockFile.SetFileSize(BlockRange.BlockOffset); + LegacyCasLog.Flush(); } - } - } - BlockFile.Close(); + MigratedBlockCount++; + MigratedChunkCount += MovedChunks.size(); + }); + LegacyCasLog.Close(); CasLog.Close(); diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 6f5578be8..3358075ec 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -170,7 +170,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, { // Log removing unreferenced block // Clear out unused blocks - ZEN_INFO("removing unused block for '{}' at '{}'", m_BlocksBasePath, Path); + ZEN_INFO("removing unused block at '{}'", Path); std::error_code Ec; std::filesystem::remove(Path, Ec); if (Ec) @@ -449,7 +449,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, }); m_ChunkBlocks[BlockIndex] = nullptr; } - ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_BlocksBasePath, BlockIndex, OldBlockFile->GetPath()); + ZEN_DEBUG("marking cas store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); std::error_code Ec; OldBlockFile->MarkAsDeleteOnClose(Ec); if (Ec) @@ -572,7 +572,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, }); m_ChunkBlocks[BlockIndex] = nullptr; } - ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_BlocksBasePath, BlockIndex, OldBlockFile->GetPath()); + ZEN_DEBUG("marking cas store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); std::error_code Ec; OldBlockFile->MarkAsDeleteOnClose(Ec); if (Ec) -- cgit v1.2.3 From dc589650427f2ab444a7ebf78fb59ee751a4c2c8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 10:53:41 +0200 Subject: use std::vector> instead of map --- zenserver/cache/structuredcachestore.cpp | 22 ++-------------------- zenstore/blockstore.cpp | 8 ++++---- zenstore/compactcas.cpp | 20 ++------------------ zenstore/include/zenstore/blockstore.h | 8 ++++---- 4 files changed, 12 insertions(+), 46 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index e9c051f88..9cfb5fbf3 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -156,24 +156,6 @@ namespace { return BucketDir / (std::string("zen") + LegacyDataExtension); } - std::vector MakeDiskIndexEntries(const std::unordered_map& MovedChunks, - const std::vector& DeletedChunks) - { - std::vector result; - result.reserve(MovedChunks.size()); - for (const auto& MovedEntry : MovedChunks) - { - result.push_back({.Key = MovedEntry.first, .Location = MovedEntry.second}); - } - for (const IoHash& ChunkHash : DeletedChunks) - { - DiskLocation Location; - Location.Flags |= DiskLocation::kTombStone; - result.push_back({.Key = ChunkHash, .Location = Location}); - } - return result; - } - bool ValidateLegacyEntry(const LegacyDiskIndexEntry& Entry, std::string& OutReason) { if (Entry.Key == IoHash::Zero) @@ -1659,8 +1641,8 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &Index, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const std::unordered_map& MovedChunks, - const std::vector& RemovedChunks) { + const std::vector>& MovedChunks, + const std::vector& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 3358075ec..593ccc529 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -459,8 +459,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, continue; } - std::unordered_map MovedChunks; - std::vector Chunk; + std::vector> MovedChunks; + std::vector Chunk; for (const size_t& ChunkIndex : KeepMap) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; @@ -541,8 +541,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - MovedChunks[ChunkIndex] = {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}; - WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); + WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); } Chunk.clear(); if (NewBlockFile) diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 8d90ba186..6a9cba817 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -48,22 +48,6 @@ struct CasDiskIndexHeader static_assert(sizeof(CasDiskIndexHeader) == 32); namespace { - std::vector MakeCasDiskEntries(const std::unordered_map& MovedChunks, - const std::vector& DeletedChunks) - { - std::vector result; - result.reserve(MovedChunks.size()); - for (const auto& MovedEntry : MovedChunks) - { - result.push_back({.Key = MovedEntry.first, .Location = MovedEntry.second}); - } - for (const IoHash& ChunkHash : DeletedChunks) - { - result.push_back({.Key = ChunkHash, .Flags = CasDiskIndexEntry::kTombstone}); - } - return result; - } - const char* IndexExtension = ".uidx"; const char* LogExtension = ".ulog"; @@ -493,8 +477,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const std::unordered_map& MovedChunks, - const std::vector& RemovedChunks) { + const std::vector>& MovedChunks, + const std::vector& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 0cef7600f..17f4e090e 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -117,13 +117,13 @@ public: std::unordered_set ExcludeBlockIndexes; size_t BlockCount; }; - typedef std::function& MovedChunks, const std::vector& RemovedChunks)> + typedef std::function>& MovedChunks, + const std::vector& RemovedChunks)> ReclaimCallback; typedef std::function WriteChunkCallback; typedef std::function IterateChunksSmallSizeCallback; typedef std::function IterateChunksLargeSizeCallback; - - typedef std::function>& MovedChunks)> SplitCallback; + typedef std::function>& MovedChunks)> SplitCallback; void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, @@ -144,7 +144,7 @@ public: const std::vector& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& Callback = [](const std::unordered_map&, const std::vector&) {}); + const ReclaimCallback& Callback = [](const std::vector>&, const std::vector&) {}); void IterateChunks(const std::vector& ChunkLocations, IterateChunksSmallSizeCallback SmallSizeCallback, IterateChunksLargeSizeCallback LargeSizeCallback); -- cgit v1.2.3 From 80a39f97f465465466ccd2d5914421db55efb80e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 11:01:12 +0200 Subject: add back gc space reclaim call --- zenserver/cache/structuredcachestore.cpp | 3 ++- zenstore/blockstore.cpp | 11 ++++++----- zenstore/compactcas.cpp | 3 ++- zenstore/include/zenstore/blockstore.h | 5 ++++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 9cfb5fbf3..5b08ed83c 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1687,7 +1687,8 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_Index[Entry.Key].Location = Entry.Location; } } - }); + }, + [&GcCtx]() { return GcCtx.CollectSmallObjects(); }); GcCtx.DeletedCas(DeletedChunks); } diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 593ccc529..b3608687b 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -279,7 +279,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, const std::vector& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& Callback) + const ReclaimCallback& ChangeCallback, + const ClaimGCReserveCallback& GcReserveCallback) { if (ChunkLocations.empty()) { @@ -437,7 +438,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { DeletedSize += ChunkLocations[DeleteIndex].Size; } - Callback({}, DeleteMap); + ChangeCallback({}, DeleteMap); DeletedCount += DeleteMap.size(); { RwLock::ExclusiveLockScope _i(m_InsertLock); @@ -477,7 +478,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NewBlockFile->Flush(); } { - Callback(MovedChunks, {}); + ChangeCallback(MovedChunks, {}); MovedCount += KeepMap.size(); MovedChunks.clear(); RwLock::ExclusiveLockScope __(m_InsertLock); @@ -512,7 +513,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } if (Space.Free < m_MaxBlockSize) { - uint64_t ReclaimedSpace = 0; // GcCtx.ClaimGCReserve(); + uint64_t ReclaimedSpace = GcReserveCallback(); if (Space.Free + ReclaimedSpace < m_MaxBlockSize) { ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", @@ -558,7 +559,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, DeletedSize += ChunkLocations[DeleteIndex].Size; } - Callback(MovedChunks, DeleteMap); + ChangeCallback(MovedChunks, DeleteMap); MovedCount += KeepMap.size(); DeletedCount += DeleteMap.size(); MovedChunks.clear(); diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 6a9cba817..5734a16b6 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -518,7 +518,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_LocationMap[Entry.Key] = Entry.Location; } } - }); + }, + [&GcCtx]() { return GcCtx.CollectSmallObjects(); }); GcCtx.DeletedCas(DeletedChunks); } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 17f4e090e..28f0a5541 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -120,6 +120,7 @@ public: typedef std::function>& MovedChunks, const std::vector& RemovedChunks)> ReclaimCallback; + typedef std::function ClaimGCReserveCallback; typedef std::function WriteChunkCallback; typedef std::function IterateChunksSmallSizeCallback; typedef std::function IterateChunksLargeSizeCallback; @@ -144,7 +145,9 @@ public: const std::vector& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& Callback = [](const std::vector>&, const std::vector&) {}); + const ReclaimCallback& ChangeCallback = [](const std::vector>&, + const std::vector&) {}, + const ClaimGCReserveCallback& GcReserveCallback = []() { return 0; }); void IterateChunks(const std::vector& ChunkLocations, IterateChunksSmallSizeCallback SmallSizeCallback, IterateChunksLargeSizeCallback LargeSizeCallback); -- cgit v1.2.3 From 10690e805f45a590094a659c8e1f6482d12aaf8e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 11:03:04 +0200 Subject: cleanup --- zenserver/cache/structuredcachestore.cpp | 8 ++--- zenstore/blockstore.cpp | 50 ++++++++++++++++---------------- zenstore/compactcas.cpp | 13 +++------ zenstore/include/zenstore/blockstore.h | 33 ++++++++++++--------- 4 files changed, 52 insertions(+), 52 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 5b08ed83c..143f43deb 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -995,7 +995,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) m_PayloadAlignment, CleanSource, [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount]( - const std::vector>& MovedChunks) { + const BlockStore::MovedChunksArray& MovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size()); for (const auto& Entry : MovedChunks) @@ -1598,7 +1598,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) TotalChunkCount = TotalChunkHashes.size(); std::vector ChunkLocations; - std::vector KeepChunkIndexes; + BlockStore::ChunkIndexArray KeepChunkIndexes; std::vector ChunkIndexToChunkHash; ChunkLocations.reserve(TotalChunkCount); ChunkLocations.reserve(TotalChunkCount); @@ -1641,8 +1641,8 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &Index, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const std::vector>& MovedChunks, - const std::vector& RemovedChunks) { + const BlockStore::MovedChunksArray& MovedChunks, + const BlockStore::ChunkIndexArray& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index b3608687b..fd54e7291 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -276,7 +276,7 @@ BlockStore::Flush() void BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, const std::vector& ChunkLocations, - const std::vector& KeepChunkIndexes, + const ChunkIndexArray& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, const ReclaimCallback& ChangeCallback, @@ -338,8 +338,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } std::unordered_map BlockIndexToChunkMapIndex; - std::vector> BlockKeepChunks; - std::vector> BlockDeleteChunks; + std::vector BlockKeepChunks; + std::vector BlockDeleteChunks; BlockIndexToChunkMapIndex.reserve(BlockCount); BlockKeepChunks.reserve(BlockCount); @@ -374,12 +374,12 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, if (KeepChunkMap.contains(Index)) { - std::vector& IndexMap = BlockKeepChunks[ChunkMapIndex]; + ChunkIndexArray& IndexMap = BlockKeepChunks[ChunkMapIndex]; IndexMap.push_back(Index); NewTotalSize += Location.Size; continue; } - std::vector& IndexMap = BlockDeleteChunks[ChunkMapIndex]; + ChunkIndexArray& IndexMap = BlockDeleteChunks[ChunkMapIndex]; IndexMap.push_back(Index); DeleteCount++; } @@ -388,9 +388,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size()); for (const auto& Entry : BlockIndexToChunkMapIndex) { - uint32_t BlockIndex = Entry.first; - size_t ChunkMapIndex = Entry.second; - const std::vector& ChunkMap = BlockDeleteChunks[ChunkMapIndex]; + uint32_t BlockIndex = Entry.first; + size_t ChunkMapIndex = Entry.second; + const ChunkIndexArray& ChunkMap = BlockDeleteChunks[ChunkMapIndex]; if (ChunkMap.empty()) { continue; @@ -430,10 +430,10 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, ZEN_ASSERT(OldBlockFile); } - const std::vector& KeepMap = BlockKeepChunks[ChunkMapIndex]; + const ChunkIndexArray& KeepMap = BlockKeepChunks[ChunkMapIndex]; if (KeepMap.empty()) { - const std::vector& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; + const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; for (size_t DeleteIndex : DeleteMap) { DeletedSize += ChunkLocations[DeleteIndex].Size; @@ -460,8 +460,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, continue; } - std::vector> MovedChunks; - std::vector Chunk; + MovedChunksArray MovedChunks; + std::vector Chunk; for (const size_t& ChunkIndex : KeepMap) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; @@ -553,7 +553,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NewBlockFile = {}; } - const std::vector& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; + const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; for (size_t DeleteIndex : DeleteMap) { DeletedSize += ChunkLocations[DeleteIndex].Size; @@ -595,10 +595,10 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, // pass. An alternative strategy would be to use memory mapping. { - std::vector BigChunks; - const uint64_t WindowSize = 4 * 1024 * 1024; - IoBuffer ReadBuffer{WindowSize}; - void* BufferBase = ReadBuffer.MutableData(); + ChunkIndexArray BigChunks; + const uint64_t WindowSize = 4 * 1024 * 1024; + IoBuffer ReadBuffer{WindowSize}; + void* BufferBase = ReadBuffer.MutableData(); RwLock::SharedLockScope _(m_InsertLock); @@ -740,7 +740,7 @@ BlockStore::Split(const std::vector& ChunkLocations, if (CleanSource && (MaxRequiredBlockCount < 2)) { - std::vector> Chunks; + MovedChunksArray Chunks; Chunks.reserve(ChunkCount); for (size_t Index = 0; Index < ChunkCount; ++Index) { @@ -755,7 +755,7 @@ BlockStore::Split(const std::vector& ChunkLocations, return true; } - std::vector ChunkIndexes; + ChunkIndexArray ChunkIndexes; ChunkIndexes.reserve(ChunkCount); for (size_t Index = 0; Index < ChunkCount; ++Index) { @@ -773,14 +773,14 @@ BlockStore::Split(const std::vector& ChunkLocations, std::vector NewLocations; struct BlockData { - std::vector> Chunks; - uint64_t BlockOffset; - uint64_t BlockSize; - uint32_t BlockIndex; + MovedChunksArray Chunks; + uint64_t BlockOffset; + uint64_t BlockSize; + uint32_t BlockIndex; }; - std::vector BlockRanges; - std::vector> Chunks; + std::vector BlockRanges; + MovedChunksArray Chunks; BlockRanges.reserve(MaxRequiredBlockCount); for (const size_t& ChunkIndex : ChunkIndexes) { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 5734a16b6..ade6a7daf 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -345,11 +345,6 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) std::vector BadKeys; - // We do a read sweep through the payloads file and validate - // any entries that are contained within each segment, with - // the assumption that most entries will be checked in this - // pass. An alternative strategy would be to use memory mapping. - m_BlockStore.IterateChunks( ChunkLocations, [&ChunkIndexToChunkHash, &BadKeys](size_t ChunkIndex, const void* Data, uint64_t Size) { @@ -443,7 +438,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) } std::vector ChunkLocations; - std::vector KeepChunkIndexes; + BlockStore::ChunkIndexArray KeepChunkIndexes; std::vector ChunkIndexToChunkHash; ChunkLocations.reserve(TotalChunkCount); ChunkIndexToChunkHash.reserve(TotalChunkCount); @@ -477,8 +472,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) m_PayloadAlignment, false, [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const std::vector>& MovedChunks, - const std::vector& RemovedChunks) { + const BlockStore::MovedChunksArray& MovedChunks, + const BlockStore::ChunkIndexArray& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) @@ -849,7 +844,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) m_PayloadAlignment, CleanSource, [this, &LegacyDiskIndex, &ChunkIndexToChunkHash, &LegacyCasLog, &CasLog, CleanSource, &MigratedBlockCount, &MigratedChunkCount]( - const std::vector>& MovedChunks) { + const BlockStore::MovedChunksArray& MovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size()); for (const auto& Entry : MovedChunks) diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 28f0a5541..53bcf72db 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -117,24 +117,28 @@ public: std::unordered_set ExcludeBlockIndexes; size_t BlockCount; }; - typedef std::function>& MovedChunks, - const std::vector& RemovedChunks)> - ReclaimCallback; - typedef std::function ClaimGCReserveCallback; - typedef std::function WriteChunkCallback; - typedef std::function IterateChunksSmallSizeCallback; - typedef std::function IterateChunksLargeSizeCallback; - typedef std::function>& MovedChunks)> SplitCallback; + + typedef std::vector> MovedChunksArray; + typedef std::vector ChunkIndexArray; + + typedef std::function ReclaimCallback; + typedef std::function ClaimGCReserveCallback; + typedef std::function WriteChunkCallback; + typedef std::function IterateChunksSmallSizeCallback; + typedef std::function IterateChunksLargeSizeCallback; + typedef std::function SplitCallback; void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, uint64_t MaxBlockCount, const std::vector& KnownLocations); + void WriteChunk( const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback = [](const BlockStoreLocation&) {}); + Ref GetChunkBlock(const BlockStoreLocation& Location); void Flush(); @@ -142,15 +146,16 @@ public: void ReclaimSpace( const ReclaimSnapshotState& Snapshot, const std::vector& ChunkLocations, - const std::vector& KeepChunkIndexes, + const ChunkIndexArray& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& ChangeCallback = [](const std::vector>&, - const std::vector&) {}, + const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {}, const ClaimGCReserveCallback& GcReserveCallback = []() { return 0; }); - void IterateChunks(const std::vector& ChunkLocations, - IterateChunksSmallSizeCallback SmallSizeCallback, - IterateChunksLargeSizeCallback LargeSizeCallback); + + void IterateChunks(const std::vector& ChunkLocations, + IterateChunksSmallSizeCallback SmallSizeCallback, + IterateChunksLargeSizeCallback LargeSizeCallback); + static bool Split(const std::vector& ChunkLocations, const std::filesystem::path& SourceBlockFilePath, const std::filesystem::path& BlocksBasePath, -- cgit v1.2.3 From f14272b7adfb562a39295324a92c7bbf31c9bd3e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 11:22:14 +0200 Subject: restore cg comment --- zenserver/cache/structuredcachestore.cpp | 3 ++- zenstore/compactcas.cpp | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 143f43deb..6311fc2c5 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1410,7 +1410,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { ZEN_TRACE_CPU("Z$::DiskLayer::CacheBucket::CollectGarbage"); - std::vector ExpiredStandaloneEntries; + ZEN_INFO("collecting garbage from '{}'", m_BucketDir / m_BucketName); Stopwatch TotalTimer; uint64_t WriteBlockTimeUs = 0; @@ -1473,6 +1473,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) return; } + std::vector ExpiredStandaloneEntries; IndexMap Index; BlockStore::ReclaimSnapshotState BlockStoreState; { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index ade6a7daf..5a0ba974b 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -321,8 +321,6 @@ CasContainerStrategy::Flush() void CasContainerStrategy::Scrub(ScrubContext& Ctx) { - ZEN_UNUSED(Ctx); - RwLock::SharedLockScope _(m_LocationMapLock); uint64_t TotalChunkCount = m_LocationMap.size(); @@ -409,6 +407,31 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) void CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { + // It collects all the blocks that we want to delete chunks from. For each such + // block we keep a list of chunks to retain and a list of chunks to delete. + // + // If there is a block that we are currently writing to, that block is omitted + // from the garbage collection. + // + // Next it will iterate over all blocks that we want to remove chunks from. + // If the block is empty after removal of chunks we mark the block as pending + // delete - we want to delete it as soon as there are no IoBuffers using the + // block file. + // Once complete we update the m_LocationMap by removing the chunks. + // + // If the block is non-empty we write out the chunks we want to keep to a new + // block file (creating new block files as needed). + // + // We update the index as we complete each new block file. This makes it possible + // to break the GC if we want to limit time for execution. + // + // GC can very parallell to regular operation - it will block while taking + // a snapshot of the current m_LocationMap state and while moving blocks it will + // do a blocking operation and update the m_LocationMap after each new block is + // written and figuring out the path to the next new block. + + ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName); + uint64_t WriteBlockTimeUs = 0; uint64_t WriteBlockLongestTimeUs = 0; uint64_t ReadBlockTimeUs = 0; -- cgit v1.2.3 From 2c340516747cf666ee18a7ffad731c06cdb2ecb6 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 11:26:58 +0200 Subject: Don't use "GC" term in block store --- zenstore/blockstore.cpp | 4 ++-- zenstore/include/zenstore/blockstore.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index fd54e7291..d293eb97d 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -280,7 +280,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, uint64_t PayloadAlignment, bool DryRun, const ReclaimCallback& ChangeCallback, - const ClaimGCReserveCallback& GcReserveCallback) + const ClaimDiskReserveCallback& DiskReserveCallback) { if (ChunkLocations.empty()) { @@ -513,7 +513,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } if (Space.Free < m_MaxBlockSize) { - uint64_t ReclaimedSpace = GcReserveCallback(); + uint64_t ReclaimedSpace = DiskReserveCallback(); if (Space.Free + ReclaimedSpace < m_MaxBlockSize) { ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 53bcf72db..4a1642413 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -122,7 +122,7 @@ public: typedef std::vector ChunkIndexArray; typedef std::function ReclaimCallback; - typedef std::function ClaimGCReserveCallback; + typedef std::function ClaimDiskReserveCallback; typedef std::function WriteChunkCallback; typedef std::function IterateChunksSmallSizeCallback; typedef std::function IterateChunksLargeSizeCallback; @@ -149,8 +149,8 @@ public: const ChunkIndexArray& KeepChunkIndexes, uint64_t PayloadAlignment, bool DryRun, - const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {}, - const ClaimGCReserveCallback& GcReserveCallback = []() { return 0; }); + const ReclaimCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&) {}, + const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; }); void IterateChunks(const std::vector& ChunkLocations, IterateChunksSmallSizeCallback SmallSizeCallback, -- cgit v1.2.3 From 56381dc0de2d19a373c132b9a624308dc88e31bd Mon Sep 17 00:00:00 2001 From: Stefan Boberg Date: Mon, 2 May 2022 14:35:20 +0200 Subject: removed redundant pragma pack --- zenserver/cache/structuredcachestore.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 6311fc2c5..015912ce9 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -39,9 +39,6 @@ ZEN_THIRD_PARTY_INCLUDES_END ////////////////////////////////////////////////////////////////////////// -#pragma pack(push) -#pragma pack(1) - namespace zen { namespace { @@ -863,7 +860,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) { BasicFile BlockFile; BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); - BlockFileSize = BlockFile.FileSize(); + BlockFileSize = BlockFile.FileSize();strcut } std::unordered_map LegacyDiskIndex; -- cgit v1.2.3 From bb7593c9ea3412a48b3d29f3e7f7b23d7a785b2f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 16:41:32 +0200 Subject: Refactor WriteChunk to not need callback --- zenserver/cache/structuredcachestore.cpp | 46 ++++++++++++++------------------ zenstore/blockstore.cpp | 19 +++++-------- zenstore/compactcas.cpp | 19 +++++++------ zenstore/include/zenstore/blockstore.h | 20 +++++--------- 4 files changed, 42 insertions(+), 62 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 015912ce9..6f6f182b9 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -860,7 +860,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) { BasicFile BlockFile; BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead); - BlockFileSize = BlockFile.FileSize();strcut + BlockFileSize = BlockFile.FileSize(); } std::unordered_map LegacyDiskIndex; @@ -1845,31 +1845,25 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const EntryFlags |= DiskLocation::kCompressed; } - uint64_t ChunkSize = Value.Value.Size(); - - m_BlockStore.WriteChunk(Value.Value.Data(), - ChunkSize, - m_PayloadAlignment, - [this, &HashKey, EntryFlags](const BlockStoreLocation& BlockStoreLocation) { - DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); - const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; - m_SlogFile.Append(DiskIndexEntry); - m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order_seq_cst); - RwLock::ExclusiveLockScope __(m_IndexLock); - if (auto It = m_Index.find(HashKey); It != m_Index.end()) - { - // TODO: should check if write is idempotent and bail out if it is? - // this would requiring comparing contents on disk unless we add a - // content hash to the index entry - IndexEntry& Entry = It.value(); - Entry.Location = Location; - Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); - } - else - { - m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); - } - }); + BlockStoreLocation BlockStoreLocation = m_BlockStore.WriteChunk(Value.Value.Data(), Value.Value.Size(), m_PayloadAlignment); + DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); + const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; + m_SlogFile.Append(DiskIndexEntry); + m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order_seq_cst); + RwLock::ExclusiveLockScope __(m_IndexLock); + if (auto It = m_Index.find(HashKey); It != m_Index.end()) + { + // TODO: should check if write is idempotent and bail out if it is? + // this would requiring comparing contents on disk unless we add a + // content hash to the index entry + IndexEntry& Entry = It.value(); + Entry.Location = Location; + Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); + } + else + { + m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); + } } ////////////////////////////////////////////////////////////////////////// diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index d293eb97d..178687b5f 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -193,8 +193,8 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, } } -void -BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback) +BlockStoreLocation +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) { RwLock::ExclusiveLockScope InsertLock(m_InsertLock); @@ -227,15 +227,11 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, Writ uint64_t InsertOffset = m_CurrentInsertOffset; m_CurrentInsertOffset = RoundUp(InsertOffset + Size, Alignment); Ref WriteBlock = m_WriteBlock; - m_ActiveWriteBlockIndexes.push_back(WriteBlockIndex); InsertLock.ReleaseNow(); WriteBlock->Write(Data, Size, InsertOffset); - Callback({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}); - - RwLock::ExclusiveLockScope _(m_InsertLock); - m_ActiveWriteBlockIndexes.erase(std::find(m_ActiveWriteBlockIndexes.begin(), m_ActiveWriteBlockIndexes.end(), WriteBlockIndex)); + return {.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}; } BlockStore::ReclaimSnapshotState @@ -243,11 +239,8 @@ BlockStore::GetReclaimSnapshotState() { ReclaimSnapshotState State; RwLock::ExclusiveLockScope _(m_InsertLock); - for (uint32_t BlockIndex : m_ActiveWriteBlockIndexes) - { - State.ExcludeBlockIndexes.insert(BlockIndex); - } - State.BlockCount = m_ChunkBlocks.size(); + State.ExcludeBlockIndex = m_WriteBlock ? m_WriteBlockIndex.load(std::memory_order_acquire) : 0xffffffffu; + State.BlockCount = m_ChunkBlocks.size(); _.ReleaseNow(); return State; } @@ -351,7 +344,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { const BlockStoreLocation& Location = ChunkLocations[Index]; OldTotalSize += Location.Size; - if (Snapshot.ExcludeBlockIndexes.contains(Location.BlockIndex)) + if (Location.BlockIndex == Snapshot.ExcludeBlockIndex) { continue; } diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 5a0ba974b..c6115024b 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -250,16 +250,15 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const // This should be a rare occasion and the current flow reduces the time we block for // reads, insert and GC. - m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment, [this, &ChunkHash, ChunkSize](const BlockStoreLocation& Location) { - BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); - const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; - m_CasLog.Append(IndexEntry); - { - RwLock::ExclusiveLockScope _(m_LocationMapLock); - m_LocationMap.emplace(ChunkHash, DiskLocation); - } - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); - }); + BlockStoreLocation Location = m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment); + BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); + const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; + m_CasLog.Append(IndexEntry); + { + RwLock::ExclusiveLockScope _(m_LocationMapLock); + m_LocationMap.emplace(ChunkHash, DiskLocation); + } + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); return CasStore::InsertResult{.New = true}; } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 4a1642413..5019e257d 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -114,8 +114,8 @@ class BlockStore public: struct ReclaimSnapshotState { - std::unordered_set ExcludeBlockIndexes; - size_t BlockCount; + size_t ExcludeBlockIndex; + size_t BlockCount; }; typedef std::vector> MovedChunksArray; @@ -123,7 +123,6 @@ public: typedef std::function ReclaimCallback; typedef std::function ClaimDiskReserveCallback; - typedef std::function WriteChunkCallback; typedef std::function IterateChunksSmallSizeCallback; typedef std::function IterateChunksLargeSizeCallback; typedef std::function SplitCallback; @@ -133,11 +132,7 @@ public: uint64_t MaxBlockCount, const std::vector& KnownLocations); - void WriteChunk( - const void* Data, - uint64_t Size, - uint64_t Alignment, - WriteChunkCallback Callback = [](const BlockStoreLocation&) {}); + BlockStoreLocation WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment); Ref GetChunkBlock(const BlockStoreLocation& Location); void Flush(); @@ -171,11 +166,10 @@ public: private: std::unordered_map> m_ChunkBlocks; - RwLock m_InsertLock; // used to serialize inserts - Ref m_WriteBlock; - std::uint64_t m_CurrentInsertOffset = 0; - std::atomic_uint32_t m_WriteBlockIndex{}; - std::vector m_ActiveWriteBlockIndexes; + RwLock m_InsertLock; // used to serialize inserts + Ref m_WriteBlock; + std::uint64_t m_CurrentInsertOffset = 0; + std::atomic_uint32_t m_WriteBlockIndex{}; uint64_t m_MaxBlockSize = 1u << 28; uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1; -- cgit v1.2.3 From 0599d52d80beb85e50ffe1f56c8434376a8c08a2 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 16:50:08 +0200 Subject: simplify lambda captures --- zenserver/cache/structuredcachestore.cpp | 46 ++++++++++++-------------------- zenserver/projectstore.cpp | 2 +- zenstore/blockstore.cpp | 22 +++++---------- zenstore/compactcas.cpp | 20 +++++++------- zenstore/filecas.cpp | 4 +-- zenstore/gc.cpp | 14 +++++----- 6 files changed, 41 insertions(+), 67 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 6f6f182b9..46f9d8fe6 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -366,8 +366,8 @@ void ZenCacheStore::GatherReferences(GcContext& GcCtx) { Stopwatch Timer; - const auto Guard = MakeGuard( - [this, &Timer] { ZEN_INFO("cache gathered all references from '{}' in {}", m_RootDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto Guard = + MakeGuard([&] { ZEN_INFO("cache gathered all references from '{}' in {}", m_RootDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); access_tracking::AccessTimes AccessTimes; m_MemLayer.GatherAccessTimes(AccessTimes); @@ -650,7 +650,7 @@ ZenCacheDiskLayer::CacheBucket::MakeIndexSnapshot() ZEN_INFO("write store snapshot for '{}'", m_BucketDir / m_BucketName); uint64_t EntryCount = 0; Stopwatch Timer; - const auto _ = MakeGuard([this, &EntryCount, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}", m_BucketDir / m_BucketName, EntryCount, @@ -734,7 +734,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile() if (std::filesystem::is_regular_file(IndexPath)) { Stopwatch Timer; - const auto _ = MakeGuard([this, &Entries, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' index containing #{} entries in {}", m_BucketDir / m_BucketName, Entries.size(), @@ -787,7 +787,7 @@ ZenCacheDiskLayer::CacheBucket::ReadLog(uint64_t SkipEntryCount) if (std::filesystem::is_regular_file(LogPath)) { Stopwatch Timer; - const auto _ = MakeGuard([LogPath, &Entries, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' log containing #{} entries in {}", LogPath, Entries.size(), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); TCasLogFile CasLog; @@ -847,7 +847,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) uint32_t MigratedBlockCount = 0; Stopwatch MigrationTimer; uint64_t TotalSize = 0; - const auto _ = MakeGuard([this, &MigrationTimer, &MigratedChunkCount, &MigratedBlockCount, &TotalSize] { + const auto _ = MakeGuard([&] { ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})", m_BucketDir / m_BucketName, MigratedChunkCount, @@ -871,7 +871,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead); { Stopwatch Timer; - const auto __ = MakeGuard([LegacyLogPath, &LegacyDiskIndex, &Timer] { + const auto __ = MakeGuard([&] { ZEN_INFO("read store '{}' legacy log containing #{} entries in {}", LegacyLogPath, LegacyDiskIndex.size(), @@ -1317,7 +1317,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) uint64_t ReadBlockLongestTimeUs = 0; Stopwatch TotalTimer; - const auto _ = MakeGuard([this, &TotalTimer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto _ = MakeGuard([&] { ZEN_INFO("gathered references from '{}' in {} write lock: {} ({}), read lock: {} ({})", m_BucketDir / m_BucketName, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), @@ -1336,7 +1336,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) { RwLock::SharedLockScope __(m_IndexLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); @@ -1373,7 +1373,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx) { RwLock::SharedLockScope __(m_IndexLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); @@ -1421,17 +1421,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) uint64_t DeletedCount = 0; uint64_t MovedCount = 0; - const auto _ = MakeGuard([this, - &TotalTimer, - &WriteBlockTimeUs, - &WriteBlockLongestTimeUs, - &ReadBlockTimeUs, - &ReadBlockLongestTimeUs, - &TotalChunkCount, - &DeletedCount, - &MovedCount, - &DeletedSize, - &OldTotalSize] { + const auto _ = MakeGuard([&] { ZEN_INFO( "garbage collect from '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " "#{} " @@ -1476,7 +1466,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { RwLock::SharedLockScope __(m_IndexLock); Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto ____ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); @@ -1530,7 +1520,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { RwLock::SharedLockScope __(m_IndexLock); Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto ____ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); @@ -1554,7 +1544,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) RwLock::ExclusiveLockScope __(m_IndexLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -1638,9 +1628,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) KeepChunkIndexes, m_PayloadAlignment, false, - [this, &DeletedChunks, &ChunkIndexToChunkHash, &Index, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const BlockStore::MovedChunksArray& MovedChunks, - const BlockStore::ChunkIndexArray& RemovedChunks) { + [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) @@ -1668,7 +1656,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) { RwLock::ExclusiveLockScope __(m_IndexLock); Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ____ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -1686,7 +1674,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) } } }, - [&GcCtx]() { return GcCtx.CollectSmallObjects(); }); + [&]() { return GcCtx.CollectSmallObjects(); }); GcCtx.DeletedCas(DeletedChunks); } diff --git a/zenserver/projectstore.cpp b/zenserver/projectstore.cpp index 617f50660..aceb2df00 100644 --- a/zenserver/projectstore.cpp +++ b/zenserver/projectstore.cpp @@ -976,7 +976,7 @@ ProjectStore::GatherReferences(GcContext& GcCtx) { Stopwatch Timer; const auto Guard = - MakeGuard([this, &Timer] { ZEN_INFO("project store gathered all references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + MakeGuard([&] { ZEN_INFO("project store gathered all references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); DiscoverProjects(); diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 178687b5f..e502113fc 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -292,17 +292,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, uint64_t DeletedCount = 0; Stopwatch TotalTimer; - const auto _ = MakeGuard([this, - &TotalTimer, - &WriteBlockTimeUs, - &WriteBlockLongestTimeUs, - &ReadBlockTimeUs, - &ReadBlockLongestTimeUs, - &TotalChunkCount, - &DeletedCount, - &MovedCount, - &DeletedSize, - OldTotalSize] { + const auto _ = MakeGuard([&] { ZEN_INFO( "reclaim space for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved " "#{} " @@ -414,7 +404,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { RwLock::SharedLockScope _i(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] { + const auto __ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); @@ -436,7 +426,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { RwLock::ExclusiveLockScope _i(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto __ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -476,7 +466,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, MovedChunks.clear(); RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -515,7 +505,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, NiceBytes(Space.Free + ReclaimedSpace)); RwLock::ExclusiveLockScope _l(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto __ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -559,7 +549,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; - const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index c6115024b..e0f84e044 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -344,7 +344,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) m_BlockStore.IterateChunks( ChunkLocations, - [&ChunkIndexToChunkHash, &BadKeys](size_t ChunkIndex, const void* Data, uint64_t Size) { + [&](size_t ChunkIndex, const void* Data, uint64_t Size) { const IoHash ComputedHash = IoHash::HashBuffer(Data, Size); const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; if (ComputedHash != ExpectedHash) @@ -353,7 +353,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) BadKeys.push_back(ExpectedHash); } }, - [&ChunkIndexToChunkHash, &BadKeys](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { + [&](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { IoHashStream Hasher; BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); @@ -493,9 +493,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) KeepChunkIndexes, m_PayloadAlignment, false, - [this, &DeletedChunks, &ChunkIndexToChunkHash, &LocationMap, &ReadBlockTimeUs, &ReadBlockLongestTimeUs]( - const BlockStore::MovedChunksArray& MovedChunks, - const BlockStore::ChunkIndexArray& RemovedChunks) { + [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& RemovedChunks) { std::vector LogEntries; LogEntries.reserve(MovedChunks.size() + RemovedChunks.size()); for (const auto& Entry : MovedChunks) @@ -518,7 +516,7 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { RwLock::ExclusiveLockScope __(m_LocationMapLock); Stopwatch Timer; - const auto ____ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] { + const auto ____ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); @@ -547,7 +545,7 @@ CasContainerStrategy::MakeIndexSnapshot() ZEN_INFO("write store snapshot for '{}'", m_Config.RootDirectory / m_ContainerBaseName); uint64_t EntryCount = 0; Stopwatch Timer; - const auto _ = MakeGuard([this, &EntryCount, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}", m_Config.RootDirectory / m_ContainerBaseName, EntryCount, @@ -632,7 +630,7 @@ CasContainerStrategy::ReadIndexFile() if (std::filesystem::is_regular_file(IndexPath)) { Stopwatch Timer; - const auto _ = MakeGuard([this, &Entries, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' index containing #{} entries in {}", m_Config.RootDirectory / m_ContainerBaseName, Entries.size(), @@ -685,7 +683,7 @@ CasContainerStrategy::ReadLog(uint64_t SkipEntryCount) if (std::filesystem::is_regular_file(LogPath)) { Stopwatch Timer; - const auto _ = MakeGuard([this, &Entries, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("read store '{}' log containing #{} entries in {}", m_Config.RootDirectory / m_ContainerBaseName, Entries.size(), @@ -745,7 +743,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) uint32_t MigratedBlockCount = 0; Stopwatch MigrationTimer; uint64_t TotalSize = 0; - const auto _ = MakeGuard([this, &MigrationTimer, &MigratedChunkCount, &MigratedBlockCount, &TotalSize] { + const auto _ = MakeGuard([&] { ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})", m_Config.RootDirectory / m_ContainerBaseName, MigratedChunkCount, @@ -768,7 +766,7 @@ CasContainerStrategy::MigrateLegacyData(bool CleanSource) LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead); { Stopwatch Timer; - const auto __ = MakeGuard([this, &LegacyDiskIndex, &Timer] { + const auto __ = MakeGuard([&] { ZEN_INFO("read store '{}' legacy log containing #{} entries in {}", m_Config.RootDirectory / m_ContainerBaseName, LegacyDiskIndex.size(), diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index b53cfaa54..d074a906f 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -92,7 +92,7 @@ FileCasStrategy::Initialize(bool IsNewStore) m_CasLog.Open(m_Config.RootDirectory / "cas.ulog", IsNewStore ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite); Stopwatch Timer; - const auto _ = MakeGuard([this, &Timer] { + const auto _ = MakeGuard([&] { ZEN_INFO("read log {} containing {}", m_Config.RootDirectory / "cas.ulog", NiceBytes(m_TotalSize.load(std::memory_order::relaxed))); }); @@ -692,7 +692,7 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx) uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed); Stopwatch TotalTimer; - const auto _ = MakeGuard([this, &TotalTimer, &DeletedCount, &ChunkCount, OldTotalSize] { + const auto _ = MakeGuard([&] { ZEN_INFO("garbage collect for '{}' DONE after {}, deleted {} out of {} files, removed {} out of {}", m_Config.RootDirectory, NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()), diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp index 21522e46a..4b50668d9 100644 --- a/zenstore/gc.cpp +++ b/zenstore/gc.cpp @@ -76,7 +76,7 @@ namespace { return MakeErrorCodeFromLastError(); } bool Keep = true; - auto _ = MakeGuard([FileHandle, &Keep, Path]() { + auto _ = MakeGuard([&]() { ::CloseHandle(FileHandle); if (!Keep) { @@ -105,7 +105,7 @@ namespace { } bool Keep = true; - auto _ = MakeGuard([Fd, &Keep, Path]() { + auto _ = MakeGuard([&]() { close(Fd); if (!Keep) { @@ -392,7 +392,7 @@ CasGc::CollectGarbage(GcContext& GcCtx) // First gather reference set { Stopwatch Timer; - const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto Guard = MakeGuard([&] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); for (GcContributor* Contributor : m_GcContribs) { Contributor->GatherReferences(GcCtx); @@ -433,7 +433,7 @@ CasGc::CollectGarbage(GcContext& GcCtx) { Stopwatch Timer; - const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("collected garbage in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto Guard = MakeGuard([&] { ZEN_INFO("collected garbage in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); for (GcStorage* Storage : m_GcStorage) { Storage->CollectGarbage(GcCtx); @@ -445,8 +445,7 @@ CasGc::CollectGarbage(GcContext& GcCtx) if (CidStore* CidStore = m_CidStore) { Stopwatch Timer; - const auto Guard = - MakeGuard([this, &Timer] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto Guard = MakeGuard([&] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); CidStore->RemoveCids(GcCtx.DeletedCas()); } } @@ -672,8 +671,7 @@ GcScheduler::SchedulerThread() NiceTimeSpanMs(uint64_t(std::chrono::duration_cast(GcCtx.MaxCacheDuration()).count()))); { Stopwatch Timer; - const auto __ = - MakeGuard([this, &Timer] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + const auto __ = MakeGuard([&] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); m_CasGc.CollectGarbage(GcCtx); -- cgit v1.2.3 From ae8505ad0af6375289c83b6455796e0c91609dc9 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 16:53:21 +0200 Subject: Make sure we close all block files when dropping a cache bucket --- zenserver/cache/structuredcachestore.cpp | 3 +-- zenstore/blockstore.cpp | 11 +++++++++++ zenstore/include/zenstore/blockstore.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 46f9d8fe6..a9e9b8f78 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1202,8 +1202,7 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& void ZenCacheDiskLayer::CacheBucket::Drop() { - // TODO: close all open files and manage locking - // TODO: add error handling + m_BlockStore.Close(); m_SlogFile.Close(); DeleteDirectories(m_BucketDir); } diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index e502113fc..b82b93823 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -193,6 +193,17 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, } } +void +BlockStore::Close() +{ + RwLock::ExclusiveLockScope InsertLock(m_InsertLock); + m_WriteBlock = nullptr; + m_CurrentInsertOffset = 0; + m_WriteBlockIndex = 0; + m_ChunkBlocks.clear(); + m_BlocksBasePath.clear(); +} + BlockStoreLocation BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) { diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 5019e257d..e153b530e 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -131,6 +131,7 @@ public: uint64_t MaxBlockSize, uint64_t MaxBlockCount, const std::vector& KnownLocations); + void Close(); BlockStoreLocation WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment); -- cgit v1.2.3 From 689b86f1d7a962338ad98a672c88fe4eee0ddc19 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 17:02:32 +0200 Subject: clean up any incomplete blocks if ReclaimSpace fails --- zenstore/blockstore.cpp | 289 +++++++++++++++++++++++++----------------------- 1 file changed, 153 insertions(+), 136 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index b82b93823..2573863a8 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -404,177 +404,194 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, } Ref NewBlockFile; - uint64_t WriteOffset = 0; - uint32_t NewBlockIndex = 0; - - for (uint32_t BlockIndex : BlocksToReWrite) + try { - const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; - - Ref OldBlockFile; + uint64_t WriteOffset = 0; + uint32_t NewBlockIndex = 0; + for (uint32_t BlockIndex : BlocksToReWrite) { - RwLock::SharedLockScope _i(m_InsertLock); - Stopwatch Timer; - const auto __ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - WriteBlockTimeUs += ElapsedUs; - WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); - }); - OldBlockFile = m_ChunkBlocks[BlockIndex]; - ZEN_ASSERT(OldBlockFile); - } + const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex]; - const ChunkIndexArray& KeepMap = BlockKeepChunks[ChunkMapIndex]; - if (KeepMap.empty()) - { - const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; - for (size_t DeleteIndex : DeleteMap) + Ref OldBlockFile; { - DeletedSize += ChunkLocations[DeleteIndex].Size; + RwLock::SharedLockScope _i(m_InsertLock); + Stopwatch Timer; + const auto __ = MakeGuard([&] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + WriteBlockTimeUs += ElapsedUs; + WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); + }); + OldBlockFile = m_ChunkBlocks[BlockIndex]; + ZEN_ASSERT(OldBlockFile); } - ChangeCallback({}, DeleteMap); - DeletedCount += DeleteMap.size(); - { - RwLock::ExclusiveLockScope _i(m_InsertLock); - Stopwatch Timer; - const auto __ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - m_ChunkBlocks[BlockIndex] = nullptr; - } - ZEN_DEBUG("marking cas store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - continue; - } - MovedChunksArray MovedChunks; - std::vector Chunk; - for (const size_t& ChunkIndex : KeepMap) - { - const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; - Chunk.resize(ChunkLocation.Size); - OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); - - if (!NewBlockFile || (WriteOffset + Chunk.size() > m_MaxBlockSize)) + const ChunkIndexArray& KeepMap = BlockKeepChunks[ChunkMapIndex]; + if (KeepMap.empty()) { - uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); - - if (NewBlockFile) + const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; + for (size_t DeleteIndex : DeleteMap) { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); + DeletedSize += ChunkLocations[DeleteIndex].Size; } + ChangeCallback({}, DeleteMap); + DeletedCount += DeleteMap.size(); { - ChangeCallback(MovedChunks, {}); - MovedCount += KeepMap.size(); - MovedChunks.clear(); - RwLock::ExclusiveLockScope __(m_InsertLock); + RwLock::ExclusiveLockScope _i(m_InsertLock); Stopwatch Timer; - const auto ___ = MakeGuard([&] { + const auto __ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); - if (m_ChunkBlocks.size() == m_MaxBlockCount) - { - ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", - m_BlocksBasePath, - static_cast(std::numeric_limits::max()) + 1); - return; - } - while (m_ChunkBlocks.contains(NextBlockIndex)) - { - NextBlockIndex = (NextBlockIndex + 1) & (m_MaxBlockCount - 1); - } - std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); - NewBlockFile = new BlockStoreFile(NewBlockPath); - m_ChunkBlocks[NextBlockIndex] = NewBlockFile; + m_ChunkBlocks[BlockIndex] = nullptr; } - - std::error_code Error; - DiskSpace Space = DiskSpaceInfo(m_BlocksBasePath, Error); - if (Error) + ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) { - ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BlocksBasePath, Error.message()); - return; + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); } - if (Space.Free < m_MaxBlockSize) + continue; + } + + MovedChunksArray MovedChunks; + std::vector Chunk; + for (const size_t& ChunkIndex : KeepMap) + { + const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; + Chunk.resize(ChunkLocation.Size); + OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); + + if (!NewBlockFile || (WriteOffset + Chunk.size() > m_MaxBlockSize)) { - uint64_t ReclaimedSpace = DiskReserveCallback(); - if (Space.Free + ReclaimedSpace < m_MaxBlockSize) + uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed); + + if (NewBlockFile) { - ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", - m_BlocksBasePath, - m_MaxBlockSize, - NiceBytes(Space.Free + ReclaimedSpace)); - RwLock::ExclusiveLockScope _l(m_InsertLock); + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); + NewBlockFile = nullptr; + } + { + ChangeCallback(MovedChunks, {}); + MovedCount += KeepMap.size(); + MovedChunks.clear(); + RwLock::ExclusiveLockScope __(m_InsertLock); Stopwatch Timer; - const auto __ = MakeGuard([&] { + const auto ___ = MakeGuard([&] { uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); ReadBlockTimeUs += ElapsedUs; ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); - m_ChunkBlocks.erase(NextBlockIndex); + if (m_ChunkBlocks.size() == m_MaxBlockCount) + { + ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded", + m_BlocksBasePath, + static_cast(std::numeric_limits::max()) + 1); + return; + } + while (m_ChunkBlocks.contains(NextBlockIndex)) + { + NextBlockIndex = (NextBlockIndex + 1) & (m_MaxBlockCount - 1); + } + std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex); + NewBlockFile = new BlockStoreFile(NewBlockPath); + m_ChunkBlocks[NextBlockIndex] = NewBlockFile; + } + + std::error_code Error; + DiskSpace Space = DiskSpaceInfo(m_BlocksBasePath, Error); + if (Error) + { + ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_BlocksBasePath, Error.message()); return; } + if (Space.Free < m_MaxBlockSize) + { + uint64_t ReclaimedSpace = DiskReserveCallback(); + if (Space.Free + ReclaimedSpace < m_MaxBlockSize) + { + ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}", + m_BlocksBasePath, + m_MaxBlockSize, + NiceBytes(Space.Free + ReclaimedSpace)); + RwLock::ExclusiveLockScope _l(m_InsertLock); + Stopwatch Timer; + const auto __ = MakeGuard([&] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + m_ChunkBlocks.erase(NextBlockIndex); + return; + } - ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", - m_BlocksBasePath, - ReclaimedSpace, - NiceBytes(Space.Free + ReclaimedSpace)); + ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}", + m_BlocksBasePath, + ReclaimedSpace, + NiceBytes(Space.Free + ReclaimedSpace)); + } + NewBlockFile->Create(m_MaxBlockSize); + NewBlockIndex = NextBlockIndex; + WriteOffset = 0; } - NewBlockFile->Create(m_MaxBlockSize); - NewBlockIndex = NextBlockIndex; - WriteOffset = 0; - } - NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); - MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); - WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); - } - Chunk.clear(); - if (NewBlockFile) - { - NewBlockFile->Truncate(WriteOffset); - NewBlockFile->Flush(); - NewBlockFile = {}; - } + NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset); + MovedChunks.push_back({ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}}); + WriteOffset = RoundUp(WriteOffset + Chunk.size(), PayloadAlignment); + } + Chunk.clear(); + if (NewBlockFile) + { + NewBlockFile->Truncate(WriteOffset); + NewBlockFile->Flush(); + NewBlockFile = nullptr; + } - const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; - for (size_t DeleteIndex : DeleteMap) - { - DeletedSize += ChunkLocations[DeleteIndex].Size; - } + const ChunkIndexArray& DeleteMap = BlockDeleteChunks[ChunkMapIndex]; + for (size_t DeleteIndex : DeleteMap) + { + DeletedSize += ChunkLocations[DeleteIndex].Size; + } - ChangeCallback(MovedChunks, DeleteMap); - MovedCount += KeepMap.size(); - DeletedCount += DeleteMap.size(); - MovedChunks.clear(); - { - RwLock::ExclusiveLockScope __(m_InsertLock); - Stopwatch Timer; - const auto ___ = MakeGuard([&] { - uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); - ReadBlockTimeUs += ElapsedUs; - ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); - }); - m_ChunkBlocks[BlockIndex] = nullptr; + ChangeCallback(MovedChunks, DeleteMap); + MovedCount += KeepMap.size(); + DeletedCount += DeleteMap.size(); + MovedChunks.clear(); + { + RwLock::ExclusiveLockScope __(m_InsertLock); + Stopwatch Timer; + const auto ___ = MakeGuard([&] { + uint64_t ElapsedUs = Timer.GetElapsedTimeUs(); + ReadBlockTimeUs += ElapsedUs; + ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); + }); + m_ChunkBlocks[BlockIndex] = nullptr; + } + ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + std::error_code Ec; + OldBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) + { + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); + } + OldBlockFile = nullptr; } - ZEN_DEBUG("marking cas store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) + } + catch (std::exception& ex) + { + ZEN_ERROR("reclaiming space for '{}' failed with: '{}'", m_BlocksBasePath, ex.what()); + if (NewBlockFile) { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); + ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); + std::error_code Ec; + NewBlockFile->MarkAsDeleteOnClose(Ec); + if (Ec) + { + ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", NewBlockFile->GetPath(), Ec.message()); + } } - OldBlockFile = nullptr; } } -- cgit v1.2.3 From 1e3da36ef01f6b823febf22645a314267353a223 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 2 May 2022 17:06:30 +0200 Subject: switched back memory_order for m_TotalSize to relaxed --- zenserver/cache/structuredcachestore.cpp | 12 ++++++------ zenstore/compactcas.cpp | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index a9e9b8f78..ae3b401a5 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -567,7 +567,7 @@ ZenCacheMemoryLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue m_CacheMap.insert_or_assign(HashKey, BucketValue(Value.Value, GcClock::TickCount())); } - m_TotalSize.fetch_add(Value.Value.GetSize(), std::memory_order_seq_cst); + m_TotalSize.fetch_add(Value.Value.GetSize(), std::memory_order::relaxed); } ////////////////////////////////////////////////////////////////////////// @@ -1088,7 +1088,7 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool Is for (const auto& Entry : m_Index) { const DiskLocation& Location = Entry.second.Location; - m_TotalSize.fetch_add(Location.Size(), std::memory_order_seq_cst); + m_TotalSize.fetch_add(Location.Size(), std::memory_order::relaxed); if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) { continue; @@ -1554,10 +1554,10 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) } m_SlogFile.Append(DiskIndexEntry{.Key = Key, .Location = RestoreLocation}); m_Index.insert({Key, {Loc, GcClock::TickCount()}}); - m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order_seq_cst); + m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed); continue; } - m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order_seq_cst); + m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed); DeletedSize += Entry.Location.Size(); DeletedCount++; } @@ -1815,7 +1815,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order_seq_cst); + m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); } void @@ -1836,7 +1836,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; m_SlogFile.Append(DiskIndexEntry); - m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order_seq_cst); + m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order::relaxed); RwLock::ExclusiveLockScope __(m_IndexLock); if (auto It = m_Index.find(HashKey); It != m_Index.end()) { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index e0f84e044..22f8ea0c3 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -258,7 +258,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const RwLock::ExclusiveLockScope _(m_LocationMapLock); m_LocationMap.emplace(ChunkHash, DiskLocation); } - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order_seq_cst); + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order::relaxed); return CasStore::InsertResult{.New = true}; } @@ -960,7 +960,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) for (const auto& Entry : m_LocationMap) { const BlockStoreDiskLocation& Location = Entry.second; - m_TotalSize.fetch_add(Location.GetSize(), std::memory_order_seq_cst); + m_TotalSize.fetch_add(Location.GetSize(), std::memory_order::relaxed); KnownLocations.push_back(Location.Get(m_PayloadAlignment)); } -- cgit v1.2.3 From af21c74a4cb0cb6f479c4cc15830eee5070d9ae3 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 09:29:19 +0200 Subject: cleanup --- zenstore/blockstore.cpp | 2 +- zenstore/include/zenstore/blockstore.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 2573863a8..f0a798e36 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -773,7 +773,7 @@ BlockStore::Split(const std::vector& ChunkLocations, ChunkIndexes.push_back(Index); } - std::sort(begin(ChunkIndexes), end(ChunkIndexes), [&ChunkLocations](size_t Lhs, size_t Rhs) { + std::sort(begin(ChunkIndexes), end(ChunkIndexes), [&](size_t Lhs, size_t Rhs) { const BlockStoreLocation& LhsLocation = ChunkLocations[Lhs]; const BlockStoreLocation& RhsLocation = ChunkLocations[Rhs]; return LhsLocation.Offset < RhsLocation.Offset; diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index e153b530e..b40704ada 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -22,10 +22,6 @@ struct BlockStoreLocation inline auto operator<=>(const BlockStoreLocation& Rhs) const = default; }; -constexpr BlockStoreLocation InvalidBlockStoreLocation{.BlockIndex = 0xfffffffful, - .Offset = 0xffffffffffffffffull, - .Size = 0xffffffffffffffffull}; - #pragma pack(push) #pragma pack(1) -- cgit v1.2.3 From 1b3b8b0e6f70129222085eec40b5a58cf9b29b01 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 11:48:35 +0200 Subject: more tests for block store --- zenstore/blockstore.cpp | 342 +++++++++++++++++++++++++++++++-- zenstore/compactcas.cpp | 4 +- zenstore/include/zenstore/blockstore.h | 5 +- 3 files changed, 334 insertions(+), 17 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index f0a798e36..f469e3746 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -117,6 +117,8 @@ BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::functio m_File.StreamByteRange(FileOffset, Size, std::move(ChunkFun)); } +constexpr uint64_t ScrubSmallChunkWindowSize = 4 * 1024 * 1024; + void BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, @@ -260,7 +262,11 @@ Ref BlockStore::GetChunkBlock(const BlockStoreLocation& Location) { RwLock::SharedLockScope InsertLock(m_InsertLock); - return m_ChunkBlocks[Location.BlockIndex]; + if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end()) + { + return BlockIt->second; + } + return {}; } void @@ -607,8 +613,7 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, { ChunkIndexArray BigChunks; - const uint64_t WindowSize = 4 * 1024 * 1024; - IoBuffer ReadBuffer{WindowSize}; + IoBuffer ReadBuffer{ScrubSmallChunkWindowSize}; void* BufferBase = ReadBuffer.MutableData(); RwLock::SharedLockScope _(m_InsertLock); @@ -616,15 +621,14 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, for (const auto& Block : m_ChunkBlocks) { uint64_t WindowStart = 0; - uint64_t WindowEnd = WindowSize; + uint64_t WindowEnd = ScrubSmallChunkWindowSize; uint32_t BlockIndex = Block.first; const Ref& BlockFile = Block.second; - BlockFile->Open(); - const uint64_t FileSize = BlockFile->FileSize(); + const uint64_t FileSize = BlockFile->FileSize(); do { - const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart); + const uint64_t ChunkSize = Min(ScrubSmallChunkWindowSize, FileSize - WindowStart); BlockFile->Read(BufferBase, ChunkSize, WindowStart); // TODO: We could be smarter here if the ChunkLocations were sorted on block index - we could @@ -655,17 +659,16 @@ BlockStore::IterateChunks(const std::vector& ChunkLocations, } } - WindowStart += WindowSize; - WindowEnd += WindowSize; + WindowStart += ScrubSmallChunkWindowSize; + WindowEnd += ScrubSmallChunkWindowSize; } while (WindowStart < FileSize); } - // Deal with large chunks - + // Deal with large chunks and chunks that extend over a ScrubSmallChunkWindowSize border for (size_t ChunkIndex : BigChunks) { - const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; - BasicFile& BlockFile = m_ChunkBlocks[Location.BlockIndex]->GetBasicFile(); + const BlockStoreLocation Location = ChunkLocations[ChunkIndex]; + const Ref& BlockFile = m_ChunkBlocks[Location.BlockIndex]; LargeSizeCallback(ChunkIndex, BlockFile, Location.Offset, Location.Size); } } @@ -1029,6 +1032,319 @@ TEST_CASE("blockstore.blockfile") CHECK(!std::filesystem::exists(RootDirectory / "1")); } +namespace { + BlockStoreLocation WriteStringAsChunk(BlockStore& Store, std::string_view String, size_t PayloadAlignment) + { + BlockStoreLocation Location = Store.WriteChunk(String.data(), String.length(), PayloadAlignment); + CHECK(Location.Size == String.length()); + return Location; + }; + + std::string ReadChunkAsString(BlockStore& Store, const BlockStoreLocation& Location) + { + Ref ChunkBlock(Store.GetChunkBlock(Location)); + if (!ChunkBlock) + { + return ""; + } + IoBuffer ChunkData = ChunkBlock->GetChunk(Location.Offset, Location.Size); + if (!ChunkData) + { + return ""; + } + std::string AsString((const char*)ChunkData.Data(), ChunkData.Size()); + return AsString; + }; + + std::vector GetDirectoryContent(std::filesystem::path RootDir, bool Files, bool Directories) + { + FileSystemTraversal Traversal; + struct Visitor : public FileSystemTraversal::TreeVisitor + { + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t) override + { + if (Files) + { + Items.push_back(Parent / File); + } + } + + virtual bool VisitDirectory(const std::filesystem::path& Parent, const path_view& Dir) override + { + if (Directories) + { + Items.push_back(Parent / Dir); + } + return true; + } + + bool Files; + bool Directories; + std::vector Items; + } Visit; + Visit.Files = Files; + Visit.Directories = Directories; + + Traversal.TraverseFileSystem(RootDir, Visit); + return Visit.Items; + }; + + static IoBuffer CreateChunk(uint64_t Size) + { + static std::random_device rd; + static std::mt19937 g(rd()); + + std::vector Values; + Values.resize(Size); + for (size_t Idx = 0; Idx < Size; ++Idx) + { + Values[Idx] = static_cast(Idx); + } + std::shuffle(Values.begin(), Values.end(), g); + + return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size()); + } +} // namespace + +TEST_CASE("blockstore.chunks") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory, 128, 1024, {}); + Ref BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512}); + CHECK(!BadChunk); + + std::string FirstChunkData = "This is the data of the first chunk that we will write"; + BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); + std::string SecondChunkData = "This is the data for the second chunk that we will write"; + BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + + CHECK(ReadChunkAsString(Store, FirstChunkLocation) == FirstChunkData); + CHECK(ReadChunkAsString(Store, SecondChunkLocation) == SecondChunkData); + + std::string ThirdChunkData = + "This is a much longer string that will not fit in the first block so it should be placed in the second block"; + BlockStoreLocation ThirdChunkLocation = WriteStringAsChunk(Store, ThirdChunkData, 4); + CHECK(ThirdChunkLocation.BlockIndex != FirstChunkLocation.BlockIndex); + + CHECK(ReadChunkAsString(Store, FirstChunkLocation) == FirstChunkData); + CHECK(ReadChunkAsString(Store, SecondChunkLocation) == SecondChunkData); + CHECK(ReadChunkAsString(Store, ThirdChunkLocation) == ThirdChunkData); +} + +TEST_CASE("blockstore.clean.stray.blocks") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory / "store", 128, 1024, {}); + + std::string FirstChunkData = "This is the data of the first chunk that we will write"; + BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); + std::string SecondChunkData = "This is the data for the second chunk that we will write"; + BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + std::string ThirdChunkData = + "This is a much longer string that will not fit in the first block so it should be placed in the second block"; + WriteStringAsChunk(Store, ThirdChunkData, 4); + + Store.Close(); + + // Not referencing the second block means that we should be deleted + Store.Initialize(RootDirectory / "store", 128, 1024, {FirstChunkLocation, SecondChunkLocation}); + + CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 1); +} + +TEST_CASE("blockstore.flush.forces.new.block") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory / "store", 128, 1024, {}); + + std::string FirstChunkData = "This is the data of the first chunk that we will write"; + BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); + Store.Flush(); + std::string SecondChunkData = "This is the data for the second chunk that we will write"; + BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + Store.Flush(); + std::string ThirdChunkData = + "This is a much longer string that will not fit in the first block so it should be placed in the second block"; + WriteStringAsChunk(Store, ThirdChunkData, 4); + + CHECK(GetDirectoryContent(RootDirectory / "store", true, false).size() == 3); +} + +TEST_CASE("blockstore.iterate.chunks") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory / "store", 128, 1024, {}); + Ref BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512}); + CHECK(!BadChunk); + + std::string FirstChunkData = "This is the data of the first chunk that we will write"; + BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); + + std::string SecondChunkData = "This is the data for the second chunk that we will write"; + BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + + std::string VeryLargeChunk(ScrubSmallChunkWindowSize * 2, 'L'); + BlockStoreLocation VeryLargeChunkLocation = WriteStringAsChunk(Store, VeryLargeChunk, 4); + + Store.IterateChunks( + {FirstChunkLocation, SecondChunkLocation, VeryLargeChunkLocation}, + [&](size_t ChunkIndex, const void* Data, uint64_t Size) { + CHECK(Data); + CHECK(Size > 0); + std::string AsString((const char*)Data, Size); + switch (ChunkIndex) + { + case 0: + CHECK(AsString == FirstChunkData); + break; + case 1: + CHECK(AsString == SecondChunkData); + break; + default: + CHECK(false); + break; + } + }, + [&](size_t ChunkIndex, Ref BlockFile, uint64_t Offset, uint64_t Size) { + CHECK(BlockFile); + CHECK(ChunkIndex == 2); + CHECK(Offset == VeryLargeChunkLocation.Offset); + CHECK(Size == VeryLargeChunkLocation.Size); + size_t StreamOffset = 0; + BlockFile->StreamByteRange(Offset, Size, [&](const void* Data, size_t Size) { + const char* VeryLargeChunkSection = &(VeryLargeChunk.data()[StreamOffset]); + CHECK(memcmp(VeryLargeChunkSection, Data, Size) == 0); + }); + }); +} + +TEST_CASE("blockstore.reclaim.space") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory / "store", 512, 1024, {}); + + constexpr size_t ChunkCount = 200; + constexpr size_t Alignment = 8; + std::vector ChunkLocations; + std::vector ChunkHashes; + ChunkLocations.reserve(ChunkCount); + ChunkHashes.reserve(ChunkCount); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + IoBuffer Chunk = CreateChunk(57 + ChunkIndex); + ChunkLocations.push_back(Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment)); + ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); + } + + std::vector ChunksToKeep; + ChunksToKeep.reserve(ChunkLocations.size()); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + ChunksToKeep.push_back(ChunkIndex); + } + + Store.Flush(); + BlockStore::ReclaimSnapshotState State1 = Store.GetReclaimSnapshotState(); + Store.ReclaimSpace(State1, ChunkLocations, ChunksToKeep, Alignment, true); + + // If we keep all the chunks we should not get any callbacks on moved/deleted stuff + Store.ReclaimSpace( + State1, + ChunkLocations, + ChunksToKeep, + Alignment, + false, + [](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&) { CHECK(false); }, + []() { + CHECK(false); + return 0; + }); + + size_t DeleteChunkCount = 38; + ChunksToKeep.clear(); + for (size_t ChunkIndex = DeleteChunkCount; ChunkIndex < ChunkCount; ++ChunkIndex) + { + ChunksToKeep.push_back(ChunkIndex); + } + + std::vector NewChunkLocations = ChunkLocations; + size_t MovedChunkCount = 0; + size_t DeletedChunkCount = 0; + Store.ReclaimSpace( + State1, + ChunkLocations, + ChunksToKeep, + Alignment, + false, + [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) { + for (const auto& MovedChunk : MovedChunks) + { + CHECK(MovedChunk.first >= DeleteChunkCount); + NewChunkLocations[MovedChunk.first] = MovedChunk.second; + } + MovedChunkCount += MovedChunks.size(); + for (size_t DeletedIndex : DeletedChunks) + { + CHECK(DeletedIndex < DeleteChunkCount); + } + DeletedChunkCount += DeletedChunks.size(); + }, + []() { + CHECK(false); + return 0; + }); + CHECK(MovedChunkCount <= DeleteChunkCount); + CHECK(DeletedChunkCount == DeleteChunkCount); + ChunkLocations = std::vector(NewChunkLocations.begin() + DeleteChunkCount, NewChunkLocations.end()); + + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + Ref ChunkBlock = Store.GetChunkBlock(NewChunkLocations[ChunkIndex]); + if (ChunkIndex >= DeleteChunkCount) + { + CHECK(ChunkBlock); + IoBuffer VerifyChunk = ChunkBlock->GetChunk(NewChunkLocations[ChunkIndex].Offset, NewChunkLocations[ChunkIndex].Size); + CHECK(VerifyChunk); + IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); + CHECK(VerifyHash == ChunkHashes[ChunkIndex]); + } + } + + NewChunkLocations = ChunkLocations; + MovedChunkCount = 0; + DeletedChunkCount = 0; + Store.ReclaimSpace( + State1, + ChunkLocations, + {}, + Alignment, + false, + [&](const BlockStore::MovedChunksArray& MovedChunks, const BlockStore::ChunkIndexArray& DeletedChunks) { + CHECK(MovedChunks.empty()); + DeletedChunkCount += DeletedChunks.size(); + }, + []() { + CHECK(false); + return 0; + }); + CHECK(DeletedChunkCount == ChunkCount - DeleteChunkCount); +} + #endif void diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 22f8ea0c3..7cc742beb 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -353,9 +353,9 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) BadKeys.push_back(ExpectedHash); } }, - [&](size_t ChunkIndex, BasicFile& BlockFile, uint64_t Offset, uint64_t Size) { + [&](size_t ChunkIndex, Ref BlockFile, uint64_t Offset, uint64_t Size) { IoHashStream Hasher; - BlockFile.StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); + BlockFile->StreamByteRange(Offset, Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); const IoHash& ExpectedHash = ChunkIndexToChunkHash[ChunkIndex]; if (ComputedHash != ExpectedHash) diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index b40704ada..e330cc080 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -120,8 +120,9 @@ public: typedef std::function ReclaimCallback; typedef std::function ClaimDiskReserveCallback; typedef std::function IterateChunksSmallSizeCallback; - typedef std::function IterateChunksLargeSizeCallback; - typedef std::function SplitCallback; + typedef std::function BlockFile, uint64_t Offset, uint64_t Size)> + IterateChunksLargeSizeCallback; + typedef std::function SplitCallback; void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, -- cgit v1.2.3 From 5dddf5f993dff479fbc429d10cbcc93601af90c9 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 12:15:35 +0200 Subject: threading test for blockstore --- zenstore/blockstore.cpp | 92 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index f469e3746..0992662c2 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -11,6 +11,7 @@ # include # include # include +# include # include # include #endif @@ -209,6 +210,11 @@ BlockStore::Close() BlockStoreLocation BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) { + ZEN_ASSERT(Data != nullptr); + ZEN_ASSERT(Size > 0u); + ZEN_ASSERT(Size <= m_MaxBlockSize); + ZEN_ASSERT(Alignment > 0u); + RwLock::ExclusiveLockScope InsertLock(m_InsertLock); uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire); @@ -1185,7 +1191,7 @@ TEST_CASE("blockstore.iterate.chunks") auto RootDirectory = TempDir.Path(); BlockStore Store; - Store.Initialize(RootDirectory / "store", 128, 1024, {}); + Store.Initialize(RootDirectory / "store", ScrubSmallChunkWindowSize * 2, 1024, {}); Ref BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512}); CHECK(!BadChunk); @@ -1194,6 +1200,7 @@ TEST_CASE("blockstore.iterate.chunks") std::string SecondChunkData = "This is the data for the second chunk that we will write"; BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + Store.Flush(); std::string VeryLargeChunk(ScrubSmallChunkWindowSize * 2, 'L'); BlockStoreLocation VeryLargeChunkLocation = WriteStringAsChunk(Store, VeryLargeChunk, 4); @@ -1345,6 +1352,89 @@ TEST_CASE("blockstore.reclaim.space") CHECK(DeletedChunkCount == ChunkCount - DeleteChunkCount); } +TEST_CASE("blockstore.thread.read.write") +{ + ScopedTemporaryDirectory TempDir; + auto RootDirectory = TempDir.Path(); + + BlockStore Store; + Store.Initialize(RootDirectory / "store", 1088, 1024, {}); + + constexpr size_t ChunkCount = 1000; + constexpr size_t Alignment = 8; + std::vector Chunks; + std::vector ChunkHashes; + Chunks.reserve(ChunkCount); + ChunkHashes.reserve(ChunkCount); + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + IoBuffer Chunk = CreateChunk(57 + ChunkIndex / 2); + Chunks.push_back(Chunk); + ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); + } + + std::vector ChunkLocations; + ChunkLocations.resize(ChunkCount); + + WorkerThreadPool WorkerPool(8); + std::atomic WorkCompleted = 0; + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + WorkerPool.ScheduleWork([&Store, ChunkIndex, &Chunks, &ChunkLocations, &WorkCompleted]() { + IoBuffer& Chunk = Chunks[ChunkIndex]; + ChunkLocations[ChunkIndex] = Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment); + WorkCompleted.fetch_add(1); + }); + } + while (WorkCompleted < Chunks.size()) + { + Sleep(1); + } + + WorkCompleted = 0; + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + WorkerPool.ScheduleWork([&Store, ChunkIndex, &ChunkLocations, &ChunkHashes, &WorkCompleted]() { + Ref ChunkBlock = Store.GetChunkBlock(ChunkLocations[ChunkIndex]); + CHECK(ChunkBlock); + IoBuffer VerifyChunk = ChunkBlock->GetChunk(ChunkLocations[ChunkIndex].Offset, ChunkLocations[ChunkIndex].Size); + CHECK(VerifyChunk); + IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); + CHECK(VerifyHash == ChunkHashes[ChunkIndex]); + WorkCompleted.fetch_add(1); + }); + } + while (WorkCompleted < Chunks.size()) + { + Sleep(1); + } + + std::vector SecondChunkLocations; + SecondChunkLocations.resize(ChunkCount); + WorkCompleted = 0; + for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) + { + WorkerPool.ScheduleWork([&Store, ChunkIndex, &Chunks, &SecondChunkLocations, &WorkCompleted]() { + IoBuffer& Chunk = Chunks[ChunkIndex]; + SecondChunkLocations[ChunkIndex] = Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment); + WorkCompleted.fetch_add(1); + }); + WorkerPool.ScheduleWork([&Store, ChunkIndex, &ChunkLocations, &ChunkHashes, &WorkCompleted]() { + Ref ChunkBlock = Store.GetChunkBlock(ChunkLocations[ChunkIndex]); + CHECK(ChunkBlock); + IoBuffer VerifyChunk = ChunkBlock->GetChunk(ChunkLocations[ChunkIndex].Offset, ChunkLocations[ChunkIndex].Size); + CHECK(VerifyChunk); + IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); + CHECK(VerifyHash == ChunkHashes[ChunkIndex]); + WorkCompleted.fetch_add(1); + }); + } + while (WorkCompleted < Chunks.size() * 2) + { + Sleep(1); + } +} + #endif void -- cgit v1.2.3 From 78e582a60763c0d9499106be0cdfe6a794e26e42 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 22:14:02 +0200 Subject: macos compilation fix --- zenserver/cache/structuredcachestore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index ae3b401a5..3ba75cd9c 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1025,7 +1025,7 @@ ZenCacheDiskLayer::CacheBucket::MigrateLegacyData(bool CleanSource) OldLocation.Size(), 0, OldLocation.Flags() | LegacyDiskLocation::kTombStone); - LegacyLogEntries.push_back(LegacyDiskIndexEntry(ChunkHash, NewLocation)); + LegacyLogEntries.push_back(LegacyDiskIndexEntry{.Key = ChunkHash, .Location = NewLocation}); } LegacyCasLog.Append(LegacyLogEntries); LegacyCasLog.Flush(); -- cgit v1.2.3 From a50fdca477c04c273c5521020c9faf0441cf696f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 22:29:48 +0200 Subject: unused variable in test fix --- zenstore/blockstore.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 0992662c2..cb22551b9 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -1172,11 +1172,11 @@ TEST_CASE("blockstore.flush.forces.new.block") BlockStore Store; Store.Initialize(RootDirectory / "store", 128, 1024, {}); - std::string FirstChunkData = "This is the data of the first chunk that we will write"; - BlockStoreLocation FirstChunkLocation = WriteStringAsChunk(Store, FirstChunkData, 4); + std::string FirstChunkData = "This is the data of the first chunk that we will write"; + WriteStringAsChunk(Store, FirstChunkData, 4); Store.Flush(); - std::string SecondChunkData = "This is the data for the second chunk that we will write"; - BlockStoreLocation SecondChunkLocation = WriteStringAsChunk(Store, SecondChunkData, 4); + std::string SecondChunkData = "This is the data for the second chunk that we will write"; + WriteStringAsChunk(Store, SecondChunkData, 4); Store.Flush(); std::string ThirdChunkData = "This is a much longer string that will not fit in the first block so it should be placed in the second block"; -- cgit v1.2.3 From a19eee841d7ce0c9c868dced40a6380f55cdb9bd Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 3 May 2022 23:04:45 +0200 Subject: handle that more than one block can be written to in parallel --- zenserver/cache/structuredcachestore.cpp | 39 ++++++++++++++++---------------- zenstore/blockstore.cpp | 36 +++++++++++++++++++---------- zenstore/compactcas.cpp | 29 ++++++++++-------------- zenstore/include/zenstore/blockstore.h | 16 +++++++------ 4 files changed, 65 insertions(+), 55 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 3ba75cd9c..2869191fd 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1832,25 +1832,26 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const EntryFlags |= DiskLocation::kCompressed; } - BlockStoreLocation BlockStoreLocation = m_BlockStore.WriteChunk(Value.Value.Data(), Value.Value.Size(), m_PayloadAlignment); - DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); - const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; - m_SlogFile.Append(DiskIndexEntry); - m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order::relaxed); - RwLock::ExclusiveLockScope __(m_IndexLock); - if (auto It = m_Index.find(HashKey); It != m_Index.end()) - { - // TODO: should check if write is idempotent and bail out if it is? - // this would requiring comparing contents on disk unless we add a - // content hash to the index entry - IndexEntry& Entry = It.value(); - Entry.Location = Location; - Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); - } - else - { - m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); - } + m_BlockStore.WriteChunk(Value.Value.Data(), Value.Value.Size(), m_PayloadAlignment, [&](BlockStoreLocation BlockStoreLocation) { + DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); + const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; + m_SlogFile.Append(DiskIndexEntry); + m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order::relaxed); + RwLock::ExclusiveLockScope __(m_IndexLock); + if (auto It = m_Index.find(HashKey); It != m_Index.end()) + { + // TODO: should check if write is idempotent and bail out if it is? + // this would requiring comparing contents on disk unless we add a + // content hash to the index entry + IndexEntry& Entry = It.value(); + Entry.Location = Location; + Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); + } + else + { + m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); + } + }); } ////////////////////////////////////////////////////////////////////////// diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index cb22551b9..54a8eb9df 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -207,8 +207,8 @@ BlockStore::Close() m_BlocksBasePath.clear(); } -BlockStoreLocation -BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) +void +BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback) { ZEN_ASSERT(Data != nullptr); ZEN_ASSERT(Size > 0u); @@ -246,11 +246,17 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment) uint64_t InsertOffset = m_CurrentInsertOffset; m_CurrentInsertOffset = RoundUp(InsertOffset + Size, Alignment); Ref WriteBlock = m_WriteBlock; + m_ActiveWriteBlocks.push_back(WriteBlockIndex); InsertLock.ReleaseNow(); WriteBlock->Write(Data, Size, InsertOffset); - return {.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}; + Callback({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = Size}); + + { + RwLock::ExclusiveLockScope _(m_InsertLock); + m_ActiveWriteBlocks.erase(std::find(m_ActiveWriteBlocks.begin(), m_ActiveWriteBlocks.end(), WriteBlockIndex)); + } } BlockStore::ReclaimSnapshotState @@ -258,8 +264,11 @@ BlockStore::GetReclaimSnapshotState() { ReclaimSnapshotState State; RwLock::ExclusiveLockScope _(m_InsertLock); - State.ExcludeBlockIndex = m_WriteBlock ? m_WriteBlockIndex.load(std::memory_order_acquire) : 0xffffffffu; - State.BlockCount = m_ChunkBlocks.size(); + for (uint32_t BlockIndex : m_ActiveWriteBlocks) + { + State.m_ActiveWriteBlocks.insert(BlockIndex); + } + State.BlockCount = m_ChunkBlocks.size(); _.ReleaseNow(); return State; } @@ -357,7 +366,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, { const BlockStoreLocation& Location = ChunkLocations[Index]; OldTotalSize += Location.Size; - if (Location.BlockIndex == Snapshot.ExcludeBlockIndex) + if (Snapshot.m_ActiveWriteBlocks.contains(Location.BlockIndex)) { continue; } @@ -1041,7 +1050,8 @@ TEST_CASE("blockstore.blockfile") namespace { BlockStoreLocation WriteStringAsChunk(BlockStore& Store, std::string_view String, size_t PayloadAlignment) { - BlockStoreLocation Location = Store.WriteChunk(String.data(), String.length(), PayloadAlignment); + BlockStoreLocation Location; + Store.WriteChunk(String.data(), String.length(), PayloadAlignment, [&](const BlockStoreLocation& L) { Location = L; }); CHECK(Location.Size == String.length()); return Location; }; @@ -1254,7 +1264,7 @@ TEST_CASE("blockstore.reclaim.space") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { IoBuffer Chunk = CreateChunk(57 + ChunkIndex); - ChunkLocations.push_back(Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment)); + Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { ChunkLocations.push_back(L); }); ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); } @@ -1381,8 +1391,8 @@ TEST_CASE("blockstore.thread.read.write") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { WorkerPool.ScheduleWork([&Store, ChunkIndex, &Chunks, &ChunkLocations, &WorkCompleted]() { - IoBuffer& Chunk = Chunks[ChunkIndex]; - ChunkLocations[ChunkIndex] = Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment); + IoBuffer& Chunk = Chunks[ChunkIndex]; + Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { ChunkLocations[ChunkIndex] = L; }); WorkCompleted.fetch_add(1); }); } @@ -1415,8 +1425,10 @@ TEST_CASE("blockstore.thread.read.write") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { WorkerPool.ScheduleWork([&Store, ChunkIndex, &Chunks, &SecondChunkLocations, &WorkCompleted]() { - IoBuffer& Chunk = Chunks[ChunkIndex]; - SecondChunkLocations[ChunkIndex] = Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment); + IoBuffer& Chunk = Chunks[ChunkIndex]; + Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { + SecondChunkLocations[ChunkIndex] = L; + }); WorkCompleted.fetch_add(1); }); WorkerPool.ScheduleWork([&Store, ChunkIndex, &ChunkLocations, &ChunkHashes, &WorkCompleted]() { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 7cc742beb..cc0e2241c 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -250,15 +250,16 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const // This should be a rare occasion and the current flow reduces the time we block for // reads, insert and GC. - BlockStoreLocation Location = m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment); - BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); - const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; - m_CasLog.Append(IndexEntry); - { - RwLock::ExclusiveLockScope _(m_LocationMapLock); - m_LocationMap.emplace(ChunkHash, DiskLocation); - } - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order::relaxed); + m_BlockStore.WriteChunk(ChunkData, ChunkSize, m_PayloadAlignment, [&](const BlockStoreLocation& Location) { + BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); + const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; + m_CasLog.Append(IndexEntry); + { + RwLock::ExclusiveLockScope _(m_LocationMapLock); + m_LocationMap.emplace(ChunkHash, DiskLocation); + } + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order::relaxed); + }); return CasStore::InsertResult{.New = true}; } @@ -1685,7 +1686,7 @@ TEST_CASE("compactcas.legacyconversion") } } -TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true)) +TEST_CASE("compactcas.threadedinsert") { // for (uint32_t i = 0; i < 100; ++i) { @@ -1887,13 +1888,7 @@ TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true)) { ThreadPool.ScheduleWork([&Cas, &WorkCompleted, ChunkHash]() { CHECK(Cas.HaveChunk(ChunkHash)); - if (ChunkHash != IoHash::HashBuffer(Cas.FindChunk(ChunkHash))) - { - IoBuffer Buffer = Cas.FindChunk(ChunkHash); - CHECK(Buffer); - IoHash BufferHash = IoHash::HashBuffer(Buffer); - CHECK(ChunkHash == BufferHash); - } + CHECK(ChunkHash == IoHash::HashBuffer(Cas.FindChunk(ChunkHash))); WorkCompleted.fetch_add(1); }); } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index e330cc080..9edfc36e8 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -110,8 +110,8 @@ class BlockStore public: struct ReclaimSnapshotState { - size_t ExcludeBlockIndex; - size_t BlockCount; + std::unordered_set m_ActiveWriteBlocks; + size_t BlockCount; }; typedef std::vector> MovedChunksArray; @@ -123,6 +123,7 @@ public: typedef std::function BlockFile, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; typedef std::function SplitCallback; + typedef std::function WriteChunkCallback; void Initialize(const std::filesystem::path& BlocksBasePath, uint64_t MaxBlockSize, @@ -130,7 +131,7 @@ public: const std::vector& KnownLocations); void Close(); - BlockStoreLocation WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment); + void WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback); Ref GetChunkBlock(const BlockStoreLocation& Location); void Flush(); @@ -164,10 +165,11 @@ public: private: std::unordered_map> m_ChunkBlocks; - RwLock m_InsertLock; // used to serialize inserts - Ref m_WriteBlock; - std::uint64_t m_CurrentInsertOffset = 0; - std::atomic_uint32_t m_WriteBlockIndex{}; + RwLock m_InsertLock; // used to serialize inserts + Ref m_WriteBlock; + std::uint64_t m_CurrentInsertOffset = 0; + std::atomic_uint32_t m_WriteBlockIndex{}; + std::vector m_ActiveWriteBlocks; uint64_t m_MaxBlockSize = 1u << 28; uint64_t m_MaxBlockCount = BlockStoreDiskLocation::MaxBlockIndex + 1; -- cgit v1.2.3 From 013e2c7ab88dc51d92d683e8f8ec488bdb4d08d9 Mon Sep 17 00:00:00 2001 From: Joe Kirchoff Date: Tue, 3 May 2022 15:57:02 -0700 Subject: Initialize upstream apply in background thread (#88) --- zenserver/compute/function.cpp | 23 ++++++++++++----------- zenserver/compute/function.h | 1 + zenserver/upstream/upstreamapply.cpp | 22 ++++++++++++++++++++++ zenserver/upstream/upstreamapply.h | 1 + 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/zenserver/compute/function.cpp b/zenserver/compute/function.cpp index dd31013ef..171c67a6e 100644 --- a/zenserver/compute/function.cpp +++ b/zenserver/compute/function.cpp @@ -39,15 +39,17 @@ HttpFunctionService::HttpFunctionService(CasStore& Store, { m_UpstreamApply = UpstreamApply::Create({}, m_CasStore, m_CidStore); - auto HordeUpstreamEndpoint = UpstreamApplyEndpoint::CreateHordeEndpoint(ComputeOptions, - ComputeAuthConfig, - StorageOptions, - StorageAuthConfig, - m_CasStore, - m_CidStore, - Mgr); - m_UpstreamApply->RegisterEndpoint(std::move(HordeUpstreamEndpoint)); - m_UpstreamApply->Initialize(); + InitializeThread = std::thread{[this, ComputeOptions, StorageOptions, ComputeAuthConfig, StorageAuthConfig, &Mgr] { + auto HordeUpstreamEndpoint = UpstreamApplyEndpoint::CreateHordeEndpoint(ComputeOptions, + ComputeAuthConfig, + StorageOptions, + StorageAuthConfig, + m_CasStore, + m_CidStore, + Mgr); + m_UpstreamApply->RegisterEndpoint(std::move(HordeUpstreamEndpoint)); + m_UpstreamApply->Initialize(); + }}; m_Router.AddPattern("job", "([[:digit:]]+)"); m_Router.AddPattern("worker", "([[:xdigit:]]{40})"); @@ -58,8 +60,7 @@ HttpFunctionService::HttpFunctionService(CasStore& Store, [this](HttpRouterRequest& Req) { HttpServerRequest& HttpReq = Req.ServerRequest(); - // Todo: check upstream health - return HttpReq.WriteResponse(HttpResponseCode::OK); + return HttpReq.WriteResponse(m_UpstreamApply->IsHealthy() ? HttpResponseCode::OK : HttpResponseCode::ServiceUnavailable); }, HttpVerb::kGet); diff --git a/zenserver/compute/function.h b/zenserver/compute/function.h index 2ddddabb4..efabe96ee 100644 --- a/zenserver/compute/function.h +++ b/zenserver/compute/function.h @@ -48,6 +48,7 @@ public: virtual void HandleRequest(HttpServerRequest& Request) override; private: + std::thread InitializeThread; spdlog::logger& Log() { return m_Log; } spdlog::logger& m_Log; HttpRequestRouter m_Router; diff --git a/zenserver/upstream/upstreamapply.cpp b/zenserver/upstream/upstreamapply.cpp index 9758e7565..c397bb141 100644 --- a/zenserver/upstream/upstreamapply.cpp +++ b/zenserver/upstream/upstreamapply.cpp @@ -119,6 +119,22 @@ public: return m_RunState.IsRunning; } + virtual bool IsHealthy() const override + { + if (m_RunState.IsRunning) + { + for (const auto& Endpoint : m_Endpoints) + { + if (Endpoint->IsHealthy()) + { + return true; + } + } + } + + return false; + } + virtual void RegisterEndpoint(std::unique_ptr Endpoint) override { m_Endpoints.emplace_back(std::move(Endpoint)); @@ -429,6 +445,12 @@ private: ////////////////////////////////////////////////////////////////////////// +bool +UpstreamApply::IsHealthy() const +{ + return false; +} + std::unique_ptr UpstreamApply::Create(const UpstreamApplyOptions& Options, CasStore& CasStore, CidStore& CidStore) { diff --git a/zenserver/upstream/upstreamapply.h b/zenserver/upstream/upstreamapply.h index c6e38142c..2edc6dc49 100644 --- a/zenserver/upstream/upstreamapply.h +++ b/zenserver/upstream/upstreamapply.h @@ -167,6 +167,7 @@ public: virtual ~UpstreamApply() = default; virtual bool Initialize() = 0; + virtual bool IsHealthy() const = 0; virtual void RegisterEndpoint(std::unique_ptr Endpoint) = 0; struct EnqueueResult -- cgit v1.2.3 From 5b95a4fba97aa66cec935ef3e0d969893223f9d6 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 4 May 2022 15:25:35 +0200 Subject: Add namespacecachestore layer to allow multiple structured cache namespaces --- zenserver/cache/namespacecachestore.cpp | 207 ++++++++++++++++++++++++++++++++ zenserver/cache/namespacecachestore.h | 49 ++++++++ zenserver/cache/structuredcache.cpp | 50 ++++---- zenserver/cache/structuredcache.h | 14 +-- zenserver/upstream/upstreamcache.cpp | 9 +- zenserver/upstream/upstreamcache.h | 4 +- zenserver/zenserver.cpp | 5 +- 7 files changed, 299 insertions(+), 39 deletions(-) create mode 100644 zenserver/cache/namespacecachestore.cpp create mode 100644 zenserver/cache/namespacecachestore.h diff --git a/zenserver/cache/namespacecachestore.cpp b/zenserver/cache/namespacecachestore.cpp new file mode 100644 index 000000000..82ac40c62 --- /dev/null +++ b/zenserver/cache/namespacecachestore.cpp @@ -0,0 +1,207 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "namespacecachestore.h" +#include "structuredcachestore.h" + +namespace zen { + +const char* NamespaceDirPrefix = "ns_"; + +NamespaceCacheStore::NamespaceCacheStore(std::filesystem::path BasePath, CasGc& Gc) +: GcStorage(Gc) +, GcContributor(Gc) +, m_Log(logging::Get("namespacecachestore")) +, m_BasePath(BasePath) +{ + CreateDirectories(m_BasePath); + std::vector ExistingFolders = FindExistingFolders(); + + std::vector LegacyBuckets; + std::vector Namespaces; + for (const std::string& DirName : ExistingFolders) + { + if (DirName.starts_with(NamespaceDirPrefix)) + { + Namespaces.push_back(DirName.substr(3)); + continue; + } + LegacyBuckets.push_back(DirName); + } + if (Namespaces.empty() && !LegacyBuckets.empty()) + { + // If we find no namespaces, but any unknown folders we assume we have a legacy folder + // and move any existing folders into a default namespace + std::filesystem::path DefaultfNamespaceFolder = m_BasePath / NamespaceDirPrefix; + CreateDirectories(DefaultfNamespaceFolder); + for (const std::string& DirName : LegacyBuckets) + { + std::filesystem::path LegacyFolder = m_BasePath / DirName; + std::filesystem::path NewPath = DefaultfNamespaceFolder / DirName; + std::filesystem::rename(LegacyFolder, NewPath); + } + Namespaces.push_back(""); + } + + for (const std::string& NamespaceName : Namespaces) + { + ZenCacheStore* Store = new ZenCacheStore(Gc, m_BasePath / (NamespaceDirPrefix + NamespaceName)); + m_Namespaces[NamespaceName] = Store; + } +} + +NamespaceCacheStore::~NamespaceCacheStore() +{ + for (const auto& Entry : m_Namespaces) + { + delete Entry.second; + } + m_Namespaces.clear(); +} + +std::vector +NamespaceCacheStore::FindExistingFolders() const +{ + FileSystemTraversal Traversal; + struct Visitor : public FileSystemTraversal::TreeVisitor + { + virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, + [[maybe_unused]] const path_view& File, + [[maybe_unused]] uint64_t FileSize) override + { + } + + virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override + { + std::string DirName8 = WideToUtf8(DirectoryName); + Dirs.push_back(DirName8); + return false; + } + + std::vector Dirs; + } Visit; + + Traversal.TraverseFileSystem(m_BasePath, Visit); + return Visit.Dirs; +} + +bool +NamespaceCacheStore::Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) +{ + ZenCacheStore* Store = GetStore(Namespace); + if (!Store) + { + return false; + } + return Store->Get(Bucket, HashKey, OutValue); +} + +void +NamespaceCacheStore::Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) +{ + ZenCacheStore* Store = GetStore(Namespace); + if (!Store) + { + return; + } + Store->Put(Bucket, HashKey, Value); +} + +bool +NamespaceCacheStore::DropBucket(const std::string& Namespace, std::string_view Bucket) +{ + ZenCacheStore* Store = GetStore(Namespace); + if (!Store) + { + return false; + } + return Store->DropBucket(Bucket); +} + +void +NamespaceCacheStore::Flush() +{ + std::vector Stores; + RwLock::SharedLockScope _(m_NamespacesLock); + Stores.reserve(m_Namespaces.size()); + for (const auto& Entry : m_Namespaces) + { + Stores.push_back(Entry.second); + } + _.ReleaseNow(); + for (ZenCacheStore* Store : Stores) + { + Store->Flush(); + } +} + +void +NamespaceCacheStore::Scrub(ScrubContext& Ctx) +{ + std::vector Stores = GetAllStores(); + for (ZenCacheStore* Store : Stores) + { + Store->Scrub(Ctx); + } +} + +ZenCacheStore* +NamespaceCacheStore::GetStore(const std::string& Namespace) +{ + RwLock::SharedLockScope _(m_NamespacesLock); + if (auto It = m_Namespaces.find(Namespace); It != m_Namespaces.end()) + { + return It->second; + } + return nullptr; +} + +std::vector +NamespaceCacheStore::GetAllStores() const +{ + std::vector Stores; + RwLock::SharedLockScope _(m_NamespacesLock); + Stores.reserve(m_Namespaces.size()); + for (const auto& Entry : m_Namespaces) + { + Stores.push_back(Entry.second); + } + return Stores; +} + +void +NamespaceCacheStore::GatherReferences(GcContext& GcCtx) +{ + std::vector Stores = GetAllStores(); + for (ZenCacheStore* Store : Stores) + { + Store->GatherReferences(GcCtx); + } +} + +void +NamespaceCacheStore::CollectGarbage(GcContext& GcCtx) +{ + std::vector Stores = GetAllStores(); + for (ZenCacheStore* Store : Stores) + { + Store->CollectGarbage(GcCtx); + } +} + +GcStorageSize +NamespaceCacheStore::StorageSize() const +{ + std::vector Stores = GetAllStores(); + GcStorageSize Size; + for (ZenCacheStore* Store : Stores) + { + GcStorageSize StoreSize = Store->StorageSize(); + Size.MemorySize += StoreSize.MemorySize; + Size.DiskSize += StoreSize.DiskSize; + } + return Size; +} + +} // namespace zen diff --git a/zenserver/cache/namespacecachestore.h b/zenserver/cache/namespacecachestore.h new file mode 100644 index 000000000..85f4150bf --- /dev/null +++ b/zenserver/cache/namespacecachestore.h @@ -0,0 +1,49 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include +#include + +#include + +#include + +namespace zen { + +class ScrubContext; +class ZenCacheStore; +struct ZenCacheValue; + +/** NamespaceCache Store + */ +class NamespaceCacheStore : public RefCounted, public GcStorage, public GcContributor +{ +public: + NamespaceCacheStore(std::filesystem::path BasePath, CasGc& Gc); + ~NamespaceCacheStore(); + + bool Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); + bool DropBucket(const std::string& Namespace, std::string_view Bucket); + void Flush(); + void Scrub(ScrubContext& Ctx); + + spdlog::logger& Log() { return m_Log; } + const std::filesystem::path& BasePath() const { return m_BasePath; } + + virtual void GatherReferences(GcContext& GcCtx) override; + virtual void CollectGarbage(GcContext& GcCtx) override; + virtual GcStorageSize StorageSize() const override; + +private: + std::vector FindExistingFolders() const; + ZenCacheStore* GetStore(const std::string& Namespace); + std::vector GetAllStores() const; + spdlog::logger& m_Log; + std::filesystem::path m_BasePath; + mutable RwLock m_NamespacesLock; + std::unordered_map m_Namespaces; +}; + +} // namespace zen diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index e1d9de976..276c99081 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -21,6 +21,7 @@ //#include "cachekey.h" #include "monitoring/httpstats.h" +#include "namespacecachestore.h" #include "structuredcachestore.h" #include "upstream/jupiter.h" #include "upstream/upstreamcache.h" @@ -72,13 +73,13 @@ struct PutRequestData ////////////////////////////////////////////////////////////////////////// -HttpStructuredCacheService::HttpStructuredCacheService(ZenCacheStore& InCacheStore, - CidStore& InCidStore, - HttpStatsService& StatsService, - HttpStatusService& StatusService, - UpstreamCache& UpstreamCache) +HttpStructuredCacheService::HttpStructuredCacheService(NamespaceCacheStore& InNamespaceCacheStore, + CidStore& InCidStore, + HttpStatsService& StatsService, + HttpStatusService& StatusService, + UpstreamCache& UpstreamCache) : m_Log(logging::Get("cache")) -, m_CacheStore(InCacheStore) +, m_CacheStore(InNamespaceCacheStore) , m_StatsService(StatsService) , m_StatusService(StatusService) , m_CidStore(InCidStore) @@ -176,7 +177,7 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, case HttpVerb::kDelete: // Drop bucket - if (m_CacheStore.DropBucket(Bucket)) + if (m_CacheStore.DropBucket("", Bucket)) { return Request.WriteResponse(HttpResponseCode::OK); } @@ -225,7 +226,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request return Request.WriteResponse(HttpResponseCode::OK); } - if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && m_CacheStore.Get(Ref.BucketSegment, Ref.HashKey, ClientResultValue)) + if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && m_CacheStore.Get("", Ref.BucketSegment, Ref.HashKey, ClientResultValue)) { Success = true; ZenContentType ContentType = ClientResultValue.Value.GetContentType(); @@ -350,7 +351,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (Success && StoreLocal) { - m_CacheStore.Put(Ref.BucketSegment, Ref.HashKey, ClientResultValue); + m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, ClientResultValue); } } else if (AcceptType == ZenContentType::kCbPackage) @@ -404,7 +405,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (StoreLocal) { - m_CacheStore.Put(Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, CacheValue); } BinaryWriter MemStream; @@ -486,7 +487,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (ContentType == HttpContentType::kBinary || ContentType == HttpContentType::kCompressedBinary) { ZEN_DEBUG("PUT - '{}/{}' {} '{}'", Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), ToString(ContentType)); - m_CacheStore.Put(Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, {.Value = Body}); if (EnumHasAllFlags(PolicyFromURL, CachePolicy::StoreRemote)) { @@ -528,7 +529,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request ValidAttachments.size()); Body.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, {.Value = Body}); const bool IsPartialRecord = TotalCount != static_cast(ValidAttachments.size()); @@ -611,7 +612,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request CacheValue.Value = CacheRecord.GetBuffer().AsIoBuffer(); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; @@ -1013,7 +1014,7 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack CacheValue.Value = IoBuffer(Record.GetSize()); Record.CopyTo(MutableMemoryView(CacheValue.Value.MutableData(), CacheValue.Value.GetSize())); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(Request.Key.Bucket, Request.Key.Hash, CacheValue); + m_CacheStore.Put("", Request.Key.Bucket, Request.Key.Hash, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; @@ -1098,7 +1099,8 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt bool FoundLocalInvalid = false; ZenCacheValue RecordCacheValue; - if (EnumHasAllFlags(Policy.GetRecordPolicy(), CachePolicy::QueryLocal) && m_CacheStore.Get(Key.Bucket, Key.Hash, RecordCacheValue)) + if (EnumHasAllFlags(Policy.GetRecordPolicy(), CachePolicy::QueryLocal) && + m_CacheStore.Get("", Key.Bucket, Key.Hash, RecordCacheValue)) { Request.RecordCacheValue = std::move(RecordCacheValue.Value); if (Request.RecordCacheValue.GetContentType() != ZenContentType::kCbObject) @@ -1229,7 +1231,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt Request.RecordObject = ObjectBuffer; if (EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::StoreLocal)) { - m_CacheStore.Put(Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); + m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); } ParseValues(Request); Request.UsedUpstream = true; @@ -1386,7 +1388,7 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ { IoBuffer Value = Chunk.GetCompressed().Flatten().AsIoBuffer(); Value.SetContentType(ZenContentType::kCompressedBinary); - m_CacheStore.Put(Key.Bucket, Key.Hash, {.Value = Value}); + m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = Value}); TransferredSize = Chunk.GetCompressedSize(); } Succeeded = true; @@ -1400,7 +1402,7 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ else if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { ZenCacheValue ExistingValue; - if (m_CacheStore.Get(Key.Bucket, Key.Hash, ExistingValue) && IsCompressedBinary(ExistingValue.Value.GetContentType())) + if (m_CacheStore.Get("", Key.Bucket, Key.Hash, ExistingValue) && IsCompressedBinary(ExistingValue.Value.GetContentType())) { Succeeded = true; } @@ -1483,7 +1485,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { - if (m_CacheStore.Get(Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) + if (m_CacheStore.Get("", Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) { Result = CompressedBuffer::FromCompressed(SharedBuffer(CacheValue.Value)); } @@ -1547,7 +1549,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive // for us to keep the data only on the upstream server. // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) - m_CacheStore.Put(Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); + m_CacheStore.Put("", Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, @@ -1803,7 +1805,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (!Record.Exists && EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) + if (m_CacheStore.Get("", RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) { Record.Exists = true; Record.CacheValue = std::move(CacheValue.Value); @@ -1838,7 +1840,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::StoreLocal)) { - m_CacheStore.Put(Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); + m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); } }; m_UpstreamCache.GetCacheRecords(UpstreamRecordRequests, std::move(OnCacheRecordGetComplete)); @@ -1935,7 +1937,7 @@ HttpStructuredCacheService::GetLocalCacheValues(std::vectorExists && EnumHasAllFlags(Request->DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) + if (m_CacheStore.Get("", Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) { if (IsCompressedBinary(CacheValue.Value.GetContentType())) { @@ -2004,7 +2006,7 @@ HttpStructuredCacheService::GetUpstreamCacheChunks(std::vector #include +#include "cache/namespacecachestore.h" #include "cache/structuredcache.h" #include "cache/structuredcachestore.h" #include "diag/logging.h" @@ -1173,7 +1174,7 @@ namespace detail { class UpstreamCacheImpl final : public UpstreamCache { public: - UpstreamCacheImpl(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) + UpstreamCacheImpl(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore) : m_Log(logging::Get("upstream")) , m_Options(Options) , m_CacheStore(CacheStore) @@ -1517,7 +1518,7 @@ private: ZenCacheValue CacheValue; std::vector Payloads; - if (!m_CacheStore.Get(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) + if (!m_CacheStore.Get("", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) { ZEN_WARN("process upstream FAILED, '{}/{}', cache record doesn't exist", CacheRecord.Key.Bucket, CacheRecord.Key.Hash); return; @@ -1687,7 +1688,7 @@ private: spdlog::logger& m_Log; UpstreamCacheOptions m_Options; - ZenCacheStore& m_CacheStore; + NamespaceCacheStore& m_CacheStore; CidStore& m_CidStore; UpstreamQueue m_UpstreamQueue; std::shared_mutex m_EndpointsMutex; @@ -1712,7 +1713,7 @@ UpstreamEndpoint::CreateJupiterEndpoint(const CloudCacheClientOptions& Options, } std::unique_ptr -UpstreamCache::Create(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) +UpstreamCache::Create(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore) { return std::make_unique(Options, CacheStore, CidStore); } diff --git a/zenserver/upstream/upstreamcache.h b/zenserver/upstream/upstreamcache.h index 6f18b3119..54386e80d 100644 --- a/zenserver/upstream/upstreamcache.h +++ b/zenserver/upstream/upstreamcache.h @@ -24,7 +24,7 @@ class CbObjectView; class CbPackage; class CbObjectWriter; class CidStore; -class ZenCacheStore; +class NamespaceCacheStore; struct CloudCacheClientOptions; class CloudCacheTokenProvider; struct ZenStructuredCacheClientOptions; @@ -206,7 +206,7 @@ public: virtual void GetStatus(CbObjectWriter& CbO) = 0; - static std::unique_ptr Create(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore); + static std::unique_ptr Create(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore); }; } // namespace zen diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp index abaec888a..9b7083312 100644 --- a/zenserver/zenserver.cpp +++ b/zenserver/zenserver.cpp @@ -102,6 +102,7 @@ ZEN_THIRD_PARTY_INCLUDES_END #include "admin/admin.h" #include "auth/authmgr.h" #include "auth/authservice.h" +#include "cache/namespacecachestore.h" #include "cache/structuredcache.h" #include "cache/structuredcachestore.h" #include "compute/function.h" @@ -611,7 +612,7 @@ private: zen::GcScheduler m_GcScheduler{m_CasGc}; std::unique_ptr m_CasStore{zen::CreateCasStore(m_CasGc)}; std::unique_ptr m_CidStore; - std::unique_ptr m_CacheStore; + std::unique_ptr m_CacheStore; zen::CasScrubber m_Scrubber{*m_CasStore}; zen::HttpTestService m_TestService; zen::HttpTestingService m_TestingService; @@ -755,7 +756,7 @@ ZenServer::InitializeStructuredCache(const ZenServerOptions& ServerOptions) using namespace std::literals; ZEN_INFO("instantiating structured cache service"); - m_CacheStore = std::make_unique(m_CasGc, m_DataRoot / "cache"); + m_CacheStore = std::make_unique(m_DataRoot / "cache", m_CasGc); const ZenUpstreamCacheConfig& UpstreamConfig = ServerOptions.UpstreamCacheConfig; -- cgit v1.2.3 From 322731d5fabcd9e5219eb66bd199057ec933f310 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 4 May 2022 15:48:00 +0200 Subject: default namespace fix --- zenserver/cache/namespacecachestore.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/zenserver/cache/namespacecachestore.cpp b/zenserver/cache/namespacecachestore.cpp index 82ac40c62..1cf76b0ae 100644 --- a/zenserver/cache/namespacecachestore.cpp +++ b/zenserver/cache/namespacecachestore.cpp @@ -29,16 +29,17 @@ NamespaceCacheStore::NamespaceCacheStore(std::filesystem::path BasePath, CasGc& } LegacyBuckets.push_back(DirName); } - if (Namespaces.empty() && !LegacyBuckets.empty()) + + if (std::find(Namespaces.begin(), Namespaces.end(), "") == Namespaces.end()) { - // If we find no namespaces, but any unknown folders we assume we have a legacy folder - // and move any existing folders into a default namespace - std::filesystem::path DefaultfNamespaceFolder = m_BasePath / NamespaceDirPrefix; - CreateDirectories(DefaultfNamespaceFolder); + std::filesystem::path DefaultNamespaceFolder = m_BasePath / NamespaceDirPrefix; + CreateDirectories(DefaultNamespaceFolder); + + // Move any non-namespace folders into the default namespace folder for (const std::string& DirName : LegacyBuckets) { std::filesystem::path LegacyFolder = m_BasePath / DirName; - std::filesystem::path NewPath = DefaultfNamespaceFolder / DirName; + std::filesystem::path NewPath = DefaultNamespaceFolder / DirName; std::filesystem::rename(LegacyFolder, NewPath); } Namespaces.push_back(""); -- cgit v1.2.3 From ef12415d287c9307c0c4774aeacff6c91966f693 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 4 May 2022 19:45:57 +0200 Subject: cleanup --- zenserver/cache/namespacecachestore.cpp | 208 ------------------------- zenserver/cache/namespacecachestore.h | 49 ------ zenserver/cache/structuredcache.cpp | 13 +- zenserver/cache/structuredcache.h | 14 +- zenserver/cache/structuredcachestore.cpp | 256 +++++++++++++++++++++++++++---- zenserver/cache/structuredcachestore.h | 33 +++- zenserver/upstream/upstreamapply.h | 2 +- zenserver/upstream/upstreamcache.cpp | 7 +- zenserver/upstream/upstreamcache.h | 4 +- zenserver/zenserver.cpp | 5 +- 10 files changed, 274 insertions(+), 317 deletions(-) delete mode 100644 zenserver/cache/namespacecachestore.cpp delete mode 100644 zenserver/cache/namespacecachestore.h diff --git a/zenserver/cache/namespacecachestore.cpp b/zenserver/cache/namespacecachestore.cpp deleted file mode 100644 index 1cf76b0ae..000000000 --- a/zenserver/cache/namespacecachestore.cpp +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include "namespacecachestore.h" -#include "structuredcachestore.h" - -namespace zen { - -const char* NamespaceDirPrefix = "ns_"; - -NamespaceCacheStore::NamespaceCacheStore(std::filesystem::path BasePath, CasGc& Gc) -: GcStorage(Gc) -, GcContributor(Gc) -, m_Log(logging::Get("namespacecachestore")) -, m_BasePath(BasePath) -{ - CreateDirectories(m_BasePath); - std::vector ExistingFolders = FindExistingFolders(); - - std::vector LegacyBuckets; - std::vector Namespaces; - for (const std::string& DirName : ExistingFolders) - { - if (DirName.starts_with(NamespaceDirPrefix)) - { - Namespaces.push_back(DirName.substr(3)); - continue; - } - LegacyBuckets.push_back(DirName); - } - - if (std::find(Namespaces.begin(), Namespaces.end(), "") == Namespaces.end()) - { - std::filesystem::path DefaultNamespaceFolder = m_BasePath / NamespaceDirPrefix; - CreateDirectories(DefaultNamespaceFolder); - - // Move any non-namespace folders into the default namespace folder - for (const std::string& DirName : LegacyBuckets) - { - std::filesystem::path LegacyFolder = m_BasePath / DirName; - std::filesystem::path NewPath = DefaultNamespaceFolder / DirName; - std::filesystem::rename(LegacyFolder, NewPath); - } - Namespaces.push_back(""); - } - - for (const std::string& NamespaceName : Namespaces) - { - ZenCacheStore* Store = new ZenCacheStore(Gc, m_BasePath / (NamespaceDirPrefix + NamespaceName)); - m_Namespaces[NamespaceName] = Store; - } -} - -NamespaceCacheStore::~NamespaceCacheStore() -{ - for (const auto& Entry : m_Namespaces) - { - delete Entry.second; - } - m_Namespaces.clear(); -} - -std::vector -NamespaceCacheStore::FindExistingFolders() const -{ - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, - [[maybe_unused]] const path_view& File, - [[maybe_unused]] uint64_t FileSize) override - { - } - - virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override - { - std::string DirName8 = WideToUtf8(DirectoryName); - Dirs.push_back(DirName8); - return false; - } - - std::vector Dirs; - } Visit; - - Traversal.TraverseFileSystem(m_BasePath, Visit); - return Visit.Dirs; -} - -bool -NamespaceCacheStore::Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) -{ - ZenCacheStore* Store = GetStore(Namespace); - if (!Store) - { - return false; - } - return Store->Get(Bucket, HashKey, OutValue); -} - -void -NamespaceCacheStore::Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) -{ - ZenCacheStore* Store = GetStore(Namespace); - if (!Store) - { - return; - } - Store->Put(Bucket, HashKey, Value); -} - -bool -NamespaceCacheStore::DropBucket(const std::string& Namespace, std::string_view Bucket) -{ - ZenCacheStore* Store = GetStore(Namespace); - if (!Store) - { - return false; - } - return Store->DropBucket(Bucket); -} - -void -NamespaceCacheStore::Flush() -{ - std::vector Stores; - RwLock::SharedLockScope _(m_NamespacesLock); - Stores.reserve(m_Namespaces.size()); - for (const auto& Entry : m_Namespaces) - { - Stores.push_back(Entry.second); - } - _.ReleaseNow(); - for (ZenCacheStore* Store : Stores) - { - Store->Flush(); - } -} - -void -NamespaceCacheStore::Scrub(ScrubContext& Ctx) -{ - std::vector Stores = GetAllStores(); - for (ZenCacheStore* Store : Stores) - { - Store->Scrub(Ctx); - } -} - -ZenCacheStore* -NamespaceCacheStore::GetStore(const std::string& Namespace) -{ - RwLock::SharedLockScope _(m_NamespacesLock); - if (auto It = m_Namespaces.find(Namespace); It != m_Namespaces.end()) - { - return It->second; - } - return nullptr; -} - -std::vector -NamespaceCacheStore::GetAllStores() const -{ - std::vector Stores; - RwLock::SharedLockScope _(m_NamespacesLock); - Stores.reserve(m_Namespaces.size()); - for (const auto& Entry : m_Namespaces) - { - Stores.push_back(Entry.second); - } - return Stores; -} - -void -NamespaceCacheStore::GatherReferences(GcContext& GcCtx) -{ - std::vector Stores = GetAllStores(); - for (ZenCacheStore* Store : Stores) - { - Store->GatherReferences(GcCtx); - } -} - -void -NamespaceCacheStore::CollectGarbage(GcContext& GcCtx) -{ - std::vector Stores = GetAllStores(); - for (ZenCacheStore* Store : Stores) - { - Store->CollectGarbage(GcCtx); - } -} - -GcStorageSize -NamespaceCacheStore::StorageSize() const -{ - std::vector Stores = GetAllStores(); - GcStorageSize Size; - for (ZenCacheStore* Store : Stores) - { - GcStorageSize StoreSize = Store->StorageSize(); - Size.MemorySize += StoreSize.MemorySize; - Size.DiskSize += StoreSize.DiskSize; - } - return Size; -} - -} // namespace zen diff --git a/zenserver/cache/namespacecachestore.h b/zenserver/cache/namespacecachestore.h deleted file mode 100644 index 85f4150bf..000000000 --- a/zenserver/cache/namespacecachestore.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Epic Games, Inc. All Rights Reserved. - -#pragma once - -#include -#include - -#include - -#include - -namespace zen { - -class ScrubContext; -class ZenCacheStore; -struct ZenCacheValue; - -/** NamespaceCache Store - */ -class NamespaceCacheStore : public RefCounted, public GcStorage, public GcContributor -{ -public: - NamespaceCacheStore(std::filesystem::path BasePath, CasGc& Gc); - ~NamespaceCacheStore(); - - bool Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); - void Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); - bool DropBucket(const std::string& Namespace, std::string_view Bucket); - void Flush(); - void Scrub(ScrubContext& Ctx); - - spdlog::logger& Log() { return m_Log; } - const std::filesystem::path& BasePath() const { return m_BasePath; } - - virtual void GatherReferences(GcContext& GcCtx) override; - virtual void CollectGarbage(GcContext& GcCtx) override; - virtual GcStorageSize StorageSize() const override; - -private: - std::vector FindExistingFolders() const; - ZenCacheStore* GetStore(const std::string& Namespace); - std::vector GetAllStores() const; - spdlog::logger& m_Log; - std::filesystem::path m_BasePath; - mutable RwLock m_NamespacesLock; - std::unordered_map m_Namespaces; -}; - -} // namespace zen diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 276c99081..8deb958be 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -21,7 +21,6 @@ //#include "cachekey.h" #include "monitoring/httpstats.h" -#include "namespacecachestore.h" #include "structuredcachestore.h" #include "upstream/jupiter.h" #include "upstream/upstreamcache.h" @@ -73,13 +72,13 @@ struct PutRequestData ////////////////////////////////////////////////////////////////////////// -HttpStructuredCacheService::HttpStructuredCacheService(NamespaceCacheStore& InNamespaceCacheStore, - CidStore& InCidStore, - HttpStatsService& StatsService, - HttpStatusService& StatusService, - UpstreamCache& UpstreamCache) +HttpStructuredCacheService::HttpStructuredCacheService(ZenCacheStore& CacheStore, + CidStore& InCidStore, + HttpStatsService& StatsService, + HttpStatusService& StatusService, + UpstreamCache& UpstreamCache) : m_Log(logging::Get("cache")) -, m_CacheStore(InNamespaceCacheStore) +, m_CacheStore(CacheStore) , m_StatsService(StatsService) , m_StatusService(StatusService) , m_CidStore(InCidStore) diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 7e9847838..c41afef12 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -25,7 +25,7 @@ class CbObjectView; struct PutRequestData; class ScrubContext; class UpstreamCache; -class NamespaceCacheStore; +class ZenCacheStore; enum class CachePolicy : uint32_t; namespace cache::detail { @@ -64,11 +64,11 @@ namespace cache::detail { class HttpStructuredCacheService : public HttpService, public IHttpStatsProvider, public IHttpStatusProvider { public: - HttpStructuredCacheService(NamespaceCacheStore& InNamespaceCacheStore, - CidStore& InCidStore, - HttpStatsService& StatsService, - HttpStatusService& StatusService, - UpstreamCache& UpstreamCache); + HttpStructuredCacheService(ZenCacheStore& CacheStore, + CidStore& InCidStore, + HttpStatsService& StatsService, + HttpStatusService& StatusService, + UpstreamCache& UpstreamCache); ~HttpStructuredCacheService(); virtual const char* BaseUri() const override; @@ -140,7 +140,7 @@ private: spdlog::logger& Log() { return m_Log; } spdlog::logger& m_Log; - NamespaceCacheStore& m_CacheStore; + ZenCacheStore& m_CacheStore; HttpStatsService& m_StatsService; HttpStatusService& m_StatusService; CidStore& m_CidStore; diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 2869191fd..075b7d408 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -232,7 +232,7 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object) WriteFile(Path, Object.GetBuffer().AsIoBuffer()); } -ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) +ZenCacheNamespace::ZenCacheNamespace(CasGc& Gc, const std::filesystem::path& RootDir) : GcStorage(Gc) , GcContributor(Gc) , m_RootDir(RootDir) @@ -248,12 +248,12 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) #endif } -ZenCacheStore::~ZenCacheStore() +ZenCacheNamespace::~ZenCacheNamespace() { } bool -ZenCacheStore::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheValue& OutValue) +ZenCacheNamespace::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheValue& OutValue) { ZEN_TRACE_CPU("Z$::Get"); @@ -291,7 +291,7 @@ ZenCacheStore::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheVal } void -ZenCacheStore::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCacheValue& Value) +ZenCacheNamespace::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCacheValue& Value) { ZEN_TRACE_CPU("Z$::Put"); @@ -327,7 +327,7 @@ ZenCacheStore::Put(std::string_view InBucket, const IoHash& HashKey, const ZenCa } bool -ZenCacheStore::DropBucket(std::string_view Bucket) +ZenCacheNamespace::DropBucket(std::string_view Bucket) { ZEN_INFO("dropping bucket '{}'", Bucket); @@ -343,13 +343,13 @@ ZenCacheStore::DropBucket(std::string_view Bucket) } void -ZenCacheStore::Flush() +ZenCacheNamespace::Flush() { m_DiskLayer.Flush(); } void -ZenCacheStore::Scrub(ScrubContext& Ctx) +ZenCacheNamespace::Scrub(ScrubContext& Ctx) { if (m_LastScrubTime == Ctx.ScrubTimestamp()) { @@ -363,7 +363,7 @@ ZenCacheStore::Scrub(ScrubContext& Ctx) } void -ZenCacheStore::GatherReferences(GcContext& GcCtx) +ZenCacheNamespace::GatherReferences(GcContext& GcCtx) { Stopwatch Timer; const auto Guard = @@ -377,14 +377,14 @@ ZenCacheStore::GatherReferences(GcContext& GcCtx) } void -ZenCacheStore::CollectGarbage(GcContext& GcCtx) +ZenCacheNamespace::CollectGarbage(GcContext& GcCtx) { m_MemLayer.Reset(); m_DiskLayer.CollectGarbage(GcCtx); } GcStorageSize -ZenCacheStore::StorageSize() const +ZenCacheNamespace::StorageSize() const { return {.DiskSize = m_DiskLayer.TotalSize(), .MemorySize = m_MemLayer.TotalSize()}; } @@ -2098,6 +2098,200 @@ ZenCacheDiskLayer::TotalSize() const return TotalSize; } +//////////////////////////// ZenCacheStore + +const char* ZenCacheNamespaceDirPrefix = "ns_"; + +namespace { + + std::vector FindExistingFolders(const std::filesystem::path& RootPath) + { + FileSystemTraversal Traversal; + struct Visitor : public FileSystemTraversal::TreeVisitor + { + virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t) override {} + + virtual bool VisitDirectory(const std::filesystem::path&, const path_view& DirectoryName) override + { + std::string DirName8 = WideToUtf8(DirectoryName); + Dirs.push_back(DirName8); + return false; + } + + std::vector Dirs; + } Visit; + + Traversal.TraverseFileSystem(RootPath, Visit); + return Visit.Dirs; + } + +} // namespace + +ZenCacheStore::ZenCacheStore(std::filesystem::path BasePath, CasGc& Gc) : GcStorage(Gc), GcContributor(Gc) +{ + CreateDirectories(BasePath); + std::vector ExistingFolders = FindExistingFolders(BasePath); + + std::vector LegacyBuckets; + std::vector Namespaces; + for (const std::string& DirName : ExistingFolders) + { + if (DirName.starts_with(ZenCacheNamespaceDirPrefix)) + { + Namespaces.push_back(DirName.substr(3)); + continue; + } + LegacyBuckets.push_back(DirName); + } + + ZEN_INFO("Found #{} namespaces in '{}' and #{} legacy buckets", Namespaces.size(), BasePath, LegacyBuckets.size()); + + if (std::find(Namespaces.begin(), Namespaces.end(), "") == Namespaces.end()) + { + ZEN_INFO("Moving #{} legacy buckets to anonymous namespace", LegacyBuckets.size()); + std::filesystem::path DefaultNamespaceFolder = BasePath / ZenCacheNamespaceDirPrefix; + CreateDirectories(DefaultNamespaceFolder); + + // Move any non-namespace folders into the default namespace folder + for (const std::string& DirName : LegacyBuckets) + { + std::filesystem::path LegacyFolder = BasePath / DirName; + std::filesystem::path NewPath = DefaultNamespaceFolder / DirName; + std::filesystem::rename(LegacyFolder, NewPath); + } + Namespaces.push_back(""); + } + + for (const std::string& NamespaceName : Namespaces) + { + Ref Store = new ZenCacheNamespace(Gc, BasePath / (ZenCacheNamespaceDirPrefix + NamespaceName)); + m_Namespaces[NamespaceName] = Store; + } +} + +ZenCacheStore::~ZenCacheStore() +{ + m_Namespaces.clear(); +} + +bool +ZenCacheStore::Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) +{ + Ref Store = GetStore(Namespace); + if (!Store) + { + return false; + } + return Store->Get(Bucket, HashKey, OutValue); +} + +void +ZenCacheStore::Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) +{ + Ref Store = GetStore(Namespace); + if (!Store) + { + return; + } + Store->Put(Bucket, HashKey, Value); +} + +bool +ZenCacheStore::DropBucket(const std::string& Namespace, std::string_view Bucket) +{ + Ref Store = GetStore(Namespace); + if (!Store) + { + return false; + } + return Store->DropBucket(Bucket); +} + +void +ZenCacheStore::Flush() +{ + std::vector> Stores; + RwLock::SharedLockScope _(m_NamespacesLock); + Stores.reserve(m_Namespaces.size()); + for (const auto& Entry : m_Namespaces) + { + Stores.push_back(Entry.second); + } + _.ReleaseNow(); + for (const Ref& Store : Stores) + { + Store->Flush(); + } +} + +void +ZenCacheStore::Scrub(ScrubContext& Ctx) +{ + std::vector> Stores = GetAllStores(); + for (const Ref& Store : Stores) + { + Store->Scrub(Ctx); + } +} + +Ref +ZenCacheStore::GetStore(const std::string& Namespace) +{ + RwLock::SharedLockScope _(m_NamespacesLock); + if (auto It = m_Namespaces.find(Namespace); It != m_Namespaces.end()) + { + return It->second; + } + return nullptr; +} + +std::vector> +ZenCacheStore::GetAllStores() const +{ + std::vector> Stores; + RwLock::SharedLockScope _(m_NamespacesLock); + Stores.reserve(m_Namespaces.size()); + for (const auto& Entry : m_Namespaces) + { + Stores.push_back(Entry.second); + } + return Stores; +} + +void +ZenCacheStore::GatherReferences(GcContext& GcCtx) +{ + std::vector> Stores = GetAllStores(); + for (const Ref& Store : Stores) + { + Store->GatherReferences(GcCtx); + } +} + +void +ZenCacheStore::CollectGarbage(GcContext& GcCtx) +{ + std::vector> Stores = GetAllStores(); + for (const Ref& Store : Stores) + { + Store->CollectGarbage(GcCtx); + } +} + +GcStorageSize +ZenCacheStore::StorageSize() const +{ + std::vector> Stores = GetAllStores(); + GcStorageSize Size; + for (const Ref& Store : Stores) + { + GcStorageSize StoreSize = Store->StorageSize(); + Size.MemorySize += StoreSize.MemorySize; + Size.DiskSize += StoreSize.DiskSize; + } + return Size; +} + ////////////////////////////////////////////////////////////////////////// #if ZEN_WITH_TESTS @@ -2136,7 +2330,7 @@ TEST_CASE("z$.store") CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); const int kIterationCount = 100; @@ -2189,8 +2383,8 @@ TEST_CASE("z$.size") GcStorageSize CacheSize; { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); CbObject CacheValue = CreateCacheValue(Zcs.DiskLayerThreshold() - 256); @@ -2209,8 +2403,8 @@ TEST_CASE("z$.size") } { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); const GcStorageSize SerializedSize = Zcs.StorageSize(); CHECK_EQ(SerializedSize.MemorySize, 0); @@ -2232,8 +2426,8 @@ TEST_CASE("z$.size") GcStorageSize CacheSize; { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); CbObject CacheValue = CreateCacheValue(Zcs.DiskLayerThreshold() + 64); @@ -2252,8 +2446,8 @@ TEST_CASE("z$.size") } { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); const GcStorageSize SerializedSize = Zcs.StorageSize(); CHECK_EQ(SerializedSize.MemorySize, 0); @@ -2290,9 +2484,9 @@ TEST_CASE("z$.gc") }; { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); - const auto Bucket = "teardrinker"sv; + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); + const auto Bucket = "teardrinker"sv; // Create a cache record const IoHash Key = CreateKey(42); @@ -2328,7 +2522,7 @@ TEST_CASE("z$.gc") // Expect timestamps to be serialized { CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); std::vector Keep; // Collect garbage with 1 hour max cache duration @@ -2349,7 +2543,7 @@ TEST_CASE("z$.gc") { ScopedTemporaryDirectory TempDir; CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); const auto Bucket = "fortysixandtwo"sv; const GcClock::TimePoint CurrentTime = GcClock::Now(); @@ -2397,7 +2591,7 @@ TEST_CASE("z$.gc") { ScopedTemporaryDirectory TempDir; CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); const auto Bucket = "rightintwo"sv; const GcClock::TimePoint CurrentTime = GcClock::Now(); @@ -2490,7 +2684,7 @@ TEST_CASE("z$.legacyconversion") const std::string Bucket = "rightintwo"; { CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path()); + ZenCacheNamespace Zcs(Gc, TempDir.Path()); const GcClock::TimePoint CurrentTime = GcClock::Now(); for (size_t i = 0; i < ChunkCount; i++) @@ -2578,8 +2772,8 @@ TEST_CASE("z$.legacyconversion") std::filesystem::remove(IndexPath); { - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path()); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path()); for (size_t i = 0; i < ChunkCount; i += 2) { @@ -2639,9 +2833,9 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) CreateDirectories(TempDir.Path()); - WorkerThreadPool ThreadPool(4); - CasGc Gc; - ZenCacheStore Zcs(Gc, TempDir.Path()); + WorkerThreadPool ThreadPool(4); + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path()); { std::atomic WorkCompleted = 0; diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 0c2a7c0b2..a803b0603 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -322,11 +322,11 @@ private: ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; }; -class ZenCacheStore final : public GcStorage, public GcContributor +class ZenCacheNamespace final : public RefCounted, public GcStorage, public GcContributor { public: - ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir); - ~ZenCacheStore(); + ZenCacheNamespace(CasGc& Gc, const std::filesystem::path& RootDir); + ~ZenCacheNamespace(); bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Put(std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); @@ -349,8 +349,31 @@ private: std::unique_ptr m_AccessTracker; #endif - ZenCacheStore(const ZenCacheStore&) = delete; - ZenCacheStore& operator=(const ZenCacheStore&) = delete; + ZenCacheNamespace(const ZenCacheNamespace&) = delete; + ZenCacheNamespace& operator=(const ZenCacheNamespace&) = delete; +}; + +class ZenCacheStore final : public GcStorage, public GcContributor +{ +public: + ZenCacheStore(std::filesystem::path BasePath, CasGc& Gc); + ~ZenCacheStore(); + + bool Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); + bool DropBucket(const std::string& Namespace, std::string_view Bucket); + void Flush(); + void Scrub(ScrubContext& Ctx); + + virtual void GatherReferences(GcContext& GcCtx) override; + virtual void CollectGarbage(GcContext& GcCtx) override; + virtual GcStorageSize StorageSize() const override; + +private: + Ref GetStore(const std::string& Namespace); + std::vector> GetAllStores() const; + mutable RwLock m_NamespacesLock; + std::unordered_map> m_Namespaces; }; void z$_forcelink(); diff --git a/zenserver/upstream/upstreamapply.h b/zenserver/upstream/upstreamapply.h index 2edc6dc49..1deaf00a5 100644 --- a/zenserver/upstream/upstreamapply.h +++ b/zenserver/upstream/upstreamapply.h @@ -23,7 +23,7 @@ class CbObjectWriter; class CidStore; class CloudCacheTokenProvider; class WorkerThreadPool; -class ZenCacheStore; +class ZenCacheNamespace; struct CloudCacheClientOptions; struct UpstreamAuthConfig; diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index 49f384774..c89227106 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -19,7 +19,6 @@ #include #include -#include "cache/namespacecachestore.h" #include "cache/structuredcache.h" #include "cache/structuredcachestore.h" #include "diag/logging.h" @@ -1174,7 +1173,7 @@ namespace detail { class UpstreamCacheImpl final : public UpstreamCache { public: - UpstreamCacheImpl(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore) + UpstreamCacheImpl(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) : m_Log(logging::Get("upstream")) , m_Options(Options) , m_CacheStore(CacheStore) @@ -1688,7 +1687,7 @@ private: spdlog::logger& m_Log; UpstreamCacheOptions m_Options; - NamespaceCacheStore& m_CacheStore; + ZenCacheStore& m_CacheStore; CidStore& m_CidStore; UpstreamQueue m_UpstreamQueue; std::shared_mutex m_EndpointsMutex; @@ -1713,7 +1712,7 @@ UpstreamEndpoint::CreateJupiterEndpoint(const CloudCacheClientOptions& Options, } std::unique_ptr -UpstreamCache::Create(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore) +UpstreamCache::Create(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore) { return std::make_unique(Options, CacheStore, CidStore); } diff --git a/zenserver/upstream/upstreamcache.h b/zenserver/upstream/upstreamcache.h index 54386e80d..6f18b3119 100644 --- a/zenserver/upstream/upstreamcache.h +++ b/zenserver/upstream/upstreamcache.h @@ -24,7 +24,7 @@ class CbObjectView; class CbPackage; class CbObjectWriter; class CidStore; -class NamespaceCacheStore; +class ZenCacheStore; struct CloudCacheClientOptions; class CloudCacheTokenProvider; struct ZenStructuredCacheClientOptions; @@ -206,7 +206,7 @@ public: virtual void GetStatus(CbObjectWriter& CbO) = 0; - static std::unique_ptr Create(const UpstreamCacheOptions& Options, NamespaceCacheStore& CacheStore, CidStore& CidStore); + static std::unique_ptr Create(const UpstreamCacheOptions& Options, ZenCacheStore& CacheStore, CidStore& CidStore); }; } // namespace zen diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp index 9b7083312..e572e6a49 100644 --- a/zenserver/zenserver.cpp +++ b/zenserver/zenserver.cpp @@ -102,7 +102,6 @@ ZEN_THIRD_PARTY_INCLUDES_END #include "admin/admin.h" #include "auth/authmgr.h" #include "auth/authservice.h" -#include "cache/namespacecachestore.h" #include "cache/structuredcache.h" #include "cache/structuredcachestore.h" #include "compute/function.h" @@ -612,7 +611,7 @@ private: zen::GcScheduler m_GcScheduler{m_CasGc}; std::unique_ptr m_CasStore{zen::CreateCasStore(m_CasGc)}; std::unique_ptr m_CidStore; - std::unique_ptr m_CacheStore; + std::unique_ptr m_CacheStore; zen::CasScrubber m_Scrubber{*m_CasStore}; zen::HttpTestService m_TestService; zen::HttpTestingService m_TestingService; @@ -756,7 +755,7 @@ ZenServer::InitializeStructuredCache(const ZenServerOptions& ServerOptions) using namespace std::literals; ZEN_INFO("instantiating structured cache service"); - m_CacheStore = std::make_unique(m_DataRoot / "cache", m_CasGc); + m_CacheStore = std::make_unique(m_DataRoot / "cache", m_CasGc); const ZenUpstreamCacheConfig& UpstreamConfig = ServerOptions.UpstreamCacheConfig; -- cgit v1.2.3 From 861a92d1ee6c54eeb9035190501baf8ea888591f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 5 May 2022 09:55:09 +0200 Subject: cleanup and review feedback --- zenserver/cache/structuredcache.cpp | 42 ++++++++++++++++++-------------- zenserver/cache/structuredcachestore.cpp | 33 ++++++++++--------------- zenserver/cache/structuredcachestore.h | 10 +++++--- zenserver/upstream/upstreamcache.cpp | 2 +- 4 files changed, 44 insertions(+), 43 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 8deb958be..69ee32bd6 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -176,7 +176,7 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, case HttpVerb::kDelete: // Drop bucket - if (m_CacheStore.DropBucket("", Bucket)) + if (m_CacheStore.DropBucket(ZenCacheStore::DefaultNamespace, Bucket)) { return Request.WriteResponse(HttpResponseCode::OK); } @@ -225,7 +225,8 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request return Request.WriteResponse(HttpResponseCode::OK); } - if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && m_CacheStore.Get("", Ref.BucketSegment, Ref.HashKey, ClientResultValue)) + if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && + m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue)) { Success = true; ZenContentType ContentType = ClientResultValue.Value.GetContentType(); @@ -350,7 +351,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (Success && StoreLocal) { - m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, ClientResultValue); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue); } } else if (AcceptType == ZenContentType::kCbPackage) @@ -404,7 +405,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (StoreLocal) { - m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, CacheValue); } BinaryWriter MemStream; @@ -486,7 +487,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (ContentType == HttpContentType::kBinary || ContentType == HttpContentType::kCompressedBinary) { ZEN_DEBUG("PUT - '{}/{}' {} '{}'", Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), ToString(ContentType)); - m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); if (EnumHasAllFlags(PolicyFromURL, CachePolicy::StoreRemote)) { @@ -528,7 +529,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request ValidAttachments.size()); Body.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); const bool IsPartialRecord = TotalCount != static_cast(ValidAttachments.size()); @@ -611,7 +612,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request CacheValue.Value = CacheRecord.GetBuffer().AsIoBuffer(); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put("", Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; @@ -1013,7 +1014,7 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack CacheValue.Value = IoBuffer(Record.GetSize()); Record.CopyTo(MutableMemoryView(CacheValue.Value.MutableData(), CacheValue.Value.GetSize())); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put("", Request.Key.Bucket, Request.Key.Hash, CacheValue); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Request.Key.Bucket, Request.Key.Hash, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; @@ -1099,7 +1100,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt ZenCacheValue RecordCacheValue; if (EnumHasAllFlags(Policy.GetRecordPolicy(), CachePolicy::QueryLocal) && - m_CacheStore.Get("", Key.Bucket, Key.Hash, RecordCacheValue)) + m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, RecordCacheValue)) { Request.RecordCacheValue = std::move(RecordCacheValue.Value); if (Request.RecordCacheValue.GetContentType() != ZenContentType::kCbObject) @@ -1230,7 +1231,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt Request.RecordObject = ObjectBuffer; if (EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::StoreLocal)) { - m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); } ParseValues(Request); Request.UsedUpstream = true; @@ -1387,7 +1388,7 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ { IoBuffer Value = Chunk.GetCompressed().Flatten().AsIoBuffer(); Value.SetContentType(ZenContentType::kCompressedBinary); - m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = Value}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = Value}); TransferredSize = Chunk.GetCompressedSize(); } Succeeded = true; @@ -1401,7 +1402,8 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ else if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { ZenCacheValue ExistingValue; - if (m_CacheStore.Get("", Key.Bucket, Key.Hash, ExistingValue) && IsCompressedBinary(ExistingValue.Value.GetContentType())) + if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, ExistingValue) && + IsCompressedBinary(ExistingValue.Value.GetContentType())) { Succeeded = true; } @@ -1484,7 +1486,8 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { - if (m_CacheStore.Get("", Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) + if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, CacheValue) && + IsCompressedBinary(CacheValue.Value.GetContentType())) { Result = CompressedBuffer::FromCompressed(SharedBuffer(CacheValue.Value)); } @@ -1548,7 +1551,10 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive // for us to keep the data only on the upstream server. // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) - m_CacheStore.Put("", Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, + Request.Key.Bucket, + Request.Key.Hash, + ZenCacheValue{Params.Value}); ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, @@ -1804,7 +1810,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (!Record.Exists && EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get("", RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) + if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) { Record.Exists = true; Record.CacheValue = std::move(CacheValue.Value); @@ -1839,7 +1845,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::StoreLocal)) { - m_CacheStore.Put("", Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); + m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); } }; m_UpstreamCache.GetCacheRecords(UpstreamRecordRequests, std::move(OnCacheRecordGetComplete)); @@ -1936,7 +1942,7 @@ HttpStructuredCacheService::GetLocalCacheValues(std::vectorExists && EnumHasAllFlags(Request->DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get("", Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) + if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) { if (IsCompressedBinary(CacheValue.Value.GetContentType())) { @@ -2005,7 +2011,7 @@ HttpStructuredCacheService::GetUpstreamCacheChunks(std::vector Store = new ZenCacheNamespace(Gc, BasePath / (ZenCacheNamespaceDirPrefix + NamespaceName)); + Ref Store = new ZenCacheNamespace(Gc, BasePath / fmt::format("{}{}", ZenCacheNamespaceDirPrefix, NamespaceName)); m_Namespaces[NamespaceName] = Store; } } @@ -2175,7 +2175,7 @@ ZenCacheStore::~ZenCacheStore() } bool -ZenCacheStore::Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) +ZenCacheStore::Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) { Ref Store = GetStore(Namespace); if (!Store) @@ -2186,9 +2186,9 @@ ZenCacheStore::Get(const std::string& Namespace, std::string_view Bucket, const } void -ZenCacheStore::Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) +ZenCacheStore::Put(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) { - Ref Store = GetStore(Namespace); + Ref Store = GetStore(std::string(Namespace)); if (!Store) { return; @@ -2197,9 +2197,9 @@ ZenCacheStore::Put(const std::string& Namespace, std::string_view Bucket, const } bool -ZenCacheStore::DropBucket(const std::string& Namespace, std::string_view Bucket) +ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket) { - Ref Store = GetStore(Namespace); + Ref Store = GetStore(std::string(Namespace)); if (!Store) { return false; @@ -2210,14 +2210,7 @@ ZenCacheStore::DropBucket(const std::string& Namespace, std::string_view Bucket) void ZenCacheStore::Flush() { - std::vector> Stores; - RwLock::SharedLockScope _(m_NamespacesLock); - Stores.reserve(m_Namespaces.size()); - for (const auto& Entry : m_Namespaces) - { - Stores.push_back(Entry.second); - } - _.ReleaseNow(); + std::vector> Stores = GetAllStores(); for (const Ref& Store : Stores) { Store->Flush(); @@ -2235,10 +2228,10 @@ ZenCacheStore::Scrub(ScrubContext& Ctx) } Ref -ZenCacheStore::GetStore(const std::string& Namespace) +ZenCacheStore::GetStore(std::string_view Namespace) { RwLock::SharedLockScope _(m_NamespacesLock); - if (auto It = m_Namespaces.find(Namespace); It != m_Namespaces.end()) + if (auto It = m_Namespaces.find(std::string(Namespace)); It != m_Namespaces.end()) { return It->second; } diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index a803b0603..910c56745 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -356,12 +356,14 @@ private: class ZenCacheStore final : public GcStorage, public GcContributor { public: + static constexpr std::string_view DefaultNamespace = ""; + ZenCacheStore(std::filesystem::path BasePath, CasGc& Gc); ~ZenCacheStore(); - bool Get(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); - void Put(const std::string& Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); - bool DropBucket(const std::string& Namespace, std::string_view Bucket); + bool Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); + void Put(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value); + bool DropBucket(std::string_view Namespace, std::string_view Bucket); void Flush(); void Scrub(ScrubContext& Ctx); @@ -370,7 +372,7 @@ public: virtual GcStorageSize StorageSize() const override; private: - Ref GetStore(const std::string& Namespace); + Ref GetStore(std::string_view Namespace); std::vector> GetAllStores() const; mutable RwLock m_NamespacesLock; std::unordered_map> m_Namespaces; diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index c89227106..c870e0773 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -1517,7 +1517,7 @@ private: ZenCacheValue CacheValue; std::vector Payloads; - if (!m_CacheStore.Get("", CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) + if (!m_CacheStore.Get(ZenCacheStore::DefaultNamespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) { ZEN_WARN("process upstream FAILED, '{}/{}', cache record doesn't exist", CacheRecord.Key.Bucket, CacheRecord.Key.Hash); return; -- cgit v1.2.3 From 7b842505d25fcd8f0c52656c608c1a66f45ccf96 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 5 May 2022 10:16:39 +0200 Subject: mac/linux build fix --- zenserver/cache/structuredcachestore.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 7db18a7bb..23ad550c9 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2113,8 +2113,12 @@ namespace { virtual bool VisitDirectory(const std::filesystem::path&, const path_view& DirectoryName) override { - std::string DirName8 = WideToUtf8(DirectoryName); - Dirs.push_back(DirName8); +#if ZEN_PLATFORM_WINDOWS + std::string DirectoryName8 = WideToUtf8(DirectoryName); +#else + std::string DirectoryName8 = std::string(DirectoryName); +#endif + Dirs.push_back(DirectoryName8); return false; } -- cgit v1.2.3 From d484acb3d32662c9e1faf9a99efad543f607732a Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 5 May 2022 10:49:35 +0200 Subject: revert back constructor order for ZenCacheStore --- zenserver/cache/structuredcachestore.cpp | 2 +- zenserver/cache/structuredcachestore.h | 2 +- zenserver/zenserver.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 23ad550c9..a734e9eb1 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2131,7 +2131,7 @@ namespace { } // namespace -ZenCacheStore::ZenCacheStore(std::filesystem::path BasePath, CasGc& Gc) : GcStorage(Gc), GcContributor(Gc) +ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStorage(Gc), GcContributor(Gc) { CreateDirectories(BasePath); std::vector ExistingFolders = FindExistingFolders(BasePath); diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 910c56745..67bb9c7bf 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -358,7 +358,7 @@ class ZenCacheStore final : public GcStorage, public GcContributor public: static constexpr std::string_view DefaultNamespace = ""; - ZenCacheStore(std::filesystem::path BasePath, CasGc& Gc); + ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath); ~ZenCacheStore(); bool Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp index e572e6a49..abaec888a 100644 --- a/zenserver/zenserver.cpp +++ b/zenserver/zenserver.cpp @@ -755,7 +755,7 @@ ZenServer::InitializeStructuredCache(const ZenServerOptions& ServerOptions) using namespace std::literals; ZEN_INFO("instantiating structured cache service"); - m_CacheStore = std::make_unique(m_DataRoot / "cache", m_CasGc); + m_CacheStore = std::make_unique(m_CasGc, m_DataRoot / "cache"); const ZenUpstreamCacheConfig& UpstreamConfig = ServerOptions.UpstreamCacheConfig; -- cgit v1.2.3 From f5273c981f66834d70db680cff5deb47fd8d1274 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 5 May 2022 11:01:44 +0200 Subject: reverted unnecessary changes --- zenserver/cache/structuredcache.cpp | 4 ++-- zenserver/cache/structuredcache.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 69ee32bd6..0f16f6785 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -72,13 +72,13 @@ struct PutRequestData ////////////////////////////////////////////////////////////////////////// -HttpStructuredCacheService::HttpStructuredCacheService(ZenCacheStore& CacheStore, +HttpStructuredCacheService::HttpStructuredCacheService(ZenCacheStore& InCacheStore, CidStore& InCidStore, HttpStatsService& StatsService, HttpStatusService& StatusService, UpstreamCache& UpstreamCache) : m_Log(logging::Get("cache")) -, m_CacheStore(CacheStore) +, m_CacheStore(InCacheStore) , m_StatsService(StatsService) , m_StatusService(StatusService) , m_CidStore(InCidStore) diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index c41afef12..00c4260aa 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -64,7 +64,7 @@ namespace cache::detail { class HttpStructuredCacheService : public HttpService, public IHttpStatsProvider, public IHttpStatusProvider { public: - HttpStructuredCacheService(ZenCacheStore& CacheStore, + HttpStructuredCacheService(ZenCacheStore& InCacheStore, CidStore& InCidStore, HttpStatsService& StatsService, HttpStatusService& StatusService, -- cgit v1.2.3 From e4b96fade542151fca17b5ac61e3eaad263ce92c Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 11:53:11 +0200 Subject: Added GetDirectoryContent utility --- zencore/filesystem.cpp | 34 ++++++++++++++ zencore/include/zencore/filesystem.h | 11 +++++ zenserver/cache/structuredcachestore.cpp | 78 ++++++-------------------------- zenserver/projectstore.cpp | 51 ++++----------------- zenstore/blockstore.cpp | 40 ++++------------ 5 files changed, 77 insertions(+), 137 deletions(-) diff --git a/zencore/filesystem.cpp b/zencore/filesystem.cpp index 437741161..bd85f5a11 100644 --- a/zencore/filesystem.cpp +++ b/zencore/filesystem.cpp @@ -1022,6 +1022,40 @@ MaximizeOpenFileCount() #endif } +void +GetDirectoryContent(const std::filesystem::path& RootDir, uint8_t Flags, DirectoryContent& OutContent) +{ + FileSystemTraversal Traversal; + struct Visitor : public FileSystemTraversal::TreeVisitor + { + Visitor(uint8_t Flags, DirectoryContent& OutContent) : Flags(Flags), Content(OutContent) {} + + virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, + [[maybe_unused]] const path_view& File, + [[maybe_unused]] uint64_t FileSize) override + { + if (Flags & DirectoryContent::IncludeFilesFlag) + { + Content.Files.push_back(Parent / File); + } + } + + virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override + { + if (Flags & DirectoryContent::IncludeDirsFlag) + { + Content.Directories.push_back(Parent / DirectoryName); + } + return (Flags & DirectoryContent::RecursiveFlag) != 0; + } + + const uint8_t Flags; + DirectoryContent& Content; + } Visit(Flags, OutContent); + + Traversal.TraverseFileSystem(RootDir, Visit); +} + ////////////////////////////////////////////////////////////////////////// // // Testing related code follows... diff --git a/zencore/include/zencore/filesystem.h b/zencore/include/zencore/filesystem.h index a6e76eaa0..6d07a79b4 100644 --- a/zencore/include/zencore/filesystem.h +++ b/zencore/include/zencore/filesystem.h @@ -169,6 +169,17 @@ public: void TraverseFileSystem(const std::filesystem::path& RootDir, TreeVisitor& Visitor); }; +struct DirectoryContent +{ + static const uint8_t IncludeDirsFlag = 1u << 0; + static const uint8_t IncludeFilesFlag = 1u << 1; + static const uint8_t RecursiveFlag = 1u << 2; + std::vector Files; + std::vector Directories; +}; + +void GetDirectoryContent(const std::filesystem::path& RootDir, uint8_t Flags, DirectoryContent& OutContent); + ////////////////////////////////////////////////////////////////////////// void filesystem_forcelink(); // internal diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index a734e9eb1..da48253e2 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1956,49 +1956,23 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z void ZenCacheDiskLayer::DiscoverBuckets() { - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, - [[maybe_unused]] const path_view& File, - [[maybe_unused]] uint64_t FileSize) override - { - } - - virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override - { - Dirs.push_back((decltype(Dirs)::value_type)(DirectoryName)); - return false; - } - - std::vector Dirs; - } Visit; - - Traversal.TraverseFileSystem(m_RootDir, Visit); + DirectoryContent DirContent; + GetDirectoryContent(m_RootDir, DirectoryContent::IncludeDirsFlag, DirContent); // Initialize buckets RwLock::ExclusiveLockScope _(m_Lock); - for (const auto& BucketName : Visit.Dirs) + for (const std::filesystem::path& BucketPath : DirContent.Directories) { + std::string BucketName = PathToUtf8(BucketPath.stem()); // New bucket needs to be created - -#if ZEN_PLATFORM_WINDOWS - std::string BucketName8 = WideToUtf8(BucketName); -#else - const auto& BucketName8 = BucketName; -#endif - - if (auto It = m_Buckets.find(BucketName8); It != m_Buckets.end()) + if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) { } else { - auto InsertResult = m_Buckets.try_emplace(BucketName8, BucketName8); - - std::filesystem::path BucketPath = m_RootDir; - BucketPath /= BucketName8; + auto InsertResult = m_Buckets.try_emplace(BucketName, BucketName); CacheBucket& Bucket = InsertResult.first->second; @@ -2006,11 +1980,11 @@ ZenCacheDiskLayer::DiscoverBuckets() if (Bucket.IsOk()) { - ZEN_INFO("Discovered bucket '{}'", BucketName8); + ZEN_INFO("Discovered bucket '{}'", BucketName); } else { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName8, m_RootDir); + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); m_Buckets.erase(InsertResult.first); } @@ -2102,44 +2076,18 @@ ZenCacheDiskLayer::TotalSize() const static constexpr std::string_view ZenCacheNamespaceDirPrefix = "ns_"; -namespace { - - std::vector FindExistingFolders(const std::filesystem::path& RootPath) - { - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t) override {} - - virtual bool VisitDirectory(const std::filesystem::path&, const path_view& DirectoryName) override - { -#if ZEN_PLATFORM_WINDOWS - std::string DirectoryName8 = WideToUtf8(DirectoryName); -#else - std::string DirectoryName8 = std::string(DirectoryName); -#endif - Dirs.push_back(DirectoryName8); - return false; - } - - std::vector Dirs; - } Visit; - - Traversal.TraverseFileSystem(RootPath, Visit); - return Visit.Dirs; - } - -} // namespace - ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStorage(Gc), GcContributor(Gc) { CreateDirectories(BasePath); - std::vector ExistingFolders = FindExistingFolders(BasePath); + + DirectoryContent DirContent; + GetDirectoryContent(BasePath, DirectoryContent::IncludeDirsFlag, DirContent); std::vector LegacyBuckets; std::vector Namespaces; - for (const std::string& DirName : ExistingFolders) + for (const std::filesystem::path& DirPath : DirContent.Directories) { + std::string DirName = PathToUtf8(DirPath.stem()); if (DirName.starts_with(ZenCacheNamespaceDirPrefix)) { Namespaces.push_back(DirName.substr(3)); diff --git a/zenserver/projectstore.cpp b/zenserver/projectstore.cpp index aceb2df00..d18ae9e1a 100644 --- a/zenserver/projectstore.cpp +++ b/zenserver/projectstore.cpp @@ -804,29 +804,12 @@ ProjectStore::Project::DeleteOplog(std::string_view OplogId) void ProjectStore::Project::DiscoverOplogs() { - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, - [[maybe_unused]] const path_view& File, - [[maybe_unused]] uint64_t FileSize) override - { - } - - virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override - { - Dirs.push_back(PathToUtf8(DirectoryName)); - return false; - } - - std::vector Dirs; - } Visit; - - Traversal.TraverseFileSystem(m_OplogStoragePath, Visit); + DirectoryContent DirContent; + GetDirectoryContent(m_OplogStoragePath, DirectoryContent::IncludeDirsFlag, DirContent); - for (const std::string& Dir : Visit.Dirs) + for (const std::filesystem::path& DirPath : DirContent.Directories) { - OpenOplog(Dir); + OpenOplog(PathToUtf8(DirPath.stem())); } } @@ -900,34 +883,18 @@ ProjectStore::BasePathForProject(std::string_view ProjectId) void ProjectStore::DiscoverProjects() { - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, - [[maybe_unused]] const path_view& File, - [[maybe_unused]] uint64_t FileSize) override - { - } - - virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override - { - Dirs.push_back(PathToUtf8(DirectoryName)); - return false; - } - - std::vector Dirs; - } Visit; - if (!std::filesystem::exists(m_ProjectBasePath)) { return; } - Traversal.TraverseFileSystem(m_ProjectBasePath, Visit); + DirectoryContent DirContent; + GetDirectoryContent(m_ProjectBasePath, DirectoryContent::IncludeDirsFlag, DirContent); - for (const auto& Dir : Visit.Dirs) + for (const std::filesystem::path& DirPath : DirContent.Directories) { - Project* Project = OpenProject(Dir); + std::string DirName = PathToUtf8(DirPath.stem()); + Project* Project = OpenProject(DirName); if (Project) { diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 54a8eb9df..bfd2d63a5 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -162,7 +162,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, { continue; } - std::string FileName = Path.stem().string(); + std::string FileName = PathToUtf8(Path.stem()); uint32_t BlockIndex; bool OK = ParseHexNumber(FileName, BlockIndex); if (!OK) @@ -1074,35 +1074,15 @@ namespace { std::vector GetDirectoryContent(std::filesystem::path RootDir, bool Files, bool Directories) { - FileSystemTraversal Traversal; - struct Visitor : public FileSystemTraversal::TreeVisitor - { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t) override - { - if (Files) - { - Items.push_back(Parent / File); - } - } - - virtual bool VisitDirectory(const std::filesystem::path& Parent, const path_view& Dir) override - { - if (Directories) - { - Items.push_back(Parent / Dir); - } - return true; - } - - bool Files; - bool Directories; - std::vector Items; - } Visit; - Visit.Files = Files; - Visit.Directories = Directories; - - Traversal.TraverseFileSystem(RootDir, Visit); - return Visit.Items; + DirectoryContent DirectoryContent; + GetDirectoryContent(RootDir, + DirectoryContent::RecursiveFlag | (Files ? DirectoryContent::IncludeFilesFlag : 0) | + (Directories ? DirectoryContent::IncludeDirsFlag : 0), + DirectoryContent); + std::vector Result; + Result.insert(Result.end(), DirectoryContent.Directories.begin(), DirectoryContent.Directories.end()); + Result.insert(Result.end(), DirectoryContent.Files.begin(), DirectoryContent.Files.end()); + return Result; }; static IoBuffer CreateChunk(uint64_t Size) -- cgit v1.2.3 From 6db10b5a491297d45c14efae453c420f0d7fa58c Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 12:12:09 +0200 Subject: review feedback and cleanup --- zenserver/cache/structuredcachestore.cpp | 81 ++++++++++++++------------------ zenserver/cache/structuredcachestore.h | 11 +++-- 2 files changed, 43 insertions(+), 49 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index da48253e2..3ac319961 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2109,7 +2109,12 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor { std::filesystem::path LegacyFolder = BasePath / DirName; std::filesystem::path NewPath = DefaultNamespaceFolder / DirName; - std::filesystem::rename(LegacyFolder, NewPath); + std::error_code Ec; + std::filesystem::rename(LegacyFolder, NewPath, Ec); + if (Ec) + { + ZEN_ERROR("Unable to move '{}' to '{}', reason '{}'", LegacyFolder, NewPath, Ec.message()); + } } Namespaces.push_back(std::string(DefaultNamespace)); } @@ -2129,54 +2134,45 @@ ZenCacheStore::~ZenCacheStore() bool ZenCacheStore::Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) { - Ref Store = GetStore(Namespace); - if (!Store) + if (Ref Store = GetStore(Namespace); Store) { - return false; + return Store->Get(Bucket, HashKey, OutValue); } - return Store->Get(Bucket, HashKey, OutValue); + ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::Get, bucket '{}', key '{}'", Namespace, Bucket, HashKey.ToHexString()); + return false; } void ZenCacheStore::Put(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) { - Ref Store = GetStore(std::string(Namespace)); - if (!Store) + if (Ref Store = GetStore(Namespace); Store) { - return; + return Store->Put(Bucket, HashKey, Value); } - Store->Put(Bucket, HashKey, Value); + ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::Put, bucket '{}', key '{}'", Namespace, Bucket, HashKey.ToHexString()); } bool ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket) { - Ref Store = GetStore(std::string(Namespace)); - if (!Store) + if (Ref Store = GetStore(Namespace); Store) { - return false; + return Store->DropBucket(Bucket); } - return Store->DropBucket(Bucket); + ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::Put, bucket '{}'", Namespace, Bucket); + return false; } void ZenCacheStore::Flush() { - std::vector> Stores = GetAllStores(); - for (const Ref& Store : Stores) - { - Store->Flush(); - } + IterateStores([&](const Ref& Store) { Store->Flush(); }); } void ZenCacheStore::Scrub(ScrubContext& Ctx) { - std::vector> Stores = GetAllStores(); - for (const Ref& Store : Stores) - { - Store->Scrub(Ctx); - } + IterateStores([&](const Ref& Store) { Store->Scrub(Ctx); }); } Ref @@ -2190,50 +2186,45 @@ ZenCacheStore::GetStore(std::string_view Namespace) return nullptr; } -std::vector> -ZenCacheStore::GetAllStores() const +void +ZenCacheStore::IterateStores(const std::function& Store)>& Callback) const { std::vector> Stores; - RwLock::SharedLockScope _(m_NamespacesLock); - Stores.reserve(m_Namespaces.size()); - for (const auto& Entry : m_Namespaces) { - Stores.push_back(Entry.second); + RwLock::SharedLockScope _(m_NamespacesLock); + Stores.reserve(m_Namespaces.size()); + for (const auto& Entry : m_Namespaces) + { + Stores.push_back(Entry.second); + } + } + for (const Ref& Store : Stores) + { + Callback(Store); } - return Stores; } void ZenCacheStore::GatherReferences(GcContext& GcCtx) { - std::vector> Stores = GetAllStores(); - for (const Ref& Store : Stores) - { - Store->GatherReferences(GcCtx); - } + IterateStores([&](const Ref& Store) { Store->GatherReferences(GcCtx); }); } void ZenCacheStore::CollectGarbage(GcContext& GcCtx) { - std::vector> Stores = GetAllStores(); - for (const Ref& Store : Stores) - { - Store->CollectGarbage(GcCtx); - } + IterateStores([&](const Ref& Store) { Store->CollectGarbage(GcCtx); }); } GcStorageSize ZenCacheStore::StorageSize() const { - std::vector> Stores = GetAllStores(); - GcStorageSize Size; - for (const Ref& Store : Stores) - { + GcStorageSize Size; + IterateStores([&](const Ref& Store) { GcStorageSize StoreSize = Store->StorageSize(); Size.MemorySize += StoreSize.MemorySize; Size.DiskSize += StoreSize.DiskSize; - } + }); return Size; } diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 67bb9c7bf..10335890f 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -372,10 +372,13 @@ public: virtual GcStorageSize StorageSize() const override; private: - Ref GetStore(std::string_view Namespace); - std::vector> GetAllStores() const; - mutable RwLock m_NamespacesLock; - std::unordered_map> m_Namespaces; + Ref GetStore(std::string_view Namespace); + void IterateStores(const std::function& Store)>& Callback) const; + + typedef std::unordered_map> NameSpaceMap; + + mutable RwLock m_NamespacesLock; + NameSpaceMap m_Namespaces; }; void z$_forcelink(); -- cgit v1.2.3 From 76fd97b9d864ab60d06859befdd4a3a3bf4abd97 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 12:55:04 +0200 Subject: Fix standalone file lock in CacheBucket Grab sharding lock when deleting files during GC Don't hold sharding lock when sleeping in back-off due to file contention Remove unneeded renaming logic when writing standalone cache values --- zenserver/cache/structuredcachestore.cpp | 170 ++++++++++++++++++------------- 1 file changed, 97 insertions(+), 73 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 2869191fd..163a3f2f2 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1515,6 +1515,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) Path.Reset(); BuildPath(Path, Key); + fs::path FilePath = Path.ToPath(); { RwLock::SharedLockScope __(m_IndexLock); @@ -1530,8 +1531,14 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) ZEN_DEBUG("skipping z$ delete standalone of file '{}' FAILED, it has been added back", Path.ToUtf8()); continue; } - ZEN_DEBUG("deleting standalone cache file '{}'", Path.ToUtf8()); - fs::remove(Path.c_str(), Ec); + __.ReleaseNow(); + + RwLock::ExclusiveLockScope ValueLock(LockForHash(Key)); + if (fs::is_regular_file(FilePath)) + { + ZEN_DEBUG("deleting standalone cache file '{}'", Path.ToUtf8()); + fs::remove(FilePath, Ec); + } } if (Ec) @@ -1722,100 +1729,117 @@ ZenCacheDiskLayer::UpdateAccessTimes(const zen::access_tracking::AccessTimes& Ac void ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value) { - RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); - ExtendablePathBuilder<256> DataFilePath; BuildPath(DataFilePath, HashKey); + std::filesystem::path FsPath{DataFilePath.ToPath()}; - TemporaryFile DataFile; - - std::error_code Ec; - DataFile.CreateTemporary(m_BucketDir.c_str(), Ec); + auto UpdateIndex = [&]() { + uint8_t EntryFlags = DiskLocation::kStandaloneFile; - if (Ec) - { - throw std::system_error(Ec, fmt::format("Failed to open temporary file for put at '{}'", m_BucketDir)); - } + if (Value.Value.GetContentType() == ZenContentType::kCbObject) + { + EntryFlags |= DiskLocation::kStructured; + } + else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + { + EntryFlags |= DiskLocation::kCompressed; + } - DataFile.WriteAll(Value.Value, Ec); + RwLock::ExclusiveLockScope _(m_IndexLock); - if (Ec) - { - throw std::system_error(Ec, fmt::format("Failed to write payload ({} bytes) to file", NiceBytes(Value.Value.Size()))); - } + DiskLocation Loc(Value.Value.Size(), EntryFlags); + IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); - // Move file into place (atomically) + if (auto It = m_Index.find(HashKey); It == m_Index.end()) + { + // Previously unknown object + m_Index.insert({HashKey, Entry}); + } + else + { + // TODO: should check if write is idempotent and bail out if it is? + It.value() = Entry; + } - std::filesystem::path FsPath{DataFilePath.ToPath()}; + m_SlogFile.Append({.Key = HashKey, .Location = Loc}); + m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); + }; - DataFile.MoveTemporaryIntoPlace(FsPath, Ec); + std::error_code Ec; + BasicFile DataFile; - if (Ec) + // Happy path - directory structure exists and nobody is busy reading the file { - int RetryCount = 3; - - do + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); + DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); + if (!Ec) { - std::filesystem::path ParentPath = FsPath.parent_path(); - CreateDirectories(ParentPath); - - DataFile.MoveTemporaryIntoPlace(FsPath, Ec); - - if (!Ec) - { - break; - } - - std::error_code InnerEc; - const uint64_t ExistingFileSize = std::filesystem::file_size(FsPath, InnerEc); - - if (!InnerEc && ExistingFileSize == Value.Value.Size()) + DataFile.WriteAll(Value.Value, Ec); + if (Ec) { - // Concurrent write of same value? - return; + if (Ec) + { + throw std::system_error(Ec, + fmt::format("Failed to write payload ({} bytes) to file '{}' in '{}'", + NiceBytes(Value.Value.Size()), + FsPath, + m_BucketDir)); + } } + ValueLock.ReleaseNow(); + UpdateIndex(); + return; + } + } - // Semi arbitrary back-off - zen::Sleep(1000 * RetryCount); - } while (RetryCount--); - + std::filesystem::path ParentPath = FsPath.parent_path(); + if (!std::filesystem::is_directory(ParentPath)) + { + Ec.clear(); + std::filesystem::create_directories(ParentPath, Ec); if (Ec) { - throw std::system_error(Ec, fmt::format("Failed to finalize file '{}'", DataFilePath.ToUtf8())); + throw std::system_error( + Ec, + fmt::format("Failed to create parent directory '{}' for file '{}' for put in '{}'", ParentPath, FsPath, m_BucketDir)); } } - // Update index - - uint8_t EntryFlags = DiskLocation::kStandaloneFile; - - if (Value.Value.GetContentType() == ZenContentType::kCbObject) - { - EntryFlags |= DiskLocation::kStructured; - } - else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + // We retry to open the file since it can be held open for read + // This happens if the server gets a Get request for the file or + // if we are busy sending the file upstream + int RetryCount = 3; + do { - EntryFlags |= DiskLocation::kCompressed; - } - - RwLock::ExclusiveLockScope _(m_IndexLock); - - DiskLocation Loc(Value.Value.Size(), EntryFlags); - IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); + Ec.clear(); + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); + DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); + if (!Ec) + { + DataFile.WriteAll(Value.Value, Ec); + if (Ec) + { + if (Ec) + { + throw std::system_error(Ec, + fmt::format("Failed to write payload ({} bytes) to file '{}' in '{}'", + NiceBytes(Value.Value.Size()), + FsPath, + m_BucketDir)); + } + } + ValueLock.ReleaseNow(); + UpdateIndex(); + return; + } + ZEN_INFO("Failed writing opening file '{}' for writing, pausing and retrying, reason '{}'", FsPath.string(), Ec.message()); + ValueLock.ReleaseNow(); - if (auto It = m_Index.find(HashKey); It == m_Index.end()) - { - // Previously unknown object - m_Index.insert({HashKey, Entry}); - } - else - { - // TODO: should check if write is idempotent and bail out if it is? - It.value() = Entry; - } + // Semi arbitrary back-off + zen::Sleep(200 * (4 - RetryCount)); // Sleep at most for a total of 2 seconds + } while (RetryCount--); - m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); + throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } void -- cgit v1.2.3 From 1e279eb7700e7bfb35282cbc8acdaec3cb355e23 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 13:23:52 +0200 Subject: clean up file on failed write --- zenserver/cache/structuredcachestore.cpp | 73 ++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 163a3f2f2..77307bc2d 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1765,27 +1765,41 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); }; - std::error_code Ec; - BasicFile DataFile; + auto WritePayload = [&](BasicFile& File, const IoBuffer& Payload) { + std::error_code Ec; + File.WriteAll(Payload, Ec); + if (Ec) + { + File.Close(); + std::error_code RemoveEc; + std::filesystem::remove(FsPath, RemoveEc); + if (RemoveEc) + { + ZEN_WARN("Failed cleaning up file '{}' after failed write for put in '{}', reason '{}'", + FsPath.string(), + m_BucketDir, + RemoveEc.message()); + } + + throw std::system_error(Ec, + fmt::format("Failed to write payload ({} bytes) to file '{}' for put in '{}'", + NiceBytes(Payload.Size()), + FsPath, + m_BucketDir)); + } + File.Close(); + }; // Happy path - directory structure exists and nobody is busy reading the file { + std::error_code Ec; + BasicFile DataFile; + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); if (!Ec) { - DataFile.WriteAll(Value.Value, Ec); - if (Ec) - { - if (Ec) - { - throw std::system_error(Ec, - fmt::format("Failed to write payload ({} bytes) to file '{}' in '{}'", - NiceBytes(Value.Value.Size()), - FsPath, - m_BucketDir)); - } - } + WritePayload(DataFile, Value.Value); ValueLock.ReleaseNow(); UpdateIndex(); return; @@ -1795,7 +1809,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c std::filesystem::path ParentPath = FsPath.parent_path(); if (!std::filesystem::is_directory(ParentPath)) { - Ec.clear(); + std::error_code Ec; std::filesystem::create_directories(ParentPath, Ec); if (Ec) { @@ -1805,41 +1819,36 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } } - // We retry to open the file since it can be held open for read - // This happens if the server gets a Get request for the file or + // We retry to open the file since it can be held open for read. + // This happens if the server processes a Get request for the file or // if we are busy sending the file upstream - int RetryCount = 3; + int RetryCount = 3; + std::error_code Ec; do { + BasicFile DataFile; Ec.clear(); + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); if (!Ec) { - DataFile.WriteAll(Value.Value, Ec); - if (Ec) - { - if (Ec) - { - throw std::system_error(Ec, - fmt::format("Failed to write payload ({} bytes) to file '{}' in '{}'", - NiceBytes(Value.Value.Size()), - FsPath, - m_BucketDir)); - } - } + WritePayload(DataFile, Value.Value); ValueLock.ReleaseNow(); UpdateIndex(); return; } - ZEN_INFO("Failed writing opening file '{}' for writing, pausing and retrying, reason '{}'", FsPath.string(), Ec.message()); + ZEN_INFO("Failed writing opening file '{}' for writing for put in '{}', pausing and retrying, reason '{}'", + FsPath.string(), + m_BucketDir, + Ec.message()); ValueLock.ReleaseNow(); // Semi arbitrary back-off zen::Sleep(200 * (4 - RetryCount)); // Sleep at most for a total of 2 seconds } while (RetryCount--); - throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); + throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' for put in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } void -- cgit v1.2.3 From 394e31f10d0ec32bb6dbe7ea9b7f3a1dc4edeec6 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 17:40:31 +0200 Subject: restore write using rename in PutStandaloneCacheValue --- zenserver/cache/structuredcachestore.cpp | 150 +++++++++++++------------------ 1 file changed, 61 insertions(+), 89 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 77307bc2d..dee4c55f0 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1729,124 +1729,96 @@ ZenCacheDiskLayer::UpdateAccessTimes(const zen::access_tracking::AccessTimes& Ac void ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value) { + TemporaryFile DataFile; + + std::error_code Ec; + DataFile.CreateTemporary(m_BucketDir.c_str(), Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to open temporary file for put at '{}'", m_BucketDir)); + } + + DataFile.WriteAll(Value.Value, Ec); + if (Ec) + { + throw std::system_error(Ec, + fmt::format("Failed to write payload ({} bytes) to temporary file for put in '{}'", + NiceBytes(Value.Value.Size()), + m_BucketDir)); + } + ExtendablePathBuilder<256> DataFilePath; BuildPath(DataFilePath, HashKey); std::filesystem::path FsPath{DataFilePath.ToPath()}; - auto UpdateIndex = [&]() { - uint8_t EntryFlags = DiskLocation::kStandaloneFile; - - if (Value.Value.GetContentType() == ZenContentType::kCbObject) - { - EntryFlags |= DiskLocation::kStructured; - } - else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + // We retry to open the file since it can be held open for read. + // This happens if the server processes a Get request for the file or + // if we are busy sending the file upstream + int RetryCount = 3; + do + { + Ec.clear(); { - EntryFlags |= DiskLocation::kCompressed; + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); + DataFile.MoveTemporaryIntoPlace(FsPath, Ec); } - RwLock::ExclusiveLockScope _(m_IndexLock); - - DiskLocation Loc(Value.Value.Size(), EntryFlags); - IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); - - if (auto It = m_Index.find(HashKey); It == m_Index.end()) - { - // Previously unknown object - m_Index.insert({HashKey, Entry}); - } - else + if (!Ec) { - // TODO: should check if write is idempotent and bail out if it is? - It.value() = Entry; - } + uint8_t EntryFlags = DiskLocation::kStandaloneFile; - m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); - }; - - auto WritePayload = [&](BasicFile& File, const IoBuffer& Payload) { - std::error_code Ec; - File.WriteAll(Payload, Ec); - if (Ec) - { - File.Close(); - std::error_code RemoveEc; - std::filesystem::remove(FsPath, RemoveEc); - if (RemoveEc) + if (Value.Value.GetContentType() == ZenContentType::kCbObject) { - ZEN_WARN("Failed cleaning up file '{}' after failed write for put in '{}', reason '{}'", - FsPath.string(), - m_BucketDir, - RemoveEc.message()); + EntryFlags |= DiskLocation::kStructured; } + else if (Value.Value.GetContentType() == ZenContentType::kCompressedBinary) + { + EntryFlags |= DiskLocation::kCompressed; + } + DiskLocation Loc(Value.Value.Size(), EntryFlags); + IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); - throw std::system_error(Ec, - fmt::format("Failed to write payload ({} bytes) to file '{}' for put in '{}'", - NiceBytes(Payload.Size()), - FsPath, - m_BucketDir)); - } - File.Close(); - }; - - // Happy path - directory structure exists and nobody is busy reading the file - { - std::error_code Ec; - BasicFile DataFile; + RwLock::ExclusiveLockScope _(m_IndexLock); + if (auto It = m_Index.find(HashKey); It == m_Index.end()) + { + // Previously unknown object + m_Index.insert({HashKey, Entry}); + } + else + { + // TODO: should check if write is idempotent and bail out if it is? + It.value() = Entry; + } - RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); - DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); - if (!Ec) - { - WritePayload(DataFile, Value.Value); - ValueLock.ReleaseNow(); - UpdateIndex(); + m_SlogFile.Append({.Key = HashKey, .Location = Loc}); + m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); return; } - } - std::filesystem::path ParentPath = FsPath.parent_path(); - if (!std::filesystem::is_directory(ParentPath)) - { - std::error_code Ec; - std::filesystem::create_directories(ParentPath, Ec); - if (Ec) + std::filesystem::path ParentPath = FsPath.parent_path(); + if (!std::filesystem::is_directory(ParentPath)) { + Ec.clear(); + std::filesystem::create_directories(ParentPath, Ec); + if (!Ec) + { + // Retry without sleep + continue; + } throw std::system_error( Ec, fmt::format("Failed to create parent directory '{}' for file '{}' for put in '{}'", ParentPath, FsPath, m_BucketDir)); } - } - // We retry to open the file since it can be held open for read. - // This happens if the server processes a Get request for the file or - // if we are busy sending the file upstream - int RetryCount = 3; - std::error_code Ec; - do - { - BasicFile DataFile; - Ec.clear(); - - RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); - DataFile.Open(FsPath, BasicFile::Mode::kTruncate, Ec); - if (!Ec) - { - WritePayload(DataFile, Value.Value); - ValueLock.ReleaseNow(); - UpdateIndex(); - return; - } ZEN_INFO("Failed writing opening file '{}' for writing for put in '{}', pausing and retrying, reason '{}'", FsPath.string(), m_BucketDir, Ec.message()); - ValueLock.ReleaseNow(); // Semi arbitrary back-off zen::Sleep(200 * (4 - RetryCount)); // Sleep at most for a total of 2 seconds - } while (RetryCount--); + RetryCount--; + } while (RetryCount > 0); throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' for put in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } -- cgit v1.2.3 From 2f6461e3ed7851bc5592b6bc9efdfb0d973fe284 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 23:32:35 +0200 Subject: remove use of Ref<> in ZenCacheStore naming cleanup --- zenserver/cache/structuredcachestore.cpp | 40 ++++++++++++++++---------------- zenserver/cache/structuredcachestore.h | 6 ++--- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 3ac319961..1d43e9591 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2121,8 +2121,8 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor for (const std::string& NamespaceName : Namespaces) { - Ref Store = new ZenCacheNamespace(Gc, BasePath / fmt::format("{}{}", ZenCacheNamespaceDirPrefix, NamespaceName)); - m_Namespaces[NamespaceName] = Store; + m_Namespaces[NamespaceName] = + std::make_unique(Gc, BasePath / fmt::format("{}{}", ZenCacheNamespaceDirPrefix, NamespaceName)); } } @@ -2134,7 +2134,7 @@ ZenCacheStore::~ZenCacheStore() bool ZenCacheStore::Get(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue) { - if (Ref Store = GetStore(Namespace); Store) + if (ZenCacheNamespace* Store = GetNamespace(Namespace); Store) { return Store->Get(Bucket, HashKey, OutValue); } @@ -2145,7 +2145,7 @@ ZenCacheStore::Get(std::string_view Namespace, std::string_view Bucket, const Io void ZenCacheStore::Put(std::string_view Namespace, std::string_view Bucket, const IoHash& HashKey, const ZenCacheValue& Value) { - if (Ref Store = GetStore(Namespace); Store) + if (ZenCacheNamespace* Store = GetNamespace(Namespace); Store) { return Store->Put(Bucket, HashKey, Value); } @@ -2155,7 +2155,7 @@ ZenCacheStore::Put(std::string_view Namespace, std::string_view Bucket, const Io bool ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket) { - if (Ref Store = GetStore(Namespace); Store) + if (ZenCacheNamespace* Store = GetNamespace(Namespace); Store) { return Store->DropBucket(Bucket); } @@ -2166,62 +2166,62 @@ ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket) void ZenCacheStore::Flush() { - IterateStores([&](const Ref& Store) { Store->Flush(); }); + IterateNamespaces([&](std::string_view, ZenCacheNamespace& Store) { Store.Flush(); }); } void ZenCacheStore::Scrub(ScrubContext& Ctx) { - IterateStores([&](const Ref& Store) { Store->Scrub(Ctx); }); + IterateNamespaces([&](std::string_view, ZenCacheNamespace& Store) { Store.Scrub(Ctx); }); } -Ref -ZenCacheStore::GetStore(std::string_view Namespace) +ZenCacheNamespace* +ZenCacheStore::GetNamespace(std::string_view Namespace) { RwLock::SharedLockScope _(m_NamespacesLock); if (auto It = m_Namespaces.find(std::string(Namespace)); It != m_Namespaces.end()) { - return It->second; + return It->second.get(); } return nullptr; } void -ZenCacheStore::IterateStores(const std::function& Store)>& Callback) const +ZenCacheStore::IterateNamespaces(const std::function& Callback) const { - std::vector> Stores; + std::vector > Namespaces; { RwLock::SharedLockScope _(m_NamespacesLock); - Stores.reserve(m_Namespaces.size()); + Namespaces.reserve(m_Namespaces.size()); for (const auto& Entry : m_Namespaces) { - Stores.push_back(Entry.second); + Namespaces.push_back({Entry.first, *Entry.second}); } } - for (const Ref& Store : Stores) + for (auto& Entry : Namespaces) { - Callback(Store); + Callback(Entry.first, Entry.second); } } void ZenCacheStore::GatherReferences(GcContext& GcCtx) { - IterateStores([&](const Ref& Store) { Store->GatherReferences(GcCtx); }); + IterateNamespaces([&](std::string_view, ZenCacheNamespace& Store) { Store.GatherReferences(GcCtx); }); } void ZenCacheStore::CollectGarbage(GcContext& GcCtx) { - IterateStores([&](const Ref& Store) { Store->CollectGarbage(GcCtx); }); + IterateNamespaces([&](std::string_view, ZenCacheNamespace& Store) { Store.CollectGarbage(GcCtx); }); } GcStorageSize ZenCacheStore::StorageSize() const { GcStorageSize Size; - IterateStores([&](const Ref& Store) { - GcStorageSize StoreSize = Store->StorageSize(); + IterateNamespaces([&](std::string_view, ZenCacheNamespace& Store) { + GcStorageSize StoreSize = Store.StorageSize(); Size.MemorySize += StoreSize.MemorySize; Size.DiskSize += StoreSize.DiskSize; }); diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 10335890f..881285bc9 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -372,10 +372,10 @@ public: virtual GcStorageSize StorageSize() const override; private: - Ref GetStore(std::string_view Namespace); - void IterateStores(const std::function& Store)>& Callback) const; + ZenCacheNamespace* GetNamespace(std::string_view Namespace); + void IterateNamespaces(const std::function& Callback) const; - typedef std::unordered_map> NameSpaceMap; + typedef std::unordered_map> NameSpaceMap; mutable RwLock m_NamespacesLock; NameSpaceMap m_Namespaces; -- cgit v1.2.3 From 308d60e0289b2adc5c0738fe25273176e780735f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Sun, 8 May 2022 00:11:11 +0200 Subject: Make sure blockstore owner and block store state does not get out of sync when fetching a chunk Move MarkAsDeleteOnClose() to IoBuffer(ExtendedCore) and set it on close, SetFileInformationByHandle sometimes fails if done in parallel with FileMapping --- zencore/include/zencore/iobuffer.h | 3 ++ zencore/iobuffer.cpp | 30 +++++++++++++ zenserver/cache/structuredcachestore.cpp | 48 ++++++++++---------- zenstore/basicfile.cpp | 25 ----------- zenstore/blockstore.cpp | 75 +++++++++++--------------------- zenstore/compactcas.cpp | 14 ++---- zenstore/include/zenstore/basicfile.h | 12 ++--- zenstore/include/zenstore/blockstore.h | 6 +-- 8 files changed, 93 insertions(+), 120 deletions(-) diff --git a/zencore/include/zencore/iobuffer.h b/zencore/include/zencore/iobuffer.h index bc8cfdc0f..5d9daa1c7 100644 --- a/zencore/include/zencore/iobuffer.h +++ b/zencore/include/zencore/iobuffer.h @@ -276,12 +276,14 @@ struct IoBufferExtendedCore : public IoBufferCore void Materialize() const; bool GetFileReference(IoBufferFileReference& OutRef) const; + void MarkAsDeleteOnClose(); private: void* m_FileHandle = nullptr; uint64_t m_FileOffset = 0; mutable void* m_MmapHandle = nullptr; mutable void* m_MappedPointer = nullptr; + bool m_DeleteOnClose = false; }; inline IoBufferExtendedCore* @@ -377,6 +379,7 @@ public: inline void SetContentType(ZenContentType ContentType) { m_Core->SetContentType(ContentType); } [[nodiscard]] inline ZenContentType GetContentType() const { return m_Core->GetContentType(); } [[nodiscard]] ZENCORE_API bool GetFileReference(IoBufferFileReference& OutRef) const; + void MarkAsDeleteOnClose(); inline MemoryView GetView() const { return MemoryView(m_Core->DataPointer(), m_Core->DataBytes()); } inline MutableMemoryView GetMutableView() { return MutableMemoryView(m_Core->MutableDataPointer(), m_Core->DataBytes()); } diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp index 46b9ab336..c4b7f7bdf 100644 --- a/zencore/iobuffer.cpp +++ b/zencore/iobuffer.cpp @@ -211,6 +211,18 @@ IoBufferExtendedCore::~IoBufferExtendedCore() if (LocalFlags & kOwnsFile) { + if (m_DeleteOnClose) + { +#if ZEN_PLATFORM_WINDOWS + // Mark file for deletion when final handle is closed + FILE_DISPOSITION_INFO Fdi{.DeleteFile = TRUE}; + + SetFileInformationByHandle(m_FileHandle, FileDispositionInfo, &Fdi, sizeof Fdi); +#else + std::filesystem::path FilePath = zen::PathFromHandle(m_FileHandle); + unlink(FilePath.c_str()); +#endif + } #if ZEN_PLATFORM_WINDOWS BOOL Success = CloseHandle(m_FileHandle); #else @@ -298,6 +310,9 @@ IoBufferExtendedCore::Materialize() const if (MappedBase == nullptr) { +#if ZEN_PLATFORM_WINDOWS + CloseHandle(NewMmapHandle); +#endif // ZEN_PLATFORM_WINDOWS throw std::system_error(std::error_code(zen::GetLastError(), std::system_category()), fmt::format("MapViewOfFile failed (offset {:#x}, size {:#x}) file: '{}'", MapOffset, @@ -327,6 +342,12 @@ IoBufferExtendedCore::GetFileReference(IoBufferFileReference& OutRef) const return true; } +void +IoBufferExtendedCore::MarkAsDeleteOnClose() +{ + m_DeleteOnClose = true; +} + ////////////////////////////////////////////////////////////////////////// IoBuffer::IoBuffer(size_t InSize) : m_Core(new IoBufferCore(InSize)) @@ -389,6 +410,15 @@ IoBuffer::GetFileReference(IoBufferFileReference& OutRef) const return false; } +void +IoBuffer::MarkAsDeleteOnClose() +{ + if (IoBufferExtendedCore* ExtCore = m_Core->ExtendedCore()) + { + ExtCore->MarkAsDeleteOnClose(); + } +} + ////////////////////////////////////////////////////////////////////////// IoBuffer diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 2869191fd..a929284b9 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1126,13 +1126,11 @@ ZenCacheDiskLayer::CacheBucket::GetInlineCacheValue(const DiskLocation& Loc, Zen { BlockStoreLocation Location = Loc.GetBlockLocation(m_PayloadAlignment); - Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); - if (!ChunkBlock) + OutValue.Value = m_BlockStore.TryGetChunk(Location); + if (!OutValue.Value) { return false; } - - OutValue.Value = ChunkBlock->GetChunk(Location.Offset, Location.Size); OutValue.Value.SetContentType(Loc.GetContentType()); return true; @@ -1166,22 +1164,21 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal } RwLock::SharedLockScope _(m_IndexLock); - - if (auto It = m_Index.find(HashKey); It != m_Index.end()) + auto It = m_Index.find(HashKey); + if (It == m_Index.end()) + { + return false; + } + IndexEntry& Entry = It.value(); + Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); + DiskLocation Location = Entry.Location; + if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) { - IndexEntry& Entry = It.value(); - Entry.LastAccess.store(GcClock::TickCount(), std::memory_order_relaxed); - DiskLocation Location = Entry.Location; + // We don't need to hold the index lock when we read a standalone file _.ReleaseNow(); - - if (Location.IsFlagSet(DiskLocation::kStandaloneFile)) - { - return GetStandaloneCacheValue(Location, HashKey, OutValue); - } - return GetInlineCacheValue(Location, OutValue); + return GetStandaloneCacheValue(Location, HashKey, OutValue); } - - return false; + return GetInlineCacheValue(Location, OutValue); } void @@ -1470,14 +1467,13 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx) WriteBlockTimeUs += ElapsedUs; WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs); }); + if (m_Index.empty()) { - if (m_Index.empty()) - { - ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_BucketDir / m_BucketName); - return; - } - BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); + ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_BucketDir / m_BucketName); + return; } + BlockStoreState = m_BlockStore.GetReclaimSnapshotState(); + SaveManifest(); Index = m_Index; @@ -1832,12 +1828,11 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const EntryFlags |= DiskLocation::kCompressed; } - m_BlockStore.WriteChunk(Value.Value.Data(), Value.Value.Size(), m_PayloadAlignment, [&](BlockStoreLocation BlockStoreLocation) { + m_BlockStore.WriteChunk(Value.Value.Data(), Value.Value.Size(), m_PayloadAlignment, [&](const BlockStoreLocation& BlockStoreLocation) { DiskLocation Location(BlockStoreLocation, m_PayloadAlignment, EntryFlags); const DiskIndexEntry DiskIndexEntry{.Key = HashKey, .Location = Location}; m_SlogFile.Append(DiskIndexEntry); - m_TotalSize.fetch_add(BlockStoreLocation.Size, std::memory_order::relaxed); - RwLock::ExclusiveLockScope __(m_IndexLock); + RwLock::ExclusiveLockScope _(m_IndexLock); if (auto It = m_Index.find(HashKey); It != m_Index.end()) { // TODO: should check if write is idempotent and bail out if it is? @@ -1852,6 +1847,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, const m_Index.insert({HashKey, {Location, GcClock::TickCount()}}); } }); + m_TotalSize.fetch_add(Value.Value.Size(), std::memory_order::relaxed); } ////////////////////////////////////////////////////////////////////////// diff --git a/zenstore/basicfile.cpp b/zenstore/basicfile.cpp index 8eb172a1c..e5a2adc41 100644 --- a/zenstore/basicfile.cpp +++ b/zenstore/basicfile.cpp @@ -373,31 +373,6 @@ BasicFile::SetFileSize(uint64_t FileSize) #endif } -void -BasicFile::MarkAsDeleteOnClose(std::error_code& Ec) -{ - Ec.clear(); -#if ZEN_PLATFORM_WINDOWS - FILE_DISPOSITION_INFO Fdi{}; - Fdi.DeleteFile = TRUE; - BOOL Success = SetFileInformationByHandle(m_FileHandle, FileDispositionInfo, &Fdi, sizeof Fdi); - if (!Success) - { - Ec = MakeErrorCodeFromLastError(); - } -#elif ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC - std::filesystem::path SourcePath = PathFromHandle(m_FileHandle); - if (unlink(SourcePath.c_str()) < 0) - { - int UnlinkError = zen::GetLastError(); - if (UnlinkError != ENOENT) - { - Ec = MakeErrorCode(UnlinkError); - } - } -#endif -} - void* BasicFile::Detach() { diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp index 54a8eb9df..1946169c4 100644 --- a/zenstore/blockstore.cpp +++ b/zenstore/blockstore.cpp @@ -71,9 +71,9 @@ BlockStoreFile::FileSize() } void -BlockStoreFile::MarkAsDeleteOnClose(std::error_code& Ec) +BlockStoreFile::MarkAsDeleteOnClose() { - m_File.MarkAsDeleteOnClose(Ec); + m_IoBuffer.MarkAsDeleteOnClose(); } IoBuffer @@ -262,26 +262,28 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, Writ BlockStore::ReclaimSnapshotState BlockStore::GetReclaimSnapshotState() { - ReclaimSnapshotState State; - RwLock::ExclusiveLockScope _(m_InsertLock); + ReclaimSnapshotState State; + RwLock::SharedLockScope _(m_InsertLock); for (uint32_t BlockIndex : m_ActiveWriteBlocks) { State.m_ActiveWriteBlocks.insert(BlockIndex); } State.BlockCount = m_ChunkBlocks.size(); - _.ReleaseNow(); return State; } -Ref -BlockStore::GetChunkBlock(const BlockStoreLocation& Location) +IoBuffer +BlockStore::TryGetChunk(const BlockStoreLocation& Location) { RwLock::SharedLockScope InsertLock(m_InsertLock); if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end()) { - return BlockIt->second; + if (const Ref& Block = BlockIt->second; Block) + { + return Block->GetChunk(Location.Offset, Location.Size); + } } - return {}; + return IoBuffer(); } void @@ -465,13 +467,8 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); m_ChunkBlocks[BlockIndex] = nullptr; - } - ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); + ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + OldBlockFile->MarkAsDeleteOnClose(); } continue; } @@ -589,15 +586,9 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs); }); m_ChunkBlocks[BlockIndex] = nullptr; + ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); + OldBlockFile->MarkAsDeleteOnClose(); } - ZEN_DEBUG("marking cas block store file '{}' for delete, block #{}", OldBlockFile->GetPath(), BlockIndex); - std::error_code Ec; - OldBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message()); - } - OldBlockFile = nullptr; } } catch (std::exception& ex) @@ -606,12 +597,7 @@ BlockStore::ReclaimSpace(const ReclaimSnapshotState& Snapshot, if (NewBlockFile) { ZEN_DEBUG("dropping incomplete cas block store file '{}'", NewBlockFile->GetPath()); - std::error_code Ec; - NewBlockFile->MarkAsDeleteOnClose(Ec); - if (Ec) - { - ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", NewBlockFile->GetPath(), Ec.message()); - } + NewBlockFile->MarkAsDeleteOnClose(); } } } @@ -1032,9 +1018,7 @@ TEST_CASE("blockstore.blockfile") { BlockStoreFile File1(RootDirectory / "1"); File1.Open(); - std::error_code Ec; - File1.MarkAsDeleteOnClose(Ec); - CHECK(!Ec); + File1.MarkAsDeleteOnClose(); DataChunk = File1.GetChunk(0, 5); BoopChunk = File1.GetChunk(5, 5); } @@ -1058,12 +1042,7 @@ namespace { std::string ReadChunkAsString(BlockStore& Store, const BlockStoreLocation& Location) { - Ref ChunkBlock(Store.GetChunkBlock(Location)); - if (!ChunkBlock) - { - return ""; - } - IoBuffer ChunkData = ChunkBlock->GetChunk(Location.Offset, Location.Size); + IoBuffer ChunkData = Store.TryGetChunk(Location); if (!ChunkData) { return ""; @@ -1129,7 +1108,7 @@ TEST_CASE("blockstore.chunks") BlockStore Store; Store.Initialize(RootDirectory, 128, 1024, {}); - Ref BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512}); + IoBuffer BadChunk = Store.TryGetChunk({.BlockIndex = 0, .Offset = 0, .Size = 512}); CHECK(!BadChunk); std::string FirstChunkData = "This is the data of the first chunk that we will write"; @@ -1202,7 +1181,7 @@ TEST_CASE("blockstore.iterate.chunks") BlockStore Store; Store.Initialize(RootDirectory / "store", ScrubSmallChunkWindowSize * 2, 1024, {}); - Ref BadChunk = Store.GetChunkBlock({.BlockIndex = 0, .Offset = 0, .Size = 512}); + IoBuffer BadChunk = Store.TryGetChunk({.BlockIndex = 0, .Offset = 0, .Size = 512}); CHECK(!BadChunk); std::string FirstChunkData = "This is the data of the first chunk that we will write"; @@ -1264,6 +1243,7 @@ TEST_CASE("blockstore.reclaim.space") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { IoBuffer Chunk = CreateChunk(57 + ChunkIndex); + Store.WriteChunk(Chunk.Data(), Chunk.Size(), Alignment, [&](const BlockStoreLocation& L) { ChunkLocations.push_back(L); }); ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size())); } @@ -1331,11 +1311,10 @@ TEST_CASE("blockstore.reclaim.space") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { - Ref ChunkBlock = Store.GetChunkBlock(NewChunkLocations[ChunkIndex]); + IoBuffer ChunkBlock = Store.TryGetChunk(NewChunkLocations[ChunkIndex]); if (ChunkIndex >= DeleteChunkCount) { - CHECK(ChunkBlock); - IoBuffer VerifyChunk = ChunkBlock->GetChunk(NewChunkLocations[ChunkIndex].Offset, NewChunkLocations[ChunkIndex].Size); + IoBuffer VerifyChunk = Store.TryGetChunk(NewChunkLocations[ChunkIndex]); CHECK(VerifyChunk); IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); CHECK(VerifyHash == ChunkHashes[ChunkIndex]); @@ -1405,9 +1384,7 @@ TEST_CASE("blockstore.thread.read.write") for (size_t ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex) { WorkerPool.ScheduleWork([&Store, ChunkIndex, &ChunkLocations, &ChunkHashes, &WorkCompleted]() { - Ref ChunkBlock = Store.GetChunkBlock(ChunkLocations[ChunkIndex]); - CHECK(ChunkBlock); - IoBuffer VerifyChunk = ChunkBlock->GetChunk(ChunkLocations[ChunkIndex].Offset, ChunkLocations[ChunkIndex].Size); + IoBuffer VerifyChunk = Store.TryGetChunk(ChunkLocations[ChunkIndex]); CHECK(VerifyChunk); IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); CHECK(VerifyHash == ChunkHashes[ChunkIndex]); @@ -1432,9 +1409,7 @@ TEST_CASE("blockstore.thread.read.write") WorkCompleted.fetch_add(1); }); WorkerPool.ScheduleWork([&Store, ChunkIndex, &ChunkLocations, &ChunkHashes, &WorkCompleted]() { - Ref ChunkBlock = Store.GetChunkBlock(ChunkLocations[ChunkIndex]); - CHECK(ChunkBlock); - IoBuffer VerifyChunk = ChunkBlock->GetChunk(ChunkLocations[ChunkIndex].Offset, ChunkLocations[ChunkIndex].Size); + IoBuffer VerifyChunk = Store.TryGetChunk(ChunkLocations[ChunkIndex]); CHECK(VerifyChunk); IoHash VerifyHash = IoHash::HashBuffer(VerifyChunk.Data(), VerifyChunk.Size()); CHECK(VerifyHash == ChunkHashes[ChunkIndex]); diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index cc0e2241c..2d48265f7 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -258,8 +258,8 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const RwLock::ExclusiveLockScope _(m_LocationMapLock); m_LocationMap.emplace(ChunkHash, DiskLocation); } - m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order::relaxed); }); + m_TotalSize.fetch_add(static_cast(ChunkSize), std::memory_order::relaxed); return CasStore::InsertResult{.New = true}; } @@ -279,16 +279,10 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { return IoBuffer(); } - BlockStoreLocation Location = KeyIt->second.Get(m_PayloadAlignment); - _.ReleaseNow(); - - Ref ChunkBlock = m_BlockStore.GetChunkBlock(Location); - if (!ChunkBlock) - { - return IoBuffer(); - } + const BlockStoreLocation& Location = KeyIt->second.Get(m_PayloadAlignment); - return ChunkBlock->GetChunk(Location.Offset, Location.Size); + IoBuffer Chunk = m_BlockStore.TryGetChunk(Location); + return Chunk; } bool diff --git a/zenstore/include/zenstore/basicfile.h b/zenstore/include/zenstore/basicfile.h index 5a500c65f..ce9988776 100644 --- a/zenstore/include/zenstore/basicfile.h +++ b/zenstore/include/zenstore/basicfile.h @@ -33,11 +33,12 @@ public: enum class Mode : uint32_t { - kRead = 0, // Opens a existing file for read only - kWrite = 1, // Opens (or creates) a file for read and write - kTruncate = 2, // Opens (or creates) a file for read and write and sets the size to zero - kDelete = 3, // Opens (or creates) a file for read and write enabling MarkAsDeleteOnClose() - kTruncateDelete = 4 // Opens (or creates) a file for read and write and sets the size to zero enabling MarkAsDeleteOnClose() + kRead = 0, // Opens a existing file for read only + kWrite = 1, // Opens (or creates) a file for read and write + kTruncate = 2, // Opens (or creates) a file for read and write and sets the size to zero + kDelete = 3, // Opens (or creates) a file for read and write allowing .DeleteFile file disposition to be set + kTruncateDelete = + 4 // Opens (or creates) a file for read and write and sets the size to zero allowing .DeleteFile file disposition to be set }; void Open(const std::filesystem::path& FileName, Mode Mode); @@ -55,7 +56,6 @@ public: void SetFileSize(uint64_t FileSize); IoBuffer ReadAll(); void WriteAll(IoBuffer Data, std::error_code& Ec); - void MarkAsDeleteOnClose(std::error_code& Ec); void* Detach(); inline void* Handle() { return m_FileHandle; } diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h index 9edfc36e8..34c475fb6 100644 --- a/zenstore/include/zenstore/blockstore.h +++ b/zenstore/include/zenstore/blockstore.h @@ -89,7 +89,7 @@ struct BlockStoreFile : public RefCounted const std::filesystem::path& GetPath() const; void Open(); void Create(uint64_t InitialSize); - void MarkAsDeleteOnClose(std::error_code& Ec); + void MarkAsDeleteOnClose(); uint64_t FileSize(); IoBuffer GetChunk(uint64_t Offset, uint64_t Size); void Read(void* Data, uint64_t Size, uint64_t FileOffset); @@ -133,8 +133,8 @@ public: void WriteChunk(const void* Data, uint64_t Size, uint64_t Alignment, WriteChunkCallback Callback); - Ref GetChunkBlock(const BlockStoreLocation& Location); - void Flush(); + IoBuffer TryGetChunk(const BlockStoreLocation& Location); + void Flush(); ReclaimSnapshotState GetReclaimSnapshotState(); void ReclaimSpace( -- cgit v1.2.3 From e177d1005c73512112b2ff6ab6f34e5d327c127b Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 May 2022 08:31:46 +0200 Subject: fix exception message/logging --- zenserver/cache/structuredcachestore.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index dee4c55f0..f69ffd82c 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1735,15 +1735,16 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c DataFile.CreateTemporary(m_BucketDir.c_str(), Ec); if (Ec) { - throw std::system_error(Ec, fmt::format("Failed to open temporary file for put at '{}'", m_BucketDir)); + throw std::system_error(Ec, fmt::format("Failed to open temporary file for put in '{}'", m_BucketDir)); } DataFile.WriteAll(Value.Value, Ec); if (Ec) { throw std::system_error(Ec, - fmt::format("Failed to write payload ({} bytes) to temporary file for put in '{}'", + fmt::format("Failed to write payload ({} bytes) to temporary file '{}' for put in '{}'", NiceBytes(Value.Value.Size()), + DataFile.GetPath().string(), m_BucketDir)); } @@ -1775,6 +1776,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c { EntryFlags |= DiskLocation::kCompressed; } + DiskLocation Loc(Value.Value.Size(), EntryFlags); IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); @@ -1810,7 +1812,8 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c fmt::format("Failed to create parent directory '{}' for file '{}' for put in '{}'", ParentPath, FsPath, m_BucketDir)); } - ZEN_INFO("Failed writing opening file '{}' for writing for put in '{}', pausing and retrying, reason '{}'", + ZEN_INFO("Failed renaming temporary file '{}' to '{}' for put in '{}', pausing and retrying, reason '{}'", + DataFile.GetPath().string(), FsPath.string(), m_BucketDir, Ec.message()); -- cgit v1.2.3 From 5ef2b317ef1965121ab0090d86962d3eea4a357e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 May 2022 22:03:45 +0200 Subject: Make sure CacheBucket::PutStandaloneCacheValue cleans up the temp file if we fail to move the it into place --- zenserver/cache/structuredcachestore.cpp | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 05c80c5bf..411717e61 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1819,6 +1819,14 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c RetryCount--; } while (RetryCount > 0); + // Once we have called MoveTemporaryIntoPlace we no longer will automatically clean up the temp file + // as the file handle has already been closed + std::filesystem::remove(DataFile.GetPath(), Ec); + if (Ec) + { + ZEN_WARN("Failed to clean up temporary file '{}' for put in '{}', reason '{}'", DataFile.GetPath(), m_BucketDir, Ec.message()); + } + throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' for put in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } @@ -2971,6 +2979,46 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } +# if ZEN_PLATFORM_WINDOWS +TEST_CASE("z$.blocked.disklayer.put") +{ + // On Windows platform we can't overwrite a standalone file that + // is open for read at the same time. + // Make sure the retry path runs and we get an exception + + ScopedTemporaryDirectory TempDir; + + GcStorageSize CacheSize; + + const auto CreateCacheValue = [](size_t Size) -> CbObject { + std::vector Buf; + Buf.resize(Size); + + CbObjectWriter Writer; + Writer.AddBinary("Binary"sv, Buf.data(), Buf.size()); + return Writer.Save(); + }; + + CasGc Gc; + ZenCacheNamespace Zcs(Gc, TempDir.Path() / "cache"); + + CbObject CacheValue = CreateCacheValue(64 * 1024 + 64); + + IoBuffer Buffer = CacheValue.GetBuffer().AsIoBuffer(); + Buffer.SetContentType(ZenContentType::kCbObject); + + size_t Key = Buffer.Size(); + IoHash HashKey = IoHash::HashBuffer(&Key, sizeof(uint32_t)); + Zcs.Put("test_bucket", HashKey, {.Value = Buffer}); + + ZenCacheValue BufferGet; + CHECK(Zcs.Get("test_bucket", HashKey, BufferGet)); + + MemoryView ValueView = BufferGet.Value.GetView(); + CHECK_THROWS(Zcs.Put("test_bucket", HashKey, {.Value = Buffer})); +} +# endif + #endif void -- cgit v1.2.3 From 5a872e2c699b439e3e5e95fe1c1882c8a0ca92dd Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 May 2022 22:32:17 +0200 Subject: Restore logic where we accept failed overwrite if resulting size is the same Correctly calculate the m_TotalSize difference when overwriting file --- zenserver/cache/structuredcachestore.cpp | 47 ++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 411717e61..b6fd44742 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1748,6 +1748,8 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c BuildPath(DataFilePath, HashKey); std::filesystem::path FsPath{DataFilePath.ToPath()}; + uint64_t OldFileSize = 0; + // We retry to open the file since it can be held open for read. // This happens if the server processes a Get request for the file or // if we are busy sending the file upstream @@ -1757,7 +1759,27 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c Ec.clear(); { RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); + + std::error_code ExistingEc; + OldFileSize = std::filesystem::file_size(FsPath, ExistingEc); + if (ExistingEc) + { + OldFileSize = 0; + } + DataFile.MoveTemporaryIntoPlace(FsPath, Ec); + + if (Ec && (!ExistingEc) && (OldFileSize == Value.Value.Size())) + { + ZEN_INFO( + "Failed to move temporary file '{}' to '{}'. Target file has same size, assuming concurrent write of same value, move " + "failed with reason '{}'", + DataFile.GetPath(), + FsPath.string(), + m_BucketDir, + Ec.message()); + return; + } } if (!Ec) @@ -1789,7 +1811,15 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c } m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_TotalSize.fetch_add(Loc.Size(), std::memory_order::relaxed); + uint64_t NewFileSize = Loc.Size(); + if (OldFileSize <= NewFileSize) + { + m_TotalSize.fetch_add(NewFileSize - OldFileSize, std::memory_order::relaxed); + } + else + { + m_TotalSize.fetch_sub(OldFileSize - NewFileSize, std::memory_order::relaxed); + } return; } @@ -3014,8 +3044,19 @@ TEST_CASE("z$.blocked.disklayer.put") ZenCacheValue BufferGet; CHECK(Zcs.Get("test_bucket", HashKey, BufferGet)); - MemoryView ValueView = BufferGet.Value.GetView(); - CHECK_THROWS(Zcs.Put("test_bucket", HashKey, {.Value = Buffer})); + // Overwriting with a value of same size should go fine + Zcs.Put("test_bucket", HashKey, {.Value = Buffer}); + + CbObject CacheValue2 = CreateCacheValue(64 * 1024 + 64 + 1); + IoBuffer Buffer2 = CacheValue2.GetBuffer().AsIoBuffer(); + Buffer2.SetContentType(ZenContentType::kCbObject); + // Overwriting with different size should throw exception if file is held open + CHECK_THROWS(Zcs.Put("test_bucket", HashKey, {.Value = Buffer2})); + + BufferGet = ZenCacheValue{}; + + // Read access has been removed, we should now be able to overwrite it + Zcs.Put("test_bucket", HashKey, {.Value = Buffer2}); } # endif -- cgit v1.2.3 From 239e09c1df23e080c5d88cfb5d6af8eb63c232f9 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 May 2022 22:38:27 +0200 Subject: make test run on more platforms --- zenserver/cache/structuredcachestore.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index b6fd44742..a4cab881f 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -3009,13 +3009,8 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } -# if ZEN_PLATFORM_WINDOWS TEST_CASE("z$.blocked.disklayer.put") { - // On Windows platform we can't overwrite a standalone file that - // is open for read at the same time. - // Make sure the retry path runs and we get an exception - ScopedTemporaryDirectory TempDir; GcStorageSize CacheSize; @@ -3050,15 +3045,20 @@ TEST_CASE("z$.blocked.disklayer.put") CbObject CacheValue2 = CreateCacheValue(64 * 1024 + 64 + 1); IoBuffer Buffer2 = CacheValue2.GetBuffer().AsIoBuffer(); Buffer2.SetContentType(ZenContentType::kCbObject); - // Overwriting with different size should throw exception if file is held open +# if ZEN_PLATFORM_WINDOWS + // On Windows platform, overwriting with different size while we have + // it open for read should throw exception if file is held open CHECK_THROWS(Zcs.Put("test_bucket", HashKey, {.Value = Buffer2})); +# else + // Other platforms should handle overwrite just fine + Zcs.Put("test_bucket", HashKey, {.Value = Buffer2}); +# endif BufferGet = ZenCacheValue{}; // Read access has been removed, we should now be able to overwrite it Zcs.Put("test_bucket", HashKey, {.Value = Buffer2}); } -# endif #endif -- cgit v1.2.3 From e67a43514bfba97fae4bc4ccf42ca312ba1d01bb Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 9 May 2022 23:31:29 +0200 Subject: happy path should be minimal work --- zenserver/cache/structuredcachestore.cpp | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index a4cab881f..c3904d40a 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1725,6 +1725,8 @@ ZenCacheDiskLayer::UpdateAccessTimes(const zen::access_tracking::AccessTimes& Ac void ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value) { + uint64_t NewFileSize = Value.Value.Size(); + TemporaryFile DataFile; std::error_code Ec; @@ -1739,7 +1741,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c { throw std::system_error(Ec, fmt::format("Failed to write payload ({} bytes) to temporary file '{}' for put in '{}'", - NiceBytes(Value.Value.Size()), + NiceBytes(NewFileSize), DataFile.GetPath().string(), m_BucketDir)); } @@ -1748,9 +1750,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c BuildPath(DataFilePath, HashKey); std::filesystem::path FsPath{DataFilePath.ToPath()}; - uint64_t OldFileSize = 0; - - // We retry to open the file since it can be held open for read. + // We retry to move the file since it can be held open for read. // This happens if the server processes a Get request for the file or // if we are busy sending the file upstream int RetryCount = 3; @@ -1760,25 +1760,24 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c { RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); - std::error_code ExistingEc; - OldFileSize = std::filesystem::file_size(FsPath, ExistingEc); - if (ExistingEc) - { - OldFileSize = 0; - } - DataFile.MoveTemporaryIntoPlace(FsPath, Ec); - if (Ec && (!ExistingEc) && (OldFileSize == Value.Value.Size())) + if (Ec) { - ZEN_INFO( - "Failed to move temporary file '{}' to '{}'. Target file has same size, assuming concurrent write of same value, move " - "failed with reason '{}'", - DataFile.GetPath(), - FsPath.string(), - m_BucketDir, - Ec.message()); - return; + std::error_code ExistingEc; + uint64_t OldFileSize = std::filesystem::file_size(FsPath, ExistingEc); + if (!ExistingEc && (OldFileSize == NewFileSize)) + { + ZEN_INFO( + "Failed to move temporary file '{}' to '{}'. Target file has same size, assuming concurrent write of same value, " + "move " + "failed with reason '{}'", + DataFile.GetPath(), + FsPath.string(), + m_BucketDir, + Ec.message()); + return; + } } } @@ -1795,9 +1794,10 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c EntryFlags |= DiskLocation::kCompressed; } - DiskLocation Loc(Value.Value.Size(), EntryFlags); + DiskLocation Loc(NewFileSize, EntryFlags); IndexEntry Entry = IndexEntry(Loc, GcClock::TickCount()); + uint64_t OldFileSize = 0; RwLock::ExclusiveLockScope _(m_IndexLock); if (auto It = m_Index.find(HashKey); It == m_Index.end()) { @@ -1807,11 +1807,11 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c else { // TODO: should check if write is idempotent and bail out if it is? - It.value() = Entry; + OldFileSize = It.value().Location.Size(); + It.value() = Entry; } m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - uint64_t NewFileSize = Loc.Size(); if (OldFileSize <= NewFileSize) { m_TotalSize.fetch_add(NewFileSize - OldFileSize, std::memory_order::relaxed); -- cgit v1.2.3 From 5d15fa59655c79a0c8ad1b4c5d44b657aa07c29e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 10 May 2022 10:08:31 +0200 Subject: Make sure we clean up temp file in all scenarios --- zenserver/cache/structuredcachestore.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index c3904d40a..ce55b24b6 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1736,6 +1736,22 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c throw std::system_error(Ec, fmt::format("Failed to open temporary file for put in '{}'", m_BucketDir)); } + bool CleanUpTempFile = false; + auto __ = MakeGuard([&] { + if (CleanUpTempFile) + { + std::error_code Ec; + std::filesystem::remove(DataFile.GetPath(), Ec); + if (Ec) + { + ZEN_WARN("Failed to clean up temporary file '{}' for put in '{}', reason '{}'", + DataFile.GetPath(), + m_BucketDir, + Ec.message()); + } + } + }); + DataFile.WriteAll(Value.Value, Ec); if (Ec) { @@ -1762,6 +1778,10 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c DataFile.MoveTemporaryIntoPlace(FsPath, Ec); + // Once we have called MoveTemporaryIntoPlace automatic clean up the temp file + // will be disabled as the file handle has already been closed + CleanUpTempFile = Ec ? true : false; + if (Ec) { std::error_code ExistingEc; @@ -1849,14 +1869,6 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c RetryCount--; } while (RetryCount > 0); - // Once we have called MoveTemporaryIntoPlace we no longer will automatically clean up the temp file - // as the file handle has already been closed - std::filesystem::remove(DataFile.GetPath(), Ec); - if (Ec) - { - ZEN_WARN("Failed to clean up temporary file '{}' for put in '{}', reason '{}'", DataFile.GetPath(), m_BucketDir, Ec.message()); - } - throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' for put in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } -- cgit v1.2.3 From 33fa76a35a96cad1865854068e60c1ca0b53864e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Fri, 6 May 2022 16:42:27 +0200 Subject: parameterize namespace for upstream (first hack) --- zenserver-test/zenserver-test.cpp | 39 ++--- zenserver/cache/structuredcache.cpp | 240 +++++++++++++++++++------------ zenserver/cache/structuredcache.h | 3 +- zenserver/upstream/hordecompute.cpp | 20 +-- zenserver/upstream/jupiter.cpp | 102 +++++++------ zenserver/upstream/jupiter.h | 50 +++---- zenserver/upstream/upstreamcache.cpp | 66 ++++++--- zenutil/include/zenutil/cache/cachekey.h | 18 ++- 8 files changed, 323 insertions(+), 215 deletions(-) diff --git a/zenserver-test/zenserver-test.cpp b/zenserver-test/zenserver-test.cpp index 7e9a36a81..0f4858bd5 100644 --- a/zenserver-test/zenserver-test.cpp +++ b/zenserver-test/zenserver-test.cpp @@ -1451,16 +1451,18 @@ TEST_CASE("zcache.rpc") return zen::IoBuffer(zen::IoBuffer::Clone, MemStream.Data(), MemStream.Size()); }; - auto PutCacheRecords = - [&AppendCacheRecord, - &ToIoBuffer](std::string_view BaseUri, std::string_view Bucket, size_t Num, size_t PayloadSize = 1024) -> std::vector { + auto PutCacheRecords = [&AppendCacheRecord, &ToIoBuffer](std::string_view BaseUri, + std::string_view Namespace, + std::string_view Bucket, + size_t Num, + size_t PayloadSize = 1024) -> std::vector { std::vector OutKeys; for (uint32_t Key = 1; Key <= Num; ++Key) { zen::IoHash KeyHash; ((uint32_t*)(KeyHash.Hash))[0] = Key; - const zen::CacheKey CacheKey = zen::CacheKey::Create(Bucket, KeyHash); + const zen::CacheKey CacheKey = zen::CacheKey::Create(Namespace, Bucket, KeyHash); CbPackage Package; CbWriter Writer; @@ -1561,7 +1563,9 @@ TEST_CASE("zcache.rpc") auto LoadKey = [](zen::CbFieldView KeyView) -> zen::CacheKey { if (zen::CbObjectView KeyObj = KeyView.AsObjectView()) { - return CacheKey::Create(KeyObj["Bucket"sv].AsString(), KeyObj["Hash"].AsHash()); + return CacheKey::Create(KeyObj["Namespace"sv] ? KeyObj["Namespace"sv].AsString() : ""sv, + KeyObj["Bucket"sv].AsString(), + KeyObj["Hash"].AsHash()); } return CacheKey::Empty; }; @@ -1578,7 +1582,7 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector Keys = PutCacheRecords(BaseUri, "mastodon"sv, 128); + std::vector Keys = PutCacheRecords(BaseUri, ""sv, "mastodon"sv, 128); GetCacheRecordResult Result = GetCacheRecords(BaseUri, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1589,9 +1593,11 @@ TEST_CASE("zcache.rpc") CbObjectView RecordObj = RecordView.AsObjectView(); CbObjectView KeyObj = RecordObj["Key"sv].AsObjectView(); - const CacheKey Key = CacheKey::Create(KeyObj["Bucket"sv].AsString(), KeyObj["Hash"].AsHash()); - IoHash AttachmentHash; - size_t NumValues = 0; + const CacheKey Key = CacheKey::Create(KeyObj["Namespace"sv] ? KeyObj["Namespace"sv].AsString() : ""sv, + KeyObj["Bucket"sv].AsString(), + KeyObj["Hash"].AsHash()); + IoHash AttachmentHash; + size_t NumValues = 0; for (CbFieldView Value : RecordObj["Values"sv]) { AttachmentHash = Value.AsObjectView()["RawHash"sv].AsHash(); @@ -1617,13 +1623,13 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector ExistingKeys = PutCacheRecords(BaseUri, "mastodon"sv, 128); + std::vector ExistingKeys = PutCacheRecords(BaseUri, ""sv, "mastodon"sv, 128); std::vector Keys; for (const zen::CacheKey& Key : ExistingKeys) { Keys.push_back(Key); - Keys.push_back(CacheKey::Create("missing"sv, IoHash::Zero)); + Keys.push_back(CacheKey::Create("missing"sv, "missing"sv, IoHash::Zero)); } GetCacheRecordResult Result = GetCacheRecords(BaseUri, Keys, Policy); @@ -1671,7 +1677,7 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, ""sv, "mastodon"sv, 4); CachePolicy Policy = CachePolicy::QueryLocal; GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, Keys, Policy); @@ -1696,7 +1702,7 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, ""sv, "mastodon"sv, 4); CachePolicy Policy = (CachePolicy::QueryLocal | CachePolicy::QueryRemote); GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, Keys, Policy); @@ -1728,8 +1734,9 @@ TEST_CASE("zcache.rpc.allpolicies") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::string_view TestVersion = "F72150A02AE34B57A9EC91D36BA1CE08"sv; - std::string_view TestBucket = "allpoliciestest"sv; + std::string_view TestVersion = "F72150A02AE34B57A9EC91D36BA1CE08"sv; + std::string_view TestBucket = "allpoliciestest"sv; + std::string_view TestNamespace = ""sv; // NumKeys = (2 Value vs Record)*(2 SkipData vs Default)*(2 ForceMiss vs Not)*(2 use local) // *(2 use remote)*(2 UseValue Policy vs not)*(4 cases per type) @@ -1831,7 +1838,7 @@ TEST_CASE("zcache.rpc.allpolicies") IoHash KeyHash = KeyWriter.GetHash(); KeyData& KeyData = KeyDatas[KeyIndex]; - KeyData.Key = CacheKey::Create(TestBucket, KeyHash); + KeyData.Key = CacheKey::Create(TestNamespace, TestBucket, KeyHash); KeyData.KeyIndex = KeyIndex; KeyData.GetRequestsData = (KeyIndex & (1 << 1)) == 0; KeyData.UseValueAPI = (KeyIndex & (1 << 2)) != 0; diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 0f16f6785..74438ac2e 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -141,7 +141,7 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) { // Bucket reference - return HandleCacheBucketRequest(Request, Key); + return HandleCacheBucketRequest(Request, ZenCacheStore::DefaultNamespace, Key); } return Request.WriteResponse(HttpResponseCode::BadRequest); // invalid URL @@ -162,7 +162,7 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) } void -HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Bucket) +HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket) { switch (Request.RequestVerb()) { @@ -176,7 +176,7 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, case HttpVerb::kDelete: // Drop bucket - if (m_CacheStore.DropBucket(ZenCacheStore::DefaultNamespace, Bucket)) + if (m_CacheStore.DropBucket(Namespace, Bucket)) { return Request.WriteResponse(HttpResponseCode::OK); } @@ -226,7 +226,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request } if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && - m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue)) + m_CacheStore.Get(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue)) { Success = true; ZenContentType ContentType = ClientResultValue.Value.GetContentType(); @@ -287,7 +287,8 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (Success) { - ZEN_DEBUG("HIT - '{}/{}' {} '{}' (LOCAL)", + ZEN_DEBUG("HIT - '{}/{}/{}' {} '{}' (LOCAL)", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(ClientResultValue.Value.Size()), @@ -306,7 +307,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request } else if (!EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryRemote)) { - ZEN_DEBUG("MISS - '{}/{}' '{}'", Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); + ZEN_DEBUG("MISS - '{}/{}/{}' '{}'", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); m_CacheStats.MissCount++; return Request.WriteResponse(HttpResponseCode::NotFound); } @@ -324,7 +325,8 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request metrics::OperationTiming::Scope $(m_UpstreamGetRequestTiming); - if (GetUpstreamCacheResult UpstreamResult = m_UpstreamCache.GetCacheRecord({Ref.BucketSegment, Ref.HashKey}, AcceptType); + if (GetUpstreamCacheResult UpstreamResult = + m_UpstreamCache.GetCacheRecord({Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, AcceptType); UpstreamResult.Success) { Success = true; @@ -340,7 +342,8 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (ValidationResult != CbValidateError::None) { Success = false; - ZEN_WARN("Get - '{}/{}' '{}' FAILED, invalid compact binary object from upstream", + ZEN_WARN("Get - '{}/{}/{}' '{}' FAILED, invalid compact binary object from upstream", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); @@ -351,7 +354,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (Success && StoreLocal) { - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue); + m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue); } } else if (AcceptType == ZenContentType::kCbPackage) @@ -405,7 +408,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request if (StoreLocal) { - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, CacheValue); } BinaryWriter MemStream; @@ -434,14 +437,19 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request else { Success = false; - ZEN_WARN("Get - '{}/{}' '{}' FAILED, invalid upstream package", Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); + ZEN_WARN("Get - '{}/{}/{}' '{}' FAILED, invalid upstream package", + Ref.Namespace, + Ref.BucketSegment, + Ref.HashKey, + ToString(AcceptType)); } } } if (Success) { - ZEN_DEBUG("HIT - '{}/{}' {} '{}' (UPSTREAM)", + ZEN_DEBUG("HIT - '{}/{}/{}' {} '{}' (UPSTREAM)", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(ClientResultValue.Value.Size()), @@ -463,7 +471,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request } else { - ZEN_DEBUG("MISS - '{}/{}' '{}'", Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); + ZEN_DEBUG("MISS - '{}/{}/{}' '{}'", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); m_CacheStats.MissCount++; AsyncRequest.WriteResponse(HttpResponseCode::NotFound); } @@ -486,12 +494,12 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (ContentType == HttpContentType::kBinary || ContentType == HttpContentType::kCompressedBinary) { - ZEN_DEBUG("PUT - '{}/{}' {} '{}'", Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), ToString(ContentType)); - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}'", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), ToString(ContentType)); + m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); if (EnumHasAllFlags(PolicyFromURL, CachePolicy::StoreRemote)) { - m_UpstreamCache.EnqueueUpstream({.Type = ContentType, .Key = {Ref.BucketSegment, Ref.HashKey}}); + m_UpstreamCache.EnqueueUpstream({.Type = ContentType, .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}}); } Request.WriteResponse(HttpResponseCode::Created); @@ -502,7 +510,11 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (ValidationResult != CbValidateError::None) { - ZEN_WARN("PUT - '{}/{}' '{}' FAILED, invalid compact binary", Ref.BucketSegment, Ref.HashKey, ToString(ContentType)); + ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, invalid compact binary", + Ref.Namespace, + Ref.BucketSegment, + Ref.HashKey, + ToString(ContentType)); return Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "Compact binary validation failed"sv); } @@ -520,7 +532,8 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request TotalCount++; }); - ZEN_DEBUG("PUT - '{}/{}' {} '{}' attachments '{}/{}' (valid/total)", + ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}' attachments '{}/{}' (valid/total)", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), @@ -529,14 +542,14 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request ValidAttachments.size()); Body.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); + m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); const bool IsPartialRecord = TotalCount != static_cast(ValidAttachments.size()); if (EnumHasAllFlags(Policy, CachePolicy::StoreRemote) && !IsPartialRecord) { m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCbObject, - .Key = {Ref.BucketSegment, Ref.HashKey}, + .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, .ValueContentIds = std::move(ValidAttachments)}); } @@ -548,7 +561,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (!Package.TryLoad(Body)) { - ZEN_WARN("PUT - '{}/{}' '{}' FAILED, invalid package", Ref.BucketSegment, Ref.HashKey, ToString(ContentType)); + ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, invalid package", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(ContentType)); return Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "Invalid package"sv); } CachePolicy Policy = PolicyFromURL; @@ -578,7 +591,8 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request } else { - ZEN_WARN("PUT - '{}/{}' '{}' FAILED, attachment '{}' is not compressed", + ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, attachment '{}' is not compressed", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(HttpContentType::kCbPackage), @@ -599,7 +613,8 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request return Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "Invalid attachment(s)"sv); } - ZEN_DEBUG("PUT - '{}/{}' {} '{}', attachments '{}/{}/{}' (new/valid/total)", + ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}', attachments '{}/{}/{}' (new/valid/total)", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.GetSize()), @@ -612,14 +627,14 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request CacheValue.Value = CacheRecord.GetBuffer().AsIoBuffer(); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Ref.BucketSegment, Ref.HashKey, CacheValue); + m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; if (EnumHasAllFlags(Policy, CachePolicy::StoreRemote) && !IsPartialRecord) { m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCbPackage, - .Key = {Ref.BucketSegment, Ref.HashKey}, + .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, .ValueContentIds = std::move(ValidAttachments)}); } @@ -661,7 +676,7 @@ HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, if (QueryUpstream) { - if (auto UpstreamResult = m_UpstreamCache.GetCacheValue({Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); + if (auto UpstreamResult = m_UpstreamCache.GetCacheValue({Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); UpstreamResult.Success) { if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(UpstreamResult.Value))) @@ -679,7 +694,8 @@ HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, if (!Value) { - ZEN_DEBUG("MISS - '{}/{}/{}' '{}' in {}", + ZEN_DEBUG("MISS - '{}/{}/{}/{}' '{}' in {}", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, @@ -689,7 +705,8 @@ HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, return Request.WriteResponse(HttpResponseCode::NotFound); } - ZEN_DEBUG("HIT - '{}/{}/{}' {} '{}' ({}) in {}", + ZEN_DEBUG("HIT - '{}/{}/{}/{}' {} '{}' ({}) in {}", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, @@ -747,7 +764,8 @@ HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, CidStore::InsertResult Result = m_CidStore.AddChunk(Compressed); - ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}' ({}) in {}", + ZEN_DEBUG("PUT - '{}/{}/{}/{}' {} '{}' ({}) in {}", + Ref.Namespace, Ref.BucketSegment, Ref.HashKey, Ref.ValueContentId, @@ -772,8 +790,14 @@ HttpStructuredCacheService::ValidateKeyUri(HttpServerRequest& Request, CacheRef& return false; } + OutRef.Namespace = ToLower(""); // TODO: Should we add namespace to URI? OutRef.BucketSegment = ToLower(Key.substr(0, BucketSplitOffset)); + if (!std::all_of(begin(OutRef.Namespace), end(OutRef.Namespace), [](const char c) { return std::isalnum(c); })) + { + return false; + } + if (!std::all_of(begin(OutRef.BucketSegment), end(OutRef.BucketSegment), [](const char c) { return std::isalnum(c); })) { return false; @@ -907,12 +931,15 @@ HttpStructuredCacheService::HandleRpcPutCacheRecords(zen::HttpServerRequest& Req std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView RecordObject = RequestObject["Record"sv].AsObjectView(); - CbObjectView KeyView = RecordObject["Key"sv].AsObjectView(); - CbFieldView BucketField = KeyView["Bucket"sv]; - CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = CacheKey::Create(BucketField.AsString(), HashField.AsHash()); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView RecordObject = RequestObject["Record"sv].AsObjectView(); + CbObjectView KeyView = RecordObject["Key"sv].AsObjectView(); + CbFieldView NamespaceField = KeyView["Namespace"sv]; + CbFieldView BucketField = KeyView["Bucket"sv]; + CbFieldView HashField = KeyView["Hash"sv]; + CacheKey Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, + BucketField.AsString(), + HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) { return Request.WriteResponse(HttpResponseCode::BadRequest); @@ -981,7 +1008,8 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack } else { - ZEN_WARN("PUT - '{}/{}' '{}' FAILED, attachment '{}' is not compressed", + ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, attachment '{}' is not compressed", + Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, ToString(HttpContentType::kCbPackage), @@ -1002,7 +1030,8 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack return PutResult::Invalid; } - ZEN_DEBUG("PUT - '{}/{}' {}, attachments '{}/{}/{}' (new/valid/total)", + ZEN_DEBUG("PUT - '{}/{}/{}' {}, attachments '{}/{}/{}' (new/valid/total)", + Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, NiceBytes(TransferredSize), @@ -1014,7 +1043,7 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack CacheValue.Value = IoBuffer(Record.GetSize()); Record.CopyTo(MutableMemoryView(CacheValue.Value.MutableData(), CacheValue.Value.GetSize())); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Request.Key.Bucket, Request.Key.Hash, CacheValue); + m_CacheStore.Put(Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; @@ -1080,13 +1109,16 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt for (CbFieldView RequestField : RequestsArray) { - RecordRequestData& Request = Requests.emplace_back(); - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CbFieldView BucketField = KeyObject["Bucket"sv]; - CbFieldView HashField = KeyObject["Hash"sv]; - CacheKey& Key = Request.Upstream.Key; - Key = CacheKey::Create(BucketField.AsString(), HashField.AsHash()); + RecordRequestData& Request = Requests.emplace_back(); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); + CbFieldView NamespaceField = KeyObject["Namespace"sv]; + CbFieldView BucketField = KeyObject["Bucket"sv]; + CbFieldView HashField = KeyObject["Hash"sv]; + CacheKey& Key = Request.Upstream.Key; + Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, + BucketField.AsString(), + HashField.AsHash()); if (HashField.HasError() || Key.Bucket.empty()) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); @@ -1100,7 +1132,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt ZenCacheValue RecordCacheValue; if (EnumHasAllFlags(Policy.GetRecordPolicy(), CachePolicy::QueryLocal) && - m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, RecordCacheValue)) + m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, RecordCacheValue)) { Request.RecordCacheValue = std::move(RecordCacheValue.Value); if (Request.RecordCacheValue.GetContentType() != ZenContentType::kCbObject) @@ -1231,7 +1263,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt Request.RecordObject = ObjectBuffer; if (EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::StoreLocal)) { - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); + m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); } ParseValues(Request); Request.UsedUpstream = true; @@ -1269,7 +1301,11 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt } else { - ZEN_DEBUG("Uncompressed value '{}' from upstream cache record '{}/{}'", Value.ContentId, Key.Bucket, Key.Hash); + ZEN_DEBUG("Uncompressed value '{}' from upstream cache record '{}/{}/{}'", + Value.ContentId, + Key.Namespace, + Key.Bucket, + Key.Hash); } } if (!Value.Exists && !EnumHasAllFlags(ValuePolicy, CachePolicy::SkipData)) @@ -1306,7 +1342,8 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt } } - ZEN_DEBUG("HIT - '{}/{}' {}{}{}", + ZEN_DEBUG("HIT - '{}/{}/{}' {}{}{}", + Key.Namespace, Key.Bucket, Key.Hash, NiceBytes(Request.RecordCacheValue.Size()), @@ -1322,11 +1359,11 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt if (!EnumHasAnyFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::Query)) { // If they requested no query, do not record this as a miss - ZEN_DEBUG("DISABLEDQUERY - '{}/{}'", Key.Bucket, Key.Hash); + ZEN_DEBUG("DISABLEDQUERY - '{}/{}/{}'", Key.Namespace, Key.Bucket, Key.Hash); } else { - ZEN_DEBUG("MISS - '{}/{}' {}", Key.Bucket, Key.Hash, Request.RecordObject ? ""sv : "(PARTIAL)"sv); + ZEN_DEBUG("MISS - '{}/{}/{}' {}", Key.Namespace, Key.Bucket, Key.Hash, Request.RecordObject ? ""sv : "(PARTIAL)"sv); m_CacheStats.MissCount++; } } @@ -1357,11 +1394,14 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyView = RequestObject["Key"sv].AsObjectView(); - CbFieldView BucketField = KeyView["Bucket"sv]; - CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = CacheKey::Create(BucketField.AsString(), HashField.AsHash()); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyView = RequestObject["Key"sv].AsObjectView(); + CbFieldView NamespaceField = KeyView["Namespace"sv]; + CbFieldView BucketField = KeyView["Bucket"sv]; + CbFieldView HashField = KeyView["Hash"sv]; + CacheKey Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, + BucketField.AsString(), + HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) { return Request.WriteResponse(HttpResponseCode::BadRequest); @@ -1388,21 +1428,21 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ { IoBuffer Value = Chunk.GetCompressed().Flatten().AsIoBuffer(); Value.SetContentType(ZenContentType::kCompressedBinary); - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = Value}); + m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = Value}); TransferredSize = Chunk.GetCompressedSize(); } Succeeded = true; } else { - ZEN_WARN("PUTCACHEVALUES - '{}/{}/{}' FAILED, value is not compressed", Key.Bucket, Key.Hash, RawHash); + ZEN_WARN("PUTCACHEVALUES - '{}/{}/{}/{}' FAILED, value is not compressed", Key.Namespace, Key.Bucket, Key.Hash, RawHash); return Request.WriteResponse(HttpResponseCode::BadRequest); } } else if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { ZenCacheValue ExistingValue; - if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, ExistingValue) && + if (m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, ExistingValue) && IsCompressedBinary(ExistingValue.Value.GetContentType())) { Succeeded = true; @@ -1416,7 +1456,12 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCompressedBinary, .Key = Key}); } Results.push_back(Succeeded); - ZEN_DEBUG("PUTCACHEVALUES - '{}/{}' {}, '{}'", Key.Bucket, Key.Hash, NiceBytes(TransferredSize), Succeeded ? "Added"sv : "Invalid"); + ZEN_DEBUG("PUTCACHEVALUES - '{}/{}/{}' {}, '{}'", + Key.Namespace, + Key.Bucket, + Key.Hash, + NiceBytes(TransferredSize), + Succeeded ? "Added"sv : "Invalid"); } if (Results.empty()) { @@ -1466,12 +1511,15 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http { Stopwatch Timer; - RequestData& Request = Requests.emplace_back(); - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CbFieldView BucketField = KeyObject["Bucket"sv]; - CbFieldView HashField = KeyObject["Hash"sv]; - Request.Key = CacheKey::Create(BucketField.AsString(), HashField.AsHash()); + RequestData& Request = Requests.emplace_back(); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); + CbFieldView NamespaceField = KeyObject["Namespace"sv]; + CbFieldView BucketField = KeyObject["Bucket"sv]; + CbFieldView HashField = KeyObject["Hash"sv]; + Request.Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, + BucketField.AsString(), + HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Request.Key.Bucket.empty()) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); @@ -1486,15 +1534,15 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { - if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, CacheValue) && - IsCompressedBinary(CacheValue.Value.GetContentType())) + if (m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) { Result = CompressedBuffer::FromCompressed(SharedBuffer(CacheValue.Value)); } } if (Result) { - ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", + ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}/{}' {} ({}) in {}", + Key.Namespace, Key.Bucket, Key.Hash, NiceBytes(Result.GetCompressed().GetSize()), @@ -1509,11 +1557,12 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http else if (!EnumHasAnyFlags(Policy, CachePolicy::Query)) { // If they requested no query, do not record this as a miss - ZEN_DEBUG("GETCACHEVALUES DISABLEDQUERY - '{}/{}'", Key.Bucket, Key.Hash); + ZEN_DEBUG("GETCACHEVALUES DISABLEDQUERY - '{}/{}/{}'", Key.Namespace, Key.Bucket, Key.Hash); } else { - ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}' ({}) in {}", + ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}/{}' ({}) in {}", + Key.Namespace, Key.Bucket, Key.Hash, "LOCAL"sv, @@ -1531,7 +1580,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http for (size_t Index : RemoteRequestIndexes) { RequestData& Request = Requests[Index]; - RequestedRecordsData.push_back({{Request.Key.Bucket, Request.Key.Hash}}); + RequestedRecordsData.push_back({{Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash}}); CacheChunkRequests.push_back(&RequestedRecordsData.back()); } Stopwatch Timer; @@ -1551,11 +1600,9 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive // for us to keep the data only on the upstream server. // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, - Request.Key.Bucket, - Request.Key.Hash, - ZenCacheValue{Params.Value}); - ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}' {} ({}) in {}", + m_CacheStore.Put(Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); + ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}/{}' {} ({}) in {}", + ChunkRequest.Key.Namespace, ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, NiceBytes(Request.Result.GetCompressed().GetSize()), @@ -1566,7 +1613,8 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http return; } } - ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}' ({}) in {}", + ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}/{}' ({}) in {}", + ChunkRequest.Key.Namespace, ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, "UPSTREAM"sv, @@ -1723,9 +1771,12 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vectorKey.Namespace, PreviousRecordKey->Key.Bucket, PreviousRecordKey->Key.Hash); return false; @@ -1810,7 +1863,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (!Record.Exists && EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) + if (m_CacheStore.Get(RecordKey.Key.Namespace, RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) { Record.Exists = true; Record.CacheValue = std::move(CacheValue.Value); @@ -1845,7 +1898,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::StoreLocal)) { - m_CacheStore.Put(ZenCacheStore::DefaultNamespace, Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); + m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); } }; m_UpstreamCache.GetCacheRecords(UpstreamRecordRequests, std::move(OnCacheRecordGetComplete)); @@ -1942,7 +1995,7 @@ HttpStructuredCacheService::GetLocalCacheValues(std::vectorExists && EnumHasAllFlags(Request->DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(ZenCacheStore::DefaultNamespace, Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) + if (m_CacheStore.Get(Request->Key->Key.Namespace, Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) { if (IsCompressedBinary(CacheValue.Value.GetContentType())) { @@ -2011,7 +2064,7 @@ HttpStructuredCacheService::GetUpstreamCacheChunks(std::vectorKey.Namespace, Request.Key->Key.Bucket, Request.Key->Key.Hash, Request.Key->ValueId, @@ -2069,11 +2123,19 @@ HttpStructuredCacheService::WriteGetCacheChunksResponse(std::vectorKey.Bucket, Request.Key->Key.Hash, Request.Key->ValueId); + ZEN_DEBUG("SKIP - '{}/{}/{}/{}'", + Request.Key->Key.Namespace, + Request.Key->Key.Bucket, + Request.Key->Key.Hash, + Request.Key->ValueId); } else { - ZEN_DEBUG("MISS - '{}/{}/{}'", Request.Key->Key.Bucket, Request.Key->Key.Hash, Request.Key->ValueId); + ZEN_DEBUG("MISS - '{}/{}/{}/{}'", + Request.Key->Key.Namespace, + Request.Key->Key.Bucket, + Request.Key->Key.Hash, + Request.Key->ValueId); m_CacheStats.MissCount++; } } diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 00c4260aa..66ef034a5 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -80,6 +80,7 @@ public: private: struct CacheRef { + std::string Namespace; std::string BucketSegment; IoHash HashKey; IoHash ValueContentId; @@ -111,7 +112,7 @@ private: void HandleRpcPutCacheValues(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); void HandleRpcGetCacheValues(zen::HttpServerRequest& Request, CbObjectView BatchRequest); void HandleRpcGetCacheChunks(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Bucket); + void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket); virtual void HandleStatsRequest(zen::HttpServerRequest& Request) override; virtual void HandleStatusRequest(zen::HttpServerRequest& Request) override; PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); diff --git a/zenserver/upstream/hordecompute.cpp b/zenserver/upstream/hordecompute.cpp index dbf86cc13..4d502a193 100644 --- a/zenserver/upstream/hordecompute.cpp +++ b/zenserver/upstream/hordecompute.cpp @@ -198,7 +198,8 @@ namespace detail { } { - PutRefResult RefResult = StorageSession.PutRef("requests"sv, + PutRefResult RefResult = StorageSession.PutRef(StorageSession.Client().BlobStoreNamespace(), + "requests"sv, UpstreamData.TaskId, UpstreamData.Objects[UpstreamData.TaskId].GetBuffer().AsIoBuffer(), ZenContentType::kCbObject); @@ -292,7 +293,7 @@ namespace detail { std::set Keys; std::transform(Blobs.begin(), Blobs.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; }); - CloudCacheExistsResult ExistsResult = Session.BlobExists(Keys); + CloudCacheExistsResult ExistsResult = Session.BlobExists(Session.Client().BlobStoreNamespace(), Keys); Log().debug("Queried {} missing blobs Need={} Duration={}s Result={}", Keys.size(), ExistsResult.Needs.size(), @@ -309,7 +310,7 @@ namespace detail { for (const auto& Hash : ExistsResult.Needs) { - CloudCacheResult Result = Session.PutBlob(Hash, Blobs.at(Hash)); + CloudCacheResult Result = Session.PutBlob(Session.Client().BlobStoreNamespace(), Hash, Blobs.at(Hash)); Log().debug("Put blob {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success); Bytes += Result.Bytes; ElapsedSeconds += Result.ElapsedSeconds; @@ -339,7 +340,7 @@ namespace detail { std::set Keys; std::transform(Objects.begin(), Objects.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; }); - CloudCacheExistsResult ExistsResult = Session.ObjectExists(Keys); + CloudCacheExistsResult ExistsResult = Session.ObjectExists(Session.Client().BlobStoreNamespace(), Keys); Log().debug("Queried {} missing objects Need={} Duration={}s Result={}", Keys.size(), ExistsResult.Needs.size(), @@ -356,7 +357,8 @@ namespace detail { for (const auto& Hash : ExistsResult.Needs) { - CloudCacheResult Result = Session.PutObject(Hash, Objects.at(Hash).GetBuffer().AsIoBuffer()); + CloudCacheResult Result = + Session.PutObject(Session.Client().BlobStoreNamespace(), Hash, Objects.at(Hash).GetBuffer().AsIoBuffer()); Log().debug("Put object {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success); Bytes += Result.Bytes; ElapsedSeconds += Result.ElapsedSeconds; @@ -691,7 +693,8 @@ namespace detail { std::map BinaryData; { - CloudCacheResult ObjectRefResult = Session.GetRef("responses"sv, ResultHash, ZenContentType::kCbObject); + CloudCacheResult ObjectRefResult = + Session.GetRef(Session.Client().BlobStoreNamespace(), "responses"sv, ResultHash, ZenContentType::kCbObject); Log().debug("Get ref {} Bytes={} Duration={}s Result={}", ResultHash, ObjectRefResult.Bytes, @@ -718,7 +721,8 @@ namespace detail { std::set NeededData; if (OutputHash != IoHash::Zero) { - GetObjectReferencesResult ObjectReferenceResult = Session.GetObjectReferences(OutputHash); + GetObjectReferencesResult ObjectReferenceResult = + Session.GetObjectReferences(Session.Client().BlobStoreNamespace(), OutputHash); Log().debug("Get object references {} References={} Bytes={} Duration={}s Result={}", ResultHash, ObjectReferenceResult.References.size(), @@ -748,7 +752,7 @@ namespace detail { { continue; } - CloudCacheResult BlobResult = Session.GetBlob(Hash); + CloudCacheResult BlobResult = Session.GetBlob(Session.Client().BlobStoreNamespace(), Hash); Log().debug("Get blob {} Bytes={} Duration={}s Result={}", Hash, BlobResult.Bytes, diff --git a/zenserver/upstream/jupiter.cpp b/zenserver/upstream/jupiter.cpp index 4bec41a29..881798c1f 100644 --- a/zenserver/upstream/jupiter.cpp +++ b/zenserver/upstream/jupiter.cpp @@ -83,12 +83,12 @@ CloudCacheSession::Authenticate() } CloudCacheResult -CloudCacheSession::GetDerivedData(std::string_view BucketId, std::string_view Key) +CloudCacheSession::GetDerivedData(std::string_view Namespace, std::string_view BucketId, std::string_view Key) { ZEN_TRACE_CPU("HordeClient::GetDerivedData"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/c/ddc/" << m_CacheClient->DdcNamespace() << "/" << BucketId << "/" << Key; + Uri << m_CacheClient->ServiceUrl() << "/api/v1/c/ddc/" << Namespace << "/" << BucketId << "/" << Key; cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -115,19 +115,18 @@ CloudCacheSession::GetDerivedData(std::string_view BucketId, std::string_view Ke } CloudCacheResult -CloudCacheSession::GetDerivedData(std::string_view BucketId, const IoHash& Key) +CloudCacheSession::GetDerivedData(std::string_view Namespace, std::string_view BucketId, const IoHash& Key) { - return GetDerivedData(BucketId, Key.ToHexString()); + return GetDerivedData(Namespace, BucketId, Key.ToHexString()); } CloudCacheResult -CloudCacheSession::GetRef(std::string_view BucketId, const IoHash& Key, ZenContentType RefType) +CloudCacheSession::GetRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, ZenContentType RefType) { const std::string ContentType = RefType == ZenContentType::kCbObject ? "application/x-ue-cb" : "application/octet-stream"; ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << m_CacheClient->BlobStoreNamespace() << "/" << BucketId << "/" - << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << Namespace << "/" << BucketId << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -155,10 +154,10 @@ CloudCacheSession::GetRef(std::string_view BucketId, const IoHash& Key, ZenConte } CloudCacheResult -CloudCacheSession::GetBlob(const IoHash& Key) +CloudCacheSession::GetBlob(std::string_view Namespace, const IoHash& Key) { ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/blobs/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/blobs/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -187,12 +186,12 @@ CloudCacheSession::GetBlob(const IoHash& Key) } CloudCacheResult -CloudCacheSession::GetCompressedBlob(const IoHash& Key) +CloudCacheSession::GetCompressedBlob(std::string_view Namespace, const IoHash& Key) { ZEN_TRACE_CPU("HordeClient::GetCompressedBlob"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/compressed-blobs/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/compressed-blobs/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -220,12 +219,12 @@ CloudCacheSession::GetCompressedBlob(const IoHash& Key) } CloudCacheResult -CloudCacheSession::GetObject(const IoHash& Key) +CloudCacheSession::GetObject(std::string_view Namespace, const IoHash& Key) { ZEN_TRACE_CPU("HordeClient::GetObject"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -253,14 +252,14 @@ CloudCacheSession::GetObject(const IoHash& Key) } CloudCacheResult -CloudCacheSession::PutDerivedData(std::string_view BucketId, std::string_view Key, IoBuffer DerivedData) +CloudCacheSession::PutDerivedData(std::string_view Namespace, std::string_view BucketId, std::string_view Key, IoBuffer DerivedData) { ZEN_TRACE_CPU("HordeClient::PutDerivedData"); IoHash Hash = IoHash::HashBuffer(DerivedData.Data(), DerivedData.Size()); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/c/ddc/" << m_CacheClient->DdcNamespace() << "/" << BucketId << "/" << Key; + Uri << m_CacheClient->ServiceUrl() << "/api/v1/c/ddc/" << Namespace << "/" << BucketId << "/" << Key; cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -289,13 +288,13 @@ CloudCacheSession::PutDerivedData(std::string_view BucketId, std::string_view Ke } CloudCacheResult -CloudCacheSession::PutDerivedData(std::string_view BucketId, const IoHash& Key, IoBuffer DerivedData) +CloudCacheSession::PutDerivedData(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, IoBuffer DerivedData) { - return PutDerivedData(BucketId, Key.ToHexString(), DerivedData); + return PutDerivedData(Namespace, BucketId, Key.ToHexString(), DerivedData); } PutRefResult -CloudCacheSession::PutRef(std::string_view BucketId, const IoHash& Key, IoBuffer Ref, ZenContentType RefType) +CloudCacheSession::PutRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, IoBuffer Ref, ZenContentType RefType) { ZEN_TRACE_CPU("HordeClient::PutRef"); @@ -304,8 +303,7 @@ CloudCacheSession::PutRef(std::string_view BucketId, const IoHash& Key, IoBuffer const std::string ContentType = RefType == ZenContentType::kCbObject ? "application/x-ue-cb" : "application/octet-stream"; ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << m_CacheClient->BlobStoreNamespace() << "/" << BucketId << "/" - << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << Namespace << "/" << BucketId << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -356,13 +354,13 @@ CloudCacheSession::PutRef(std::string_view BucketId, const IoHash& Key, IoBuffer } FinalizeRefResult -CloudCacheSession::FinalizeRef(std::string_view BucketId, const IoHash& Key, const IoHash& RefHash) +CloudCacheSession::FinalizeRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, const IoHash& RefHash) { ZEN_TRACE_CPU("HordeClient::FinalizeRef"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << m_CacheClient->BlobStoreNamespace() << "/" << BucketId << "/" - << Key.ToHexString() << "/finalize/" << RefHash.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << Namespace << "/" << BucketId << "/" << Key.ToHexString() << "/finalize/" + << RefHash.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -414,12 +412,12 @@ CloudCacheSession::FinalizeRef(std::string_view BucketId, const IoHash& Key, con } CloudCacheResult -CloudCacheSession::PutBlob(const IoHash& Key, IoBuffer Blob) +CloudCacheSession::PutBlob(std::string_view Namespace, const IoHash& Key, IoBuffer Blob) { ZEN_TRACE_CPU("HordeClient::PutBlob"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/blobs/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/blobs/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -446,12 +444,12 @@ CloudCacheSession::PutBlob(const IoHash& Key, IoBuffer Blob) } CloudCacheResult -CloudCacheSession::PutCompressedBlob(const IoHash& Key, IoBuffer Blob) +CloudCacheSession::PutCompressedBlob(std::string_view Namespace, const IoHash& Key, IoBuffer Blob) { ZEN_TRACE_CPU("HordeClient::PutCompressedBlob"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/compressed-blobs/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/compressed-blobs/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -478,12 +476,12 @@ CloudCacheSession::PutCompressedBlob(const IoHash& Key, IoBuffer Blob) } CloudCacheResult -CloudCacheSession::PutObject(const IoHash& Key, IoBuffer Object) +CloudCacheSession::PutObject(std::string_view Namespace, const IoHash& Key, IoBuffer Object) { ZEN_TRACE_CPU("HordeClient::PutObject"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -510,13 +508,12 @@ CloudCacheSession::PutObject(const IoHash& Key, IoBuffer Object) } CloudCacheResult -CloudCacheSession::RefExists(std::string_view BucketId, const IoHash& Key) +CloudCacheSession::RefExists(std::string_view Namespace, std::string_view BucketId, const IoHash& Key) { ZEN_TRACE_CPU("HordeClient::RefExists"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << m_CacheClient->BlobStoreNamespace() << "/" << BucketId << "/" - << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/refs/" << Namespace << "/" << BucketId << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -541,13 +538,12 @@ CloudCacheSession::RefExists(std::string_view BucketId, const IoHash& Key) } GetObjectReferencesResult -CloudCacheSession::GetObjectReferences(const IoHash& Key) +CloudCacheSession::GetObjectReferences(std::string_view Namespace, const IoHash& Key) { ZEN_TRACE_CPU("HordeClient::GetObjectReferences"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString() - << "/references"; + Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << Namespace << "/" << Key.ToHexString() << "/references"; cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -585,39 +581,39 @@ CloudCacheSession::GetObjectReferences(const IoHash& Key) } CloudCacheResult -CloudCacheSession::BlobExists(const IoHash& Key) +CloudCacheSession::BlobExists(std::string_view Namespace, const IoHash& Key) { - return CacheTypeExists("blobs"sv, Key); + return CacheTypeExists(Namespace, "blobs"sv, Key); } CloudCacheResult -CloudCacheSession::CompressedBlobExists(const IoHash& Key) +CloudCacheSession::CompressedBlobExists(std::string_view Namespace, const IoHash& Key) { - return CacheTypeExists("compressed-blobs"sv, Key); + return CacheTypeExists(Namespace, "compressed-blobs"sv, Key); } CloudCacheResult -CloudCacheSession::ObjectExists(const IoHash& Key) +CloudCacheSession::ObjectExists(std::string_view Namespace, const IoHash& Key) { - return CacheTypeExists("objects"sv, Key); + return CacheTypeExists(Namespace, "objects"sv, Key); } CloudCacheExistsResult -CloudCacheSession::BlobExists(const std::set& Keys) +CloudCacheSession::BlobExists(std::string_view Namespace, const std::set& Keys) { - return CacheTypeExists("blobs"sv, Keys); + return CacheTypeExists(Namespace, "blobs"sv, Keys); } CloudCacheExistsResult -CloudCacheSession::CompressedBlobExists(const std::set& Keys) +CloudCacheSession::CompressedBlobExists(std::string_view Namespace, const std::set& Keys) { - return CacheTypeExists("compressed-blobs"sv, Keys); + return CacheTypeExists(Namespace, "compressed-blobs"sv, Keys); } CloudCacheExistsResult -CloudCacheSession::ObjectExists(const std::set& Keys) +CloudCacheSession::ObjectExists(std::string_view Namespace, const std::set& Keys) { - return CacheTypeExists("objects"sv, Keys); + return CacheTypeExists(Namespace, "objects"sv, Keys); } CloudCacheResult @@ -685,11 +681,11 @@ CloudCacheSession::GetComputeUpdates(std::string_view ChannelId, const uint32_t } std::vector -CloudCacheSession::Filter(std::string_view BucketId, const std::vector& ChunkHashes) +CloudCacheSession::Filter(std::string_view Namespace, std::string_view BucketId, const std::vector& ChunkHashes) { ExtendableStringBuilder<256> Uri; Uri << m_CacheClient->ServiceUrl(); - Uri << "/api/v1/s/" << m_CacheClient->DdcNamespace(); + Uri << "/api/v1/s/" << Namespace; ZEN_UNUSED(BucketId, ChunkHashes); @@ -715,12 +711,12 @@ CloudCacheSession::VerifyAccessToken(long StatusCode) } CloudCacheResult -CloudCacheSession::CacheTypeExists(std::string_view TypeId, const IoHash& Key) +CloudCacheSession::CacheTypeExists(std::string_view Namespace, std::string_view TypeId, const IoHash& Key) { ZEN_TRACE_CPU("HordeClient::CacheTypeExists"); ExtendableStringBuilder<256> Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/" << TypeId << "/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString(); + Uri << m_CacheClient->ServiceUrl() << "/api/v1/" << TypeId << "/" << Namespace << "/" << Key.ToHexString(); cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); @@ -745,7 +741,7 @@ CloudCacheSession::CacheTypeExists(std::string_view TypeId, const IoHash& Key) } CloudCacheExistsResult -CloudCacheSession::CacheTypeExists(std::string_view TypeId, const std::set& Keys) +CloudCacheSession::CacheTypeExists(std::string_view Namespace, std::string_view TypeId, const std::set& Keys) { ZEN_TRACE_CPU("HordeClient::CacheTypeExists"); @@ -758,7 +754,7 @@ CloudCacheSession::CacheTypeExists(std::string_view TypeId, const std::set Uri; - Uri << m_CacheClient->ServiceUrl() << "/api/v1/" << TypeId << "/" << m_CacheClient->BlobStoreNamespace() << "/exist"; + Uri << m_CacheClient->ServiceUrl() << "/api/v1/" << TypeId << "/" << Namespace << "/exist"; cpr::Session& Session = GetSession(); const CloudCacheAccessToken& AccessToken = GetAccessToken(); diff --git a/zenserver/upstream/jupiter.h b/zenserver/upstream/jupiter.h index cff9a9ef1..a74d4d81d 100644 --- a/zenserver/upstream/jupiter.h +++ b/zenserver/upstream/jupiter.h @@ -95,38 +95,40 @@ public: ~CloudCacheSession(); CloudCacheResult Authenticate(); - CloudCacheResult GetDerivedData(std::string_view BucketId, std::string_view Key); - CloudCacheResult GetDerivedData(std::string_view BucketId, const IoHash& Key); - CloudCacheResult GetRef(std::string_view BucketId, const IoHash& Key, ZenContentType RefType); - CloudCacheResult GetBlob(const IoHash& Key); - CloudCacheResult GetCompressedBlob(const IoHash& Key); - CloudCacheResult GetObject(const IoHash& Key); + CloudCacheResult GetDerivedData(std::string_view Namespace, std::string_view BucketId, std::string_view Key); + CloudCacheResult GetDerivedData(std::string_view Namespace, std::string_view BucketId, const IoHash& Key); + CloudCacheResult GetRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, ZenContentType RefType); + CloudCacheResult GetBlob(std::string_view Namespace, const IoHash& Key); + CloudCacheResult GetCompressedBlob(std::string_view Namespace, const IoHash& Key); + CloudCacheResult GetObject(std::string_view Namespace, const IoHash& Key); - CloudCacheResult PutDerivedData(std::string_view BucketId, std::string_view Key, IoBuffer DerivedData); - CloudCacheResult PutDerivedData(std::string_view BucketId, const IoHash& Key, IoBuffer DerivedData); - PutRefResult PutRef(std::string_view BucketId, const IoHash& Key, IoBuffer Ref, ZenContentType RefType); - CloudCacheResult PutBlob(const IoHash& Key, IoBuffer Blob); - CloudCacheResult PutCompressedBlob(const IoHash& Key, IoBuffer Blob); - CloudCacheResult PutObject(const IoHash& Key, IoBuffer Object); + CloudCacheResult PutDerivedData(std::string_view Namespace, std::string_view BucketId, std::string_view Key, IoBuffer DerivedData); + CloudCacheResult PutDerivedData(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, IoBuffer DerivedData); + PutRefResult PutRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, IoBuffer Ref, ZenContentType RefType); + CloudCacheResult PutBlob(std::string_view Namespace, const IoHash& Key, IoBuffer Blob); + CloudCacheResult PutCompressedBlob(std::string_view Namespace, const IoHash& Key, IoBuffer Blob); + CloudCacheResult PutObject(std::string_view Namespace, const IoHash& Key, IoBuffer Object); - FinalizeRefResult FinalizeRef(std::string_view BucketId, const IoHash& Key, const IoHash& RefHah); + FinalizeRefResult FinalizeRef(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, const IoHash& RefHah); - CloudCacheResult RefExists(std::string_view BucketId, const IoHash& Key); + CloudCacheResult RefExists(std::string_view Namespace, std::string_view BucketId, const IoHash& Key); - GetObjectReferencesResult GetObjectReferences(const IoHash& Key); + GetObjectReferencesResult GetObjectReferences(std::string_view Namespace, const IoHash& Key); - CloudCacheResult BlobExists(const IoHash& Key); - CloudCacheResult CompressedBlobExists(const IoHash& Key); - CloudCacheResult ObjectExists(const IoHash& Key); + CloudCacheResult BlobExists(std::string_view Namespace, const IoHash& Key); + CloudCacheResult CompressedBlobExists(std::string_view Namespace, const IoHash& Key); + CloudCacheResult ObjectExists(std::string_view Namespace, const IoHash& Key); - CloudCacheExistsResult BlobExists(const std::set& Keys); - CloudCacheExistsResult CompressedBlobExists(const std::set& Keys); - CloudCacheExistsResult ObjectExists(const std::set& Keys); + CloudCacheExistsResult BlobExists(std::string_view Namespace, const std::set& Keys); + CloudCacheExistsResult CompressedBlobExists(std::string_view Namespace, const std::set& Keys); + CloudCacheExistsResult ObjectExists(std::string_view Namespace, const std::set& Keys); CloudCacheResult PostComputeTasks(IoBuffer TasksData); CloudCacheResult GetComputeUpdates(std::string_view ChannelId, const uint32_t WaitSeconds = 0); - std::vector Filter(std::string_view BucketId, const std::vector& ChunkHashes); + std::vector Filter(std::string_view Namespace, std::string_view BucketId, const std::vector& ChunkHashes); + + CloudCacheClient& Client() { return *m_CacheClient; }; private: inline spdlog::logger& Log() { return m_Log; } @@ -134,9 +136,9 @@ private: CloudCacheAccessToken GetAccessToken(bool RefreshToken = false); bool VerifyAccessToken(long StatusCode); - CloudCacheResult CacheTypeExists(std::string_view TypeId, const IoHash& Key); + CloudCacheResult CacheTypeExists(std::string_view Namespace, std::string_view TypeId, const IoHash& Key); - CloudCacheExistsResult CacheTypeExists(std::string_view TypeId, const std::set& Keys); + CloudCacheExistsResult CacheTypeExists(std::string_view Namespace, std::string_view TypeId, const std::set& Keys); spdlog::logger& m_Log; RefPtr m_CacheClient; diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index c870e0773..2b52b8efa 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -175,11 +175,15 @@ namespace detail { if (m_UseLegacyDdc && Type == ZenContentType::kBinary) { - Result = Session.GetDerivedData(CacheKey.Bucket, CacheKey.Hash); + Result = + Session.GetDerivedData(CacheKey.Namespace, CacheKey.Bucket, CacheKey.Hash); // Session.Client().DdcNamespace(), } else if (Type == ZenContentType::kCompressedBinary) { - Result = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); + Result = Session.GetRef(CacheKey.Namespace, + CacheKey.Bucket, + CacheKey.Hash, + ZenContentType::kCbObject); // Session.Client().BlobStoreNamespace(), if (Result.Success) { @@ -192,7 +196,8 @@ namespace detail { CacheRecord.IterateAttachments( [&Session, &Result, &ContentBuffer, &NumAttachments](CbFieldView AttachmentHash) { - CloudCacheResult AttachmentResult = Session.GetCompressedBlob(AttachmentHash.AsHash()); + CloudCacheResult AttachmentResult = + Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); Result.Bytes += AttachmentResult.Bytes; Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; Result.ErrorCode = AttachmentResult.ErrorCode; @@ -217,7 +222,10 @@ namespace detail { else { const ZenContentType AcceptType = Type == ZenContentType::kCbPackage ? ZenContentType::kCbObject : Type; - Result = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, AcceptType); + Result = Session.GetRef(CacheKey.Namespace, + CacheKey.Bucket, + CacheKey.Hash, + AcceptType); // Session.Client().BlobStoreNamespace() if (Result.Success && Type == ZenContentType::kCbPackage) { @@ -229,7 +237,8 @@ namespace detail { CbObject CacheRecord = LoadCompactBinaryObject(Result.Response); CacheRecord.IterateAttachments([&Session, &Result, &Package](CbFieldView AttachmentHash) { - CloudCacheResult AttachmentResult = Session.GetCompressedBlob(AttachmentHash.AsHash()); + CloudCacheResult AttachmentResult = + Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); Result.Bytes += AttachmentResult.Bytes; Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; Result.ErrorCode = AttachmentResult.ErrorCode; @@ -294,7 +303,10 @@ namespace detail { if (!Result.Error) { - CloudCacheResult RefResult = Session.GetRef(CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); + CloudCacheResult RefResult = Session.GetRef(CacheKey.Namespace, + CacheKey.Bucket, + CacheKey.Hash, + ZenContentType::kCbObject); // Session.Client().BlobStoreNamespace() AppendResult(RefResult, Result); m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); @@ -306,7 +318,8 @@ namespace detail { { Record = LoadCompactBinaryObject(RefResult.Response); Record.IterateAttachments([this, &Session, &Result, &Package](CbFieldView AttachmentHash) { - CloudCacheResult BlobResult = Session.GetCompressedBlob(AttachmentHash.AsHash()); + CloudCacheResult BlobResult = + Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); AppendResult(BlobResult, Result); m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); @@ -336,7 +349,7 @@ namespace detail { try { CloudCacheSession Session(m_Client); - const CloudCacheResult Result = Session.GetCompressedBlob(ValueContentId); + const CloudCacheResult Result = Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -376,7 +389,7 @@ namespace detail { CompressedBuffer Compressed; if (!Result.Error) { - const CloudCacheResult BlobResult = Session.GetCompressedBlob(Request.ChunkId); + const CloudCacheResult BlobResult = Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), Request.ChunkId); Payload = BlobResult.Response; AppendResult(BlobResult, Result); @@ -424,11 +437,18 @@ namespace detail { { if (m_UseLegacyDdc) { - Result = Session.PutDerivedData(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue); + Result = Session.PutDerivedData(CacheRecord.Key.Namespace, // Session.Client().BlobStoreNamespace(), + CacheRecord.Key.Bucket, + CacheRecord.Key.Hash, + RecordValue); } else { - Result = Session.PutRef(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, ZenContentType::kBinary); + Result = Session.PutRef(CacheRecord.Key.Namespace, // Session.Client().BlobStoreNamespace(), + CacheRecord.Key.Bucket, + CacheRecord.Key.Hash, + RecordValue, + ZenContentType::kBinary); } } @@ -539,7 +559,7 @@ namespace detail { CloudCacheResult BlobResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !BlobResult.Success; Attempt++) { - BlobResult = Session.PutCompressedBlob(ValueContentId, BlobBuffer); + BlobResult = Session.PutCompressedBlob(Session.Client().BlobStoreNamespace(), ValueContentId, BlobBuffer); } m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); @@ -560,7 +580,11 @@ namespace detail { PutRefResult RefResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !RefResult.Success; Attempt++) { - RefResult = Session.PutRef(Key.Bucket, Key.Hash, ObjectBuffer, ZenContentType::kCbObject); + RefResult = Session.PutRef(Key.Namespace, // Seesion.Client().BlobStoreNamespace(), + Key.Bucket, + Key.Hash, + ObjectBuffer, + ZenContentType::kCbObject); } m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); @@ -580,8 +604,9 @@ namespace detail { return {.Reason = std::move(Reason), .Success = false}; } - const IoHash RefHash = IoHash::HashBuffer(ObjectBuffer); - FinalizeRefResult FinalizeResult = Session.FinalizeRef(Key.Bucket, Key.Hash, RefHash); + const IoHash RefHash = IoHash::HashBuffer(ObjectBuffer); + FinalizeRefResult FinalizeResult = + Session.FinalizeRef(Key.Namespace, Key.Bucket, Key.Hash, RefHash); // Session.Client().BlobStoreNamespace(), m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); @@ -599,7 +624,8 @@ namespace detail { return {.Reason = std::move(Reason), .Success = false}; } - FinalizeResult = Session.FinalizeRef(Key.Bucket, Key.Hash, RefHash); + FinalizeResult = + Session.FinalizeRef(Key.Namespace, Key.Bucket, Key.Hash, RefHash); // Session.Client().BlobStoreNamespace(), m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); @@ -761,6 +787,7 @@ namespace detail { const CacheKey& Key = Request->Key; BatchRequest.BeginObject("Key"sv); { + BatchRequest << "Namespace"sv << Key.Namespace; BatchRequest << "Bucket"sv << Key.Bucket; BatchRequest << "Hash"sv << Key.Hash; } @@ -871,6 +898,7 @@ namespace detail { BatchRequest.BeginObject(); { BatchRequest.BeginObject("Key"sv); + BatchRequest << "Namespace"sv << Request.Key.Namespace; BatchRequest << "Bucket"sv << Request.Key.Bucket; BatchRequest << "Hash"sv << Request.Key.Hash; BatchRequest.EndObject(); @@ -1042,6 +1070,7 @@ namespace detail { const CacheKey& Key = CacheRecord.Key; BatchWriter.BeginObject("Key"sv); { + BatchWriter << "Namespace"sv << Key.Namespace; BatchWriter << "Bucket"sv << Key.Bucket; BatchWriter << "Hash"sv << Key.Hash; } @@ -1517,7 +1546,7 @@ private: ZenCacheValue CacheValue; std::vector Payloads; - if (!m_CacheStore.Get(ZenCacheStore::DefaultNamespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) + if (!m_CacheStore.Get(CacheRecord.Key.Namespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) { ZEN_WARN("process upstream FAILED, '{}/{}', cache record doesn't exist", CacheRecord.Key.Bucket, CacheRecord.Key.Hash); return; @@ -1531,7 +1560,8 @@ private: } else { - ZEN_WARN("process upstream FAILED, '{}/{}/{}', ValueContentId doesn't exist in CAS", + ZEN_WARN("process upstream FAILED, '{}/{}/{}/{}', ValueContentId doesn't exist in CAS", + CacheRecord.Key.Namespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, ValueContentId); diff --git a/zenutil/include/zenutil/cache/cachekey.h b/zenutil/include/zenutil/cache/cachekey.h index aa649b4dc..e6110a522 100644 --- a/zenutil/include/zenutil/cache/cachekey.h +++ b/zenutil/include/zenutil/cache/cachekey.h @@ -12,10 +12,14 @@ namespace zen { struct CacheKey { + std::string Namespace; std::string Bucket; IoHash Hash; - static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } + static CacheKey Create(std::string_view Namespace, std::string_view Bucket, const IoHash& Hash) + { + return {.Namespace = ToLower(Namespace), .Bucket = ToLower(Bucket), .Hash = Hash}; + } static const CacheKey Empty; }; @@ -23,21 +27,23 @@ struct CacheKey inline bool operator==(const CacheKey& A, const CacheKey& B) { - return A.Bucket == B.Bucket && A.Hash == B.Hash; + return A.Namespace == B.Namespace && A.Bucket == B.Bucket && A.Hash == B.Hash; } inline bool operator!=(const CacheKey& A, const CacheKey& B) { - return A.Bucket != B.Bucket || A.Hash != B.Hash; + return A.Namespace != B.Namespace || A.Bucket != B.Bucket || A.Hash != B.Hash; } inline bool operator<(const CacheKey& A, const CacheKey& B) { - const std::string& BucketA = A.Bucket; - const std::string& BucketB = B.Bucket; - return BucketA == BucketB ? A.Hash < B.Hash : BucketA < BucketB; + const std::string& NamespaceA = A.Namespace; + const std::string& NamespaceB = B.Namespace; + const std::string& BucketA = A.Bucket; + const std::string& BucketB = B.Bucket; + return NamespaceA == NamespaceB ? (BucketA == BucketB ? A.Hash < B.Hash : BucketA < BucketB) : NamespaceA < NamespaceB; } struct CacheChunkRequest -- cgit v1.2.3 From 41aa73ae51fd251969c6abf9ffafa5b40aad22e6 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 10 May 2022 09:28:38 +0200 Subject: Use configured namespace in Jupiter if not explicit namespace is given DdcNamespace -> DefaultDdcNamespace BlobStoreNamespace -> DefaultBlobStoreNamespace --- zenserver/upstream/hordecompute.cpp | 16 ++--- zenserver/upstream/jupiter.cpp | 4 +- zenserver/upstream/jupiter.h | 8 +-- zenserver/upstream/upstreamcache.cpp | 120 ++++++++++++++++++----------------- 4 files changed, 76 insertions(+), 72 deletions(-) diff --git a/zenserver/upstream/hordecompute.cpp b/zenserver/upstream/hordecompute.cpp index 4d502a193..2ec24b303 100644 --- a/zenserver/upstream/hordecompute.cpp +++ b/zenserver/upstream/hordecompute.cpp @@ -198,7 +198,7 @@ namespace detail { } { - PutRefResult RefResult = StorageSession.PutRef(StorageSession.Client().BlobStoreNamespace(), + PutRefResult RefResult = StorageSession.PutRef(StorageSession.Client().DefaultBlobStoreNamespace(), "requests"sv, UpstreamData.TaskId, UpstreamData.Objects[UpstreamData.TaskId].GetBuffer().AsIoBuffer(), @@ -293,7 +293,7 @@ namespace detail { std::set Keys; std::transform(Blobs.begin(), Blobs.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; }); - CloudCacheExistsResult ExistsResult = Session.BlobExists(Session.Client().BlobStoreNamespace(), Keys); + CloudCacheExistsResult ExistsResult = Session.BlobExists(Session.Client().DefaultBlobStoreNamespace(), Keys); Log().debug("Queried {} missing blobs Need={} Duration={}s Result={}", Keys.size(), ExistsResult.Needs.size(), @@ -310,7 +310,7 @@ namespace detail { for (const auto& Hash : ExistsResult.Needs) { - CloudCacheResult Result = Session.PutBlob(Session.Client().BlobStoreNamespace(), Hash, Blobs.at(Hash)); + CloudCacheResult Result = Session.PutBlob(Session.Client().DefaultBlobStoreNamespace(), Hash, Blobs.at(Hash)); Log().debug("Put blob {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success); Bytes += Result.Bytes; ElapsedSeconds += Result.ElapsedSeconds; @@ -340,7 +340,7 @@ namespace detail { std::set Keys; std::transform(Objects.begin(), Objects.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; }); - CloudCacheExistsResult ExistsResult = Session.ObjectExists(Session.Client().BlobStoreNamespace(), Keys); + CloudCacheExistsResult ExistsResult = Session.ObjectExists(Session.Client().DefaultBlobStoreNamespace(), Keys); Log().debug("Queried {} missing objects Need={} Duration={}s Result={}", Keys.size(), ExistsResult.Needs.size(), @@ -358,7 +358,7 @@ namespace detail { for (const auto& Hash : ExistsResult.Needs) { CloudCacheResult Result = - Session.PutObject(Session.Client().BlobStoreNamespace(), Hash, Objects.at(Hash).GetBuffer().AsIoBuffer()); + Session.PutObject(Session.Client().DefaultBlobStoreNamespace(), Hash, Objects.at(Hash).GetBuffer().AsIoBuffer()); Log().debug("Put object {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success); Bytes += Result.Bytes; ElapsedSeconds += Result.ElapsedSeconds; @@ -694,7 +694,7 @@ namespace detail { { CloudCacheResult ObjectRefResult = - Session.GetRef(Session.Client().BlobStoreNamespace(), "responses"sv, ResultHash, ZenContentType::kCbObject); + Session.GetRef(Session.Client().DefaultBlobStoreNamespace(), "responses"sv, ResultHash, ZenContentType::kCbObject); Log().debug("Get ref {} Bytes={} Duration={}s Result={}", ResultHash, ObjectRefResult.Bytes, @@ -722,7 +722,7 @@ namespace detail { if (OutputHash != IoHash::Zero) { GetObjectReferencesResult ObjectReferenceResult = - Session.GetObjectReferences(Session.Client().BlobStoreNamespace(), OutputHash); + Session.GetObjectReferences(Session.Client().DefaultBlobStoreNamespace(), OutputHash); Log().debug("Get object references {} References={} Bytes={} Duration={}s Result={}", ResultHash, ObjectReferenceResult.References.size(), @@ -752,7 +752,7 @@ namespace detail { { continue; } - CloudCacheResult BlobResult = Session.GetBlob(Session.Client().BlobStoreNamespace(), Hash); + CloudCacheResult BlobResult = Session.GetBlob(Session.Client().DefaultBlobStoreNamespace(), Hash); Log().debug("Get blob {} Bytes={} Duration={}s Result={}", Hash, BlobResult.Bytes, diff --git a/zenserver/upstream/jupiter.cpp b/zenserver/upstream/jupiter.cpp index 881798c1f..ddc6c49d2 100644 --- a/zenserver/upstream/jupiter.cpp +++ b/zenserver/upstream/jupiter.cpp @@ -890,8 +890,8 @@ CloudCacheTokenProvider::CreateFromCallback(std::function TokenProvider) : m_Log(zen::logging::Get("jupiter")) , m_ServiceUrl(Options.ServiceUrl) -, m_DdcNamespace(Options.DdcNamespace) -, m_BlobStoreNamespace(Options.BlobStoreNamespace) +, m_DefaultDdcNamespace(Options.DdcNamespace) +, m_DefaultBlobStoreNamespace(Options.BlobStoreNamespace) , m_ComputeCluster(Options.ComputeCluster) , m_ConnectTimeout(Options.ConnectTimeout) , m_Timeout(Options.Timeout) diff --git a/zenserver/upstream/jupiter.h b/zenserver/upstream/jupiter.h index a74d4d81d..3d9e6ea7b 100644 --- a/zenserver/upstream/jupiter.h +++ b/zenserver/upstream/jupiter.h @@ -191,8 +191,8 @@ public: ~CloudCacheClient(); CloudCacheAccessToken AcquireAccessToken(); - std::string_view DdcNamespace() const { return m_DdcNamespace; } - std::string_view BlobStoreNamespace() const { return m_BlobStoreNamespace; } + std::string_view DefaultDdcNamespace() const { return m_DefaultDdcNamespace; } + std::string_view DefaultBlobStoreNamespace() const { return m_DefaultBlobStoreNamespace; } std::string_view ComputeCluster() const { return m_ComputeCluster; } std::string_view ServiceUrl() const { return m_ServiceUrl; } @@ -201,8 +201,8 @@ public: private: spdlog::logger& m_Log; std::string m_ServiceUrl; - std::string m_DdcNamespace; - std::string m_BlobStoreNamespace; + std::string m_DefaultDdcNamespace; + std::string m_DefaultBlobStoreNamespace; std::string m_ComputeCluster; std::chrono::milliseconds m_ConnectTimeout{}; std::chrono::milliseconds m_Timeout{}; diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index 2b52b8efa..cbb32b13e 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -160,6 +160,24 @@ namespace detail { } } + std::string_view GetActualDdcNamespace(CloudCacheSession& Session, const std::string Namespace) + { + if (Namespace == ZenCacheStore::DefaultNamespace) + { + return Session.Client().DefaultDdcNamespace(); + } + return Namespace; + } + + std::string_view GetActualBlobStoreNamespace(CloudCacheSession& Session, const std::string Namespace) + { + if (Namespace == ZenCacheStore::DefaultNamespace) + { + return Session.Client().DefaultBlobStoreNamespace(); + } + return Namespace; + } + virtual UpstreamEndpointState GetState() override { return m_Status.EndpointState(); } virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); } @@ -173,17 +191,16 @@ namespace detail { CloudCacheSession Session(m_Client); CloudCacheResult Result; + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + if (m_UseLegacyDdc && Type == ZenContentType::kBinary) { - Result = - Session.GetDerivedData(CacheKey.Namespace, CacheKey.Bucket, CacheKey.Hash); // Session.Client().DdcNamespace(), + std::string_view DdcNamespace = GetActualDdcNamespace(Session, CacheKey.Namespace); + Result = Session.GetDerivedData(DdcNamespace, CacheKey.Bucket, CacheKey.Hash); } else if (Type == ZenContentType::kCompressedBinary) { - Result = Session.GetRef(CacheKey.Namespace, - CacheKey.Bucket, - CacheKey.Hash, - ZenContentType::kCbObject); // Session.Client().BlobStoreNamespace(), + Result = Session.GetRef(BlobStoreNamespace, CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); if (Result.Success) { @@ -194,24 +211,22 @@ namespace detail { IoBuffer ContentBuffer; int NumAttachments = 0; - CacheRecord.IterateAttachments( - [&Session, &Result, &ContentBuffer, &NumAttachments](CbFieldView AttachmentHash) { - CloudCacheResult AttachmentResult = - Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); - Result.Bytes += AttachmentResult.Bytes; - Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; - Result.ErrorCode = AttachmentResult.ErrorCode; + CacheRecord.IterateAttachments([&](CbFieldView AttachmentHash) { + CloudCacheResult AttachmentResult = Session.GetCompressedBlob(BlobStoreNamespace, AttachmentHash.AsHash()); + Result.Bytes += AttachmentResult.Bytes; + Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; + Result.ErrorCode = AttachmentResult.ErrorCode; - if (CompressedBuffer Chunk = CompressedBuffer::FromCompressed(SharedBuffer(AttachmentResult.Response))) - { - Result.Response = AttachmentResult.Response; - ++NumAttachments; - } - else - { - Result.Success = false; - } - }); + if (CompressedBuffer Chunk = CompressedBuffer::FromCompressed(SharedBuffer(AttachmentResult.Response))) + { + Result.Response = AttachmentResult.Response; + ++NumAttachments; + } + else + { + Result.Success = false; + } + }); if (NumAttachments != 1) { Result.Success = false; @@ -222,10 +237,7 @@ namespace detail { else { const ZenContentType AcceptType = Type == ZenContentType::kCbPackage ? ZenContentType::kCbObject : Type; - Result = Session.GetRef(CacheKey.Namespace, - CacheKey.Bucket, - CacheKey.Hash, - AcceptType); // Session.Client().BlobStoreNamespace() + Result = Session.GetRef(BlobStoreNamespace, CacheKey.Bucket, CacheKey.Hash, AcceptType); if (Result.Success && Type == ZenContentType::kCbPackage) { @@ -236,9 +248,8 @@ namespace detail { { CbObject CacheRecord = LoadCompactBinaryObject(Result.Response); - CacheRecord.IterateAttachments([&Session, &Result, &Package](CbFieldView AttachmentHash) { - CloudCacheResult AttachmentResult = - Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); + CacheRecord.IterateAttachments([&](CbFieldView AttachmentHash) { + CloudCacheResult AttachmentResult = Session.GetCompressedBlob(BlobStoreNamespace, AttachmentHash.AsHash()); Result.Bytes += AttachmentResult.Bytes; Result.ElapsedSeconds += AttachmentResult.ElapsedSeconds; Result.ErrorCode = AttachmentResult.ErrorCode; @@ -303,10 +314,9 @@ namespace detail { if (!Result.Error) { - CloudCacheResult RefResult = Session.GetRef(CacheKey.Namespace, - CacheKey.Bucket, - CacheKey.Hash, - ZenContentType::kCbObject); // Session.Client().BlobStoreNamespace() + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + CloudCacheResult RefResult = + Session.GetRef(BlobStoreNamespace, CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); AppendResult(RefResult, Result); m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); @@ -317,9 +327,8 @@ namespace detail { if (ValidationResult == CbValidateError::None) { Record = LoadCompactBinaryObject(RefResult.Response); - Record.IterateAttachments([this, &Session, &Result, &Package](CbFieldView AttachmentHash) { - CloudCacheResult BlobResult = - Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), AttachmentHash.AsHash()); + Record.IterateAttachments([&](CbFieldView AttachmentHash) { + CloudCacheResult BlobResult = Session.GetCompressedBlob(BlobStoreNamespace, AttachmentHash.AsHash()); AppendResult(BlobResult, Result); m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); @@ -342,14 +351,15 @@ namespace detail { return Result; } - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey&, const IoHash& ValueContentId) override + virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheValue"); try { CloudCacheSession Session(m_Client); - const CloudCacheResult Result = Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), ValueContentId); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + const CloudCacheResult Result = Session.GetCompressedBlob(BlobStoreNamespace, ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -389,8 +399,9 @@ namespace detail { CompressedBuffer Compressed; if (!Result.Error) { - const CloudCacheResult BlobResult = Session.GetCompressedBlob(Session.Client().BlobStoreNamespace(), Request.ChunkId); - Payload = BlobResult.Response; + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Request.Key.Namespace); + const CloudCacheResult BlobResult = Session.GetCompressedBlob(BlobStoreNamespace, Request.ChunkId); + Payload = BlobResult.Response; AppendResult(BlobResult, Result); @@ -435,16 +446,14 @@ namespace detail { CloudCacheResult Result; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheRecord.Key.Namespace); if (m_UseLegacyDdc) { - Result = Session.PutDerivedData(CacheRecord.Key.Namespace, // Session.Client().BlobStoreNamespace(), - CacheRecord.Key.Bucket, - CacheRecord.Key.Hash, - RecordValue); + Result = Session.PutDerivedData(BlobStoreNamespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue); } else { - Result = Session.PutRef(CacheRecord.Key.Namespace, // Session.Client().BlobStoreNamespace(), + Result = Session.PutRef(BlobStoreNamespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, @@ -547,7 +556,8 @@ namespace detail { int64_t TotalBytes = 0ull; double TotalElapsedSeconds = 0.0; - const auto PutBlobs = [&](std::span ValueContentIds, std::string& OutReason) -> bool { + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Key.Namespace); + const auto PutBlobs = [&](std::span ValueContentIds, std::string& OutReason) -> bool { for (const IoHash& ValueContentId : ValueContentIds) { IoBuffer BlobBuffer; @@ -559,7 +569,7 @@ namespace detail { CloudCacheResult BlobResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !BlobResult.Success; Attempt++) { - BlobResult = Session.PutCompressedBlob(Session.Client().BlobStoreNamespace(), ValueContentId, BlobBuffer); + BlobResult = Session.PutCompressedBlob(BlobStoreNamespace, ValueContentId, BlobBuffer); } m_Status.SetFromErrorCode(BlobResult.ErrorCode, BlobResult.Reason); @@ -580,11 +590,7 @@ namespace detail { PutRefResult RefResult; for (int32_t Attempt = 0; Attempt < MaxAttempts && !RefResult.Success; Attempt++) { - RefResult = Session.PutRef(Key.Namespace, // Seesion.Client().BlobStoreNamespace(), - Key.Bucket, - Key.Hash, - ObjectBuffer, - ZenContentType::kCbObject); + RefResult = Session.PutRef(BlobStoreNamespace, Key.Bucket, Key.Hash, ObjectBuffer, ZenContentType::kCbObject); } m_Status.SetFromErrorCode(RefResult.ErrorCode, RefResult.Reason); @@ -604,9 +610,8 @@ namespace detail { return {.Reason = std::move(Reason), .Success = false}; } - const IoHash RefHash = IoHash::HashBuffer(ObjectBuffer); - FinalizeRefResult FinalizeResult = - Session.FinalizeRef(Key.Namespace, Key.Bucket, Key.Hash, RefHash); // Session.Client().BlobStoreNamespace(), + const IoHash RefHash = IoHash::HashBuffer(ObjectBuffer); + FinalizeRefResult FinalizeResult = Session.FinalizeRef(BlobStoreNamespace, Key.Bucket, Key.Hash, RefHash); m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); @@ -624,8 +629,7 @@ namespace detail { return {.Reason = std::move(Reason), .Success = false}; } - FinalizeResult = - Session.FinalizeRef(Key.Namespace, Key.Bucket, Key.Hash, RefHash); // Session.Client().BlobStoreNamespace(), + FinalizeResult = Session.FinalizeRef(BlobStoreNamespace, Key.Bucket, Key.Hash, RefHash); m_Status.SetFromErrorCode(FinalizeResult.ErrorCode, FinalizeResult.Reason); -- cgit v1.2.3 From 1b681b96424e0c25e26a2dbc24b42038539ac5af Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 10 May 2022 09:53:00 +0200 Subject: cleanup --- zenserver/cache/structuredcache.cpp | 52 ++++++++++++++++++------------------- zenserver/cache/structuredcache.h | 2 +- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 74438ac2e..98272722c 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -140,8 +140,7 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) if (std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) { // Bucket reference - - return HandleCacheBucketRequest(Request, ZenCacheStore::DefaultNamespace, Key); + return HandleCacheBucketRequest(Request, Key); } return Request.WriteResponse(HttpResponseCode::BadRequest); // invalid URL @@ -162,7 +161,7 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) } void -HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket) +HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Key) { switch (Request.RequestVerb()) { @@ -175,14 +174,19 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, case HttpVerb::kDelete: // Drop bucket - - if (m_CacheStore.DropBucket(Namespace, Bucket)) - { - return Request.WriteResponse(HttpResponseCode::OK); - } - else { - return Request.WriteResponse(HttpResponseCode::NotFound); + // TODO: Should add namespace to URI and handle if the namespace is missing for backwards compatability + std::string_view Namespace = ZenCacheStore::DefaultNamespace; + std::string_view Bucket = Key; + + if (m_CacheStore.DropBucket(Namespace, Bucket)) + { + return Request.WriteResponse(HttpResponseCode::OK); + } + else + { + return Request.WriteResponse(HttpResponseCode::NotFound); + } } break; @@ -790,7 +794,8 @@ HttpStructuredCacheService::ValidateKeyUri(HttpServerRequest& Request, CacheRef& return false; } - OutRef.Namespace = ToLower(""); // TODO: Should we add namespace to URI? + OutRef.Namespace = ToLower(ZenCacheStore::DefaultNamespace); // TODO: Should add namespace to URI and handle if the namespace is + // missing for backwards compatability OutRef.BucketSegment = ToLower(Key.substr(0, BucketSplitOffset)); if (!std::all_of(begin(OutRef.Namespace), end(OutRef.Namespace), [](const char c) { return std::isalnum(c); })) @@ -937,9 +942,8 @@ HttpStructuredCacheService::HandleRpcPutCacheRecords(zen::HttpServerRequest& Req CbFieldView NamespaceField = KeyView["Namespace"sv]; CbFieldView BucketField = KeyView["Bucket"sv]; CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, - BucketField.AsString(), - HashField.AsHash()); + CacheKey Key = + CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) { return Request.WriteResponse(HttpResponseCode::BadRequest); @@ -1116,9 +1120,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt CbFieldView BucketField = KeyObject["Bucket"sv]; CbFieldView HashField = KeyObject["Hash"sv]; CacheKey& Key = Request.Upstream.Key; - Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, - BucketField.AsString(), - HashField.AsHash()); + Key = CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); if (HashField.HasError() || Key.Bucket.empty()) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); @@ -1399,9 +1401,8 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ CbFieldView NamespaceField = KeyView["Namespace"sv]; CbFieldView BucketField = KeyView["Bucket"sv]; CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, - BucketField.AsString(), - HashField.AsHash()); + CacheKey Key = + CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) { return Request.WriteResponse(HttpResponseCode::BadRequest); @@ -1517,9 +1518,8 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http CbFieldView NamespaceField = KeyObject["Namespace"sv]; CbFieldView BucketField = KeyObject["Bucket"sv]; CbFieldView HashField = KeyObject["Hash"sv]; - Request.Key = CacheKey::Create(NamespaceField ? NamespaceField.AsString() : ZenCacheStore::DefaultNamespace, - BucketField.AsString(), - HashField.AsHash()); + Request.Key = + CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); if (BucketField.HasError() || HashField.HasError() || Request.Key.Bucket.empty()) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); @@ -1774,9 +1774,9 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vector Date: Tue, 10 May 2022 13:45:06 +0200 Subject: Add namespace test --- zenserver/cache/structuredcachestore.cpp | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index ce55b24b6..c21945702 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -3021,6 +3021,59 @@ TEST_CASE("z$.threadedinsert") // * doctest::skip(true)) } } +TEST_CASE("z$.namespaces") +{ + using namespace testutils; + + const auto CreateCacheValue = [](size_t Size) -> CbObject { + std::vector Buf; + Buf.resize(Size); + + CbObjectWriter Writer; + Writer.AddBinary("Binary"sv, Buf.data(), Buf.size()); + return Writer.Save(); + }; + + ScopedTemporaryDirectory TempDir; + CreateDirectories(TempDir.Path()); + + { + CasGc Gc; + ZenCacheStore Zcs(Gc, TempDir.Path() / "cache"); + const auto Bucket = "teardrinker"sv; + const auto CustomNamespace = "mynamespace"sv; + + // Create a cache record + const IoHash Key = CreateKey(42); + CbObject CacheValue = CreateCacheValue(4096); + + IoBuffer Buffer = CacheValue.GetBuffer().AsIoBuffer(); + Buffer.SetContentType(ZenContentType::kCbObject); + + ZenCacheValue PutValue = {.Value = Buffer}; + Zcs.Put(ZenCacheStore::DefaultNamespace, Bucket, Key, PutValue); + + ZenCacheValue GetValue; + CHECK(Zcs.Get(ZenCacheStore::DefaultNamespace, Bucket, Key, GetValue)); + + CHECK(!Zcs.Get(CustomNamespace, Bucket, Key, GetValue)); + + // This should just be dropped for now until we decide how we add namespaces + Zcs.Put(CustomNamespace, Bucket, Key, PutValue); + CHECK(!Zcs.Get(CustomNamespace, Bucket, Key, GetValue)); + + const IoHash Key2 = CreateKey(43); + CbObject CacheValue2 = CreateCacheValue(4096); + + IoBuffer Buffer2 = CacheValue2.GetBuffer().AsIoBuffer(); + Buffer2.SetContentType(ZenContentType::kCbObject); + ZenCacheValue PutValue2 = {.Value = Buffer2}; + Zcs.Put(CustomNamespace, Bucket, Key2, PutValue2); + + CHECK(!Zcs.Get(ZenCacheStore::DefaultNamespace, Bucket, Key2, GetValue)); + } +} + TEST_CASE("z$.blocked.disklayer.put") { ScopedTemporaryDirectory TempDir; -- cgit v1.2.3 From 68f7940692332ac648d249212064a090fe2b72f4 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 10 May 2022 15:57:04 +0200 Subject: use spaceship operator for CacheKey comparisons --- zenutil/include/zenutil/cache/cachekey.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/zenutil/include/zenutil/cache/cachekey.h b/zenutil/include/zenutil/cache/cachekey.h index e6110a522..9d0a4ba6d 100644 --- a/zenutil/include/zenutil/cache/cachekey.h +++ b/zenutil/include/zenutil/cache/cachekey.h @@ -21,31 +21,11 @@ struct CacheKey return {.Namespace = ToLower(Namespace), .Bucket = ToLower(Bucket), .Hash = Hash}; } + auto operator<=>(const CacheKey&) const = default; + static const CacheKey Empty; }; -inline bool -operator==(const CacheKey& A, const CacheKey& B) -{ - return A.Namespace == B.Namespace && A.Bucket == B.Bucket && A.Hash == B.Hash; -} - -inline bool -operator!=(const CacheKey& A, const CacheKey& B) -{ - return A.Namespace != B.Namespace || A.Bucket != B.Bucket || A.Hash != B.Hash; -} - -inline bool -operator<(const CacheKey& A, const CacheKey& B) -{ - const std::string& NamespaceA = A.Namespace; - const std::string& NamespaceB = B.Namespace; - const std::string& BucketA = A.Bucket; - const std::string& BucketB = B.Bucket; - return NamespaceA == NamespaceB ? (BucketA == BucketB ? A.Hash < B.Hash : BucketA < BucketB) : NamespaceA < NamespaceB; -} - struct CacheChunkRequest { CacheKey Key; -- cgit v1.2.3 From 19066beb8e31296b1d4b542b8c4414f29b1f47c8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 10:08:12 +0200 Subject: revert Bucket - Key parameter change --- zenserver/cache/structuredcache.cpp | 3 +-- zenserver/cache/structuredcache.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 98272722c..eed7a4420 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -161,7 +161,7 @@ HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) } void -HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Key) +HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Bucket) { switch (Request.RequestVerb()) { @@ -177,7 +177,6 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, { // TODO: Should add namespace to URI and handle if the namespace is missing for backwards compatability std::string_view Namespace = ZenCacheStore::DefaultNamespace; - std::string_view Bucket = Key; if (m_CacheStore.DropBucket(Namespace, Bucket)) { diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index c20856dff..8285d517d 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -112,7 +112,7 @@ private: void HandleRpcPutCacheValues(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); void HandleRpcGetCacheValues(zen::HttpServerRequest& Request, CbObjectView BatchRequest); void HandleRpcGetCacheChunks(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Key); + void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Bucket); virtual void HandleStatsRequest(zen::HttpServerRequest& Request) override; virtual void HandleStatusRequest(zen::HttpServerRequest& Request) override; PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); -- cgit v1.2.3 From 4e2e93d4e0cab545ac1273d1c04dfa99fb47a980 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 10:20:19 +0200 Subject: manual <=> calls for strings in CacheKey --- zenutil/include/zenutil/cache/cachekey.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/zenutil/include/zenutil/cache/cachekey.h b/zenutil/include/zenutil/cache/cachekey.h index 9d0a4ba6d..569a31441 100644 --- a/zenutil/include/zenutil/cache/cachekey.h +++ b/zenutil/include/zenutil/cache/cachekey.h @@ -21,7 +21,14 @@ struct CacheKey return {.Namespace = ToLower(Namespace), .Bucket = ToLower(Bucket), .Hash = Hash}; } - auto operator<=>(const CacheKey&) const = default; + auto operator<=>(const CacheKey& that) const + { + if (auto cmp = Namespace <=> that.Namespace; cmp != 0) + return cmp; + if (auto cmp = Bucket <=> that.Bucket; cmp != 0) + return cmp; + return Hash <=> that.Hash; + } static const CacheKey Empty; }; -- cgit v1.2.3 From 992f21133f83f6e0a2a99136906cb2e07d6c0842 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 10:40:31 +0200 Subject: Add caseSensitiveCompareStrings and manual <=> and == operator for CacheKey MacOS clang compiler does not implement a default <=> operator for string --- zencore/include/zencore/string.h | 23 +++++++++++++++++++++++ zenutil/include/zenutil/cache/cachekey.h | 14 ++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/zencore/include/zencore/string.h b/zencore/include/zencore/string.h index 012ee73ee..fe838ac19 100644 --- a/zencore/include/zencore/string.h +++ b/zencore/include/zencore/string.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -795,6 +796,28 @@ StrCaseCompare(const char* Lhs, const char* Rhs, int64_t Length = -1) #endif } +/** + * @brief + * Helper function to implement case sensitive spaceship operator for strings. + * MacOS clang version we use does not implement <=> for std::string + * @param Lhs string + * @param Rhs string + * @return std::strong_ordering indicating relationship between Lhs and Rhs + */ +inline auto +caseSensitiveCompareStrings(const std::string& Lhs, const std::string& Rhs) +{ + if (Lhs == Rhs) + { + return std::strong_ordering::equal; + } + if (Lhs < Rhs) + { + return std::strong_ordering::less; + } + return std::strong_ordering::greater; +} + ////////////////////////////////////////////////////////////////////////// /** diff --git a/zenutil/include/zenutil/cache/cachekey.h b/zenutil/include/zenutil/cache/cachekey.h index 569a31441..427c99435 100644 --- a/zenutil/include/zenutil/cache/cachekey.h +++ b/zenutil/include/zenutil/cache/cachekey.h @@ -23,13 +23,19 @@ struct CacheKey auto operator<=>(const CacheKey& that) const { - if (auto cmp = Namespace <=> that.Namespace; cmp != 0) - return cmp; - if (auto cmp = Bucket <=> that.Bucket; cmp != 0) - return cmp; + if (auto n = caseSensitiveCompareStrings(Namespace, that.Namespace); n != std::strong_ordering::equal) + { + return n; + } + if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) + { + return b; + } return Hash <=> that.Hash; } + auto operator==(const CacheKey& that) const { return (*this <=> that) == std::strong_ordering::equal; } + static const CacheKey Empty; }; -- cgit v1.2.3 From d418b9794dddb550bf90c55a4353f2ed1a764168 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 10:43:41 +0200 Subject: string_view vs string lifetime fix --- zenserver/upstream/upstreamcache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index cbb32b13e..52513abe9 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -160,7 +160,7 @@ namespace detail { } } - std::string_view GetActualDdcNamespace(CloudCacheSession& Session, const std::string Namespace) + std::string_view GetActualDdcNamespace(CloudCacheSession& Session, std::string_view Namespace) { if (Namespace == ZenCacheStore::DefaultNamespace) { @@ -169,7 +169,7 @@ namespace detail { return Namespace; } - std::string_view GetActualBlobStoreNamespace(CloudCacheSession& Session, const std::string Namespace) + std::string_view GetActualBlobStoreNamespace(CloudCacheSession& Session, std::string_view Namespace) { if (Namespace == ZenCacheStore::DefaultNamespace) { -- cgit v1.2.3 From e45d87b8834d204a130f09daf9009b85f3cd32ef Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 12:16:03 +0200 Subject: use string::compare in caseSensitiveCompareStrings --- zencore/include/zencore/string.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/zencore/include/zencore/string.h b/zencore/include/zencore/string.h index fe838ac19..92f567dae 100644 --- a/zencore/include/zencore/string.h +++ b/zencore/include/zencore/string.h @@ -807,15 +807,8 @@ StrCaseCompare(const char* Lhs, const char* Rhs, int64_t Length = -1) inline auto caseSensitiveCompareStrings(const std::string& Lhs, const std::string& Rhs) { - if (Lhs == Rhs) - { - return std::strong_ordering::equal; - } - if (Lhs < Rhs) - { - return std::strong_ordering::less; - } - return std::strong_ordering::greater; + int r = Lhs.compare(Rhs); + return r == 0 ? std::strong_ordering::equal : r < 0 ? std::strong_ordering::less : std::strong_ordering::greater; } ////////////////////////////////////////////////////////////////////////// -- cgit v1.2.3 From 1b235fc947589dfbac0d09024947c37171c5dc7f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 13:04:59 +0200 Subject: Add support for /api/v2/ URI requests with namespace support --- zenserver/cache/structuredcache.cpp | 177 +++++++++++++++++++++++++++++++----- zenserver/cache/structuredcache.h | 5 +- 2 files changed, 155 insertions(+), 27 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index eed7a4420..691da36fa 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -121,47 +121,148 @@ HttpStructuredCacheService::Scrub(ScrubContext& Ctx) m_CacheStore.Scrub(Ctx); } -void -HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) +static constexpr std::string_view HttpZCacheAPIV2Prefix = "api/v2/"sv; +static constexpr std::string_view HttpZCacheRPCPrefix = "$rpc"sv; + +struct HttpRequestData { - CacheRef Ref; + std::optional Namespace; + std::optional Bucket; + std::optional HashKey; + std::optional ValueContentId; +}; - metrics::OperationTiming::Scope $(m_HttpRequests); +static bool +HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) +{ + std::string_view Namespace = ZenCacheStore::DefaultNamespace; + if (Key.starts_with(HttpZCacheAPIV2Prefix)) + { + std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/', HttpZCacheAPIV2Prefix.length()); + if (NamespaceSplitOffset == std::string_view::npos) + { + // Namespace reference + if (!std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) + { + return false; + } + Data.Namespace = ToLower(Key); + return true; + } + Data.Namespace = Key.substr(0, NamespaceSplitOffset); + Key = Key.substr(NamespaceSplitOffset + 1); + } + + std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); + if (BucketSplitOffset == std::string_view::npos) + { + if (!std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) + { + return false; + } + Data.Bucket = ToLower(Key); + return true; + } + + std::string_view HashSegment; + std::string_view ValueSegment; + + std::string_view::size_type ValueSplitOffset = Key.find_last_of('/'); + if (ValueSplitOffset == BucketSplitOffset) + { + // Basic cache record lookup + HashSegment = Key.substr(BucketSplitOffset + 1); + } + else + { + // Cache record + valueid lookup + HashSegment = Key.substr(BucketSplitOffset + 1, ValueSplitOffset - BucketSplitOffset - 1); + ValueSegment = Key.substr(ValueSplitOffset + 1); + } - if (!ValidateKeyUri(Request, /* out */ Ref)) + if (HashSegment.size() != IoHash::StringLength) { - std::string_view Key = Request.RelativeUri(); + return false; + } + + IoHash KeyHash; + if (!ParseHexBytes(HashSegment.data(), HashSegment.size(), KeyHash.Hash)) + { + return false; + } - if (Key == "$rpc") + Data.HashKey = KeyHash; + + if (!ValueSegment.empty()) + { + if (ValueSegment.size() != IoHash::StringLength) { - return HandleRpcRequest(Request); + return false; } - if (std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) + IoHash ValueHash; + if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) { - // Bucket reference - return HandleCacheBucketRequest(Request, Key); + return false; } + Data.ValueContentId = ValueHash; + } + + return true; +} + +void +HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) +{ + metrics::OperationTiming::Scope $(m_HttpRequests); + + std::string_view Key = Request.RelativeUri(); + if (Key == HttpZCacheRPCPrefix) + { + return HandleRpcRequest(Request); + } + HttpRequestData RequestData; + if (!HttpRequestParseRelativeUri(Key, RequestData)) + { return Request.WriteResponse(HttpResponseCode::BadRequest); // invalid URL } - CachePolicy PolicyFromURL = ParseCachePolicy(Request.GetQueryParams()); + if (RequestData.ValueContentId.has_value()) + { + ZEN_ASSERT(RequestData.Namespace.has_value()); + ZEN_ASSERT(RequestData.Bucket.has_value()); + ZEN_ASSERT(RequestData.HashKey.has_value()); + CacheRef Ref = {.Namespace = RequestData.Namespace.value(), + .BucketSegment = RequestData.Bucket.value(), + .HashKey = RequestData.HashKey.value(), + .ValueContentId = RequestData.ValueContentId.value()}; + return HandleCacheValueRequest(Request, Ref, ParseCachePolicy(Request.GetQueryParams())); + } - if (Ref.ValueContentId == IoHash::Zero) + if (RequestData.HashKey.has_value()) { - return HandleCacheRecordRequest(Request, Ref, PolicyFromURL); + ZEN_ASSERT(RequestData.Namespace.has_value()); + ZEN_ASSERT(RequestData.Bucket.has_value()); + CacheRef Ref = {.Namespace = RequestData.Namespace.value(), + .BucketSegment = RequestData.Bucket.value(), + .HashKey = RequestData.HashKey.value(), + .ValueContentId = IoHash::Zero}; + return HandleCacheRecordRequest(Request, Ref, ParseCachePolicy(Request.GetQueryParams())); } - else + + if (RequestData.Bucket.has_value()) { - return HandleCacheValueRequest(Request, Ref, PolicyFromURL); + ZEN_ASSERT(RequestData.Namespace.has_value()); + return HandleCacheBucketRequest(Request, RequestData.Namespace.value(), RequestData.Bucket.value()); } - return; + ZEN_ASSERT(RequestData.Namespace.has_value()); + return HandleCacheNamespaceRequest(Request, RequestData.Namespace.value()); } void -HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Bucket) +HttpStructuredCacheService::HandleCacheNamespaceRequest(zen::HttpServerRequest& Request, std::string_view) { switch (Request.RequestVerb()) { @@ -173,11 +274,39 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, break; case HttpVerb::kDelete: - // Drop bucket + // Drop namespace { - // TODO: Should add namespace to URI and handle if the namespace is missing for backwards compatability - std::string_view Namespace = ZenCacheStore::DefaultNamespace; + // if (m_CacheStore.DropNamespace(Namespace)) + // { + // return Request.WriteResponse(HttpResponseCode::OK); + // } + // else + // { + // return Request.WriteResponse(HttpResponseCode::NotFound); + // } + } + break; + default: + break; + } +} + +void +HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket) +{ + switch (Request.RequestVerb()) + { + case HttpVerb::kHead: + case HttpVerb::kGet: + { + // Query stats + } + break; + + case HttpVerb::kDelete: + // Drop bucket + { if (m_CacheStore.DropBucket(Namespace, Bucket)) { return Request.WriteResponse(HttpResponseCode::OK); @@ -783,9 +912,8 @@ HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, } bool -HttpStructuredCacheService::ValidateKeyUri(HttpServerRequest& Request, CacheRef& OutRef) +HttpStructuredCacheService::ValidateKeyUri(std::string_view Namespace, std::string_view Key, CacheRef& OutRef) { - std::string_view Key = Request.RelativeUri(); std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); if (BucketSplitOffset == std::string_view::npos) @@ -793,8 +921,7 @@ HttpStructuredCacheService::ValidateKeyUri(HttpServerRequest& Request, CacheRef& return false; } - OutRef.Namespace = ToLower(ZenCacheStore::DefaultNamespace); // TODO: Should add namespace to URI and handle if the namespace is - // missing for backwards compatability + OutRef.Namespace = ToLower(Namespace); OutRef.BucketSegment = ToLower(Key.substr(0, BucketSplitOffset)); if (!std::all_of(begin(OutRef.Namespace), end(OutRef.Namespace), [](const char c) { return std::isalnum(c); })) diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 8285d517d..40e92c675 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -99,7 +99,7 @@ private: Invalid, }; - [[nodiscard]] bool ValidateKeyUri(zen::HttpServerRequest& Request, CacheRef& OutRef); + [[nodiscard]] bool ValidateKeyUri(std::string_view Namespace, std::string_view Key, CacheRef& OutRef); void HandleCacheRecordRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); void HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); void HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); @@ -112,7 +112,8 @@ private: void HandleRpcPutCacheValues(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); void HandleRpcGetCacheValues(zen::HttpServerRequest& Request, CbObjectView BatchRequest); void HandleRpcGetCacheChunks(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Bucket); + void HandleCacheNamespaceRequest(zen::HttpServerRequest& Request, std::string_view Namespace); + void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket); virtual void HandleStatsRequest(zen::HttpServerRequest& Request) override; virtual void HandleStatusRequest(zen::HttpServerRequest& Request) override; PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); -- cgit v1.2.3 From 1274f92cf7ce890b7aa1fc9354503e2508c185eb Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 13:48:45 +0200 Subject: Tests for HttpRequestParseRelativeUri --- zenserver/cache/structuredcache.cpp | 168 ++++++++++++++++++++++++++----- zenserver/cache/structuredcache.h | 2 + zenserver/cache/structuredcachestore.cpp | 11 +- zenserver/cache/structuredcachestore.h | 1 + zenserver/zenserver.cpp | 1 + 5 files changed, 151 insertions(+), 32 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 691da36fa..9299911cf 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -35,6 +35,11 @@ #include +#if ZEN_WITH_TESTS +# include +# include +#endif + namespace zen { using namespace std::literals; @@ -135,32 +140,51 @@ struct HttpRequestData static bool HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) { - std::string_view Namespace = ZenCacheStore::DefaultNamespace; if (Key.starts_with(HttpZCacheAPIV2Prefix)) { - std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/', HttpZCacheAPIV2Prefix.length()); + Key = Key.substr(HttpZCacheAPIV2Prefix.length()); + // Namespace reference + if (!Key.starts_with(ZenCacheStore::NamespacePrefix)) + { + return false; + } + Key = Key.substr(ZenCacheStore::NamespacePrefix.length()); + std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/'); + + std::string_view Namespace = Key.substr(0, NamespaceSplitOffset); + if (!std::all_of(begin(Namespace), end(Namespace), [](const char c) { return std::isalnum(c); })) + { + return false; + } + Data.Namespace = ToLower(Namespace); + if (NamespaceSplitOffset == std::string_view::npos) { - // Namespace reference - if (!std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) - { - return false; - } - Data.Namespace = ToLower(Key); return true; } - Data.Namespace = Key.substr(0, NamespaceSplitOffset); - Key = Key.substr(NamespaceSplitOffset + 1); + + Key = Key.substr(NamespaceSplitOffset + 1); + } + else + { + Data.Namespace = ZenCacheStore::DefaultNamespace; } std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); + std::string_view Bucket = Key.substr(0, BucketSplitOffset); + if (Bucket.empty()) + { + return false; + } + if (!std::all_of(begin(Bucket), end(Bucket), [](const char c) { return std::isalnum(c); })) + { + return false; + } + Data.Bucket = ToLower(Bucket); + if (BucketSplitOffset == std::string_view::npos) { - if (!std::all_of(begin(Key), end(Key), [](const char c) { return std::isalnum(c); })) - { - return false; - } - Data.Bucket = ToLower(Key); + // Bucket reference return true; } @@ -193,20 +217,22 @@ HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) Data.HashKey = KeyHash; - if (!ValueSegment.empty()) + if (ValueSegment.empty()) { - if (ValueSegment.size() != IoHash::StringLength) - { - return false; - } + return true; + } - IoHash ValueHash; - if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) - { - return false; - } - Data.ValueContentId = ValueHash; + if (ValueSegment.size() != IoHash::StringLength) + { + return false; + } + + IoHash ValueHash; + if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) + { + return false; } + Data.ValueContentId = ValueHash; return true; } @@ -2330,4 +2356,94 @@ HttpStructuredCacheService::HandleStatusRequest(zen::HttpServerRequest& Request) Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); } +#if ZEN_WITH_TESTS + +TEST_CASE("z$service.parse.relative.Uri") +{ + HttpRequestData LegacyBucketRequest; + CHECK(HttpRequestParseRelativeUri("test", LegacyBucketRequest)); + CHECK(LegacyBucketRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(LegacyBucketRequest.Bucket == "test"sv); + CHECK(!LegacyBucketRequest.HashKey.has_value()); + CHECK(!LegacyBucketRequest.ValueContentId.has_value()); + + HttpRequestData LegacyHashKeyRequest; + CHECK(HttpRequestParseRelativeUri("test/0123456789abcdef12340123456789abcdef1234", LegacyHashKeyRequest)); + CHECK(LegacyHashKeyRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(LegacyHashKeyRequest.Bucket == "test"sv); + CHECK(LegacyHashKeyRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); + CHECK(!LegacyHashKeyRequest.ValueContentId.has_value()); + + HttpRequestData LegacyValueContentIdRequest; + CHECK(HttpRequestParseRelativeUri("test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", + LegacyValueContentIdRequest)); + CHECK(LegacyValueContentIdRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(LegacyValueContentIdRequest.Bucket == "test"sv); + CHECK(LegacyValueContentIdRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); + CHECK(LegacyValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); + + HttpRequestData V2DefaultNamespaceRequest; + CHECK(HttpRequestParseRelativeUri("api/v2/ns_", V2DefaultNamespaceRequest)); + CHECK(V2DefaultNamespaceRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(!V2DefaultNamespaceRequest.Bucket.has_value()); + CHECK(!V2DefaultNamespaceRequest.HashKey.has_value()); + CHECK(!V2DefaultNamespaceRequest.ValueContentId.has_value()); + + HttpRequestData V2NamespaceRequest; + CHECK(HttpRequestParseRelativeUri("api/v2/ns_nicenamespace", V2NamespaceRequest)); + CHECK(V2NamespaceRequest.Namespace == "nicenamespace"sv); + CHECK(!V2NamespaceRequest.Bucket.has_value()); + CHECK(!V2NamespaceRequest.HashKey.has_value()); + CHECK(!V2NamespaceRequest.ValueContentId.has_value()); + + HttpRequestData V2BucketRequestWithDefaultNamespace; + CHECK(HttpRequestParseRelativeUri("api/v2/ns_/test", V2BucketRequestWithDefaultNamespace)); + CHECK(V2BucketRequestWithDefaultNamespace.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(V2BucketRequestWithDefaultNamespace.Bucket == "test"sv); + CHECK(!V2BucketRequestWithDefaultNamespace.HashKey.has_value()); + CHECK(!V2BucketRequestWithDefaultNamespace.ValueContentId.has_value()); + + HttpRequestData V2BucketRequestWithNamespace; + CHECK(HttpRequestParseRelativeUri("api/v2/ns_nicenamespace/test", V2BucketRequestWithNamespace)); + CHECK(V2BucketRequestWithNamespace.Namespace == "nicenamespace"sv); + CHECK(V2BucketRequestWithNamespace.Bucket == "test"sv); + CHECK(!V2BucketRequestWithNamespace.HashKey.has_value()); + CHECK(!V2BucketRequestWithNamespace.ValueContentId.has_value()); + + HttpRequestData V2HashKeyRequest; + CHECK(HttpRequestParseRelativeUri("api/v2/ns_/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); + CHECK(V2HashKeyRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(V2HashKeyRequest.Bucket == "test"); + CHECK(V2HashKeyRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); + CHECK(!V2HashKeyRequest.ValueContentId.has_value()); + + HttpRequestData V2ValueContentIdRequest; + CHECK(HttpRequestParseRelativeUri( + "api/v2/ns_nicenamespace/test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", + V2ValueContentIdRequest)); + CHECK(V2ValueContentIdRequest.Namespace == "nicenamespace"sv); + CHECK(V2ValueContentIdRequest.Bucket == "test"sv); + CHECK(V2ValueContentIdRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); + CHECK(V2ValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); + + HttpRequestData Invalid; + CHECK(!HttpRequestParseRelativeUri("api/v2/bla", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2//", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_bad\2_namespace", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_nice/\2\1bucket", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789a", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); + CHECK(!HttpRequestParseRelativeUri( + "api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/ppppppppdef12345678956789abcdef123456789", + Invalid)); +} + +#endif + +void +z$service_forcelink() +{ +} + } // namespace zen diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 40e92c675..ba02b3b14 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -161,4 +161,6 @@ IsCompressedBinary(ZenContentType Type) return Type == ZenContentType::kBinary || Type == ZenContentType::kCompressedBinary; } +void z$service_forcelink(); + } // namespace zen diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index c21945702..5d1d39c50 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2128,8 +2128,6 @@ ZenCacheDiskLayer::TotalSize() const //////////////////////////// ZenCacheStore -static constexpr std::string_view ZenCacheNamespaceDirPrefix = "ns_"; - ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStorage(Gc), GcContributor(Gc) { CreateDirectories(BasePath); @@ -2142,7 +2140,7 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor for (const std::filesystem::path& DirPath : DirContent.Directories) { std::string DirName = PathToUtf8(DirPath.stem()); - if (DirName.starts_with(ZenCacheNamespaceDirPrefix)) + if (DirName.starts_with(NamespacePrefix)) { Namespaces.push_back(DirName.substr(3)); continue; @@ -2155,7 +2153,7 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor if (std::find(Namespaces.begin(), Namespaces.end(), DefaultNamespace) == Namespaces.end()) { ZEN_INFO("Moving #{} legacy buckets to anonymous namespace", LegacyBuckets.size()); - std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", ZenCacheNamespaceDirPrefix, DefaultNamespace); + std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", NamespacePrefix, DefaultNamespace); CreateDirectories(DefaultNamespaceFolder); // Move any non-namespace folders into the default namespace folder @@ -2176,7 +2174,7 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor for (const std::string& NamespaceName : Namespaces) { m_Namespaces[NamespaceName] = - std::make_unique(Gc, BasePath / fmt::format("{}{}", ZenCacheNamespaceDirPrefix, NamespaceName)); + std::make_unique(Gc, BasePath / fmt::format("{}{}", NamespacePrefix, NamespaceName)); } } @@ -2284,8 +2282,9 @@ ZenCacheStore::StorageSize() const ////////////////////////////////////////////////////////////////////////// +} // namespace zen + #if ZEN_WITH_TESTS -} namespace zen { diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 881285bc9..b6d06432c 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -357,6 +357,7 @@ class ZenCacheStore final : public GcStorage, public GcContributor { public: static constexpr std::string_view DefaultNamespace = ""; + static constexpr std::string_view NamespacePrefix = "ns_"; ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath); ~ZenCacheStore(); diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp index abaec888a..9e6c67d34 100644 --- a/zenserver/zenserver.cpp +++ b/zenserver/zenserver.cpp @@ -1155,6 +1155,7 @@ test_main(int argc, char** argv) zen::zenhttp_forcelinktests(); zen::zenstore_forcelinktests(); zen::z$_forcelink(); + zen::z$service_forcelink(); zen::logging::InitializeLogging(); spdlog::set_level(spdlog::level::debug); -- cgit v1.2.3 From 4090ba3269bbebeb7dc772bd15b632560a7202b8 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 15:23:57 +0200 Subject: Add validation to namespace names that follows Jupiters rules. Add unified validation of Namespace, Bucket and Hash for rpc requests. cleanup --- zenserver/cache/structuredcache.cpp | 417 +++++++++++++++++------------------- zenserver/cache/structuredcache.h | 35 ++- 2 files changed, 209 insertions(+), 243 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 9299911cf..06114ed1e 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -75,6 +75,169 @@ struct PutRequestData CacheRecordPolicy Policy; }; +namespace { + static constexpr std::string_view HttpZCacheAPIV2Prefix = "api/v2/"sv; + static constexpr std::string_view HttpZCacheRPCPrefix = "$rpc"sv; + + struct HttpRequestData + { + std::optional Namespace; + std::optional Bucket; + std::optional HashKey; + std::optional ValueContentId; + }; + + bool IsValidNamespaceName(std::string_view Name) + { + if (Name == ZenCacheStore::DefaultNamespace) + { + return true; + } + if (Name.empty()) + { + return false; + } + return Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789-_.") == std::string::npos; + } + + bool IsValidBucketName(std::string_view Name) + { + if (Name.empty()) + { + return false; + } + return Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789-_.") == std::string::npos; + } + + bool HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) + { + if (Key.starts_with(HttpZCacheAPIV2Prefix)) + { + Key = Key.substr(HttpZCacheAPIV2Prefix.length()); + // Namespace reference + if (!Key.starts_with(ZenCacheStore::NamespacePrefix)) + { + return false; + } + Key = Key.substr(ZenCacheStore::NamespacePrefix.length()); + std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/'); + + std::string Namespace = ToLower(Key.substr(0, NamespaceSplitOffset)); + if (!IsValidNamespaceName(Namespace)) + { + return false; + } + + Data.Namespace = Namespace; + + if (NamespaceSplitOffset == std::string_view::npos) + { + return true; + } + Key = Key.substr(NamespaceSplitOffset + 1); + } + else + { + Data.Namespace = ZenCacheStore::DefaultNamespace; + } + + std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); + std::string Bucket = ToLower(Key.substr(0, BucketSplitOffset)); + if (!IsValidBucketName(Bucket)) + { + return false; + } + Data.Bucket = Bucket; + + if (BucketSplitOffset == std::string_view::npos) + { + // Bucket reference + return true; + } + + std::string_view HashSegment; + std::string_view ValueSegment; + + std::string_view::size_type ValueSplitOffset = Key.find_last_of('/'); + if (ValueSplitOffset == BucketSplitOffset) + { + // Basic cache record lookup + HashSegment = Key.substr(BucketSplitOffset + 1); + } + else + { + // Cache record + valueid lookup + HashSegment = Key.substr(BucketSplitOffset + 1, ValueSplitOffset - BucketSplitOffset - 1); + ValueSegment = Key.substr(ValueSplitOffset + 1); + } + + if (HashSegment.size() != IoHash::StringLength) + { + return false; + } + + IoHash KeyHash; + if (!ParseHexBytes(HashSegment.data(), HashSegment.size(), KeyHash.Hash)) + { + return false; + } + + Data.HashKey = KeyHash; + + if (ValueSegment.empty()) + { + return true; + } + + if (ValueSegment.size() != IoHash::StringLength) + { + return false; + } + + IoHash ValueHash; + if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) + { + return false; + } + Data.ValueContentId = ValueHash; + + return true; + } + + bool GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) + { + CbFieldView NamespaceField = KeyView["Namespace"sv]; + std::string_view Namespace = NamespaceField.AsString(ZenCacheStore::DefaultNamespace); + CbFieldView BucketField = KeyView["Bucket"sv]; + if (BucketField.HasError()) + { + return false; + } + if (!BucketField.IsString()) + { + return false; + } + std::string_view Bucket = BucketField.AsString(); + if (!IsValidBucketName(Bucket)) + { + return false; + } + CbFieldView HashField = KeyView["Hash"sv]; + if (HashField.HasError()) + { + return false; + } + if (!HashField.IsHash()) + { + return false; + } + IoHash Hash = HashField.AsHash(); + Key = CacheKey::Create(Namespace, Bucket, Hash); + return true; + } + +} // namespace + ////////////////////////////////////////////////////////////////////////// HttpStructuredCacheService::HttpStructuredCacheService(ZenCacheStore& InCacheStore, @@ -126,117 +289,6 @@ HttpStructuredCacheService::Scrub(ScrubContext& Ctx) m_CacheStore.Scrub(Ctx); } -static constexpr std::string_view HttpZCacheAPIV2Prefix = "api/v2/"sv; -static constexpr std::string_view HttpZCacheRPCPrefix = "$rpc"sv; - -struct HttpRequestData -{ - std::optional Namespace; - std::optional Bucket; - std::optional HashKey; - std::optional ValueContentId; -}; - -static bool -HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) -{ - if (Key.starts_with(HttpZCacheAPIV2Prefix)) - { - Key = Key.substr(HttpZCacheAPIV2Prefix.length()); - // Namespace reference - if (!Key.starts_with(ZenCacheStore::NamespacePrefix)) - { - return false; - } - Key = Key.substr(ZenCacheStore::NamespacePrefix.length()); - std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/'); - - std::string_view Namespace = Key.substr(0, NamespaceSplitOffset); - if (!std::all_of(begin(Namespace), end(Namespace), [](const char c) { return std::isalnum(c); })) - { - return false; - } - Data.Namespace = ToLower(Namespace); - - if (NamespaceSplitOffset == std::string_view::npos) - { - return true; - } - - Key = Key.substr(NamespaceSplitOffset + 1); - } - else - { - Data.Namespace = ZenCacheStore::DefaultNamespace; - } - - std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); - std::string_view Bucket = Key.substr(0, BucketSplitOffset); - if (Bucket.empty()) - { - return false; - } - if (!std::all_of(begin(Bucket), end(Bucket), [](const char c) { return std::isalnum(c); })) - { - return false; - } - Data.Bucket = ToLower(Bucket); - - if (BucketSplitOffset == std::string_view::npos) - { - // Bucket reference - return true; - } - - std::string_view HashSegment; - std::string_view ValueSegment; - - std::string_view::size_type ValueSplitOffset = Key.find_last_of('/'); - if (ValueSplitOffset == BucketSplitOffset) - { - // Basic cache record lookup - HashSegment = Key.substr(BucketSplitOffset + 1); - } - else - { - // Cache record + valueid lookup - HashSegment = Key.substr(BucketSplitOffset + 1, ValueSplitOffset - BucketSplitOffset - 1); - ValueSegment = Key.substr(ValueSplitOffset + 1); - } - - if (HashSegment.size() != IoHash::StringLength) - { - return false; - } - - IoHash KeyHash; - if (!ParseHexBytes(HashSegment.data(), HashSegment.size(), KeyHash.Hash)) - { - return false; - } - - Data.HashKey = KeyHash; - - if (ValueSegment.empty()) - { - return true; - } - - if (ValueSegment.size() != IoHash::StringLength) - { - return false; - } - - IoHash ValueHash; - if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) - { - return false; - } - Data.ValueContentId = ValueHash; - - return true; -} - void HttpStructuredCacheService::HandleRequest(HttpServerRequest& Request) { @@ -937,77 +989,6 @@ HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, Request.WriteResponse(ResponseCode); } -bool -HttpStructuredCacheService::ValidateKeyUri(std::string_view Namespace, std::string_view Key, CacheRef& OutRef) -{ - std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); - - if (BucketSplitOffset == std::string_view::npos) - { - return false; - } - - OutRef.Namespace = ToLower(Namespace); - OutRef.BucketSegment = ToLower(Key.substr(0, BucketSplitOffset)); - - if (!std::all_of(begin(OutRef.Namespace), end(OutRef.Namespace), [](const char c) { return std::isalnum(c); })) - { - return false; - } - - if (!std::all_of(begin(OutRef.BucketSegment), end(OutRef.BucketSegment), [](const char c) { return std::isalnum(c); })) - { - return false; - } - - std::string_view HashSegment; - std::string_view ValueSegment; - - std::string_view::size_type ValueSplitOffset = Key.find_last_of('/'); - - // We know there is a slash so no need to check for npos return - - if (ValueSplitOffset == BucketSplitOffset) - { - // Basic cache record lookup - HashSegment = Key.substr(BucketSplitOffset + 1); - } - else - { - // Cache record + valueid lookup - HashSegment = Key.substr(BucketSplitOffset + 1, ValueSplitOffset - BucketSplitOffset - 1); - ValueSegment = Key.substr(ValueSplitOffset + 1); - } - - if (HashSegment.size() != IoHash::StringLength) - { - return false; - } - - if (!ValueSegment.empty() && ValueSegment.size() == IoHash::StringLength) - { - const bool IsOk = ParseHexBytes(ValueSegment.data(), ValueSegment.size(), OutRef.ValueContentId.Hash); - - if (!IsOk) - { - return false; - } - } - else - { - OutRef.ValueContentId = IoHash::Zero; - } - - const bool IsOk = ParseHexBytes(HashSegment.data(), HashSegment.size(), OutRef.HashKey.Hash); - - if (!IsOk) - { - return false; - } - - return true; -} - void HttpStructuredCacheService::HandleRpcRequest(zen::HttpServerRequest& Request) { @@ -1088,15 +1069,12 @@ HttpStructuredCacheService::HandleRpcPutCacheRecords(zen::HttpServerRequest& Req std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView RecordObject = RequestObject["Record"sv].AsObjectView(); - CbObjectView KeyView = RecordObject["Key"sv].AsObjectView(); - CbFieldView NamespaceField = KeyView["Namespace"sv]; - CbFieldView BucketField = KeyView["Bucket"sv]; - CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = - CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); - if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView RecordObject = RequestObject["Record"sv].AsObjectView(); + CbObjectView KeyView = RecordObject["Key"sv].AsObjectView(); + + CacheKey Key; + if (!GetRpcRequestCacheKey(KeyView, Key)) { return Request.WriteResponse(HttpResponseCode::BadRequest); } @@ -1265,18 +1243,16 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt for (CbFieldView RequestField : RequestsArray) { - RecordRequestData& Request = Requests.emplace_back(); - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CbFieldView NamespaceField = KeyObject["Namespace"sv]; - CbFieldView BucketField = KeyObject["Bucket"sv]; - CbFieldView HashField = KeyObject["Hash"sv]; - CacheKey& Key = Request.Upstream.Key; - Key = CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); - if (HashField.HasError() || Key.Bucket.empty()) + RecordRequestData& Request = Requests.emplace_back(); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); + + CacheKey& Key = Request.Upstream.Key; + if (!GetRpcRequestCacheKey(KeyObject, Key)) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); } + Request.DownstreamPolicy = LoadCacheRecordPolicy(RequestObject["Policy"sv].AsObjectView(), DefaultPolicy); const CacheRecordPolicy& Policy = Request.DownstreamPolicy; @@ -1548,17 +1524,15 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyView = RequestObject["Key"sv].AsObjectView(); - CbFieldView NamespaceField = KeyView["Namespace"sv]; - CbFieldView BucketField = KeyView["Bucket"sv]; - CbFieldView HashField = KeyView["Hash"sv]; - CacheKey Key = - CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); - if (BucketField.HasError() || HashField.HasError() || Key.Bucket.empty()) + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyView = RequestObject["Key"sv].AsObjectView(); + + CacheKey Key; + if (!GetRpcRequestCacheKey(KeyView, Key)) { return Request.WriteResponse(HttpResponseCode::BadRequest); } + PolicyText = RequestObject["Policy"sv].AsString(); CachePolicy Policy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : DefaultPolicy; IoHash RawHash = RequestObject["RawHash"sv].AsBinaryAttachment(); @@ -1664,18 +1638,15 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http { Stopwatch Timer; - RequestData& Request = Requests.emplace_back(); - CbObjectView RequestObject = RequestField.AsObjectView(); - CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CbFieldView NamespaceField = KeyObject["Namespace"sv]; - CbFieldView BucketField = KeyObject["Bucket"sv]; - CbFieldView HashField = KeyObject["Hash"sv]; - Request.Key = - CacheKey::Create(NamespaceField.AsString(ZenCacheStore::DefaultNamespace), BucketField.AsString(), HashField.AsHash()); - if (BucketField.HasError() || HashField.HasError() || Request.Key.Bucket.empty()) + RequestData& Request = Requests.emplace_back(); + CbObjectView RequestObject = RequestField.AsObjectView(); + CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); + + if (!GetRpcRequestCacheKey(KeyObject, Request.Key)) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); } + PolicyText = RequestObject["Policy"sv].AsString(); Request.Policy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : DefaultPolicy; @@ -1921,15 +1892,10 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vectorKey)) { ZEN_WARN("GetCacheChunks: Invalid key in ChunkRequest."); return false; @@ -2432,6 +2398,7 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(!HttpRequestParseRelativeUri("api/v2/ns_bad\2_namespace", Invalid)); CHECK(!HttpRequestParseRelativeUri("api/v2/ns_nice/\2\1bucket", Invalid)); CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789a", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcdef1234", Invalid)); CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); CHECK(!HttpRequestParseRelativeUri( diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index ba02b3b14..4b3b8fa52 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -99,24 +99,23 @@ private: Invalid, }; - [[nodiscard]] bool ValidateKeyUri(std::string_view Namespace, std::string_view Key, CacheRef& OutRef); - void HandleCacheRecordRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); - void HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleCacheValueRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleRpcRequest(zen::HttpServerRequest& Request); - void HandleRpcPutCacheRecords(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); - void HandleRpcGetCacheRecords(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleRpcPutCacheValues(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); - void HandleRpcGetCacheValues(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleRpcGetCacheChunks(zen::HttpServerRequest& Request, CbObjectView BatchRequest); - void HandleCacheNamespaceRequest(zen::HttpServerRequest& Request, std::string_view Namespace); - void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket); - virtual void HandleStatsRequest(zen::HttpServerRequest& Request) override; - virtual void HandleStatusRequest(zen::HttpServerRequest& Request) override; - PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); + void HandleCacheRecordRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); + void HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandleCacheValueRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandleRpcRequest(zen::HttpServerRequest& Request); + void HandleRpcPutCacheRecords(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); + void HandleRpcGetCacheRecords(zen::HttpServerRequest& Request, CbObjectView BatchRequest); + void HandleRpcPutCacheValues(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); + void HandleRpcGetCacheValues(zen::HttpServerRequest& Request, CbObjectView BatchRequest); + void HandleRpcGetCacheChunks(zen::HttpServerRequest& Request, CbObjectView BatchRequest); + void HandleCacheNamespaceRequest(zen::HttpServerRequest& Request, std::string_view Namespace); + void HandleCacheBucketRequest(zen::HttpServerRequest& Request, std::string_view Namespace, std::string_view Bucket); + virtual void HandleStatsRequest(zen::HttpServerRequest& Request) override; + virtual void HandleStatusRequest(zen::HttpServerRequest& Request) override; + PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); /** HandleRpcGetCacheChunks Helper: Parse the Body object into RecordValue Requests and Value Requests. */ bool ParseGetCacheChunksRequest(std::vector& RecordKeys, -- cgit v1.2.3 From c9a2cc8afa11f3dfea05b91f0758ba3d0cae0784 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 15:33:18 +0200 Subject: keep compatability for valid bucket names --- zenserver/cache/structuredcache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 06114ed1e..83f2f41d9 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -106,7 +106,7 @@ namespace { { return false; } - return Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789-_.") == std::string::npos; + return std::all_of(begin(Name), end(Name), [](const char c) { return std::isalnum(c); }); } bool HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) -- cgit v1.2.3 From e9466f9684d5479029d73fd2a60327d1daa89192 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 12 May 2022 15:33:58 +0200 Subject: Validate max length for namespace name --- zenserver/cache/structuredcache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 83f2f41d9..02cecf3e1 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -97,6 +97,10 @@ namespace { { return false; } + if (Name.length() > 64) + { + return false; + } return Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789-_.") == std::string::npos; } -- cgit v1.2.3 From 9f8d957167fd965bfc3b64bd64ec5b66394d45f4 Mon Sep 17 00:00:00 2001 From: Alex Dunn Date: Thu, 12 May 2022 13:20:54 -0700 Subject: Adding PR update support. --- .github/workflows/self_host_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self_host_build.yml b/.github/workflows/self_host_build.yml index 2645f9738..3864151ce 100644 --- a/.github/workflows/self_host_build.yml +++ b/.github/workflows/self_host_build.yml @@ -2,7 +2,7 @@ name: Validate Build on: pull_request: - types: [opened, reopened] + types: [opened, reopened, synchronize, reopened] branches: [ main ] jobs: -- cgit v1.2.3 From f488fdd311d09a2fb69e348ef88f0a896fd34c20 Mon Sep 17 00:00:00 2001 From: Alex Dunn Date: Fri, 13 May 2022 09:41:14 -0700 Subject: Testing version increment and label filtering for release build --- .github/workflows/update_release.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update_release.yml b/.github/workflows/update_release.yml index 62568d1c0..734480134 100644 --- a/.github/workflows/update_release.yml +++ b/.github/workflows/update_release.yml @@ -3,12 +3,13 @@ name: Build release on: # push pull_request: - types: [closed] + types: [closed, labeled] branches: [ main ] jobs: windows-build: # if: github.event.pull_request.merged == true + if: contains( github.event.pull_request.labels.*.name, 'release') name: Build Windows runs-on: [self-hosted, windows, x64] strategy: @@ -75,7 +76,15 @@ jobs: $latest = (Invoke-WebRequest -Headers @{"Accept"="application/vnd.github.v3+json";"Authorization"="token ${{ secrets.GITHUB_TOKEN }}"} $releases | ConvertFrom-Json)[0] $current_version_tag = [version]$latest.tag_name.replace('v','') echo "Current version" $current_version_tag - $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor,$current_version_tag.Build,$current_version_tag.Revision+1).toString() + if ($current_version_tag.Revision >= 9) { + if ($current_version_tag.Build >= 9) { + $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor+1,0,0).toString() + }else { + $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor,$current_version_tag.Build+1,0).toString() + } + }else { + $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor,$current_version_tag.Build,$current_version_tag.Revision+1).toString() + } echo $new_version_tag echo "new_version_tag=$new_version_tag" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append -- cgit v1.2.3 From e0d3333df269715d7d1480a859e083539a339ff7 Mon Sep 17 00:00:00 2001 From: Alex Dunn Date: Fri, 13 May 2022 09:57:42 -0700 Subject: Testing label trigger --- .github/workflows/update_release.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update_release.yml b/.github/workflows/update_release.yml index 734480134..ec0fcba52 100644 --- a/.github/workflows/update_release.yml +++ b/.github/workflows/update_release.yml @@ -3,13 +3,14 @@ name: Build release on: # push pull_request: - types: [closed, labeled] + types: [closed] branches: [ main ] jobs: windows-build: - # if: github.event.pull_request.merged == true - if: contains( github.event.pull_request.labels.*.name, 'release') + if: >- + github.event.pull_request.merged == true && + contains( github.event.pull_request.labels.*.name, 'release') name: Build Windows runs-on: [self-hosted, windows, x64] strategy: -- cgit v1.2.3 From d332746e3300fe5a32ee6d79d1755e761e7a0ef7 Mon Sep 17 00:00:00 2001 From: Alex Dunn Date: Fri, 13 May 2022 10:40:35 -0700 Subject: Testing label trigger changes --- .github/workflows/update_release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update_release.yml b/.github/workflows/update_release.yml index ec0fcba52..27d5e2783 100644 --- a/.github/workflows/update_release.yml +++ b/.github/workflows/update_release.yml @@ -77,8 +77,8 @@ jobs: $latest = (Invoke-WebRequest -Headers @{"Accept"="application/vnd.github.v3+json";"Authorization"="token ${{ secrets.GITHUB_TOKEN }}"} $releases | ConvertFrom-Json)[0] $current_version_tag = [version]$latest.tag_name.replace('v','') echo "Current version" $current_version_tag - if ($current_version_tag.Revision >= 9) { - if ($current_version_tag.Build >= 9) { + if ($current_version_tag.Revision.Equals(9)) { + if ($current_version_tag.Build.Equals(9)) { $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor+1,0,0).toString() }else { $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor,$current_version_tag.Build+1,0).toString() -- cgit v1.2.3 From 4be7a0cb5b83732e780a9247636a27172f2fb3e1 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 16 May 2022 12:56:14 +0200 Subject: use "default" as the default namespace remove ns_ prefix for namespaces on disk and in requests --- zenserver/cache/structuredcache.cpp | 28 ++++++++++++++-------------- zenserver/cache/structuredcachestore.h | 4 ++-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 02cecf3e1..9f0ff7408 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -2353,35 +2353,35 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(LegacyValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); HttpRequestData V2DefaultNamespaceRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/ns_", V2DefaultNamespaceRequest)); + CHECK(HttpRequestParseRelativeUri("api/v2/default", V2DefaultNamespaceRequest)); CHECK(V2DefaultNamespaceRequest.Namespace == ZenCacheStore::DefaultNamespace); CHECK(!V2DefaultNamespaceRequest.Bucket.has_value()); CHECK(!V2DefaultNamespaceRequest.HashKey.has_value()); CHECK(!V2DefaultNamespaceRequest.ValueContentId.has_value()); HttpRequestData V2NamespaceRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/ns_nicenamespace", V2NamespaceRequest)); + CHECK(HttpRequestParseRelativeUri("api/v2/nicenamespace", V2NamespaceRequest)); CHECK(V2NamespaceRequest.Namespace == "nicenamespace"sv); CHECK(!V2NamespaceRequest.Bucket.has_value()); CHECK(!V2NamespaceRequest.HashKey.has_value()); CHECK(!V2NamespaceRequest.ValueContentId.has_value()); HttpRequestData V2BucketRequestWithDefaultNamespace; - CHECK(HttpRequestParseRelativeUri("api/v2/ns_/test", V2BucketRequestWithDefaultNamespace)); + CHECK(HttpRequestParseRelativeUri("api/v2/default/test", V2BucketRequestWithDefaultNamespace)); CHECK(V2BucketRequestWithDefaultNamespace.Namespace == ZenCacheStore::DefaultNamespace); CHECK(V2BucketRequestWithDefaultNamespace.Bucket == "test"sv); CHECK(!V2BucketRequestWithDefaultNamespace.HashKey.has_value()); CHECK(!V2BucketRequestWithDefaultNamespace.ValueContentId.has_value()); HttpRequestData V2BucketRequestWithNamespace; - CHECK(HttpRequestParseRelativeUri("api/v2/ns_nicenamespace/test", V2BucketRequestWithNamespace)); + CHECK(HttpRequestParseRelativeUri("api/v2/nicenamespace/test", V2BucketRequestWithNamespace)); CHECK(V2BucketRequestWithNamespace.Namespace == "nicenamespace"sv); CHECK(V2BucketRequestWithNamespace.Bucket == "test"sv); CHECK(!V2BucketRequestWithNamespace.HashKey.has_value()); CHECK(!V2BucketRequestWithNamespace.ValueContentId.has_value()); HttpRequestData V2HashKeyRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/ns_/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); + CHECK(HttpRequestParseRelativeUri("api/v2/default/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); CHECK(V2HashKeyRequest.Namespace == ZenCacheStore::DefaultNamespace); CHECK(V2HashKeyRequest.Bucket == "test"); CHECK(V2HashKeyRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); @@ -2389,7 +2389,7 @@ TEST_CASE("z$service.parse.relative.Uri") HttpRequestData V2ValueContentIdRequest; CHECK(HttpRequestParseRelativeUri( - "api/v2/ns_nicenamespace/test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", + "api/v2/nicenamespace/test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", V2ValueContentIdRequest)); CHECK(V2ValueContentIdRequest.Namespace == "nicenamespace"sv); CHECK(V2ValueContentIdRequest.Bucket == "test"sv); @@ -2397,16 +2397,16 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(V2ValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); HttpRequestData Invalid; - CHECK(!HttpRequestParseRelativeUri("api/v2/bla", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/", Invalid)); CHECK(!HttpRequestParseRelativeUri("api/v2//", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_bad\2_namespace", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_nice/\2\1bucket", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789a", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcdef1234", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/bad\2_namespace", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/nice/\2\1bucket", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789a", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcdef1234", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); + CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); CHECK(!HttpRequestParseRelativeUri( - "api/v2/ns_namespace/bucket/0123456789abcdef12340123456789abcdef1234/ppppppppdef12345678956789abcdef123456789", + "api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/ppppppppdef12345678956789abcdef123456789", Invalid)); } diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index b6d06432c..787cf2187 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -356,8 +356,8 @@ private: class ZenCacheStore final : public GcStorage, public GcContributor { public: - static constexpr std::string_view DefaultNamespace = ""; - static constexpr std::string_view NamespacePrefix = "ns_"; + static constexpr std::string_view DefaultNamespace = "default"; + static constexpr std::string_view NamespacePrefix = ""; ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath); ~ZenCacheStore(); -- cgit v1.2.3 From 2f69a30c936bc475bf85daded3706dc67f2e8a0f Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 16 May 2022 13:10:10 +0200 Subject: use ns_ prefix on disk only --- zenserver/cache/structuredcache.cpp | 5 ----- zenserver/cache/structuredcachestore.cpp | 10 ++++++---- zenserver/cache/structuredcachestore.h | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 9f0ff7408..6d3211c0b 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -119,11 +119,6 @@ namespace { { Key = Key.substr(HttpZCacheAPIV2Prefix.length()); // Namespace reference - if (!Key.starts_with(ZenCacheStore::NamespacePrefix)) - { - return false; - } - Key = Key.substr(ZenCacheStore::NamespacePrefix.length()); std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/'); std::string Namespace = ToLower(Key.substr(0, NamespaceSplitOffset)); diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 5d1d39c50..16130a98b 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2140,9 +2140,9 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor for (const std::filesystem::path& DirPath : DirContent.Directories) { std::string DirName = PathToUtf8(DirPath.stem()); - if (DirName.starts_with(NamespacePrefix)) + if (DirName.starts_with(NamespaceDiskPrefix)) { - Namespaces.push_back(DirName.substr(3)); + Namespaces.push_back(DirName.substr(NamespaceDiskPrefix.length())); continue; } LegacyBuckets.push_back(DirName); @@ -2153,7 +2153,8 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor if (std::find(Namespaces.begin(), Namespaces.end(), DefaultNamespace) == Namespaces.end()) { ZEN_INFO("Moving #{} legacy buckets to anonymous namespace", LegacyBuckets.size()); - std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", NamespacePrefix, DefaultNamespace); + + std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", NamespaceDiskPrefix, DefaultNamespace); CreateDirectories(DefaultNamespaceFolder); // Move any non-namespace folders into the default namespace folder @@ -2168,13 +2169,14 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor ZEN_ERROR("Unable to move '{}' to '{}', reason '{}'", LegacyFolder, NewPath, Ec.message()); } } + Namespaces.push_back(std::string(DefaultNamespace)); } for (const std::string& NamespaceName : Namespaces) { m_Namespaces[NamespaceName] = - std::make_unique(Gc, BasePath / fmt::format("{}{}", NamespacePrefix, NamespaceName)); + std::make_unique(Gc, BasePath / fmt::format("{}{}", NamespaceDiskPrefix, NamespaceName)); } } diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h index 787cf2187..232e8b9a8 100644 --- a/zenserver/cache/structuredcachestore.h +++ b/zenserver/cache/structuredcachestore.h @@ -356,8 +356,8 @@ private: class ZenCacheStore final : public GcStorage, public GcContributor { public: - static constexpr std::string_view DefaultNamespace = "default"; - static constexpr std::string_view NamespacePrefix = ""; + static constexpr std::string_view DefaultNamespace = "default"; + static constexpr std::string_view NamespaceDiskPrefix = "ns_"; ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath); ~ZenCacheStore(); -- cgit v1.2.3 From c442837aff6212c711e959a44fba7c070bcdcaf1 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 16 May 2022 15:55:07 +0200 Subject: drop api/v2 prefix for non-legacy requests --- zenserver/cache/structuredcache.cpp | 284 ++++++++++++++++++++++-------------- 1 file changed, 172 insertions(+), 112 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 6d3211c0b..b6b5b218e 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -87,127 +87,188 @@ namespace { std::optional ValueContentId; }; - bool IsValidNamespaceName(std::string_view Name) + const char* ValidNameCharacters = "abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + std::optional GetValidNamespaceName(std::string_view Name) { - if (Name == ZenCacheStore::DefaultNamespace) - { - return true; - } if (Name.empty()) { - return false; + return {}; } if (Name.length() > 64) { - return false; + return {}; } - return Name.find_first_not_of("abcdefghijklmnopqrstuvwxyz0123456789-_.") == std::string::npos; - } - bool IsValidBucketName(std::string_view Name) - { - if (Name.empty()) + if (Name.find_first_not_of(ValidNameCharacters) != std::string::npos) { - return false; + return {}; } - return std::all_of(begin(Name), end(Name), [](const char c) { return std::isalnum(c); }); + return ToLower(Name); } - bool HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) + std::optional GetValidBucketName(std::string_view Name) { - if (Key.starts_with(HttpZCacheAPIV2Prefix)) + if (Name.empty()) { - Key = Key.substr(HttpZCacheAPIV2Prefix.length()); - // Namespace reference - std::string_view::size_type NamespaceSplitOffset = Key.find_first_of('/'); - - std::string Namespace = ToLower(Key.substr(0, NamespaceSplitOffset)); - if (!IsValidNamespaceName(Namespace)) - { - return false; - } - - Data.Namespace = Namespace; - - if (NamespaceSplitOffset == std::string_view::npos) - { - return true; - } - Key = Key.substr(NamespaceSplitOffset + 1); + return {}; } - else + if (Name.find_first_not_of(ValidNameCharacters) != std::string::npos) { - Data.Namespace = ZenCacheStore::DefaultNamespace; + return {}; } + return ToLower(Name); + } - std::string_view::size_type BucketSplitOffset = Key.find_first_of('/'); - std::string Bucket = ToLower(Key.substr(0, BucketSplitOffset)); - if (!IsValidBucketName(Bucket)) + std::optional GetValidIoHash(std::string_view Hash) + { + if (Hash.length() != IoHash::StringLength) { - return false; + return {}; } - Data.Bucket = Bucket; - if (BucketSplitOffset == std::string_view::npos) + IoHash KeyHash; + if (!ParseHexBytes(Hash.data(), Hash.size(), KeyHash.Hash)) { - // Bucket reference - return true; + return {}; } + return KeyHash; + } - std::string_view HashSegment; - std::string_view ValueSegment; + bool HttpRequestParseRelativeUri(std::string_view Key, HttpRequestData& Data) + { + std::vector Tokens; + uint32_t TokenCount = zen::ForEachStrTok(Key, '/', [&](const std::string_view& Token) { + Tokens.push_back(Token); + return true; + }); - std::string_view::size_type ValueSplitOffset = Key.find_last_of('/'); - if (ValueSplitOffset == BucketSplitOffset) - { - // Basic cache record lookup - HashSegment = Key.substr(BucketSplitOffset + 1); - } - else + switch (TokenCount) { - // Cache record + valueid lookup - HashSegment = Key.substr(BucketSplitOffset + 1, ValueSplitOffset - BucketSplitOffset - 1); - ValueSegment = Key.substr(ValueSplitOffset + 1); - } + case 1: + Data.Namespace = GetValidNamespaceName(Tokens[0]); + return Data.Namespace.has_value(); + case 2: + { + std::optional PossibleHashKey = GetValidIoHash(Tokens[1]); + if (PossibleHashKey.has_value()) + { + // Legacy bucket/key request + Data.Bucket = GetValidBucketName(Tokens[0]); + if (!Data.Bucket.has_value()) + { + return false; + } + Data.HashKey = PossibleHashKey; + Data.Namespace = ZenCacheStore::DefaultNamespace; + return true; + } + Data.Namespace = GetValidNamespaceName(Tokens[0]); + if (!Data.Namespace.has_value()) + { + return false; + } + Data.Bucket = GetValidBucketName(Tokens[1]); + if (!Data.Bucket.has_value()) + { + return false; + } + return true; + } + case 3: + { + std::optional PossibleHashKey = GetValidIoHash(Tokens[1]); + if (PossibleHashKey.has_value()) + { + // Legacy bucket/key/valueid request + Data.Bucket = GetValidBucketName(Tokens[0]); + if (!Data.Bucket.has_value()) + { + return false; + } + Data.HashKey = PossibleHashKey; + Data.ValueContentId = GetValidIoHash(Tokens[2]); + if (!Data.ValueContentId.has_value()) + { + return false; + } + Data.Namespace = ZenCacheStore::DefaultNamespace; + return true; + } + Data.Namespace = GetValidNamespaceName(Tokens[0]); + if (!Data.Namespace.has_value()) + { + return false; + } + Data.Bucket = GetValidBucketName(Tokens[1]); + if (!Data.Bucket.has_value()) + { + return false; + } + Data.HashKey = GetValidIoHash(Tokens[2]); + if (!Data.HashKey) + { + return false; + } + return true; + } + case 4: + { + Data.Namespace = GetValidNamespaceName(Tokens[0]); + if (!Data.Namespace.has_value()) + { + return false; + } - if (HashSegment.size() != IoHash::StringLength) - { - return false; - } + Data.Bucket = GetValidBucketName(Tokens[1]); + if (!Data.Bucket.has_value()) + { + return false; + } - IoHash KeyHash; - if (!ParseHexBytes(HashSegment.data(), HashSegment.size(), KeyHash.Hash)) - { - return false; - } + Data.HashKey = GetValidIoHash(Tokens[2]); + if (!Data.HashKey.has_value()) + { + return false; + } - Data.HashKey = KeyHash; + Data.ValueContentId = GetValidIoHash(Tokens[3]); + if (!Data.ValueContentId.has_value()) + { + return false; + } + return true; + } + default: + return false; + } + } - if (ValueSegment.empty()) + bool GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) + { + CbFieldView NamespaceField = KeyView["Namespace"sv]; + std::optional Namespace; + if (!NamespaceField) { - return true; + Namespace = ZenCacheStore::DefaultNamespace; } - - if (ValueSegment.size() != IoHash::StringLength) + else { - return false; + if (NamespaceField.HasError()) + { + return false; + } + if (!NamespaceField.IsString()) + { + return false; + } + Namespace = GetValidNamespaceName(NamespaceField.AsString()); } - - IoHash ValueHash; - if (!ParseHexBytes(ValueSegment.data(), ValueSegment.size(), ValueHash.Hash)) + if (!Namespace.has_value()) { return false; } - Data.ValueContentId = ValueHash; - - return true; - } - - bool GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) - { - CbFieldView NamespaceField = KeyView["Namespace"sv]; - std::string_view Namespace = NamespaceField.AsString(ZenCacheStore::DefaultNamespace); - CbFieldView BucketField = KeyView["Bucket"sv]; + CbFieldView BucketField = KeyView["Bucket"sv]; if (BucketField.HasError()) { return false; @@ -216,8 +277,8 @@ namespace { { return false; } - std::string_view Bucket = BucketField.AsString(); - if (!IsValidBucketName(Bucket)) + std::optional Bucket = GetValidBucketName(BucketField.AsString()); + if (!Bucket.has_value()) { return false; } @@ -231,7 +292,7 @@ namespace { return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(Namespace, Bucket, Hash); + Key = CacheKey::Create(*Namespace, *Bucket, Hash); return true; } @@ -2325,12 +2386,12 @@ HttpStructuredCacheService::HandleStatusRequest(zen::HttpServerRequest& Request) TEST_CASE("z$service.parse.relative.Uri") { - HttpRequestData LegacyBucketRequest; - CHECK(HttpRequestParseRelativeUri("test", LegacyBucketRequest)); - CHECK(LegacyBucketRequest.Namespace == ZenCacheStore::DefaultNamespace); - CHECK(LegacyBucketRequest.Bucket == "test"sv); - CHECK(!LegacyBucketRequest.HashKey.has_value()); - CHECK(!LegacyBucketRequest.ValueContentId.has_value()); + HttpRequestData LegacyBucketRequestBecomesNamespaceRequest; + CHECK(HttpRequestParseRelativeUri("test", LegacyBucketRequestBecomesNamespaceRequest)); + CHECK(LegacyBucketRequestBecomesNamespaceRequest.Namespace == "test"sv); + CHECK(!LegacyBucketRequestBecomesNamespaceRequest.Bucket.has_value()); + CHECK(!LegacyBucketRequestBecomesNamespaceRequest.HashKey.has_value()); + CHECK(!LegacyBucketRequestBecomesNamespaceRequest.ValueContentId.has_value()); HttpRequestData LegacyHashKeyRequest; CHECK(HttpRequestParseRelativeUri("test/0123456789abcdef12340123456789abcdef1234", LegacyHashKeyRequest)); @@ -2348,61 +2409,60 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(LegacyValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); HttpRequestData V2DefaultNamespaceRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/default", V2DefaultNamespaceRequest)); + CHECK(HttpRequestParseRelativeUri("default", V2DefaultNamespaceRequest)); CHECK(V2DefaultNamespaceRequest.Namespace == ZenCacheStore::DefaultNamespace); CHECK(!V2DefaultNamespaceRequest.Bucket.has_value()); CHECK(!V2DefaultNamespaceRequest.HashKey.has_value()); CHECK(!V2DefaultNamespaceRequest.ValueContentId.has_value()); HttpRequestData V2NamespaceRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/nicenamespace", V2NamespaceRequest)); + CHECK(HttpRequestParseRelativeUri("nicenamespace", V2NamespaceRequest)); CHECK(V2NamespaceRequest.Namespace == "nicenamespace"sv); CHECK(!V2NamespaceRequest.Bucket.has_value()); CHECK(!V2NamespaceRequest.HashKey.has_value()); CHECK(!V2NamespaceRequest.ValueContentId.has_value()); HttpRequestData V2BucketRequestWithDefaultNamespace; - CHECK(HttpRequestParseRelativeUri("api/v2/default/test", V2BucketRequestWithDefaultNamespace)); + CHECK(HttpRequestParseRelativeUri("default/test", V2BucketRequestWithDefaultNamespace)); CHECK(V2BucketRequestWithDefaultNamespace.Namespace == ZenCacheStore::DefaultNamespace); CHECK(V2BucketRequestWithDefaultNamespace.Bucket == "test"sv); CHECK(!V2BucketRequestWithDefaultNamespace.HashKey.has_value()); CHECK(!V2BucketRequestWithDefaultNamespace.ValueContentId.has_value()); HttpRequestData V2BucketRequestWithNamespace; - CHECK(HttpRequestParseRelativeUri("api/v2/nicenamespace/test", V2BucketRequestWithNamespace)); + CHECK(HttpRequestParseRelativeUri("nicenamespace/test", V2BucketRequestWithNamespace)); CHECK(V2BucketRequestWithNamespace.Namespace == "nicenamespace"sv); CHECK(V2BucketRequestWithNamespace.Bucket == "test"sv); CHECK(!V2BucketRequestWithNamespace.HashKey.has_value()); CHECK(!V2BucketRequestWithNamespace.ValueContentId.has_value()); HttpRequestData V2HashKeyRequest; - CHECK(HttpRequestParseRelativeUri("api/v2/default/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); + CHECK(HttpRequestParseRelativeUri("default/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); CHECK(V2HashKeyRequest.Namespace == ZenCacheStore::DefaultNamespace); CHECK(V2HashKeyRequest.Bucket == "test"); CHECK(V2HashKeyRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); CHECK(!V2HashKeyRequest.ValueContentId.has_value()); HttpRequestData V2ValueContentIdRequest; - CHECK(HttpRequestParseRelativeUri( - "api/v2/nicenamespace/test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", - V2ValueContentIdRequest)); + CHECK( + HttpRequestParseRelativeUri("nicenamespace/test/0123456789abcdef12340123456789abcdef1234/56789abcdef12345678956789abcdef123456789", + V2ValueContentIdRequest)); CHECK(V2ValueContentIdRequest.Namespace == "nicenamespace"sv); CHECK(V2ValueContentIdRequest.Bucket == "test"sv); CHECK(V2ValueContentIdRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); CHECK(V2ValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); HttpRequestData Invalid; - CHECK(!HttpRequestParseRelativeUri("api/v2/", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2//", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/bad\2_namespace", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/nice/\2\1bucket", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789a", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcdef1234", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); - CHECK(!HttpRequestParseRelativeUri("api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); - CHECK(!HttpRequestParseRelativeUri( - "api/v2/namespace/bucket/0123456789abcdef12340123456789abcdef1234/ppppppppdef12345678956789abcdef123456789", - Invalid)); + CHECK(!HttpRequestParseRelativeUri("", Invalid)); + CHECK(!HttpRequestParseRelativeUri("/", Invalid)); + CHECK(!HttpRequestParseRelativeUri("bad\2_namespace", Invalid)); + CHECK(!HttpRequestParseRelativeUri("nice/\2\1bucket", Invalid)); + CHECK(!HttpRequestParseRelativeUri("namespace/bucket/0123456789a", Invalid)); + CHECK(!HttpRequestParseRelativeUri("namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcdef1234", Invalid)); + CHECK(!HttpRequestParseRelativeUri("namespace/bucket/pppppppp89abcdef12340123456789abcdef1234", Invalid)); + CHECK(!HttpRequestParseRelativeUri("namespace/bucket/0123456789abcdef12340123456789abcdef1234/56789abcd", Invalid)); + CHECK(!HttpRequestParseRelativeUri("namespace/bucket/0123456789abcdef12340123456789abcdef1234/ppppppppdef12345678956789abcdef123456789", + Invalid)); } #endif -- cgit v1.2.3 From d0440e215136a3b4dbb417f4fe317b8bb223e094 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Mon, 16 May 2022 17:01:45 +0200 Subject: review feedback --- zenserver/cache/structuredcache.cpp | 53 ++++++++++++++++++------------------- zenserver/cache/structuredcache.h | 10 +++---- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index b6b5b218e..c457c59b2 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -76,8 +76,7 @@ struct PutRequestData }; namespace { - static constexpr std::string_view HttpZCacheAPIV2Prefix = "api/v2/"sv; - static constexpr std::string_view HttpZCacheRPCPrefix = "$rpc"sv; + static constexpr std::string_view HttpZCacheRPCPrefix = "$rpc"sv; struct HttpRequestData { @@ -462,19 +461,19 @@ HttpStructuredCacheService::HandleCacheBucketRequest(HttpServerRequest& Request, } void -HttpStructuredCacheService::HandleCacheRecordRequest(HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandleCacheRecordRequest(HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { switch (Request.RequestVerb()) { case HttpVerb::kHead: case HttpVerb::kGet: { - HandleGetCacheRecord(Request, Ref, PolicyFromURL); + HandleGetCacheRecord(Request, Ref, PolicyFromUrl); } break; case HttpVerb::kPut: - HandlePutCacheRecord(Request, Ref, PolicyFromURL); + HandlePutCacheRecord(Request, Ref, PolicyFromUrl); break; default: break; @@ -482,20 +481,20 @@ HttpStructuredCacheService::HandleCacheRecordRequest(HttpServerRequest& Request, } void -HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { const ZenContentType AcceptType = Request.AcceptContentType(); - const bool SkipData = EnumHasAllFlags(PolicyFromURL, CachePolicy::SkipData); - const bool PartialRecord = EnumHasAllFlags(PolicyFromURL, CachePolicy::PartialRecord); + const bool SkipData = EnumHasAllFlags(PolicyFromUrl, CachePolicy::SkipData); + const bool PartialRecord = EnumHasAllFlags(PolicyFromUrl, CachePolicy::PartialRecord); bool Success = false; ZenCacheValue ClientResultValue; - if (!EnumHasAnyFlags(PolicyFromURL, CachePolicy::Query)) + if (!EnumHasAnyFlags(PolicyFromUrl, CachePolicy::Query)) { return Request.WriteResponse(HttpResponseCode::OK); } - if (EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal) && + if (EnumHasAllFlags(PolicyFromUrl, CachePolicy::QueryLocal) && m_CacheStore.Get(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ClientResultValue)) { Success = true; @@ -575,7 +574,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request return Request.WriteResponse(HttpResponseCode::OK, ClientResultValue.Value.GetContentType(), ClientResultValue.Value); } } - else if (!EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryRemote)) + else if (!EnumHasAllFlags(PolicyFromUrl, CachePolicy::QueryRemote)) { ZEN_DEBUG("MISS - '{}/{}/{}' '{}'", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(AcceptType)); m_CacheStats.MissCount++; @@ -585,12 +584,12 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request // Issue upstream query asynchronously in order to keep requests flowing without // hogging I/O servicing threads with blocking work - Request.WriteResponseAsync([this, AcceptType, PolicyFromURL, Ref](HttpServerRequest& AsyncRequest) { + Request.WriteResponseAsync([this, AcceptType, PolicyFromUrl, Ref](HttpServerRequest& AsyncRequest) { bool Success = false; - const bool PartialRecord = EnumHasAllFlags(PolicyFromURL, CachePolicy::PartialRecord); - const bool QueryLocal = EnumHasAllFlags(PolicyFromURL, CachePolicy::QueryLocal); - const bool StoreLocal = EnumHasAllFlags(PolicyFromURL, CachePolicy::StoreLocal); - const bool SkipData = EnumHasAllFlags(PolicyFromURL, CachePolicy::SkipData); + const bool PartialRecord = EnumHasAllFlags(PolicyFromUrl, CachePolicy::PartialRecord); + const bool QueryLocal = EnumHasAllFlags(PolicyFromUrl, CachePolicy::QueryLocal); + const bool StoreLocal = EnumHasAllFlags(PolicyFromUrl, CachePolicy::StoreLocal); + const bool SkipData = EnumHasAllFlags(PolicyFromUrl, CachePolicy::SkipData); ZenCacheValue ClientResultValue; metrics::OperationTiming::Scope $(m_UpstreamGetRequestTiming); @@ -749,7 +748,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request } void -HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { IoBuffer Body = Request.ReadPayload(); @@ -767,7 +766,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request ZEN_DEBUG("PUT - '{}/{}/{}' {} '{}'", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, NiceBytes(Body.Size()), ToString(ContentType)); m_CacheStore.Put(Ref.Namespace, Ref.BucketSegment, Ref.HashKey, {.Value = Body}); - if (EnumHasAllFlags(PolicyFromURL, CachePolicy::StoreRemote)) + if (EnumHasAllFlags(PolicyFromUrl, CachePolicy::StoreRemote)) { m_UpstreamCache.EnqueueUpstream({.Type = ContentType, .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}}); } @@ -788,7 +787,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request return Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "Compact binary validation failed"sv); } - CachePolicy Policy = PolicyFromURL; + CachePolicy Policy = PolicyFromUrl; CbObjectView CacheRecord(Body.Data()); std::vector ValidAttachments; int32_t TotalCount = 0; @@ -834,7 +833,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, invalid package", Ref.Namespace, Ref.BucketSegment, Ref.HashKey, ToString(ContentType)); return Request.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "Invalid package"sv); } - CachePolicy Policy = PolicyFromURL; + CachePolicy Policy = PolicyFromUrl; CbObject CacheRecord = Package.GetObject(); AttachmentCount Count; @@ -917,16 +916,16 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request } void -HttpStructuredCacheService::HandleCacheValueRequest(HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandleCacheValueRequest(HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { switch (Request.RequestVerb()) { case HttpVerb::kHead: case HttpVerb::kGet: - HandleGetCacheValue(Request, Ref, PolicyFromURL); + HandleGetCacheValue(Request, Ref, PolicyFromUrl); break; case HttpVerb::kPut: - HandlePutCacheValue(Request, Ref, PolicyFromURL); + HandlePutCacheValue(Request, Ref, PolicyFromUrl); break; default: break; @@ -934,13 +933,13 @@ HttpStructuredCacheService::HandleCacheValueRequest(HttpServerRequest& Request, } void -HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { Stopwatch Timer; IoBuffer Value = m_CidStore.FindChunkByCid(Ref.ValueContentId); bool InUpstreamCache = false; - CachePolicy Policy = PolicyFromURL; + CachePolicy Policy = PolicyFromUrl; { const bool QueryUpstream = !Value && EnumHasAllFlags(Policy, CachePolicy::QueryRemote); @@ -1002,10 +1001,10 @@ HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, } void -HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL) +HttpStructuredCacheService::HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl) { // Note: Individual cacherecord values are not propagated upstream until a valid cache record has been stored - ZEN_UNUSED(PolicyFromURL); + ZEN_UNUSED(PolicyFromUrl); Stopwatch Timer; diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 4b3b8fa52..5f248edd1 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -99,12 +99,12 @@ private: Invalid, }; - void HandleCacheRecordRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandleCacheRecordRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); void HandleGetCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); - void HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleCacheValueRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); - void HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromURL); + void HandlePutCacheRecord(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); + void HandleCacheValueRequest(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); + void HandleGetCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); + void HandlePutCacheValue(zen::HttpServerRequest& Request, const CacheRef& Ref, CachePolicy PolicyFromUrl); void HandleRpcRequest(zen::HttpServerRequest& Request); void HandleRpcPutCacheRecords(zen::HttpServerRequest& Request, const CbPackage& BatchRequest); void HandleRpcGetCacheRecords(zen::HttpServerRequest& Request, CbObjectView BatchRequest); -- cgit v1.2.3 From a8f84317c4caec04ede7744356da1b7b2f15c545 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Tue, 17 May 2022 15:22:54 +0200 Subject: fix release build, misplaced namespace brackets --- zenserver/cache/structuredcachestore.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 16130a98b..6b7b73dcf 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2284,12 +2284,8 @@ ZenCacheStore::StorageSize() const ////////////////////////////////////////////////////////////////////////// -} // namespace zen - #if ZEN_WITH_TESTS -namespace zen { - using namespace std::literals; namespace testutils { -- cgit v1.2.3 From 3f996c64b7ebc3fc39c2320c2d0a2a49496e2249 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Wed, 18 May 2022 09:37:29 +0200 Subject: align bucket naming rules with UE code base --- zenserver/cache/structuredcache.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index c457c59b2..bc6f31dd3 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -86,7 +86,8 @@ namespace { std::optional ValueContentId; }; - const char* ValidNameCharacters = "abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const char* ValidNamespaceNameCharacters = "abcdefghijklmnopqrstuvwxyz0123456789-_.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const char* ValidBucketNameCharacters = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; std::optional GetValidNamespaceName(std::string_view Name) { @@ -99,7 +100,7 @@ namespace { return {}; } - if (Name.find_first_not_of(ValidNameCharacters) != std::string::npos) + if (Name.find_first_not_of(ValidNamespaceNameCharacters) != std::string::npos) { return {}; } @@ -112,7 +113,7 @@ namespace { { return {}; } - if (Name.find_first_not_of(ValidNameCharacters) != std::string::npos) + if (Name.find_first_not_of(ValidBucketNameCharacters) != std::string::npos) { return {}; } -- cgit v1.2.3 From a9130d34b5318b0da5d3547c432a8734213fbe9b Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 19 May 2022 11:37:25 +0200 Subject: Keep Namespace out of CacheKey and store it on request level RPC requests now has a Namespace field under Params instead of one Namespace per cache key Fall back to legacy upstream HTTP URI format if default namespace is requested --- zenserver-test/zenserver-test.cpp | 49 ++++--- zenserver/cache/structuredcache.cpp | 221 +++++++++++++++++-------------- zenserver/cache/structuredcache.h | 17 ++- zenserver/upstream/upstreamcache.cpp | 103 +++++++++----- zenserver/upstream/upstreamcache.h | 26 ++-- zenserver/upstream/zen.cpp | 47 +++++-- zenserver/upstream/zen.h | 16 ++- zenutil/include/zenutil/cache/cachekey.h | 10 +- 8 files changed, 301 insertions(+), 188 deletions(-) diff --git a/zenserver-test/zenserver-test.cpp b/zenserver-test/zenserver-test.cpp index 0f4858bd5..c79b540ea 100644 --- a/zenserver-test/zenserver-test.cpp +++ b/zenserver-test/zenserver-test.cpp @@ -1462,7 +1462,7 @@ TEST_CASE("zcache.rpc") { zen::IoHash KeyHash; ((uint32_t*)(KeyHash.Hash))[0] = Key; - const zen::CacheKey CacheKey = zen::CacheKey::Create(Namespace, Bucket, KeyHash); + const zen::CacheKey CacheKey = zen::CacheKey::Create(Bucket, KeyHash); CbPackage Package; CbWriter Writer; @@ -1474,6 +1474,7 @@ TEST_CASE("zcache.rpc") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << Namespace; Writer.BeginArray("Requests"sv); { AppendCacheRecord(Package, Writer, CacheKey, PayloadSize, BatchDefaultPolicy, CachePolicy::Default); @@ -1505,7 +1506,10 @@ TEST_CASE("zcache.rpc") bool Success; }; - auto GetCacheRecords = [](std::string_view BaseUri, std::span Keys, zen::CachePolicy Policy) -> GetCacheRecordResult { + auto GetCacheRecords = [](std::string_view BaseUri, + std::string_view Namespace, + std::span Keys, + zen::CachePolicy Policy) -> GetCacheRecordResult { using namespace zen; CbObjectWriter Request; @@ -1514,6 +1518,8 @@ TEST_CASE("zcache.rpc") Request.BeginObject("Params"sv); { Request << "DefaultPolicy"sv << WriteToString<128>(Policy); + Request << "Namespace"sv << Namespace; + Request.BeginArray("Requests"sv); for (const CacheKey& Key : Keys) { @@ -1563,9 +1569,7 @@ TEST_CASE("zcache.rpc") auto LoadKey = [](zen::CbFieldView KeyView) -> zen::CacheKey { if (zen::CbObjectView KeyObj = KeyView.AsObjectView()) { - return CacheKey::Create(KeyObj["Namespace"sv] ? KeyObj["Namespace"sv].AsString() : ""sv, - KeyObj["Bucket"sv].AsString(), - KeyObj["Hash"].AsHash()); + return CacheKey::Create(KeyObj["Bucket"sv].AsString(), KeyObj["Hash"].AsHash()); } return CacheKey::Empty; }; @@ -1582,8 +1586,8 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector Keys = PutCacheRecords(BaseUri, ""sv, "mastodon"sv, 128); - GetCacheRecordResult Result = GetCacheRecords(BaseUri, Keys, Policy); + std::vector Keys = PutCacheRecords(BaseUri, "default"sv, "mastodon"sv, 128); + GetCacheRecordResult Result = GetCacheRecords(BaseUri, "default"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1593,11 +1597,9 @@ TEST_CASE("zcache.rpc") CbObjectView RecordObj = RecordView.AsObjectView(); CbObjectView KeyObj = RecordObj["Key"sv].AsObjectView(); - const CacheKey Key = CacheKey::Create(KeyObj["Namespace"sv] ? KeyObj["Namespace"sv].AsString() : ""sv, - KeyObj["Bucket"sv].AsString(), - KeyObj["Hash"].AsHash()); - IoHash AttachmentHash; - size_t NumValues = 0; + const CacheKey Key = CacheKey::Create(KeyObj["Bucket"sv].AsString(), KeyObj["Hash"].AsHash()); + IoHash AttachmentHash; + size_t NumValues = 0; for (CbFieldView Value : RecordObj["Values"sv]) { AttachmentHash = Value.AsObjectView()["RawHash"sv].AsHash(); @@ -1623,16 +1625,16 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector ExistingKeys = PutCacheRecords(BaseUri, ""sv, "mastodon"sv, 128); + std::vector ExistingKeys = PutCacheRecords(BaseUri, "default"sv, "mastodon"sv, 128); std::vector Keys; for (const zen::CacheKey& Key : ExistingKeys) { Keys.push_back(Key); - Keys.push_back(CacheKey::Create("missing"sv, "missing"sv, IoHash::Zero)); + Keys.push_back(CacheKey::Create("missing"sv, IoHash::Zero)); } - GetCacheRecordResult Result = GetCacheRecords(BaseUri, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(BaseUri, "default"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1677,10 +1679,10 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, ""sv, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "default"sv, "mastodon"sv, 4); CachePolicy Policy = CachePolicy::QueryLocal; - GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "default"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1702,10 +1704,10 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, ""sv, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "default"sv, "mastodon"sv, 4); CachePolicy Policy = (CachePolicy::QueryLocal | CachePolicy::QueryRemote); - GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "default"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1736,7 +1738,7 @@ TEST_CASE("zcache.rpc.allpolicies") std::string_view TestVersion = "F72150A02AE34B57A9EC91D36BA1CE08"sv; std::string_view TestBucket = "allpoliciestest"sv; - std::string_view TestNamespace = ""sv; + std::string_view TestNamespace = "default"sv; // NumKeys = (2 Value vs Record)*(2 SkipData vs Default)*(2 ForceMiss vs Not)*(2 use local) // *(2 use remote)*(2 UseValue Policy vs not)*(4 cases per type) @@ -1838,7 +1840,7 @@ TEST_CASE("zcache.rpc.allpolicies") IoHash KeyHash = KeyWriter.GetHash(); KeyData& KeyData = KeyDatas[KeyIndex]; - KeyData.Key = CacheKey::Create(TestNamespace, TestBucket, KeyHash); + KeyData.Key = CacheKey::Create(TestBucket, KeyHash); KeyData.KeyIndex = KeyIndex; KeyData.GetRequestsData = (KeyIndex & (1 << 1)) == 0; KeyData.UseValueAPI = (KeyIndex & (1 << 2)) != 0; @@ -1937,6 +1939,7 @@ TEST_CASE("zcache.rpc.allpolicies") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << TestNamespace; Writer.BeginArray("Requests"sv); for (CachePutRequest& Request : PutRequests) { @@ -1992,6 +1995,7 @@ TEST_CASE("zcache.rpc.allpolicies") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << TestNamespace; Writer.BeginArray("Requests"sv); for (CachePutValueRequest& Request : PutValueRequests) { @@ -2047,6 +2051,7 @@ TEST_CASE("zcache.rpc.allpolicies") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << TestNamespace; Writer.BeginArray("Requests"sv); for (CacheGetRequest& Request : GetRequests) { @@ -2158,6 +2163,7 @@ TEST_CASE("zcache.rpc.allpolicies") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << TestNamespace; Writer.BeginArray("Requests"sv); for (CacheGetValueRequest& Request : GetValueRequests) { @@ -2252,6 +2258,7 @@ TEST_CASE("zcache.rpc.allpolicies") { CachePolicy BatchDefaultPolicy = CachePolicy::Default; Writer << "DefaultPolicy"sv << WriteToString<128>(BatchDefaultPolicy); + Writer << "Namespace"sv << TestNamespace; Writer.BeginArray("ChunkRequests"sv); for (CacheGetChunkRequest& Request : ChunkRequests) { diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index bc6f31dd3..a349f13e1 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -70,6 +70,7 @@ struct AttachmentCount struct PutRequestData { + std::string Namespace; CacheKey Key; CbObjectView RecordObject; CacheRecordPolicy Policy; @@ -244,30 +245,27 @@ namespace { } } - bool GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) + std::optional GetRpcRequestNamespace(const CbObjectView Params) { - CbFieldView NamespaceField = KeyView["Namespace"sv]; - std::optional Namespace; + CbFieldView NamespaceField = Params["Namespace"sv]; if (!NamespaceField) { - Namespace = ZenCacheStore::DefaultNamespace; + return std::string(ZenCacheStore::DefaultNamespace); } - else + + if (NamespaceField.HasError()) { - if (NamespaceField.HasError()) - { - return false; - } - if (!NamespaceField.IsString()) - { - return false; - } - Namespace = GetValidNamespaceName(NamespaceField.AsString()); + return {}; } - if (!Namespace.has_value()) + if (!NamespaceField.IsString()) { - return false; + return {}; } + return GetValidNamespaceName(NamespaceField.AsString()); + } + + bool GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) + { CbFieldView BucketField = KeyView["Bucket"sv]; if (BucketField.HasError()) { @@ -292,7 +290,7 @@ namespace { return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(*Namespace, *Bucket, Hash); + Key = CacheKey::Create(*Bucket, Hash); return true; } @@ -596,7 +594,7 @@ HttpStructuredCacheService::HandleGetCacheRecord(zen::HttpServerRequest& Request metrics::OperationTiming::Scope $(m_UpstreamGetRequestTiming); if (GetUpstreamCacheResult UpstreamResult = - m_UpstreamCache.GetCacheRecord({Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, AcceptType); + m_UpstreamCache.GetCacheRecord(Ref.Namespace, {Ref.BucketSegment, Ref.HashKey}, AcceptType); UpstreamResult.Success) { Success = true; @@ -769,7 +767,7 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (EnumHasAllFlags(PolicyFromUrl, CachePolicy::StoreRemote)) { - m_UpstreamCache.EnqueueUpstream({.Type = ContentType, .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}}); + m_UpstreamCache.EnqueueUpstream({.Type = ContentType, .Namespace = Ref.Namespace, .Key = {Ref.BucketSegment, Ref.HashKey}}); } Request.WriteResponse(HttpResponseCode::Created); @@ -819,7 +817,8 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (EnumHasAllFlags(Policy, CachePolicy::StoreRemote) && !IsPartialRecord) { m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCbObject, - .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, + .Namespace = Ref.Namespace, + .Key = {Ref.BucketSegment, Ref.HashKey}, .ValueContentIds = std::move(ValidAttachments)}); } @@ -904,7 +903,8 @@ HttpStructuredCacheService::HandlePutCacheRecord(zen::HttpServerRequest& Request if (EnumHasAllFlags(Policy, CachePolicy::StoreRemote) && !IsPartialRecord) { m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCbPackage, - .Key = {Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, + .Namespace = Ref.Namespace, + .Key = {Ref.BucketSegment, Ref.HashKey}, .ValueContentIds = std::move(ValidAttachments)}); } @@ -946,7 +946,7 @@ HttpStructuredCacheService::HandleGetCacheValue(zen::HttpServerRequest& Request, if (QueryUpstream) { - if (auto UpstreamResult = m_UpstreamCache.GetCacheValue({Ref.Namespace, Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); + if (auto UpstreamResult = m_UpstreamCache.GetCacheValue(Ref.Namespace, {Ref.BucketSegment, Ref.HashKey}, Ref.ValueContentId); UpstreamResult.Success) { if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(UpstreamResult.Value))) @@ -1124,8 +1124,13 @@ HttpStructuredCacheService::HandleRpcPutCacheRecords(zen::HttpServerRequest& Req ZEN_ASSERT(BatchObject["Method"sv].AsString() == "PutCacheRecords"sv); - std::string_view PolicyText = Params["DefaultPolicy"].AsString(); - DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::string_view PolicyText = Params["DefaultPolicy"].AsString(); + std::optional Namespace = GetRpcRequestNamespace(Params); + if (!Namespace) + { + return Request.WriteResponse(HttpResponseCode::BadRequest); + } + DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { @@ -1139,7 +1144,7 @@ HttpStructuredCacheService::HandleRpcPutCacheRecords(zen::HttpServerRequest& Req return Request.WriteResponse(HttpResponseCode::BadRequest); } CacheRecordPolicy Policy = LoadCacheRecordPolicy(RequestObject["Policy"sv].AsObjectView(), DefaultPolicy); - PutRequestData PutRequest{std::move(Key), RecordObject, std::move(Policy)}; + PutRequestData PutRequest{*Namespace, std::move(Key), RecordObject, std::move(Policy)}; PutResult Result = PutCacheRecord(PutRequest, &BatchRequest); @@ -1203,7 +1208,7 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack else { ZEN_WARN("PUT - '{}/{}/{}' '{}' FAILED, attachment '{}' is not compressed", - Request.Key.Namespace, + Request.Namespace, Request.Key.Bucket, Request.Key.Hash, ToString(HttpContentType::kCbPackage), @@ -1225,7 +1230,7 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack } ZEN_DEBUG("PUT - '{}/{}/{}' {}, attachments '{}/{}/{}' (new/valid/total)", - Request.Key.Namespace, + Request.Namespace, Request.Key.Bucket, Request.Key.Hash, NiceBytes(TransferredSize), @@ -1237,14 +1242,16 @@ HttpStructuredCacheService::PutCacheRecord(PutRequestData& Request, const CbPack CacheValue.Value = IoBuffer(Record.GetSize()); Record.CopyTo(MutableMemoryView(CacheValue.Value.MutableData(), CacheValue.Value.GetSize())); CacheValue.Value.SetContentType(ZenContentType::kCbObject); - m_CacheStore.Put(Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, CacheValue); + m_CacheStore.Put(Request.Namespace, Request.Key.Bucket, Request.Key.Hash, CacheValue); const bool IsPartialRecord = Count.Valid != Count.Total; if (EnumHasAllFlags(Request.Policy.GetRecordPolicy(), CachePolicy::StoreRemote) && !IsPartialRecord) { - m_UpstreamCache.EnqueueUpstream( - {.Type = ZenContentType::kCbPackage, .Key = Request.Key, .ValueContentIds = std::move(ValidAttachments)}); + m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCbPackage, + .Namespace = Request.Namespace, + .Key = Request.Key, + .ValueContentIds = std::move(ValidAttachments)}); } return PutResult::Success; } @@ -1277,8 +1284,13 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt bool UsedUpstream = false; }; - std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); - CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); + CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::optional Namespace = GetRpcRequestNamespace(Params); + if (!Namespace) + { + return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); + } std::vector Requests; std::vector UpstreamIndexes; CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); @@ -1322,7 +1334,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt ZenCacheValue RecordCacheValue; if (EnumHasAllFlags(Policy.GetRecordPolicy(), CachePolicy::QueryLocal) && - m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, RecordCacheValue)) + m_CacheStore.Get(*Namespace, Key.Bucket, Key.Hash, RecordCacheValue)) { Request.RecordCacheValue = std::move(RecordCacheValue.Value); if (Request.RecordCacheValue.GetContentType() != ZenContentType::kCbObject) @@ -1436,7 +1448,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt } } - const auto OnCacheRecordGetComplete = [this, &ParseValues](CacheRecordGetCompleteParams&& Params) { + const auto OnCacheRecordGetComplete = [this, Namespace, &ParseValues](CacheRecordGetCompleteParams&& Params) { if (!Params.Record) { return; @@ -1453,7 +1465,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt Request.RecordObject = ObjectBuffer; if (EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::StoreLocal)) { - m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); + m_CacheStore.Put(*Namespace, Key.Bucket, Key.Hash, {.Value = {Request.RecordCacheValue}}); } ParseValues(Request); Request.UsedUpstream = true; @@ -1493,7 +1505,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt { ZEN_DEBUG("Uncompressed value '{}' from upstream cache record '{}/{}/{}'", Value.ContentId, - Key.Namespace, + *Namespace, Key.Bucket, Key.Hash); } @@ -1510,7 +1522,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt } }; - m_UpstreamCache.GetCacheRecords(UpstreamRequests, std::move(OnCacheRecordGetComplete)); + m_UpstreamCache.GetCacheRecords(*Namespace, UpstreamRequests, std::move(OnCacheRecordGetComplete)); } CbPackage ResponsePackage; @@ -1533,7 +1545,7 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt } ZEN_DEBUG("HIT - '{}/{}/{}' {}{}{}", - Key.Namespace, + *Namespace, Key.Bucket, Key.Hash, NiceBytes(Request.RecordCacheValue.Size()), @@ -1549,11 +1561,11 @@ HttpStructuredCacheService::HandleRpcGetCacheRecords(zen::HttpServerRequest& Htt if (!EnumHasAnyFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::Query)) { // If they requested no query, do not record this as a miss - ZEN_DEBUG("DISABLEDQUERY - '{}/{}/{}'", Key.Namespace, Key.Bucket, Key.Hash); + ZEN_DEBUG("DISABLEDQUERY - '{}/{}/{}'", *Namespace, Key.Bucket, Key.Hash); } else { - ZEN_DEBUG("MISS - '{}/{}/{}' {}", Key.Namespace, Key.Bucket, Key.Hash, Request.RecordObject ? ""sv : "(PARTIAL)"sv); + ZEN_DEBUG("MISS - '{}/{}/{}' {}", *Namespace, Key.Bucket, Key.Hash, Request.RecordObject ? ""sv : "(PARTIAL)"sv); m_CacheStats.MissCount++; } } @@ -1579,8 +1591,13 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ ZEN_ASSERT(BatchObject["Method"sv].AsString() == "PutCacheValues"sv); - std::string_view PolicyText = Params["DefaultPolicy"].AsString(); - CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::string_view PolicyText = Params["DefaultPolicy"].AsString(); + CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::optional Namespace = GetRpcRequestNamespace(Params); + if (!Namespace) + { + return Request.WriteResponse(HttpResponseCode::BadRequest); + } std::vector Results; for (CbFieldView RequestField : Params["Requests"sv]) { @@ -1615,21 +1632,21 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ { IoBuffer Value = Chunk.GetCompressed().Flatten().AsIoBuffer(); Value.SetContentType(ZenContentType::kCompressedBinary); - m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = Value}); + m_CacheStore.Put(*Namespace, Key.Bucket, Key.Hash, {.Value = Value}); TransferredSize = Chunk.GetCompressedSize(); } Succeeded = true; } else { - ZEN_WARN("PUTCACHEVALUES - '{}/{}/{}/{}' FAILED, value is not compressed", Key.Namespace, Key.Bucket, Key.Hash, RawHash); + ZEN_WARN("PUTCACHEVALUES - '{}/{}/{}/{}' FAILED, value is not compressed", *Namespace, Key.Bucket, Key.Hash, RawHash); return Request.WriteResponse(HttpResponseCode::BadRequest); } } else if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { ZenCacheValue ExistingValue; - if (m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, ExistingValue) && + if (m_CacheStore.Get(*Namespace, Key.Bucket, Key.Hash, ExistingValue) && IsCompressedBinary(ExistingValue.Value.GetContentType())) { Succeeded = true; @@ -1640,11 +1657,11 @@ HttpStructuredCacheService::HandleRpcPutCacheValues(zen::HttpServerRequest& Requ if (Succeeded && EnumHasAllFlags(Policy, CachePolicy::StoreRemote)) { - m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCompressedBinary, .Key = Key}); + m_UpstreamCache.EnqueueUpstream({.Type = ZenContentType::kCompressedBinary, .Namespace = *Namespace, .Key = Key}); } Results.push_back(Succeeded); ZEN_DEBUG("PUTCACHEVALUES - '{}/{}/{}' {}, '{}'", - Key.Namespace, + *Namespace, Key.Bucket, Key.Hash, NiceBytes(TransferredSize), @@ -1679,9 +1696,15 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http { ZEN_TRACE_CPU("Z$::RpcGetCacheValues"); - CbObjectView Params = RpcRequest["Params"sv].AsObjectView(); - std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); - CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + CbObjectView Params = RpcRequest["Params"sv].AsObjectView(); + std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); + CachePolicy DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; + std::optional Namespace = GetRpcRequestNamespace(Params); + if (!Namespace) + { + return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); + } + struct RequestData { CacheKey Key; @@ -1717,7 +1740,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { - if (m_CacheStore.Get(Key.Namespace, Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) + if (m_CacheStore.Get(*Namespace, Key.Bucket, Key.Hash, CacheValue) && IsCompressedBinary(CacheValue.Value.GetContentType())) { Result = CompressedBuffer::FromCompressed(SharedBuffer(CacheValue.Value)); } @@ -1725,7 +1748,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http if (Result) { ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}/{}' {} ({}) in {}", - Key.Namespace, + *Namespace, Key.Bucket, Key.Hash, NiceBytes(Result.GetCompressed().GetSize()), @@ -1740,12 +1763,12 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http else if (!EnumHasAnyFlags(Policy, CachePolicy::Query)) { // If they requested no query, do not record this as a miss - ZEN_DEBUG("GETCACHEVALUES DISABLEDQUERY - '{}/{}/{}'", Key.Namespace, Key.Bucket, Key.Hash); + ZEN_DEBUG("GETCACHEVALUES DISABLEDQUERY - '{}/{}/{}'", *Namespace, Key.Bucket, Key.Hash); } else { ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}/{}' ({}) in {}", - Key.Namespace, + *Namespace, Key.Bucket, Key.Hash, "LOCAL"sv, @@ -1763,13 +1786,14 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http for (size_t Index : RemoteRequestIndexes) { RequestData& Request = Requests[Index]; - RequestedRecordsData.push_back({{Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash}}); + RequestedRecordsData.push_back({Request.Key.Bucket, Request.Key.Hash}); CacheChunkRequests.push_back(&RequestedRecordsData.back()); } Stopwatch Timer; m_UpstreamCache.GetCacheValues( + *Namespace, CacheChunkRequests, - [this, &RequestedRecordsData, &Requests, &RemoteRequestIndexes, &Timer](CacheValueGetCompleteParams&& Params) { + [this, Namespace, &RequestedRecordsData, &Requests, &RemoteRequestIndexes, &Timer](CacheValueGetCompleteParams&& Params) { CacheChunkRequest& ChunkRequest = Params.Request; if (Params.Value) { @@ -1783,9 +1807,9 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http // that we copy data from upstream even when SkipData and !StoreLocal are true means that it is too expensive // for us to keep the data only on the upstream server. // if (EnumHasAllFlags(Policy, CachePolicy::StoreLocal)) - m_CacheStore.Put(Request.Key.Namespace, Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); + m_CacheStore.Put(*Namespace, Request.Key.Bucket, Request.Key.Hash, ZenCacheValue{Params.Value}); ZEN_DEBUG("GETCACHEVALUES HIT - '{}/{}/{}' {} ({}) in {}", - ChunkRequest.Key.Namespace, + *Namespace, ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, NiceBytes(Request.Result.GetCompressed().GetSize()), @@ -1797,7 +1821,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http } } ZEN_DEBUG("GETCACHEVALUES MISS - '{}/{}/{}' ({}) in {}", - ChunkRequest.Key.Namespace, + *Namespace, ChunkRequest.Key.Bucket, ChunkRequest.Key.Hash, "UPSTREAM"sv, @@ -1888,6 +1912,7 @@ HttpStructuredCacheService::HandleRpcGetCacheChunks(zen::HttpServerRequest& Http ZEN_TRACE_CPU("Z$::RpcGetCacheChunks"); + std::string Namespace; std::vector RecordKeys; // Data about a Record necessary to identify it to the upstream std::vector Records; // Scratch-space data about a Record when fulfilling RecordRequests std::vector RequestKeys; // Data about a ChunkRequest necessary to identify it to the upstream @@ -1897,27 +1922,28 @@ HttpStructuredCacheService::HandleRpcGetCacheChunks(zen::HttpServerRequest& Http std::vector UpstreamChunks; // ChunkRequests that we need to send to the upstream // Parse requests from the CompactBinary body of the RpcRequest and divide it into RecordRequests and ValueRequests - if (!ParseGetCacheChunksRequest(RecordKeys, Records, RequestKeys, Requests, RecordRequests, ValueRequests, RpcRequest)) + if (!ParseGetCacheChunksRequest(Namespace, RecordKeys, Records, RequestKeys, Requests, RecordRequests, ValueRequests, RpcRequest)) { return HttpRequest.WriteResponse(HttpResponseCode::BadRequest); } // For each Record request, load the Record if necessary to find the Chunk's ContentId, load its Payloads if we // have it locally, and otherwise append a request for the payload to UpstreamChunks - GetLocalCacheRecords(RecordKeys, Records, RecordRequests, UpstreamChunks); + GetLocalCacheRecords(Namespace, RecordKeys, Records, RecordRequests, UpstreamChunks); // For each Value request, load the Value if we have it locally and otherwise append a request for the payload to UpstreamChunks - GetLocalCacheValues(ValueRequests, UpstreamChunks); + GetLocalCacheValues(Namespace, ValueRequests, UpstreamChunks); // Call GetCacheChunks on the upstream for any payloads we do not have locally - GetUpstreamCacheChunks(UpstreamChunks, RequestKeys, Requests); + GetUpstreamCacheChunks(Namespace, UpstreamChunks, RequestKeys, Requests); // Send the payload and descriptive data about each chunk to the client - WriteGetCacheChunksResponse(Requests, HttpRequest); + WriteGetCacheChunksResponse(Namespace, Requests, HttpRequest); } bool -HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vector& RecordKeys, +HttpStructuredCacheService::ParseGetCacheChunksRequest(std::string& Namespace, + std::vector& RecordKeys, std::vector& Records, std::vector& RequestKeys, std::vector& Requests, @@ -1929,11 +1955,20 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vector(ChunkRequestsArray.Num()); + CbObjectView Params = RpcRequest["Params"sv].AsObjectView(); + std::string_view DefaultPolicyText = Params["DefaultPolicy"sv].AsString(); + CachePolicy DefaultPolicy = !DefaultPolicyText.empty() ? ParseCachePolicy(DefaultPolicyText) : CachePolicy::Default; + + std::optional NamespaceText = GetRpcRequestNamespace(Params); + if (!NamespaceText) + { + ZEN_WARN("GetCacheChunks: Invalid namespace in ChunkRequest."); + return false; + } + Namespace = *NamespaceText; + + CbArrayView ChunkRequestsArray = Params["ChunkRequests"sv].AsArrayView(); + size_t NumRequests = static_cast(ChunkRequestsArray.Num()); // Note that these reservations allow us to take pointers to the elements while populating them. If the reservation is removed, // we will need to change the pointers to indexes to handle reallocations. @@ -1996,11 +2031,9 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vectorKey.Namespace, PreviousRecordKey->Key.Bucket, PreviousRecordKey->Key.Hash); return false; @@ -2022,7 +2055,8 @@ HttpStructuredCacheService::ParseGetCacheChunksRequest(std::vector& RecordKeys, +HttpStructuredCacheService::GetLocalCacheRecords(std::string_view Namespace, + std::vector& RecordKeys, std::vector& Records, std::vector& RecordRequests, std::vector& OutUpstreamChunks) @@ -2041,7 +2075,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (!Record.Exists && EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(RecordKey.Key.Namespace, RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) + if (m_CacheStore.Get(Namespace, RecordKey.Key.Bucket, RecordKey.Key.Hash, CacheValue)) { Record.Exists = true; Record.CacheValue = std::move(CacheValue.Value); @@ -2058,7 +2092,7 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (!UpstreamRecordRequests.empty()) { - const auto OnCacheRecordGetComplete = [this, &RecordKeys, &Records](CacheRecordGetCompleteParams&& Params) { + const auto OnCacheRecordGetComplete = [this, Namespace, &RecordKeys, &Records](CacheRecordGetCompleteParams&& Params) { if (!Params.Record) { return; @@ -2076,10 +2110,10 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& if (EnumHasAllFlags(Record.DownstreamPolicy, CachePolicy::StoreLocal)) { - m_CacheStore.Put(Key.Namespace, Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); + m_CacheStore.Put(Namespace, Key.Bucket, Key.Hash, {.Value = Record.CacheValue}); } }; - m_UpstreamCache.GetCacheRecords(UpstreamRecordRequests, std::move(OnCacheRecordGetComplete)); + m_UpstreamCache.GetCacheRecords(Namespace, UpstreamRecordRequests, std::move(OnCacheRecordGetComplete)); } std::vector UpstreamPayloadRequests; @@ -2163,7 +2197,8 @@ HttpStructuredCacheService::GetLocalCacheRecords(std::vector& } void -HttpStructuredCacheService::GetLocalCacheValues(std::vector& ValueRequests, +HttpStructuredCacheService::GetLocalCacheValues(std::string_view Namespace, + std::vector& ValueRequests, std::vector& OutUpstreamChunks) { using namespace cache::detail; @@ -2173,7 +2208,7 @@ HttpStructuredCacheService::GetLocalCacheValues(std::vectorExists && EnumHasAllFlags(Request->DownstreamPolicy, CachePolicy::QueryLocal)) { ZenCacheValue CacheValue; - if (m_CacheStore.Get(Request->Key->Key.Namespace, Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) + if (m_CacheStore.Get(Namespace, Request->Key->Key.Bucket, Request->Key->Key.Hash, CacheValue)) { if (IsCompressedBinary(CacheValue.Value.GetContentType())) { @@ -2207,7 +2242,8 @@ HttpStructuredCacheService::GetLocalCacheValues(std::vector& UpstreamChunks, +HttpStructuredCacheService::GetUpstreamCacheChunks(std::string_view Namespace, + std::vector& UpstreamChunks, std::vector& RequestKeys, std::vector& Requests) { @@ -2215,7 +2251,7 @@ HttpStructuredCacheService::GetUpstreamCacheChunks(std::vector& Requests, +HttpStructuredCacheService::WriteGetCacheChunksResponse(std::string_view Namespace, + std::vector& Requests, zen::HttpServerRequest& HttpRequest) { using namespace cache::detail; @@ -2290,7 +2327,7 @@ HttpStructuredCacheService::WriteGetCacheChunksResponse(std::vectorKey.Namespace, + Namespace, Request.Key->Key.Bucket, Request.Key->Key.Hash, Request.Key->ValueId, @@ -2301,19 +2338,11 @@ HttpStructuredCacheService::WriteGetCacheChunksResponse(std::vectorKey.Namespace, - Request.Key->Key.Bucket, - Request.Key->Key.Hash, - Request.Key->ValueId); + ZEN_DEBUG("SKIP - '{}/{}/{}/{}'", Namespace, Request.Key->Key.Bucket, Request.Key->Key.Hash, Request.Key->ValueId); } else { - ZEN_DEBUG("MISS - '{}/{}/{}/{}'", - Request.Key->Key.Namespace, - Request.Key->Key.Bucket, - Request.Key->Key.Hash, - Request.Key->ValueId); + ZEN_DEBUG("MISS - '{}/{}/{}/{}'", Namespace, Request.Key->Key.Bucket, Request.Key->Key.Hash, Request.Key->ValueId); m_CacheStats.MissCount++; } } diff --git a/zenserver/cache/structuredcache.h b/zenserver/cache/structuredcache.h index 5f248edd1..890a2ebab 100644 --- a/zenserver/cache/structuredcache.h +++ b/zenserver/cache/structuredcache.h @@ -118,7 +118,8 @@ private: PutResult PutCacheRecord(PutRequestData& Request, const CbPackage* Package); /** HandleRpcGetCacheChunks Helper: Parse the Body object into RecordValue Requests and Value Requests. */ - bool ParseGetCacheChunksRequest(std::vector& RecordKeys, + bool ParseGetCacheChunksRequest(std::string& Namespace, + std::vector& RecordKeys, std::vector& Records, std::vector& RequestKeys, std::vector& Requests, @@ -126,18 +127,24 @@ private: std::vector& ValueRequests, CbObjectView RpcRequest); /** HandleRpcGetCacheChunks Helper: Load records to get ContentId for RecordRequests, and load their payloads if they exist locally. */ - void GetLocalCacheRecords(std::vector& RecordKeys, + void GetLocalCacheRecords(std::string_view Namespace, + std::vector& RecordKeys, std::vector& Records, std::vector& RecordRequests, std::vector& OutUpstreamChunks); /** HandleRpcGetCacheChunks Helper: For ValueRequests, load their payloads if they exist locally. */ - void GetLocalCacheValues(std::vector& ValueRequests, std::vector& OutUpstreamChunks); + void GetLocalCacheValues(std::string_view Namespace, + std::vector& ValueRequests, + std::vector& OutUpstreamChunks); /** HandleRpcGetCacheChunks Helper: Load payloads from upstream that did not exist locally. */ - void GetUpstreamCacheChunks(std::vector& UpstreamChunks, + void GetUpstreamCacheChunks(std::string_view Namespace, + std::vector& UpstreamChunks, std::vector& RequestKeys, std::vector& Requests); /** HandleRpcGetCacheChunks Helper: Send response message containing all chunk results. */ - void WriteGetCacheChunksResponse(std::vector& Requests, zen::HttpServerRequest& HttpRequest); + void WriteGetCacheChunksResponse(std::string_view Namespace, + std::vector& Requests, + zen::HttpServerRequest& HttpRequest); spdlog::logger& Log() { return m_Log; } spdlog::logger& m_Log; diff --git a/zenserver/upstream/upstreamcache.cpp b/zenserver/upstream/upstreamcache.cpp index 52513abe9..98b4439c7 100644 --- a/zenserver/upstream/upstreamcache.cpp +++ b/zenserver/upstream/upstreamcache.cpp @@ -182,7 +182,7 @@ namespace detail { virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); } - virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override + virtual GetUpstreamCacheResult GetCacheRecord(std::string_view Namespace, const CacheKey& CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheRecord"); @@ -191,11 +191,11 @@ namespace detail { CloudCacheSession Session(m_Client); CloudCacheResult Result; - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Namespace); if (m_UseLegacyDdc && Type == ZenContentType::kBinary) { - std::string_view DdcNamespace = GetActualDdcNamespace(Session, CacheKey.Namespace); + std::string_view DdcNamespace = GetActualDdcNamespace(Session, Namespace); Result = Session.GetDerivedData(DdcNamespace, CacheKey.Bucket, CacheKey.Hash); } else if (Type == ZenContentType::kCompressedBinary) @@ -299,7 +299,9 @@ namespace detail { } } - virtual GetUpstreamCacheResult GetCacheRecords(std::span Requests, OnCacheRecordGetComplete&& OnComplete) override + virtual GetUpstreamCacheResult GetCacheRecords(std::string_view Namespace, + std::span Requests, + OnCacheRecordGetComplete&& OnComplete) override { ZEN_TRACE_CPU("Upstream::Horde::GetCacheRecords"); @@ -314,7 +316,7 @@ namespace detail { if (!Result.Error) { - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Namespace); CloudCacheResult RefResult = Session.GetRef(BlobStoreNamespace, CacheKey.Bucket, CacheKey.Hash, ZenContentType::kCbObject); AppendResult(RefResult, Result); @@ -351,14 +353,14 @@ namespace detail { return Result; } - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override + virtual GetUpstreamCacheResult GetCacheValue(std::string_view Namespace, const CacheKey&, const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::Horde::GetSingleCacheValue"); try { CloudCacheSession Session(m_Client); - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheKey.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Namespace); const CloudCacheResult Result = Session.GetCompressedBlob(BlobStoreNamespace, ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -383,7 +385,8 @@ namespace detail { } } - virtual GetUpstreamCacheResult GetCacheValues(std::span CacheChunkRequests, + virtual GetUpstreamCacheResult GetCacheValues(std::string_view Namespace, + std::span CacheChunkRequests, OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::Horde::GetCacheValues"); @@ -399,7 +402,7 @@ namespace detail { CompressedBuffer Compressed; if (!Result.Error) { - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Request.Key.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Namespace); const CloudCacheResult BlobResult = Session.GetCompressedBlob(BlobStoreNamespace, Request.ChunkId); Payload = BlobResult.Response; @@ -446,7 +449,7 @@ namespace detail { CloudCacheResult Result; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheRecord.Key.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, CacheRecord.Namespace); if (m_UseLegacyDdc) { Result = Session.PutDerivedData(BlobStoreNamespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue); @@ -484,6 +487,7 @@ namespace detail { return PerformStructuredPut( Session, + CacheRecord.Namespace, CacheRecord.Key, ReferencingObject.Save().GetBuffer().AsIoBuffer(), MaxAttempts, @@ -503,6 +507,7 @@ namespace detail { { return PerformStructuredPut( Session, + CacheRecord.Namespace, CacheRecord.Key, RecordValue, MaxAttempts, @@ -548,6 +553,7 @@ namespace detail { PutUpstreamCacheResult PerformStructuredPut( CloudCacheSession& Session, + std::string_view Namespace, const CacheKey& Key, IoBuffer ObjectBuffer, const int32_t MaxAttempts, @@ -556,7 +562,7 @@ namespace detail { int64_t TotalBytes = 0ull; double TotalElapsedSeconds = 0.0; - std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Key.Namespace); + std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Session, Namespace); const auto PutBlobs = [&](std::span ValueContentIds, std::string& OutReason) -> bool { for (const IoHash& ValueContentId : ValueContentIds) { @@ -738,14 +744,14 @@ namespace detail { virtual UpstreamEndpointStatus GetStatus() override { return m_Status.EndpointStatus(); } - virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override + virtual GetUpstreamCacheResult GetCacheRecord(std::string_view Namespace, const CacheKey& CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::Zen::GetSingleCacheRecord"); try { ZenStructuredCacheSession Session(*m_Client); - const ZenCacheResult Result = Session.GetCacheRecord(CacheKey.Bucket, CacheKey.Hash, Type); + const ZenCacheResult Result = Session.GetCacheRecord(Namespace, CacheKey.Bucket, CacheKey.Hash, Type); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -769,20 +775,24 @@ namespace detail { } } - virtual GetUpstreamCacheResult GetCacheRecords(std::span Requests, OnCacheRecordGetComplete&& OnComplete) override + virtual GetUpstreamCacheResult GetCacheRecords(std::string_view Namespace, + std::span Requests, + OnCacheRecordGetComplete&& OnComplete) override { ZEN_TRACE_CPU("Upstream::Zen::GetCacheRecords"); ZEN_ASSERT(Requests.size() > 0); CbObjectWriter BatchRequest; BatchRequest << "Method"sv - << "GetCacheRecords"; + << "GetCacheRecords"sv; BatchRequest.BeginObject("Params"sv); { CachePolicy DefaultPolicy = Requests[0]->Policy.GetRecordPolicy(); BatchRequest << "DefaultPolicy"sv << WriteToString<128>(DefaultPolicy); + BatchRequest << "Namespace"sv << Namespace; + BatchRequest.BeginArray("Requests"sv); for (CacheKeyRequest* Request : Requests) { @@ -791,7 +801,6 @@ namespace detail { const CacheKey& Key = Request->Key; BatchRequest.BeginObject("Key"sv); { - BatchRequest << "Namespace"sv << Key.Namespace; BatchRequest << "Bucket"sv << Key.Bucket; BatchRequest << "Hash"sv << Key.Hash; } @@ -848,14 +857,16 @@ namespace detail { return {.Error{.ErrorCode = Result.ErrorCode, .Reason = std::move(Result.Reason)}}; } - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override + virtual GetUpstreamCacheResult GetCacheValue(std::string_view Namespace, + const CacheKey& CacheKey, + const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::Zen::GetSingleCacheValue"); try { ZenStructuredCacheSession Session(*m_Client); - const ZenCacheResult Result = Session.GetCacheValue(CacheKey.Bucket, CacheKey.Hash, ValueContentId); + const ZenCacheResult Result = Session.GetCacheValue(Namespace, CacheKey.Bucket, CacheKey.Hash, ValueContentId); m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -879,7 +890,8 @@ namespace detail { } } - virtual GetUpstreamCacheResult GetCacheValues(std::span CacheChunkRequests, + virtual GetUpstreamCacheResult GetCacheValues(std::string_view Namespace, + std::span CacheChunkRequests, OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::Zen::GetCacheValues"); @@ -887,12 +899,16 @@ namespace detail { CbObjectWriter BatchRequest; BatchRequest << "Method"sv - << "GetCacheChunks"; + << "GetCacheChunks"sv; + BatchRequest << "Namespace"sv << Namespace; BatchRequest.BeginObject("Params"sv); { CachePolicy DefaultPolicy = CacheChunkRequests[0]->Policy; BatchRequest << "DefaultPolicy"sv << WriteToString<128>(DefaultPolicy).ToView(); + + BatchRequest << "Namespace"sv << Namespace; + BatchRequest.BeginArray("ChunkRequests"sv); { for (CacheChunkRequest* RequestPtr : CacheChunkRequests) @@ -902,7 +918,6 @@ namespace detail { BatchRequest.BeginObject(); { BatchRequest.BeginObject("Key"sv); - BatchRequest << "Namespace"sv << Request.Key.Namespace; BatchRequest << "Bucket"sv << Request.Key.Bucket; BatchRequest << "Hash"sv << Request.Key.Hash; BatchRequest.EndObject(); @@ -1042,7 +1057,11 @@ namespace detail { for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { - Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, PackagePayload, CacheRecord.Type); + Result = Session.PutCacheRecord(CacheRecord.Namespace, + CacheRecord.Key.Bucket, + CacheRecord.Key.Hash, + PackagePayload, + CacheRecord.Type); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -1061,12 +1080,14 @@ namespace detail { CbPackage BatchPackage; CbObjectWriter BatchWriter; BatchWriter << "Method"sv - << "PutCacheValues"; + << "PutCacheValues"sv; BatchWriter.BeginObject("Params"sv); { // DefaultPolicy unspecified and expected to be Default + BatchWriter << "Namespace"sv << CacheRecord.Namespace; + BatchWriter.BeginArray("Requests"sv); { BatchWriter.BeginObject(); @@ -1074,7 +1095,6 @@ namespace detail { const CacheKey& Key = CacheRecord.Key; BatchWriter.BeginObject("Key"sv); { - BatchWriter << "Namespace"sv << Key.Namespace; BatchWriter << "Bucket"sv << Key.Bucket; BatchWriter << "Hash"sv << Key.Hash; } @@ -1108,7 +1128,8 @@ namespace detail { Result.Success = false; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { - Result = Session.PutCacheValue(CacheRecord.Key.Bucket, + Result = Session.PutCacheValue(CacheRecord.Namespace, + CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheRecord.ValueContentIds[Idx], Values[Idx]); @@ -1131,7 +1152,11 @@ namespace detail { Result.Success = false; for (uint32_t Attempt = 0; Attempt < MaxAttempts && !Result.Success; Attempt++) { - Result = Session.PutCacheRecord(CacheRecord.Key.Bucket, CacheRecord.Key.Hash, RecordValue, CacheRecord.Type); + Result = Session.PutCacheRecord(CacheRecord.Namespace, + CacheRecord.Key.Bucket, + CacheRecord.Key.Hash, + RecordValue, + CacheRecord.Type); } m_Status.SetFromErrorCode(Result.ErrorCode, Result.Reason); @@ -1259,7 +1284,7 @@ public: } } - virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) override + virtual GetUpstreamCacheResult GetCacheRecord(std::string_view Namespace, const CacheKey& CacheKey, ZenContentType Type) override { ZEN_TRACE_CPU("Upstream::GetCacheRecord"); @@ -1278,7 +1303,7 @@ public: GetUpstreamCacheResult Result; { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); - Result = Endpoint->GetCacheRecord(CacheKey, Type); + Result = Endpoint->GetCacheRecord(Namespace, CacheKey, Type); } Stats.CacheGetCount.Increment(1); @@ -1306,7 +1331,9 @@ public: return {}; } - virtual void GetCacheRecords(std::span Requests, OnCacheRecordGetComplete&& OnComplete) override final + virtual void GetCacheRecords(std::string_view Namespace, + std::span Requests, + OnCacheRecordGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::GetCacheRecords"); @@ -1334,7 +1361,7 @@ public: { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); - Result = Endpoint->GetCacheRecords(RemainingKeys, [&](CacheRecordGetCompleteParams&& Params) { + Result = Endpoint->GetCacheRecords(Namespace, RemainingKeys, [&](CacheRecordGetCompleteParams&& Params) { if (Params.Record) { OnComplete(std::forward(Params)); @@ -1371,7 +1398,9 @@ public: } } - virtual void GetCacheValues(std::span CacheChunkRequests, OnCacheValueGetComplete&& OnComplete) override final + virtual void GetCacheValues(std::string_view Namespace, + std::span CacheChunkRequests, + OnCacheValueGetComplete&& OnComplete) override final { ZEN_TRACE_CPU("Upstream::GetCacheValues"); @@ -1399,7 +1428,7 @@ public: { metrics::OperationTiming::Scope Scope(Endpoint->Stats().CacheGetRequestTiming); - Result = Endpoint->GetCacheValues(RemainingKeys, [&](CacheValueGetCompleteParams&& Params) { + Result = Endpoint->GetCacheValues(Namespace, RemainingKeys, [&](CacheValueGetCompleteParams&& Params) { if (Params.RawHash != Params.RawHash.Zero) { OnComplete(std::forward(Params)); @@ -1436,7 +1465,9 @@ public: } } - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) override + virtual GetUpstreamCacheResult GetCacheValue(std::string_view Namespace, + const CacheKey& CacheKey, + const IoHash& ValueContentId) override { ZEN_TRACE_CPU("Upstream::GetCacheValue"); @@ -1454,7 +1485,7 @@ public: { metrics::OperationTiming::Scope Scope(Stats.CacheGetRequestTiming); - Result = Endpoint->GetCacheValue(CacheKey, ValueContentId); + Result = Endpoint->GetCacheValue(Namespace, CacheKey, ValueContentId); } Stats.CacheGetCount.Increment(1); @@ -1550,7 +1581,7 @@ private: ZenCacheValue CacheValue; std::vector Payloads; - if (!m_CacheStore.Get(CacheRecord.Key.Namespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) + if (!m_CacheStore.Get(CacheRecord.Namespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, CacheValue)) { ZEN_WARN("process upstream FAILED, '{}/{}', cache record doesn't exist", CacheRecord.Key.Bucket, CacheRecord.Key.Hash); return; @@ -1565,7 +1596,7 @@ private: else { ZEN_WARN("process upstream FAILED, '{}/{}/{}/{}', ValueContentId doesn't exist in CAS", - CacheRecord.Key.Namespace, + CacheRecord.Namespace, CacheRecord.Key.Bucket, CacheRecord.Key.Hash, ValueContentId); diff --git a/zenserver/upstream/upstreamcache.h b/zenserver/upstream/upstreamcache.h index 6f18b3119..13548efc8 100644 --- a/zenserver/upstream/upstreamcache.h +++ b/zenserver/upstream/upstreamcache.h @@ -32,6 +32,7 @@ struct ZenStructuredCacheClientOptions; struct UpstreamCacheRecord { ZenContentType Type = ZenContentType::kBinary; + std::string Namespace; CacheKey Key; std::vector ValueContentIds; }; @@ -163,12 +164,15 @@ public: virtual UpstreamEndpointState GetState() = 0; virtual UpstreamEndpointStatus GetStatus() = 0; - virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) = 0; - virtual GetUpstreamCacheResult GetCacheRecords(std::span Requests, OnCacheRecordGetComplete&& OnComplete) = 0; + virtual GetUpstreamCacheResult GetCacheRecord(std::string_view Namespace, const CacheKey& CacheKey, ZenContentType Type) = 0; + virtual GetUpstreamCacheResult GetCacheRecords(std::string_view Namespace, + std::span Requests, + OnCacheRecordGetComplete&& OnComplete) = 0; - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& PayloadId) = 0; - virtual GetUpstreamCacheResult GetCacheValues(std::span CacheChunkRequests, - OnCacheValueGetComplete&& OnComplete) = 0; + virtual GetUpstreamCacheResult GetCacheValue(std::string_view Namespace, const CacheKey& CacheKey, const IoHash& PayloadId) = 0; + virtual GetUpstreamCacheResult GetCacheValues(std::string_view Namespace, + std::span CacheChunkRequests, + OnCacheValueGetComplete&& OnComplete) = 0; virtual PutUpstreamCacheResult PutCacheRecord(const UpstreamCacheRecord& CacheRecord, IoBuffer RecordValue, @@ -196,11 +200,15 @@ public: virtual void RegisterEndpoint(std::unique_ptr Endpoint) = 0; virtual void IterateEndpoints(std::function&& Fn) = 0; - virtual GetUpstreamCacheResult GetCacheRecord(CacheKey CacheKey, ZenContentType Type) = 0; - virtual void GetCacheRecords(std::span Requests, OnCacheRecordGetComplete&& OnComplete) = 0; + virtual GetUpstreamCacheResult GetCacheRecord(std::string_view Namespace, const CacheKey& CacheKey, ZenContentType Type) = 0; + virtual void GetCacheRecords(std::string_view Namespace, + std::span Requests, + OnCacheRecordGetComplete&& OnComplete) = 0; - virtual GetUpstreamCacheResult GetCacheValue(const CacheKey& CacheKey, const IoHash& ValueContentId) = 0; - virtual void GetCacheValues(std::span CacheChunkRequests, OnCacheValueGetComplete&& OnComplete) = 0; + virtual GetUpstreamCacheResult GetCacheValue(std::string_view Namespace, const CacheKey& CacheKey, const IoHash& ValueContentId) = 0; + virtual void GetCacheValues(std::string_view Namespace, + std::span CacheChunkRequests, + OnCacheValueGetComplete&& OnComplete) = 0; virtual void EnqueueUpstream(UpstreamCacheRecord CacheRecord) = 0; diff --git a/zenserver/upstream/zen.cpp b/zenserver/upstream/zen.cpp index 1ac4afe5c..efc75b5b4 100644 --- a/zenserver/upstream/zen.cpp +++ b/zenserver/upstream/zen.cpp @@ -408,10 +408,15 @@ ZenStructuredCacheSession::CheckHealth() } ZenCacheResult -ZenStructuredCacheSession::GetCacheRecord(std::string_view BucketId, const IoHash& Key, ZenContentType Type) +ZenStructuredCacheSession::GetCacheRecord(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, ZenContentType Type) { ExtendableStringBuilder<256> Uri; - Uri << m_Client.ServiceUrl() << "/z$/" << BucketId << "/" << Key.ToHexString(); + Uri << m_Client.ServiceUrl() << "/z$/"; + if (Namespace != ZenCacheStore::DefaultNamespace) + { + Uri << Namespace << "/"; + } + Uri << BucketId << "/" << Key.ToHexString(); cpr::Session& Session = m_SessionState->GetSession(); @@ -432,10 +437,18 @@ ZenStructuredCacheSession::GetCacheRecord(std::string_view BucketId, const IoHas } ZenCacheResult -ZenStructuredCacheSession::GetCacheValue(std::string_view BucketId, const IoHash& Key, const IoHash& ValueContentId) +ZenStructuredCacheSession::GetCacheValue(std::string_view Namespace, + std::string_view BucketId, + const IoHash& Key, + const IoHash& ValueContentId) { ExtendableStringBuilder<256> Uri; - Uri << m_Client.ServiceUrl() << "/z$/" << BucketId << "/" << Key.ToHexString() << "/" << ValueContentId.ToHexString(); + Uri << m_Client.ServiceUrl() << "/z$/"; + if (Namespace != ZenCacheStore::DefaultNamespace) + { + Uri << Namespace << "/"; + } + Uri << BucketId << "/" << Key.ToHexString() << "/" << ValueContentId.ToHexString(); cpr::Session& Session = m_SessionState->GetSession(); @@ -457,10 +470,19 @@ ZenStructuredCacheSession::GetCacheValue(std::string_view BucketId, const IoHash } ZenCacheResult -ZenStructuredCacheSession::PutCacheRecord(std::string_view BucketId, const IoHash& Key, IoBuffer Value, ZenContentType Type) +ZenStructuredCacheSession::PutCacheRecord(std::string_view Namespace, + std::string_view BucketId, + const IoHash& Key, + IoBuffer Value, + ZenContentType Type) { ExtendableStringBuilder<256> Uri; - Uri << m_Client.ServiceUrl() << "/z$/" << BucketId << "/" << Key.ToHexString(); + Uri << m_Client.ServiceUrl() << "/z$/"; + if (Namespace != ZenCacheStore::DefaultNamespace) + { + Uri << Namespace << "/"; + } + Uri << BucketId << "/" << Key.ToHexString(); cpr::Session& Session = m_SessionState->GetSession(); @@ -485,10 +507,19 @@ ZenStructuredCacheSession::PutCacheRecord(std::string_view BucketId, const IoHas } ZenCacheResult -ZenStructuredCacheSession::PutCacheValue(std::string_view BucketId, const IoHash& Key, const IoHash& ValueContentId, IoBuffer Payload) +ZenStructuredCacheSession::PutCacheValue(std::string_view Namespace, + std::string_view BucketId, + const IoHash& Key, + const IoHash& ValueContentId, + IoBuffer Payload) { ExtendableStringBuilder<256> Uri; - Uri << m_Client.ServiceUrl() << "/z$/" << BucketId << "/" << Key.ToHexString() << "/" << ValueContentId.ToHexString(); + Uri << m_Client.ServiceUrl() << "/z$/"; + if (Namespace != ZenCacheStore::DefaultNamespace) + { + Uri << Namespace << "/"; + } + Uri << BucketId << "/" << Key.ToHexString() << "/" << ValueContentId.ToHexString(); cpr::Session& Session = m_SessionState->GetSession(); diff --git a/zenserver/upstream/zen.h b/zenserver/upstream/zen.h index f70d9d06f..e8590f940 100644 --- a/zenserver/upstream/zen.h +++ b/zenserver/upstream/zen.h @@ -128,10 +128,18 @@ public: ~ZenStructuredCacheSession(); ZenCacheResult CheckHealth(); - ZenCacheResult GetCacheRecord(std::string_view BucketId, const IoHash& Key, ZenContentType Type); - ZenCacheResult GetCacheValue(std::string_view BucketId, const IoHash& Key, const IoHash& ValueContentId); - ZenCacheResult PutCacheRecord(std::string_view BucketId, const IoHash& Key, IoBuffer Value, ZenContentType Type); - ZenCacheResult PutCacheValue(std::string_view BucketId, const IoHash& Key, const IoHash& ValueContentId, IoBuffer Payload); + ZenCacheResult GetCacheRecord(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, ZenContentType Type); + ZenCacheResult GetCacheValue(std::string_view Namespace, std::string_view BucketId, const IoHash& Key, const IoHash& ValueContentId); + ZenCacheResult PutCacheRecord(std::string_view Namespace, + std::string_view BucketId, + const IoHash& Key, + IoBuffer Value, + ZenContentType Type); + ZenCacheResult PutCacheValue(std::string_view Namespace, + std::string_view BucketId, + const IoHash& Key, + const IoHash& ValueContentId, + IoBuffer Payload); ZenCacheResult InvokeRpc(const CbObjectView& Request); ZenCacheResult InvokeRpc(const CbPackage& Package); diff --git a/zenutil/include/zenutil/cache/cachekey.h b/zenutil/include/zenutil/cache/cachekey.h index 427c99435..9adde8fc7 100644 --- a/zenutil/include/zenutil/cache/cachekey.h +++ b/zenutil/include/zenutil/cache/cachekey.h @@ -12,21 +12,13 @@ namespace zen { struct CacheKey { - std::string Namespace; std::string Bucket; IoHash Hash; - static CacheKey Create(std::string_view Namespace, std::string_view Bucket, const IoHash& Hash) - { - return {.Namespace = ToLower(Namespace), .Bucket = ToLower(Bucket), .Hash = Hash}; - } + static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } auto operator<=>(const CacheKey& that) const { - if (auto n = caseSensitiveCompareStrings(Namespace, that.Namespace); n != std::strong_ordering::equal) - { - return n; - } if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) { return b; -- cgit v1.2.3 From b373a645f787f28a4f7a831c161553df6fd4d72c Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 19 May 2022 15:11:35 +0200 Subject: migrate legacy cache folders to ue4.ddc namespace map default namespace to at runtime ue4.ddc use a non-valid name for the default namespace so we avoid any collision or accidental creation of folder for that --- zenserver/cache/structuredcachestore.cpp | 24 +++++++++++++++++++----- zenserver/cache/structuredcachestore.h | 3 ++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 6b7b73dcf..9218c2cbb 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -2128,6 +2128,8 @@ ZenCacheDiskLayer::TotalSize() const //////////////////////////// ZenCacheStore +static constexpr std::string_view UE4DDCNamespaceName = "ue4.ddc"; + ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStorage(Gc), GcContributor(Gc) { CreateDirectories(BasePath); @@ -2150,11 +2152,13 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor ZEN_INFO("Found #{} namespaces in '{}' and #{} legacy buckets", Namespaces.size(), BasePath, LegacyBuckets.size()); - if (std::find(Namespaces.begin(), Namespaces.end(), DefaultNamespace) == Namespaces.end()) + if (std::find(Namespaces.begin(), Namespaces.end(), UE4DDCNamespaceName) == Namespaces.end()) { - ZEN_INFO("Moving #{} legacy buckets to anonymous namespace", LegacyBuckets.size()); + // default (unspecified) and ue4-ddc namespace points to the same namespace instance + + ZEN_INFO("Moving #{} legacy buckets to '{}' namespace", LegacyBuckets.size(), UE4DDCNamespaceName); - std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", NamespaceDiskPrefix, DefaultNamespace); + std::filesystem::path DefaultNamespaceFolder = BasePath / fmt::format("{}{}", NamespaceDiskPrefix, UE4DDCNamespaceName); CreateDirectories(DefaultNamespaceFolder); // Move any non-namespace folders into the default namespace folder @@ -2169,8 +2173,7 @@ ZenCacheStore::ZenCacheStore(CasGc& Gc, std::filesystem::path BasePath) : GcStor ZEN_ERROR("Unable to move '{}' to '{}', reason '{}'", LegacyFolder, NewPath, Ec.message()); } } - - Namespaces.push_back(std::string(DefaultNamespace)); + Namespaces.push_back(std::string(UE4DDCNamespaceName)); } for (const std::string& NamespaceName : Namespaces) @@ -2237,6 +2240,13 @@ ZenCacheStore::GetNamespace(std::string_view Namespace) { return It->second.get(); } + if (Namespace == DefaultNamespace) + { + if (auto It = m_Namespaces.find(std::string(UE4DDCNamespaceName)); It != m_Namespaces.end()) + { + return It->second.get(); + } + } return nullptr; } @@ -2249,6 +2259,10 @@ ZenCacheStore::IterateNamespaces(const std::function Date: Thu, 19 May 2022 15:42:04 +0200 Subject: Fix and retry count and add an extra iteration to give more time for success --- zenserver/cache/structuredcachestore.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 6b7b73dcf..4edc13b4a 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -1769,7 +1769,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c // We retry to move the file since it can be held open for read. // This happens if the server processes a Get request for the file or // if we are busy sending the file upstream - int RetryCount = 3; + int RetryCount = 4; do { Ec.clear(); @@ -1789,7 +1789,8 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c if (!ExistingEc && (OldFileSize == NewFileSize)) { ZEN_INFO( - "Failed to move temporary file '{}' to '{}'. Target file has same size, assuming concurrent write of same value, " + "Failed to move temporary file '{}' to '{}' for '{}'. Target file has same size, assuming concurrent write of same " + "value, " "move " "failed with reason '{}'", DataFile.GetPath(), @@ -1865,9 +1866,8 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c Ec.message()); // Semi arbitrary back-off - zen::Sleep(200 * (4 - RetryCount)); // Sleep at most for a total of 2 seconds - RetryCount--; - } while (RetryCount > 0); + zen::Sleep(200 * (5 - RetryCount)); // Sleep at most for a total of 3 seconds + } while (RetryCount-- > 0); throw std::system_error(Ec, fmt::format("Failed to finalize file '{}' for put in '{}'", DataFilePath.ToUtf8(), m_BucketDir)); } -- cgit v1.2.3 From bbfd45ff267f03d36e9f842876dedd3f7852f45e Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 19 May 2022 16:02:27 +0200 Subject: fix tests --- zenserver-test/zenserver-test.cpp | 18 +++++++++--------- zenserver/cache/structuredcache.cpp | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/zenserver-test/zenserver-test.cpp b/zenserver-test/zenserver-test.cpp index c79b540ea..a6e89e702 100644 --- a/zenserver-test/zenserver-test.cpp +++ b/zenserver-test/zenserver-test.cpp @@ -1586,8 +1586,8 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector Keys = PutCacheRecords(BaseUri, "default"sv, "mastodon"sv, 128); - GetCacheRecordResult Result = GetCacheRecords(BaseUri, "default"sv, Keys, Policy); + std::vector Keys = PutCacheRecords(BaseUri, "ue4.ddc"sv, "mastodon"sv, 128); + GetCacheRecordResult Result = GetCacheRecords(BaseUri, "ue4.ddc"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1625,7 +1625,7 @@ TEST_CASE("zcache.rpc") Inst.WaitUntilReady(); CachePolicy Policy = CachePolicy::Default; - std::vector ExistingKeys = PutCacheRecords(BaseUri, "default"sv, "mastodon"sv, 128); + std::vector ExistingKeys = PutCacheRecords(BaseUri, "ue4.ddc"sv, "mastodon"sv, 128); std::vector Keys; for (const zen::CacheKey& Key : ExistingKeys) @@ -1634,7 +1634,7 @@ TEST_CASE("zcache.rpc") Keys.push_back(CacheKey::Create("missing"sv, IoHash::Zero)); } - GetCacheRecordResult Result = GetCacheRecords(BaseUri, "default"sv, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(BaseUri, "ue4.ddc"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1679,10 +1679,10 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "default"sv, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "ue4.ddc"sv, "mastodon"sv, 4); CachePolicy Policy = CachePolicy::QueryLocal; - GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "default"sv, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "ue4.ddc"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1704,10 +1704,10 @@ TEST_CASE("zcache.rpc") SpawnServer(UpstreamServer, UpstreamCfg); SpawnServer(LocalServer, LocalCfg); - std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "default"sv, "mastodon"sv, 4); + std::vector Keys = PutCacheRecords(UpstreamCfg.BaseUri, "ue4.ddc"sv, "mastodon"sv, 4); CachePolicy Policy = (CachePolicy::QueryLocal | CachePolicy::QueryRemote); - GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "default"sv, Keys, Policy); + GetCacheRecordResult Result = GetCacheRecords(LocalCfg.BaseUri, "ue4.ddc"sv, Keys, Policy); CHECK(Result.Records.size() == Keys.size()); @@ -1738,7 +1738,7 @@ TEST_CASE("zcache.rpc.allpolicies") std::string_view TestVersion = "F72150A02AE34B57A9EC91D36BA1CE08"sv; std::string_view TestBucket = "allpoliciestest"sv; - std::string_view TestNamespace = "default"sv; + std::string_view TestNamespace = "ue4.ddc"sv; // NumKeys = (2 Value vs Record)*(2 SkipData vs Default)*(2 ForceMiss vs Not)*(2 use local) // *(2 use remote)*(2 UseValue Policy vs not)*(4 cases per type) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index a349f13e1..135572d07 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -2438,8 +2438,8 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(LegacyValueContentIdRequest.ValueContentId == IoHash::FromHexString("56789abcdef12345678956789abcdef123456789"sv)); HttpRequestData V2DefaultNamespaceRequest; - CHECK(HttpRequestParseRelativeUri("default", V2DefaultNamespaceRequest)); - CHECK(V2DefaultNamespaceRequest.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(HttpRequestParseRelativeUri("ue4.ddc", V2DefaultNamespaceRequest)); + CHECK(V2DefaultNamespaceRequest.Namespace == "ue4.ddc"); CHECK(!V2DefaultNamespaceRequest.Bucket.has_value()); CHECK(!V2DefaultNamespaceRequest.HashKey.has_value()); CHECK(!V2DefaultNamespaceRequest.ValueContentId.has_value()); @@ -2452,8 +2452,8 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(!V2NamespaceRequest.ValueContentId.has_value()); HttpRequestData V2BucketRequestWithDefaultNamespace; - CHECK(HttpRequestParseRelativeUri("default/test", V2BucketRequestWithDefaultNamespace)); - CHECK(V2BucketRequestWithDefaultNamespace.Namespace == ZenCacheStore::DefaultNamespace); + CHECK(HttpRequestParseRelativeUri("ue4.ddc/test", V2BucketRequestWithDefaultNamespace)); + CHECK(V2BucketRequestWithDefaultNamespace.Namespace == "ue4.ddc"); CHECK(V2BucketRequestWithDefaultNamespace.Bucket == "test"sv); CHECK(!V2BucketRequestWithDefaultNamespace.HashKey.has_value()); CHECK(!V2BucketRequestWithDefaultNamespace.ValueContentId.has_value()); @@ -2466,7 +2466,7 @@ TEST_CASE("z$service.parse.relative.Uri") CHECK(!V2BucketRequestWithNamespace.ValueContentId.has_value()); HttpRequestData V2HashKeyRequest; - CHECK(HttpRequestParseRelativeUri("default/test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); + CHECK(HttpRequestParseRelativeUri("test/0123456789abcdef12340123456789abcdef1234", V2HashKeyRequest)); CHECK(V2HashKeyRequest.Namespace == ZenCacheStore::DefaultNamespace); CHECK(V2HashKeyRequest.Bucket == "test"); CHECK(V2HashKeyRequest.HashKey == IoHash::FromHexString("0123456789abcdef12340123456789abcdef1234"sv)); -- cgit v1.2.3 From fadaf985c6a1afeede661e2eb14136b45cb1c6f0 Mon Sep 17 00:00:00 2001 From: Dan Engelbrecht Date: Thu, 19 May 2022 22:37:07 +0200 Subject: fix mac compilation error --- zenserver/cache/structuredcache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenserver/cache/structuredcache.cpp b/zenserver/cache/structuredcache.cpp index 135572d07..e11499289 100644 --- a/zenserver/cache/structuredcache.cpp +++ b/zenserver/cache/structuredcache.cpp @@ -1786,7 +1786,7 @@ HttpStructuredCacheService::HandleRpcGetCacheValues(zen::HttpServerRequest& Http for (size_t Index : RemoteRequestIndexes) { RequestData& Request = Requests[Index]; - RequestedRecordsData.push_back({Request.Key.Bucket, Request.Key.Hash}); + RequestedRecordsData.push_back({.Key = {Request.Key.Bucket, Request.Key.Hash}}); CacheChunkRequests.push_back(&RequestedRecordsData.back()); } Stopwatch Timer; -- cgit v1.2.3