diff options
| author | Dan Engelbrecht <[email protected]> | 2024-09-17 15:05:40 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2024-09-17 15:05:40 +0200 |
| commit | d020b6522b2d962db67f8a66410e74d61cf3da24 (patch) | |
| tree | 44985edb66e5405df2d3a57c291490b2a0485337 /src/zenstore/cache/cachedisklayer.cpp | |
| parent | Running the public github release mirroring as part of creating the release (... (diff) | |
| download | zen-d020b6522b2d962db67f8a66410e74d61cf3da24.tar.xz zen-d020b6522b2d962db67f8a66410e74d61cf3da24.zip | |
gc performance improvements (#160)
* optimized ValidateCbUInt
* optimized iohash comparision
* replace unordered set/map with tsl/robin set/map in blockstore
* increase max buffer size when writing cache bucket sidecar
* only store meta data for files < 4Gb
* faster ReadAttachmentsFromMetaData
* remove memcpy call in BlockStoreDiskLocation
* only write cache bucket state to disk if GC deleted anything
Diffstat (limited to 'src/zenstore/cache/cachedisklayer.cpp')
| -rw-r--r-- | src/zenstore/cache/cachedisklayer.cpp | 129 |
1 files changed, 78 insertions, 51 deletions
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index f2ec5ddda..110acba9e 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -18,6 +18,7 @@ #include <zenutil/workerpools.h> #include <future> +#include <limits> ////////////////////////////////////////////////////////////////////////// @@ -72,8 +73,8 @@ namespace { struct BucketMetaHeader { static constexpr uint32_t ExpectedMagic = 0x61'74'65'6d; // 'meta'; - static constexpr uint32_t Version1 = 1; - static constexpr uint32_t CurrentVersion = Version1; + static constexpr uint32_t Version2 = 2; + static constexpr uint32_t CurrentVersion = Version2; uint32_t Magic = ExpectedMagic; uint32_t Version = CurrentVersion; @@ -299,17 +300,18 @@ private: uint64_t m_ManifestEntryCount = 0; +#pragma pack(push) +#pragma pack(4) struct ManifestData { - IoHash Key; // 20 + uint32_t RawSize; // 4 AccessTime Timestamp; // 4 IoHash RawHash; // 20 - uint32_t Padding_0; // 4 - size_t RawSize; // 8 - uint64_t Padding_1; // 8 + IoHash Key; // 20 }; +#pragma pack(pop) - static_assert(sizeof(ManifestData) == 64); + static_assert(sizeof(ManifestData) == 48); }; void @@ -375,10 +377,12 @@ BucketManifestSerializer::ParseManifest(RwLock::ExclusiveLockScope& Buck { uint64_t RawSize = RawSizeIt.AsUInt64(); IoHash RawHash = RawHashIt.AsHash(); - if (RawSize != 0 || RawHash != IoHash::Zero) + if ((RawSize != 0 || RawHash != IoHash::Zero) && RawSize <= std::numeric_limits<std::uint32_t>::max()) { BucketPayload& Payload = Payloads[KeyIndex]; - Bucket.SetMetaData(BucketLock, Payload, BucketMetaData{.RawSize = RawSize, .RawHash = RawHash}); + Bucket.SetMetaData(BucketLock, + Payload, + BucketMetaData{.RawSize = static_cast<std::uint32_t>(RawSize), .RawHash = RawHash}); } } @@ -547,7 +551,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B return false; } - if (Header.Version != BucketMetaHeader::Version1) + if (Header.Version != BucketMetaHeader::Version2) { ZEN_WARN("Failed to read sidecar file '{}'. Unsupported version: {}", SidecarPath, Header.Version); return false; @@ -635,7 +639,7 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&, // BasicFileWriter SidecarWriter(SidecarFile, 128 * 1024); std::vector<ManifestData> ManifestDataBuffer; - const size_t MaxManifestDataBufferCount = Min(Index.size(), 4096u); // 256 Kb + const size_t MaxManifestDataBufferCount = Min(Index.size(), 8192u); // 512 Kb ManifestDataBuffer.reserve(MaxManifestDataBufferCount); for (auto& Kv : Index) { @@ -643,7 +647,7 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&, const PayloadIndex PlIndex = Kv.second; IoHash RawHash = IoHash::Zero; - uint64_t RawSize = 0; + uint32_t RawSize = 0; if (const MetaDataIndex MetaIndex = Payloads[PlIndex].MetaData) { @@ -651,12 +655,8 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&, RawSize = MetaDatas[MetaIndex].RawSize; } - ManifestDataBuffer.emplace_back(ManifestData{.Key = Key, - .Timestamp = AccessTimes[PlIndex], - .RawHash = RawHash, - .Padding_0 = 0, - .RawSize = RawSize, - .Padding_1 = 0}); + ManifestDataBuffer.emplace_back( + ManifestData{.RawSize = RawSize, .Timestamp = AccessTimes[PlIndex], .RawHash = RawHash, .Key = Key}); if (ManifestDataBuffer.size() == MaxManifestDataBufferCount) { const uint64_t WriteSize = sizeof(ManifestData) * ManifestDataBuffer.size(); @@ -1464,6 +1464,10 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept OutValue.RawHash = IoHash::HashBuffer(OutValue.Value); OutValue.RawSize = OutValue.Value.GetSize(); } + if (OutValue.RawSize > std::numeric_limits<std::uint32_t>::max()) + { + SetMetaInfo = false; + } } if (SetMetaInfo || AddToMemCache) @@ -1478,7 +1482,9 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept // Only update if it has not already been updated by other thread if (!Payload.MetaData && SetMetaInfo) { - SetMetaData(UpdateIndexLock, Payload, {.RawSize = OutValue.RawSize, .RawHash = OutValue.RawHash}); + SetMetaData(UpdateIndexLock, + Payload, + {.RawSize = gsl::narrow<uint32_t>(OutValue.RawSize), .RawHash = OutValue.RawHash}); } if (!Payload.MemCached && AddToMemCache) { @@ -1576,6 +1582,7 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept OutValue.RawSize = OutValue.Value.GetSize(); } + if (OutValue.RawSize <= std::numeric_limits<std::uint32_t>::max()) { RwLock::ExclusiveLockScope UpdateIndexLock(m_IndexLock); { @@ -1586,7 +1593,9 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept // Only update if it has not already been updated by other thread if (!Payload.MetaData) { - SetMetaData(UpdateIndexLock, Payload, {.RawSize = OutValue.RawSize, .RawHash = OutValue.RawHash}); + SetMetaData(UpdateIndexLock, + Payload, + {.RawSize = static_cast<std::uint32_t>(OutValue.RawSize), .RawHash = OutValue.RawHash}); } } } @@ -1724,15 +1733,20 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal OutValue.RawHash = IoHash::HashBuffer(OutValue.Value); OutValue.RawSize = OutValue.Value.GetSize(); } - RwLock::ExclusiveLockScope UpdateIndexLock(m_IndexLock); - if (auto WriteIt = m_Index.find(HashKey); WriteIt != m_Index.end()) + if (OutValue.RawSize <= std::numeric_limits<std::uint32_t>::max()) { - BucketPayload& WritePayload = m_Payloads[WriteIt.value()]; - - // Only set if no other path has already updated the meta data - if (!WritePayload.MetaData) + RwLock::ExclusiveLockScope UpdateIndexLock(m_IndexLock); + if (auto WriteIt = m_Index.find(HashKey); WriteIt != m_Index.end()) { - SetMetaData(UpdateIndexLock, WritePayload, {.RawSize = OutValue.RawSize, .RawHash = OutValue.RawHash}); + BucketPayload& WritePayload = m_Payloads[WriteIt.value()]; + + // Only set if no other path has already updated the meta data + if (!WritePayload.MetaData) + { + SetMetaData(UpdateIndexLock, + WritePayload, + {.RawSize = static_cast<std::uint32_t>(OutValue.RawSize), .RawHash = OutValue.RawHash}); + } } } } @@ -2972,9 +2986,9 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c RemoveMemCachedData(IndexLock, Payload); m_StandaloneSize.fetch_sub(OldSize, std::memory_order::relaxed); } - if (Value.RawSize != 0 || Value.RawHash != IoHash::Zero) + if ((Value.RawSize != 0 || Value.RawHash != IoHash::Zero) && Value.RawSize <= std::numeric_limits<std::uint32_t>::max()) { - SetMetaData(IndexLock, m_Payloads[EntryIndex], {.RawSize = Value.RawSize, .RawHash = Value.RawHash}); + SetMetaData(IndexLock, m_Payloads[EntryIndex], {.RawSize = static_cast<std::uint32_t>(Value.RawSize), .RawHash = Value.RawHash}); } else { @@ -3280,8 +3294,9 @@ public: uint64_t ChunkSize = RoundUp(Loc.Size(), m_Bucket.m_Configuration.PayloadAlignment); if (auto It = BlockUsage.find(BlockIndex); It != BlockUsage.end()) { - It->second.EntryCount++; - It->second.DiskUsage += ChunkSize; + BlockStore::BlockUsageInfo& Info = It.value(); + Info.EntryCount++; + Info.DiskUsage += ChunkSize; } else { @@ -3411,20 +3426,23 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } - bool Expected = false; - if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true)) + if (Stats.DeletedCount > 0) { - return; - } - auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); + bool Expected = false; + if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true)) + { + return; + } + auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); - try - { - SaveSnapshot([]() { return 0; }); - } - catch (const std::exception& Ex) - { - ZEN_WARN("Failed to write index and manifest after RemoveExpiredData in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); + try + { + SaveSnapshot([]() { return 0; }); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed to write index and manifest after RemoveExpiredData in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); + } } }); @@ -3529,7 +3547,7 @@ ZenCacheDiskLayer::CacheBucket::ReadAttachmentsFromMetaData(uint32_t BlockI IoBuffer MetaDataPayload = m_BlockStore.GetMetaData(BlockIndex); if (MetaDataPayload) { - std::unordered_set<IoHash, IoHash::Hasher> WantedKeys; + tsl::robin_set<IoHash, IoHash::Hasher> WantedKeys; WantedKeys.reserve(ChunkIndexes.size()); for (const size_t ChunkIndex : ChunkIndexes) { @@ -3540,20 +3558,29 @@ ZenCacheDiskLayer::CacheBucket::ReadAttachmentsFromMetaData(uint32_t BlockI MetaDataPayload, BlockMetaDataExpectedMagic, [&](std::span<const IoHash> Keys, std::span<const uint32_t> AttachmentCounts, std::span<const IoHash> Attachments) { - OutReferences.reserve(OutReferences.capacity() + Attachments.size()); - auto AttachmentStart = Attachments.begin(); - for (uint32_t Index = 0; Index < Keys.size(); Index++) + auto AttachmentReadIt = Attachments.begin(); + OutReferences.resize(OutReferences.size() + Attachments.size()); + auto OutReferencesWriteIt = OutReferences.end() - Attachments.size(); + auto KeyIt = Keys.begin(); + for (uint32_t AttachmentCount : AttachmentCounts) { - uint32_t AttachmentCount = AttachmentCounts[Index]; if (AttachmentCount > 0) { - if (WantedKeys.contains(Keys[Index])) + if (WantedKeys.contains(*KeyIt)) + { + for (uint32_t It = 0u; It < AttachmentCount; It++) + { + *OutReferencesWriteIt++ = *AttachmentReadIt++; + } + } + else { - OutReferences.insert(OutReferences.end(), AttachmentStart, AttachmentStart + AttachmentCount); + AttachmentReadIt += AttachmentCount; } - AttachmentStart += AttachmentCount; } + KeyIt++; } + OutReferences.erase(OutReferencesWriteIt, OutReferences.end()); }); } return false; |