diff options
| author | Martin Ridgers <[email protected]> | 2021-10-07 08:29:50 +0200 |
|---|---|---|
| committer | Martin Ridgers <[email protected]> | 2021-10-07 08:29:50 +0200 |
| commit | 03232621d183f22e12e798a753e4a606763e63d6 (patch) | |
| tree | 5701d202392dd4ab947139e4046a44ab9bc6cdf7 /zenserver/cache/structuredcachestore.cpp | |
| parent | Merged main (diff) | |
| parent | Only enable the MSVC debug output sink for sessions when the --debug mode is ... (diff) | |
| download | zen-03232621d183f22e12e798a753e4a606763e63d6.tar.xz zen-03232621d183f22e12e798a753e4a606763e63d6.zip | |
Merged main
Diffstat (limited to 'zenserver/cache/structuredcachestore.cpp')
| -rw-r--r-- | zenserver/cache/structuredcachestore.cpp | 275 |
1 files changed, 195 insertions, 80 deletions
diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp index 5e93ebaa9..580446473 100644 --- a/zenserver/cache/structuredcachestore.cpp +++ b/zenserver/cache/structuredcachestore.cpp @@ -32,6 +32,8 @@ ZenCacheStore::ZenCacheStore(CasStore& Cas, const std::filesystem::path& RootDir { ZEN_INFO("initializing structured cache at '{}'", RootDir); CreateDirectories(RootDir); + + m_DiskLayer.DiscoverBuckets(); } ZenCacheStore::~ZenCacheStore() @@ -116,6 +118,13 @@ ZenCacheStore::Scrub(ScrubContext& Ctx) m_DiskLayer.Scrub(Ctx); m_MemLayer.Scrub(Ctx); } + +void +ZenCacheStore::GarbageCollect(GcContext& GcCtx) +{ + ZEN_UNUSED(GcCtx); +} + ////////////////////////////////////////////////////////////////////////// ZenCacheMemoryLayer::ZenCacheMemoryLayer() @@ -142,6 +151,10 @@ ZenCacheMemoryLayer::Get(std::string_view InBucket, const IoHash& HashKey, ZenCa _.ReleaseNow(); + // There's a race here. Since the lock is released early to allow + // inserts, the bucket delete path could end up deleting the + // underlying data structure + return Bucket->Get(HashKey, OutValue); } @@ -195,13 +208,21 @@ ZenCacheMemoryLayer::Scrub(ScrubContext& Ctx) } void +ZenCacheMemoryLayer::GarbageCollect(GcContext& GcCtx) +{ + ZEN_UNUSED(GcCtx); +} + +void ZenCacheMemoryLayer::CacheBucket::Scrub(ScrubContext& Ctx) { + RwLock::SharedLockScope _(m_bucketLock); + std::vector<IoHash> BadHashes; for (auto& Kv : m_cacheMap) { - if (Kv.first != IoHash::HashBuffer(Kv.second)) + if (Kv.first != IoHash::HashBuffer(Kv.second.Payload)) { BadHashes.push_back(Kv.first); } @@ -209,10 +230,16 @@ ZenCacheMemoryLayer::CacheBucket::Scrub(ScrubContext& Ctx) if (!BadHashes.empty()) { - Ctx.ReportBadChunks(BadHashes); + Ctx.ReportBadCasChunks(BadHashes); } } +void +ZenCacheMemoryLayer::CacheBucket::GarbageCollect(GcContext& GcCtx) +{ + ZEN_UNUSED(GcCtx); +} + bool ZenCacheMemoryLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutValue) { @@ -224,18 +251,26 @@ ZenCacheMemoryLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutV } else { - OutValue.Value = bucketIt->second; + BucketValue& Value = bucketIt.value(); + OutValue.Value = Value.Payload; + Value.LastAccess = GetCurrentTimeStamp(); return true; } } +uint64_t +ZenCacheMemoryLayer::CacheBucket::GetCurrentTimeStamp() +{ + return GetLofreqTimerValue(); +} + void ZenCacheMemoryLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& Value) { RwLock::ExclusiveLockScope _(m_bucketLock); - m_cacheMap[HashKey] = Value.Value; + m_cacheMap.insert_or_assign(HashKey, BucketValue{.LastAccess = GetCurrentTimeStamp(), .Payload = Value.Value}); } ////////////////////////////////////////////////////////////////////////// @@ -245,11 +280,17 @@ ZenCacheMemoryLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue struct DiskLocation { - uint64_t OffsetAndFlags; - uint32_t Size; - uint32_t IndexDataSize; + inline DiskLocation() = default; + + inline DiskLocation(uint64_t Offset, uint64_t ValueSize, uint32_t IndexSize, uint64_t Flags) + : OffsetAndFlags(CombineOffsetAndFlags(Offset, Flags)) + , LowerSize(ValueSize & 0xFFFFffff) + , IndexDataSize(IndexSize) + { + } - static const uint64_t kOffsetMask = 0x00FF'ffFF'ffFF'ffFFull; + static const uint64_t kOffsetMask = 0x0000'ffFF'ffFF'ffFFull; + static const uint64_t kSizeMask = 0x00FF'0000'0000'0000ull; static const uint64_t kFlagsMask = 0xff00'0000'0000'0000ull; static const uint64_t kStandaloneFile = 0x8000'0000'0000'0000ull; static const uint64_t kStructured = 0x4000'0000'0000'0000ull; @@ -257,6 +298,7 @@ struct DiskLocation static uint64_t CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags) { return Offset | Flags; } inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; } + inline uint64_t Size() const { return LowerSize; } inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; } inline ZenContentType GetContentType() const { @@ -269,6 +311,11 @@ struct DiskLocation return ContentType; } + +private: + uint64_t OffsetAndFlags = 0; + uint32_t LowerSize = 0; + uint32_t IndexDataSize = 0; }; struct DiskIndexEntry @@ -286,7 +333,7 @@ struct ZenCacheDiskLayer::CacheBucket CacheBucket(CasStore& Cas); ~CacheBucket(); - void OpenOrCreate(std::filesystem::path BucketDir); + void OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); static bool Delete(std::filesystem::path BucketDir); bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); @@ -294,14 +341,15 @@ struct ZenCacheDiskLayer::CacheBucket void Drop(); void Flush(); void Scrub(ScrubContext& Ctx); + void GarbageCollect(GcContext& GcCtx); - inline bool IsOk() const { return m_Ok; } + inline bool IsOk() const { return m_IsOk; } private: CasStore& m_CasStore; std::filesystem::path m_BucketDir; Oid m_BucketId; - bool m_Ok = false; + bool m_IsOk = false; uint64_t m_LargeObjectThreshold = 64 * 1024; // These files are used to manage storage of small objects for this bucket @@ -314,9 +362,19 @@ private: uint64_t m_WriteCursor = 0; void BuildPath(WideStringBuilderBase& Path, const IoHash& HashKey); - void PutLargeObject(const IoHash& HashKey, const ZenCacheValue& Value); - bool GetStandaloneCacheValue(const IoHash& HashKey, ZenCacheValue& OutValue, const DiskLocation& Loc); + void PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value); + bool GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue); bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue); + + // These locks are here to avoid contention on file creation, therefore it's sufficient + // that we take the same lock for the same hash + // + // These locks are small and should really be spaced out so they don't share cache lines, + // but we don't currently access them at particularly high frequency so it should not be + // an issue in practice + + RwLock m_ShardedLocks[256]; + inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardedLocks[Hash.Hash[19]]; } }; ZenCacheDiskLayer::CacheBucket::CacheBucket(CasStore& Cas) : m_CasStore(Cas) @@ -341,7 +399,7 @@ ZenCacheDiskLayer::CacheBucket::Delete(std::filesystem::path BucketDir) } void -ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir) +ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate) { CreateDirectories(BucketDir); @@ -368,17 +426,23 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir) { ManifestFile.Read(&m_BucketId, sizeof(Oid), 0); - m_Ok = true; + m_IsOk = true; } - if (!m_Ok) + if (!m_IsOk) { ManifestFile.Close(); } } - if (!m_Ok) + if (!m_IsOk) { + if (AllowCreate == false) + { + // Invalid bucket + return; + } + // No manifest file found, this is a new bucket ManifestFile.Open(ManifestPath, /* IsCreate */ true, Ec); @@ -410,13 +474,13 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir) m_SlogFile.Replay([&](const DiskIndexEntry& Record) { m_Index[Record.Key] = Record.Location; - MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size); + MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset() + Record.Location.Size()); }); m_WriteCursor = (MaxFileOffset + 15) & ~15; } - m_Ok = true; + m_IsOk = true; } void @@ -437,23 +501,25 @@ ZenCacheDiskLayer::CacheBucket::BuildPath(WideStringBuilderBase& Path, const IoH bool ZenCacheDiskLayer::CacheBucket::GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue) { - if (!Loc.IsFlagSet(DiskLocation::kStandaloneFile)) + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { - OutValue.Value = IoBufferBuilder::MakeFromFileHandle(m_SobsFile.Handle(), Loc.Offset(), Loc.Size); - OutValue.Value.SetContentType(Loc.GetContentType()); - - return true; + return false; } - return false; + OutValue.Value = IoBufferBuilder::MakeFromFileHandle(m_SobsFile.Handle(), Loc.Offset(), Loc.Size()); + OutValue.Value.SetContentType(Loc.GetContentType()); + + return true; } bool -ZenCacheDiskLayer::CacheBucket::GetStandaloneCacheValue(const IoHash& HashKey, ZenCacheValue& OutValue, const DiskLocation& Loc) +ZenCacheDiskLayer::CacheBucket::GetStandaloneCacheValue(const DiskLocation& Loc, const IoHash& HashKey, ZenCacheValue& OutValue) { WideStringBuilder<128> DataFilePath; BuildPath(DataFilePath, HashKey); + RwLock::SharedLockScope ValueLock(LockForHash(HashKey)); + if (IoBuffer Data = IoBufferBuilder::MakeFromFile(DataFilePath.c_str())) { OutValue.Value = Data; @@ -468,7 +534,7 @@ ZenCacheDiskLayer::CacheBucket::GetStandaloneCacheValue(const IoHash& HashKey, Z bool ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutValue) { - if (!m_Ok) + if (!m_IsOk) { return false; } @@ -486,7 +552,7 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal _.ReleaseNow(); - return GetStandaloneCacheValue(HashKey, OutValue, Loc); + return GetStandaloneCacheValue(Loc, HashKey, OutValue); } return false; @@ -495,14 +561,14 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal void ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& Value) { - if (!m_Ok) + if (!m_IsOk) { return; } if (Value.Value.Size() >= m_LargeObjectThreshold) { - return PutLargeObject(HashKey, Value); + return PutStandaloneCacheValue(HashKey, Value); } else { @@ -517,10 +583,9 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& RwLock::ExclusiveLockScope _(m_IndexLock); - DiskLocation Loc{.OffsetAndFlags = DiskLocation::CombineOffsetAndFlags(m_WriteCursor, EntryFlags), - .Size = gsl::narrow<uint32_t>(Value.Value.Size())}; + DiskLocation Loc(m_WriteCursor, Value.Value.Size(), 0, EntryFlags); - m_WriteCursor = RoundUp(m_WriteCursor + Loc.Size, 16); + m_WriteCursor = RoundUp(m_WriteCursor + Loc.Size(), 16); if (auto it = m_Index.find(HashKey); it == m_Index.end()) { @@ -530,11 +595,13 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue& else { // TODO: should check if write is idempotent and bail out if it is? + // this would requiring comparing contents on disk unless we add a + // content hash to the index entry it.value() = Loc; } m_SlogFile.Append({.Key = HashKey, .Location = Loc}); - m_SobsFile.Write(Value.Value.Data(), Loc.Size, Loc.Offset()); + m_SobsFile.Write(Value.Value.Data(), Loc.Size(), Loc.Offset()); } } @@ -558,61 +625,69 @@ ZenCacheDiskLayer::CacheBucket::Flush() void ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx) { - std::vector<DiskIndexEntry> StandaloneFiles; + std::atomic<uint64_t> ScrubbedChunks{0}, ScrubbedBytes{0}; - std::vector<IoHash> BadChunks; - std::vector<IoBuffer> BadStandaloneChunks; + std::vector<IoHash> BadChunks; { RwLock::SharedLockScope _(m_IndexLock); for (auto& Kv : m_Index) { - const IoHash& Hash = Kv.first; - const DiskLocation& Loc = Kv.second; + const IoHash& HashKey = Kv.first; + const DiskLocation& Loc = Kv.second; ZenCacheValue Value; - if (!GetInlineCacheValue(Loc, Value)) + if (GetInlineCacheValue(Loc, Value)) { - ZEN_ASSERT(Loc.IsFlagSet(DiskLocation::kStandaloneFile)); - StandaloneFiles.push_back({.Key = Hash, .Location = Loc}); + // Validate contents } else { - if (GetStandaloneCacheValue(Hash, Value, Loc)) + if (Loc.IsFlagSet(DiskLocation::kStandaloneFile)) { - // Hash contents - - const IoHash ComputedHash = HashBuffer(Value.Value); - - if (ComputedHash != Hash) + if (GetStandaloneCacheValue(Loc, HashKey, Value)) { - BadChunks.push_back(Hash); + // Note: we cannot currently validate contents since we don't + // have a content hash! + } + else + { + // Value not found + BadChunks.push_back(HashKey); } - } - else - { - // Non-existent } } } } - if (Ctx.RunRecovery()) + Ctx.ReportScrubbed(ScrubbedChunks, ScrubbedBytes); + + if (BadChunks.empty()) { - // Clean out bad chunks + return; } - if (!BadChunks.empty()) + Ctx.ReportBadCasChunks(BadChunks); + + if (Ctx.RunRecovery()) { - Ctx.ReportBadChunks(BadChunks); + // Clean out bad data } } void -ZenCacheDiskLayer::CacheBucket::PutLargeObject(const IoHash& HashKey, const ZenCacheValue& Value) +ZenCacheDiskLayer::CacheBucket::GarbageCollect(GcContext& GcCtx) { + ZEN_UNUSED(GcCtx); +} + +void +ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, const ZenCacheValue& Value) +{ + RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); + WideStringBuilder<128> DataFilePath; BuildPath(DataFilePath, HashKey); @@ -661,7 +736,7 @@ ZenCacheDiskLayer::CacheBucket::PutLargeObject(const IoHash& HashKey, const ZenC RwLock::ExclusiveLockScope _(m_IndexLock); - DiskLocation Loc{.OffsetAndFlags = DiskLocation::CombineOffsetAndFlags(0, EntryFlags), .Size = 0}; + DiskLocation Loc(/* Offset */ 0, Value.Value.Size(), 0, EntryFlags); if (auto it = m_Index.find(HashKey); it == m_Index.end()) { @@ -719,7 +794,7 @@ ZenCacheDiskLayer::Get(std::string_view InBucket, const IoHash& HashKey, ZenCach std::filesystem::path BucketPath = m_RootDir; BucketPath /= std::string(InBucket); - Bucket->OpenOrCreate(BucketPath.c_str()); + Bucket->OpenOrCreate(BucketPath); } } @@ -762,7 +837,7 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z std::filesystem::path bucketPath = m_RootDir; bucketPath /= std::string(InBucket); - Bucket->OpenOrCreate(bucketPath.c_str()); + Bucket->OpenOrCreate(bucketPath); } } @@ -774,6 +849,63 @@ ZenCacheDiskLayer::Put(std::string_view InBucket, const IoHash& HashKey, const Z } } +void +ZenCacheDiskLayer::DiscoverBuckets() +{ + FileSystemTraversal Traversal; + struct Visitor : public FileSystemTraversal::TreeVisitor + { + virtual void VisitFile([[maybe_unused]] const std::filesystem::path& Parent, + [[maybe_unused]] const path_view& File, + [[maybe_unused]] uint64_t FileSize) override + { + } + + virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, const path_view& DirectoryName) override + { + Dirs.push_back(std::wstring(DirectoryName)); + return false; + } + + std::vector<std::wstring> Dirs; + } Visit; + + Traversal.TraverseFileSystem(m_RootDir, Visit); + + // Initialize buckets + + RwLock::ExclusiveLockScope _(m_Lock); + + for (const std::wstring& BucketName : Visit.Dirs) + { + // New bucket needs to be created + + std::string BucketName8 = WideToUtf8(BucketName); + + if (auto It = m_Buckets.find(BucketName8); It != m_Buckets.end()) + { + } + else + { + auto InsertResult = m_Buckets.try_emplace(BucketName8, m_CasStore); + + std::filesystem::path BucketPath = m_RootDir; + BucketPath /= BucketName8; + + CacheBucket& Bucket = InsertResult.first->second; + + Bucket.OpenOrCreate(BucketPath, /* AllowCreate */ false); + + if (!Bucket.IsOk()) + { + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName8, m_RootDir); + + m_Buckets.erase(InsertResult.first); + } + } + } +} + bool ZenCacheDiskLayer::DropBucket(std::string_view InBucket) { @@ -830,27 +962,10 @@ ZenCacheDiskLayer::Scrub(ScrubContext& Ctx) } } -////////////////////////////////////////////////////////////////////////// - -ZenCacheTracker::ZenCacheTracker(ZenCacheStore& CacheStore) -{ - ZEN_UNUSED(CacheStore); -} - -ZenCacheTracker::~ZenCacheTracker() -{ -} - -void -ZenCacheTracker::TrackAccess(std::string_view Bucket, const IoHash& HashKey) -{ - ZEN_UNUSED(Bucket); - ZEN_UNUSED(HashKey); -} - void -ZenCacheTracker::Flush() +ZenCacheDiskLayer::GarbageCollect(GcContext& GcCtx) { + ZEN_UNUSED(GcCtx); } } // namespace zen |