aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2026-02-03 16:31:40 +0100
committerGitHub Enterprise <[email protected]>2026-02-03 16:31:40 +0100
commit932b15e7318ed65973373aa3c4a94714cd688d7b (patch)
treefce75256111f07469cd34b2381b89aa9a0a491b7
parentadd command line option for scrub timeslice (#742) (diff)
downloadzen-932b15e7318ed65973373aa3c4a94714cd688d7b.tar.xz
zen-932b15e7318ed65973373aa3c4a94714cd688d7b.zip
reduce blocking in scrub (#743)
* reduce held locks while performing scrub operation
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/zenstore/cache/cachedisklayer.cpp142
-rw-r--r--src/zenstore/compactcas.cpp23
-rw-r--r--src/zenstore/projectstore.cpp2
4 files changed, 101 insertions, 67 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d5cbae95..da3e9c81c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
- Improvement: Revise logic for enabling work backlog during `zen builds download`
- Improvement: Added `--maxtimeslice` option to `zen scrub` command to control how long a scrub operation may run
- Improvement: Increased the default scrub timeslice from 1 min 40 sec to 5 min.
+- Improvement: Reduce lock contention when performing a scrub operation
- Bugfix: Restore `/health/log` and `/health/info` endpoint functionality
## 5.7.19
diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp
index b2e045632..ead7e4f3a 100644
--- a/src/zenstore/cache/cachedisklayer.cpp
+++ b/src/zenstore/cache/cachedisklayer.cpp
@@ -2410,74 +2410,95 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
try
{
- std::vector<BlockStoreLocation> ChunkLocations;
- std::vector<IoHash> ChunkIndexToChunkHash;
+ std::vector<DiskLocation> ChunkLocations;
+ std::vector<IoHash> ChunkIndexToChunkHash;
+ std::vector<DiskLocation> StandaloneLocations;
+ std::vector<IoHash> StandaloneIndexToKeysHash;
- RwLock::SharedLockScope _(m_IndexLock);
+ {
+ RwLock::SharedLockScope _(m_IndexLock);
- const size_t BlockChunkInitialCount = m_Index.size() / 4;
- ChunkLocations.reserve(BlockChunkInitialCount);
- ChunkIndexToChunkHash.reserve(BlockChunkInitialCount);
+ const size_t InitialCount = m_Index.size() / 4;
+ ChunkLocations.reserve(InitialCount);
+ ChunkIndexToChunkHash.reserve(InitialCount);
+ StandaloneLocations.reserve(InitialCount);
+ StandaloneIndexToKeysHash.reserve(InitialCount);
- // Do a pass over the index and verify any standalone file values straight away
- // all other storage classes are gathered and verified in bulk in order to enable
- // more efficient I/O scheduling
+ for (auto& Kv : m_Index)
+ {
+ const IoHash& HashKey = Kv.first;
+ const BucketPayload& Payload = m_Payloads[Kv.second];
+ const DiskLocation& Loc = Payload.Location;
- for (auto& Kv : m_Index)
- {
- const IoHash& HashKey = Kv.first;
- const BucketPayload& Payload = m_Payloads[Kv.second];
- const DiskLocation& Loc = Payload.Location;
+ Ctx.ThrowIfDeadlineExpired();
+ if (Loc.IsFlagSet(DiskLocation::kStandaloneFile))
+ {
+ StandaloneLocations.push_back(Loc);
+ StandaloneIndexToKeysHash.push_back(HashKey);
+ }
+ else
+ {
+ ChunkLocations.push_back(Loc);
+ ChunkIndexToChunkHash.push_back(HashKey);
+ }
+ }
+ }
+
+ for (size_t StandaloneKeyIndex = 0; StandaloneKeyIndex < StandaloneIndexToKeysHash.size(); StandaloneKeyIndex++)
+ {
Ctx.ThrowIfDeadlineExpired();
- if (Loc.IsFlagSet(DiskLocation::kStandaloneFile))
- {
- ChunkCount.fetch_add(1);
- VerifiedChunkBytes.fetch_add(Loc.Size());
+ const IoHash& HashKey = StandaloneIndexToKeysHash[StandaloneKeyIndex];
+ const DiskLocation& Loc = StandaloneLocations[StandaloneKeyIndex];
- if (Loc.GetContentType() == ZenContentType::kBinary)
- {
- // Blob cache value, not much we can do about data integrity checking
- // here since there's no hash available
- ExtendablePathBuilder<256> DataFilePath;
- BuildPath(DataFilePath, HashKey);
+ ChunkCount.fetch_add(1);
+ VerifiedChunkBytes.fetch_add(Loc.Size());
- RwLock::SharedLockScope ValueLock(LockForHash(HashKey));
+ if (Loc.GetContentType() == ZenContentType::kBinary)
+ {
+ // Blob cache value, not much we can do about data integrity checking
+ // here since there's no hash available
+ ExtendablePathBuilder<256> DataFilePath;
+ BuildPath(DataFilePath, HashKey);
- std::error_code Ec;
- uintmax_t size = FileSizeFromPath(DataFilePath.ToPath(), Ec);
- if (Ec)
- {
- ReportBadKey(HashKey);
- }
- if (size != Loc.Size())
- {
- ReportBadKey(HashKey);
- }
- continue;
+ RwLock::SharedLockScope ValueLock(LockForHash(HashKey));
+
+ std::error_code Ec;
+ uintmax_t Size = FileSizeFromPath(DataFilePath.ToPath(), Ec);
+ if (Ec)
+ {
+ ReportBadKey(HashKey);
}
- else
+ ValueLock.ReleaseNow();
+
+ if (Size != Loc.Size())
{
- // Structured cache value
- IoBuffer Buffer = GetStandaloneCacheValue(Loc, HashKey);
- if (!Buffer)
+ // Make sure we verify that size hasn't changed behind our back...
+ RwLock::SharedLockScope _(m_IndexLock);
+ if (auto It = m_Index.find(HashKey); It != m_Index.end())
{
- ReportBadKey(HashKey);
- continue;
- }
- if (!ValidateIoBuffer(Loc.GetContentType(), std::move(Buffer)))
- {
- ReportBadKey(HashKey);
- continue;
+ const BucketPayload& Payload = m_Payloads[It->second];
+ const DiskLocation& CurrentLoc = Payload.Location;
+ if (Size != CurrentLoc.Size())
+ {
+ ReportBadKey(HashKey);
+ }
}
}
}
else
{
- ChunkLocations.emplace_back(Loc.GetBlockLocation(m_Configuration.PayloadAlignment));
- ChunkIndexToChunkHash.push_back(HashKey);
- continue;
+ // Structured cache value
+ IoBuffer Buffer = GetStandaloneCacheValue(Loc, HashKey);
+ if (!Buffer)
+ {
+ ReportBadKey(HashKey);
+ }
+ else if (!ValidateIoBuffer(Loc.GetContentType(), std::move(Buffer)))
+ {
+ ReportBadKey(HashKey);
+ }
}
}
@@ -2502,8 +2523,9 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
ReportBadKey(Hash);
return true;
}
- const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)];
- ZenContentType ContentType = Payload.Location.GetContentType();
+
+ const DiskLocation& Loc = ChunkLocations[ChunkIndex];
+ ZenContentType ContentType = Loc.GetContentType();
Buffer.SetContentType(ContentType);
if (!ValidateIoBuffer(ContentType, std::move(Buffer)))
{
@@ -2525,8 +2547,8 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
ReportBadKey(Hash);
return true;
}
- const BucketPayload& Payload = m_Payloads[m_Index.at(Hash)];
- ZenContentType ContentType = Payload.Location.GetContentType();
+ const DiskLocation& Loc = ChunkLocations[ChunkIndex];
+ ZenContentType ContentType = Loc.GetContentType();
Buffer.SetContentType(ContentType);
if (!ValidateIoBuffer(ContentType, std::move(Buffer)))
{
@@ -2536,8 +2558,16 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
return true;
};
- m_BlockStore.IterateChunks(ChunkLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) {
- return m_BlockStore.IterateBlock(ChunkLocations, ChunkIndexes, ValidateSmallChunk, ValidateLargeChunk, 0);
+ std::vector<BlockStoreLocation> ChunkBlockLocations;
+ ChunkBlockLocations.reserve(ChunkLocations.size());
+
+ for (const DiskLocation& Loc : ChunkLocations)
+ {
+ ChunkBlockLocations.push_back(Loc.GetBlockLocation(m_Configuration.PayloadAlignment));
+ }
+
+ m_BlockStore.IterateChunks(ChunkBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) {
+ return m_BlockStore.IterateBlock(ChunkBlockLocations, ChunkIndexes, ValidateSmallChunk, ValidateLargeChunk, 0);
});
}
catch (ScrubDeadlineExpiredException&)
diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp
index 37a8c36b8..097102a1d 100644
--- a/src/zenstore/compactcas.cpp
+++ b/src/zenstore/compactcas.cpp
@@ -565,20 +565,21 @@ CasContainerStrategy::ScrubStorage(ScrubContext& Ctx)
try
{
- RwLock::SharedLockScope _(m_LocationMapLock);
-
- uint64_t TotalChunkCount = m_LocationMap.size();
- ChunkLocations.reserve(TotalChunkCount);
- ChunkIndexToChunkHash.reserve(TotalChunkCount);
{
- for (const auto& Entry : m_LocationMap)
+ uint64_t TotalChunkCount = m_LocationMap.size();
+ ChunkLocations.reserve(TotalChunkCount);
+ ChunkIndexToChunkHash.reserve(TotalChunkCount);
+ RwLock::SharedLockScope _(m_LocationMapLock);
{
- const IoHash& ChunkHash = Entry.first;
- const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second];
- BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
+ for (const auto& Entry : m_LocationMap)
+ {
+ const IoHash& ChunkHash = Entry.first;
+ const BlockStoreDiskLocation& DiskLocation = m_Locations[Entry.second];
+ BlockStoreLocation Location = DiskLocation.Get(m_PayloadAlignment);
- ChunkLocations.push_back(Location);
- ChunkIndexToChunkHash.push_back(ChunkHash);
+ ChunkLocations.push_back(Location);
+ ChunkIndexToChunkHash.push_back(ChunkHash);
+ }
}
}
diff --git a/src/zenstore/projectstore.cpp b/src/zenstore/projectstore.cpp
index c5b27c1ea..e6c8d624a 100644
--- a/src/zenstore/projectstore.cpp
+++ b/src/zenstore/projectstore.cpp
@@ -3932,6 +3932,7 @@ ProjectStore::Project::Scrub(ScrubContext& Ctx)
{
for (const std::string& OpLogId : OpLogs)
{
+ Ctx.ThrowIfDeadlineExpired();
Ref<ProjectStore::Oplog> OpLog;
{
if (auto OpIt = m_Oplogs.find(OpLogId); OpIt != m_Oplogs.end())
@@ -4358,6 +4359,7 @@ ProjectStore::ScrubStorage(ScrubContext& Ctx)
}
for (const Ref<Project>& Project : Projects)
{
+ Ctx.ThrowIfDeadlineExpired();
Project->Scrub(Ctx);
}
}