aboutsummaryrefslogtreecommitdiff
path: root/src/zenstore/buildstore/buildstore.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-04-03 14:28:51 +0200
committerGitHub Enterprise <[email protected]>2025-04-03 14:28:51 +0200
commit64cd6e328bee3a94bc0bf10441b4057f7be34d1c (patch)
treec1656ae98110ed24b88ac82e32ad3d849bea3ae9 /src/zenstore/buildstore/buildstore.cpp
parent`zen oplog-export`, `zen oplog-import` for `--url` (cloud) and `--builds` (bu... (diff)
downloadzen-64cd6e328bee3a94bc0bf10441b4057f7be34d1c.tar.xz
zen-64cd6e328bee3a94bc0bf10441b4057f7be34d1c.zip
build store save access times (#341)v5.6.3-pre0
* save payload size in log for buildstore * read/write access times and manifest for buldstore * use retry when removing temporary files
Diffstat (limited to 'src/zenstore/buildstore/buildstore.cpp')
-rw-r--r--src/zenstore/buildstore/buildstore.cpp283
1 files changed, 264 insertions, 19 deletions
diff --git a/src/zenstore/buildstore/buildstore.cpp b/src/zenstore/buildstore/buildstore.cpp
index f26901458..d6d727aa9 100644
--- a/src/zenstore/buildstore/buildstore.cpp
+++ b/src/zenstore/buildstore/buildstore.cpp
@@ -2,6 +2,7 @@
#include <zenstore/buildstore/buildstore.h>
+#include <zencore/compactbinarybuilder.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
#include <zencore/memory/llm.h>
@@ -36,9 +37,18 @@ using namespace std::literals;
namespace blobstore::impl {
- const std::string BaseName = "builds";
- const char* IndexExtension = ".uidx";
- const char* LogExtension = ".slog";
+ const std::string BaseName = "builds";
+ const std::string ManifestExtension = ".cbo";
+ const char* IndexExtension = ".uidx";
+ const char* LogExtension = ".slog";
+ const char* AccessTimeExtension = ".zacs";
+
+ const uint32_t ManifestVersion = (1 << 16) | (0 << 8) | (0);
+
+ std::filesystem::path GetManifestPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + ManifestExtension);
+ }
std::filesystem::path GetBlobIndexPath(const std::filesystem::path& RootDirectory)
{
@@ -56,10 +66,47 @@ namespace blobstore::impl {
{
return RootDirectory / (BaseName + "_meta" + LogExtension);
}
+
+ std::filesystem::path GetAccessTimesPath(const std::filesystem::path& RootDirectory)
+ {
+ return RootDirectory / (BaseName + AccessTimeExtension);
+ }
+
+ struct AccessTimeRecord
+ {
+ IoHash Key;
+ std::uint32_t SecondsSinceEpoch = 0;
+ };
+
+ static_assert(sizeof(AccessTimeRecord) == 24);
+
+#pragma pack(push)
+#pragma pack(1)
+ struct AccessTimesHeader
+ {
+ static constexpr uint32_t ExpectedMagic = 0x7363617a; // 'zacs';
+ static constexpr uint32_t CurrentVersion = 1;
+ static constexpr uint64_t DataAlignment = 8;
+
+ uint32_t Magic = ExpectedMagic;
+ uint32_t Version = CurrentVersion;
+ uint32_t AccessTimeCount = 0;
+ uint32_t Checksum = 0;
+
+ static uint32_t ComputeChecksum(const AccessTimesHeader& Header)
+ {
+ return XXH32(&Header.Magic, sizeof(AccessTimesHeader) - sizeof(uint32_t), 0xC0C0'BABA);
+ }
+ };
+#pragma pack(pop)
+
+ static_assert(sizeof(AccessTimesHeader) == 16);
+
} // namespace blobstore::impl
BuildStore::BuildStore(const BuildStoreConfig& Config, GcManager& Gc)
-: m_Config(Config)
+: m_Log(logging::Get("builds"))
+, m_Config(Config)
, m_Gc(Gc)
, m_LargeBlobStore(m_Gc)
, m_SmallBlobStore(Gc)
@@ -69,14 +116,57 @@ BuildStore::BuildStore(const BuildStoreConfig& Config, GcManager& Gc)
ZEN_MEMSCOPE(GetBuildstoreTag());
try
{
- std::filesystem::path BlobLogPath = blobstore::impl::GetBlobLogPath(Config.RootDirectory);
- std::filesystem::path MetaLogPath = blobstore::impl::GetMetaLogPath(Config.RootDirectory);
- bool IsNew = !(IsFile(BlobLogPath) && IsFile(MetaLogPath));
+ bool IsNew = true;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([&] {
+ ZEN_INFO("{} build store at {} in {}",
+ IsNew ? "Initialized" : "Read",
+ m_Config.RootDirectory,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ std::filesystem::path BlobLogPath = blobstore::impl::GetBlobLogPath(Config.RootDirectory);
+ std::filesystem::path MetaLogPath = blobstore::impl::GetMetaLogPath(Config.RootDirectory);
+ std::filesystem::path ManifestPath = blobstore::impl::GetManifestPath(Config.RootDirectory);
+ std::filesystem::path AccessTimesPath = blobstore::impl::GetAccessTimesPath(Config.RootDirectory);
+ if (IsFile(ManifestPath) && IsFile(BlobLogPath) && IsFile(MetaLogPath))
+ {
+ IsNew = false;
+ }
if (!IsNew)
{
- m_BlobLogFlushPosition = ReadPayloadLog(RwLock::ExclusiveLockScope(m_Lock), BlobLogPath, 0);
- m_MetaLogFlushPosition = ReadMetadataLog(RwLock::ExclusiveLockScope(m_Lock), MetaLogPath, 0);
+ RwLock::ExclusiveLockScope Lock(m_Lock);
+
+ CbObject ManifestReader = LoadCompactBinaryObject(ReadFile(ManifestPath).Flatten());
+ Oid ManifestId = ManifestReader["id"].AsObjectId();
+ uint32_t Version = ManifestReader["version"].AsUInt32();
+ DateTime CreationDate = ManifestReader["createdAt"].AsDateTime();
+ ZEN_UNUSED(CreationDate);
+ if (ManifestId == Oid::Zero || Version != blobstore::impl::ManifestVersion)
+ {
+ ZEN_WARN("Invalid manifest at {}, wiping state", ManifestPath);
+ IsNew = true;
+ }
+ else
+ {
+ m_BlobLogFlushPosition = ReadPayloadLog(Lock, BlobLogPath, 0);
+ m_MetaLogFlushPosition = ReadMetadataLog(Lock, MetaLogPath, 0);
+ if (IsFile(AccessTimesPath))
+ {
+ ReadAccessTimes(Lock, AccessTimesPath);
+ }
+ }
+ }
+
+ if (IsNew)
+ {
+ CleanDirectory(Config.RootDirectory, false);
+ CbObjectWriter ManifestWriter;
+ ManifestWriter.AddObjectId("id", Oid::NewOid());
+ ManifestWriter.AddInteger("version", blobstore::impl::ManifestVersion);
+ ManifestWriter.AddDateTime("createdAt", DateTime::Now());
+ TemporaryFile::SafeWriteFile(ManifestPath, ManifestWriter.Save().GetBuffer().AsIoBuffer());
}
m_LargeBlobStore.Initialize(Config.RootDirectory / "file_cas", IsNew);
m_SmallBlobStore.Initialize(Config.RootDirectory,
@@ -147,18 +237,19 @@ BuildStore::PutBlob(const IoHash& BlobHash, const IoBuffer& Payload)
}
}
+ uint64_t PayloadSize = Payload.GetSize();
PayloadEntry Entry;
if (Payload.GetSize() > m_Config.SmallBlobBlockStoreMaxBlockEmbedSize)
{
CasStore::InsertResult Result = m_LargeBlobStore.InsertChunk(Payload, BlobHash);
ZEN_UNUSED(Result);
- Entry = {.Flags = PayloadEntry::kStandalone};
+ Entry = PayloadEntry(PayloadEntry::kStandalone, PayloadSize);
}
else
{
CasStore::InsertResult Result = m_SmallBlobStore.InsertChunk(Payload, BlobHash);
ZEN_UNUSED(Result);
- Entry = {.Flags = 0};
+ Entry = PayloadEntry(0, PayloadSize);
}
m_PayloadlogFile.Append(PayloadDiskEntry{.Entry = Entry, .BlobHash = BlobHash});
@@ -188,6 +279,7 @@ BuildStore::PutBlob(const IoHash& BlobHash, const IoBuffer& Payload)
m_BlobEntries.push_back(BlobEntry{.Payload = NewPayloadIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
m_BlobLookup.insert({BlobHash, NewBlobIndex});
}
+ m_LastAccessTimeUpdateCount++;
}
IoBuffer
@@ -204,7 +296,7 @@ BuildStore::GetBlob(const IoHash& BlobHash)
if (Blob.Payload)
{
const PayloadEntry& Entry = m_PayloadEntries[Blob.Payload];
- const bool IsStandalone = (Entry.Flags & PayloadEntry::kStandalone) != 0;
+ const bool IsStandalone = (Entry.GetFlags() & PayloadEntry::kStandalone) != 0;
Lock.ReleaseNow();
IoBuffer Chunk;
@@ -299,6 +391,7 @@ BuildStore::PutMetadatas(std::span<const IoHash> BlobHashes, std::span<const IoB
m_BlobEntries.push_back(BlobEntry{.Metadata = NewMetadataIndex, .LastAccessTime = AccessTime(GcClock::TickCount())});
m_BlobLookup.insert({BlobHash, NewBlobIndex});
}
+ m_LastAccessTimeUpdateCount++;
WriteBlobIndex++;
if (m_TrackedCacheKeys)
{
@@ -341,6 +434,7 @@ BuildStore::GetMetadatas(std::span<const IoHash> BlobHashes, WorkerThreadPool* O
ResultContentTypes[Index] = ExistingMetadataEntry.ContentType;
}
ExistingBlobEntry.LastAccessTime = AccessTime(GcClock::TickCount());
+ m_LastAccessTimeUpdateCount++;
}
}
}
@@ -434,12 +528,23 @@ BuildStore::Flush()
ZEN_TRACE_CPU("BuildStore::Flush");
try
{
+ Stopwatch Timer;
+ const auto _ = MakeGuard(
+ [&] { ZEN_INFO("Flushed build store at {} in {}", m_Config.RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+
m_LargeBlobStore.Flush();
m_SmallBlobStore.Flush();
m_MetadataBlockStore.Flush(false);
m_PayloadlogFile.Flush();
m_MetadatalogFile.Flush();
+
+ if (uint64_t LastAccessTimeUpdateCount = m_LastAccessTimeUpdateCount.load(); LastAccessTimeUpdateCount > 0)
+ {
+ m_LastAccessTimeUpdateCount -= LastAccessTimeUpdateCount;
+ RwLock::ExclusiveLockScope UpdateLock(m_Lock);
+ WriteAccessTimes(UpdateLock, blobstore::impl::GetAccessTimesPath(m_Config.RootDirectory));
+ }
}
catch (const std::exception& Ex)
{
@@ -447,6 +552,35 @@ BuildStore::Flush()
}
}
+#if ZEN_WITH_TESTS
+std::optional<AccessTime>
+BuildStore::GetLastAccessTime(const IoHash& Key) const
+{
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ const BlobEntry& Entry = m_BlobEntries[Index];
+ return Entry.LastAccessTime;
+ }
+ return {};
+}
+
+bool
+BuildStore::SetLastAccessTime(const IoHash& Key, const AccessTime& Time)
+{
+ RwLock::SharedLockScope _(m_Lock);
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ BlobEntry& Entry = m_BlobEntries[Index];
+ Entry.LastAccessTime = Time;
+ return true;
+ }
+ return false;
+}
+#endif // ZEN_WITH_TESTS
+
void
BuildStore::CompactState()
{
@@ -540,7 +674,7 @@ BuildStore::ReadPayloadLog(const RwLock::ExclusiveLockScope&, const std::filesys
CasLog.Replay(
[&](const PayloadDiskEntry& Record) {
std::string InvalidEntryReason;
- if (Record.Entry.Flags & PayloadEntry::kTombStone)
+ if (Record.Entry.GetFlags() & PayloadEntry::kTombStone)
{
// Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation
if (auto ExistingIt = m_BlobLookup.find(Record.BlobHash); ExistingIt != m_BlobLookup.end())
@@ -702,6 +836,114 @@ BuildStore::ReadMetadataLog(const RwLock::ExclusiveLockScope&, const std::filesy
return LogEntryCount;
}
+void
+BuildStore::ReadAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath)
+{
+ ZEN_TRACE_CPU("BuildStore::ReadAccessTimes");
+
+ using namespace blobstore::impl;
+
+ BasicFile AccessTimesFile;
+ AccessTimesFile.Open(AccessTimesPath, BasicFile::Mode::kRead);
+ uint64_t Size = AccessTimesFile.FileSize();
+ if (Size >= sizeof(AccessTimesHeader))
+ {
+ AccessTimesHeader Header;
+ uint64_t Offset = 0;
+ AccessTimesFile.Read(&Header, sizeof(Header), 0);
+ Offset += sizeof(AccessTimesHeader);
+ Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment);
+ if ((Header.Magic == AccessTimesHeader::ExpectedMagic) && (Header.Version == AccessTimesHeader::CurrentVersion) &&
+ (Header.Checksum == AccessTimesHeader::ComputeChecksum(Header)))
+ {
+ uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount;
+ if (AccessTimesFile.FileSize() >= Offset + RecordsSize)
+ {
+ std::vector<AccessTimeRecord> AccessRecords(Header.AccessTimeCount);
+ AccessTimesFile.Read(AccessRecords.data(), RecordsSize, Offset);
+ for (const AccessTimeRecord& Record : AccessRecords)
+ {
+ const IoHash& Key = Record.Key;
+ const uint32_t SecondsSinceEpoch = Record.SecondsSinceEpoch;
+ if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end())
+ {
+ const BlobIndex Index = It->second;
+ BlobEntry& Entry = m_BlobEntries[Index];
+ Entry.LastAccessTime.SetSecondsSinceEpoch(SecondsSinceEpoch);
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+ }
+ else
+ {
+ m_LastAccessTimeUpdateCount++;
+ }
+}
+
+void
+BuildStore::WriteAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath)
+{
+ ZEN_TRACE_CPU("BuildStore::WriteAccessTimes");
+
+ using namespace blobstore::impl;
+
+ uint32_t Count = gsl::narrow<uint32_t>(m_BlobLookup.size());
+ AccessTimesHeader Header = {.AccessTimeCount = Count};
+ Header.Checksum = AccessTimesHeader::ComputeChecksum(Header);
+
+ TemporaryFile TempFile;
+ std::error_code Ec;
+ if (TempFile.CreateTemporary(AccessTimesPath.parent_path(), Ec); Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed to create temporary file {} to write access times. Reason ({}) {}",
+ TempFile.GetPath(),
+ Ec.value(),
+ Ec.message()));
+ }
+ {
+ uint64_t Offset = 0;
+ TempFile.Write(&Header, sizeof(AccessTimesHeader), Offset);
+ Offset += sizeof(AccessTimesHeader);
+ Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment);
+
+ std::vector<AccessTimeRecord> AccessRecords;
+ AccessRecords.reserve(Header.AccessTimeCount);
+
+ for (auto It : m_BlobLookup)
+ {
+ const IoHash& Key = It.first;
+ const BlobIndex Index = It.second;
+ const BlobEntry& Entry = m_BlobEntries[Index];
+ const uint32_t SecondsSinceEpoch = Entry.LastAccessTime.GetSecondsSinceEpoch();
+ AccessRecords.emplace_back(AccessTimeRecord{.Key = Key, .SecondsSinceEpoch = SecondsSinceEpoch});
+ }
+ uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount;
+ TempFile.Write(AccessRecords.data(), RecordsSize, Offset);
+ Offset += sizeof(AccessTimesHeader) * Header.AccessTimeCount;
+ }
+ if (TempFile.MoveTemporaryIntoPlace(AccessTimesPath, Ec); Ec)
+ {
+ throw std::runtime_error(fmt::format("Failed to move temporary file {} to {} when write access times. Reason ({}) {}",
+ TempFile.GetPath(),
+ AccessTimesPath,
+ Ec.value(),
+ Ec.message()));
+ }
+}
+
bool
BuildStore::ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string& OutReason)
{
@@ -710,18 +952,18 @@ BuildStore::ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string&
OutReason = fmt::format("Invalid blob hash {}", Entry.BlobHash.ToHexString());
return false;
}
- if (Entry.Entry.Flags & ~(PayloadEntry::kTombStone | PayloadEntry::kStandalone))
+ if (Entry.Entry.GetFlags() & ~(PayloadEntry::kTombStone | PayloadEntry::kStandalone))
{
- OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Entry.Flags, Entry.BlobHash.ToHexString());
+ OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Entry.GetFlags(), Entry.BlobHash.ToHexString());
return false;
}
- if (Entry.Entry.Flags & PayloadEntry::kTombStone)
+ if (Entry.Entry.GetFlags() & PayloadEntry::kTombStone)
{
return true;
}
- if (Entry.Entry.Reserved1 != 0 || Entry.Entry.Reserved2 != 0 || Entry.Entry.Reserved3 != 0)
+ if (Entry.Entry.GetSize() == 0 || Entry.Entry.GetSize() == 0x00ffffffffffffffu)
{
- OutReason = fmt::format("Invalid reserved fields for meta entry {}", Entry.BlobHash.ToHexString());
+ OutReason = fmt::format("Invalid size field {} for meta entry {}", Entry.Entry.GetSize(), Entry.BlobHash.ToHexString());
return false;
}
return true;
@@ -869,6 +1111,8 @@ public:
NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
});
+ const auto __ = MakeGuard([&] { m_Store.Flush(); });
+
if (!m_RemovedBlobs.empty())
{
if (Ctx.Settings.CollectSmallObjects)
@@ -1080,7 +1324,7 @@ BuildStore::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
{
RemovedPayloads.push_back(
PayloadDiskEntry{.Entry = m_PayloadEntries[ReadBlobEntry.Payload], .BlobHash = ExpiredBlob});
- RemovedPayloads.back().Entry.Flags |= PayloadEntry::kTombStone;
+ RemovedPayloads.back().Entry.AddFlag(PayloadEntry::kTombStone);
m_PayloadEntries[ReadBlobEntry.Payload] = {};
m_BlobEntries[ReadBlobIndex].Payload = {};
}
@@ -1094,6 +1338,7 @@ BuildStore::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
}
m_BlobLookup.erase(It);
+ m_LastAccessTimeUpdateCount++;
RemovedBlobs.push_back(ExpiredBlob);
Stats.DeletedCount++;