aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-09-20 08:54:34 +0200
committerPer Larsson <[email protected]>2021-09-20 08:54:34 +0200
commite25b4b20d8a5696aa7055c9c167fa47b3739bc7e (patch)
tree049654b87096a22e1bf696a385db608a75f229fa /zenstore/compactcas.cpp
parentProbe upstream Zen server when initializing upstream cache. (diff)
parentFixed unused variable warnings exposed by xmake build (unclear why I do not r... (diff)
downloadzen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.tar.xz
zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.zip
Merge branch 'main' of https://github.com/EpicGames/zen
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp142
1 files changed, 131 insertions, 11 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 4407d8b08..070ca1503 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -10,18 +10,22 @@
#include <zencore/thread.h>
#include <zencore/uid.h>
-#include <gsl/gsl-lite.hpp>
-
-#include <functional>
-
-struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive-
-#include <atlfile.h>
#include <filesystem>
+#include <functional>
+#include <gsl/gsl-lite.hpp>
//////////////////////////////////////////////////////////////////////////
namespace zen {
+CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config)
+{
+}
+
+CasContainerStrategy::~CasContainerStrategy()
+{
+}
+
void
CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore)
{
@@ -43,7 +47,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6
uint64_t MaxFileOffset = 0;
{
- // This is not technically necessary but may help future static analysis
+ // This is not technically necessary (nobody should be accessing us from
+ // another thread at this stage) but may help static analysis
+
zen::RwLock::ExclusiveLockScope _(m_LocationMapLock);
m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
@@ -103,9 +109,8 @@ IoBuffer
CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
const CasDiskLocation& Location = KeyIt->second;
return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size);
@@ -120,9 +125,8 @@ bool
CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
return true;
}
@@ -133,6 +137,13 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
void
CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
{
+ // This implementation is good enough for relatively small
+ // chunk sets (in terms of chunk identifiers), but would
+ // benefit from a better implementation which removes
+ // items incrementally for large sets, especially when
+ // we're likely to already have a large proportion of the
+ // chunks in the set
+
std::unordered_set<IoHash> HaveSet;
for (const IoHash& Hash : InOutChunks.GetChunkSet())
@@ -157,4 +168,113 @@ CasContainerStrategy::Flush()
m_SmallObjectFile.Flush();
}
+void
+CasContainerStrategy::Scrub(ScrubContext& Ctx)
+{
+ const uint64_t WindowSize = 4 * 1024 * 1024;
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ const uint64_t FileSize = m_SmallObjectFile.FileSize();
+
+ std::vector<CasDiskIndexEntry> BigChunks;
+ std::vector<CasDiskIndexEntry> BadChunks;
+
+ // We do a read sweep through the payloads file and validate
+ // any entries that are contained within each segment, with
+ // the assumption that most entries will be checked in this
+ // pass. An alternative strategy would be to use memory mapping.
+
+ {
+ IoBuffer ReadBuffer{WindowSize};
+ void* BufferBase = ReadBuffer.MutableData();
+
+ RwLock::SharedLockScope _(m_LocationMapLock);
+
+ do
+ {
+ const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart);
+ m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart);
+
+ for (auto& Entry : m_LocationMap)
+ {
+ const uint64_t EntryOffset = Entry.second.Offset;
+
+ if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
+ {
+ const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+
+ if (EntryEnd >= WindowEnd)
+ {
+ BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+
+ continue;
+ }
+
+ const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size);
+
+ if (Entry.first != ComputedHash)
+ {
+ // Hash mismatch
+
+ BadChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+ }
+ }
+ }
+
+ WindowStart += WindowSize;
+ WindowEnd += WindowSize;
+ } while (WindowStart < FileSize);
+ }
+
+ // Deal with large chunks
+
+ for (const CasDiskIndexEntry& Entry : BigChunks)
+ {
+ IoHashStream Hasher;
+ m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) {
+ Hasher.Append(Data, Size);
+ });
+ IoHash ComputedHash = Hasher.GetHash();
+
+ if (Entry.Key != ComputedHash)
+ {
+ BadChunks.push_back(Entry);
+ }
+ }
+
+ // Deal with bad chunks by removing them from our lookup map
+
+ std::vector<IoHash> BadChunkHashes;
+
+ for (const CasDiskIndexEntry& Entry : BadChunks)
+ {
+ BadChunkHashes.push_back(Entry.Key);
+ m_LocationMap.erase(Entry.Key);
+ }
+
+ // Let whomever it concerns know about the bad chunks. This could
+ // be used to invalidate higher level data structures more efficiently
+ // than a full validation pass might be able to do
+
+ Ctx.ReportBadChunks(BadChunkHashes);
+}
+
+void
+CasContainerStrategy::MakeSnapshot()
+{
+ RwLock::SharedLockScope _(m_LocationMapLock);
+
+ std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()};
+
+ uint64_t EntryIndex = 0;
+ for (auto& Entry : m_LocationMap)
+ {
+ CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++];
+ IndexEntry.Key = Entry.first;
+ IndexEntry.Location = Entry.second;
+ }
+
+ m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0);
+}
+
} // namespace zen