aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-09-23 13:56:56 +0200
committerPer Larsson <[email protected]>2021-09-23 13:56:56 +0200
commit0bbb51bc88d4a1b0fa7762d133037a728af24767 (patch)
tree8c8a43350d103031bb9af1806b79cf70dac2faf8
parentRespect Jupiter auth token expiration time. (diff)
parentcidstore: added some implementation notes (diff)
downloadzen-0bbb51bc88d4a1b0fa7762d133037a728af24767.tar.xz
zen-0bbb51bc88d4a1b0fa7762d133037a728af24767.zip
Merge branch 'main' of https://github.com/EpicGames/zen
-rw-r--r--zencore/include/zencore/iobuffer.h3
-rw-r--r--zencore/iobuffer.cpp8
-rw-r--r--zenserver/cache/structuredcachestore.cpp201
-rw-r--r--zenserver/cache/structuredcachestore.h1
-rw-r--r--zenstore/CAS.cpp2
-rw-r--r--zenstore/cidstore.cpp30
-rw-r--r--zenstore/include/zenstore/cidstore.h5
7 files changed, 193 insertions, 57 deletions
diff --git a/zencore/include/zencore/iobuffer.h b/zencore/include/zencore/iobuffer.h
index 517cc7b69..6ee40e468 100644
--- a/zencore/include/zencore/iobuffer.h
+++ b/zencore/include/zencore/iobuffer.h
@@ -9,6 +9,7 @@
namespace zen {
+struct IoHash;
struct IoBufferExtendedCore;
enum class ZenContentType : uint8_t
@@ -348,6 +349,8 @@ public:
inline static IoBuffer MakeCloneFromMemory(const void* Ptr, size_t Sz) { return IoBuffer(IoBuffer::Clone, Ptr, Sz); }
};
+IoHash HashBuffer(IoBuffer& Buffer);
+
void iobuffer_forcelink();
} // namespace zen
diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp
index bcecc768f..dc998d5ea 100644
--- a/zencore/iobuffer.cpp
+++ b/zencore/iobuffer.cpp
@@ -4,6 +4,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/iohash.h>
#include <zencore/logging.h>
#include <zencore/memory.h>
#include <zencore/testing.h>
@@ -381,6 +382,13 @@ IoBufferBuilder::MakeFromTemporaryFile(const wchar_t* FileName)
return {};
}
+IoHash
+HashBuffer(IoBuffer& Buffer)
+{
+ // TODO: handle disk buffers with special path
+ return IoHash::HashBuffer(Buffer.Data(), Buffer.Size());
+}
+
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS
diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp
index 3d80bb14c..4f16b81f2 100644
--- a/zenserver/cache/structuredcachestore.cpp
+++ b/zenserver/cache/structuredcachestore.cpp
@@ -131,23 +131,18 @@ ZenCacheMemoryLayer::~ZenCacheMemoryLayer()
bool
ZenCacheMemoryLayer::Get(std::string_view InBucket, const IoHash& HashKey, ZenCacheValue& OutValue)
{
- CacheBucket* Bucket = nullptr;
-
- {
- RwLock::SharedLockScope _(m_Lock);
+ RwLock::SharedLockScope _(m_Lock);
- auto it = m_Buckets.find(std::string(InBucket));
+ auto it = m_Buckets.find(std::string(InBucket));
- if (it != m_Buckets.end())
- {
- Bucket = &it->second;
- }
+ if (it == m_Buckets.end())
+ {
+ return false;
}
- if (Bucket == nullptr)
- return false;
+ CacheBucket* Bucket = Bucket = &it->second;
- ZEN_ASSERT(Bucket != nullptr);
+ _.ReleaseNow();
return Bucket->Get(HashKey, OutValue);
}
@@ -177,8 +172,6 @@ ZenCacheMemoryLayer::Put(std::string_view InBucket, const IoHash& HashKey, const
Bucket = &m_Buckets[std::string(InBucket)];
}
- ZEN_ASSERT(Bucket != nullptr);
-
// Note that since the underlying IoBuffer is retained, the content type is also
Bucket->Put(HashKey, Value);
@@ -195,7 +188,31 @@ ZenCacheMemoryLayer::DropBucket(std::string_view Bucket)
void
ZenCacheMemoryLayer::Scrub(ScrubContext& Ctx)
{
- ZEN_UNUSED(Ctx);
+ RwLock::SharedLockScope _(m_Lock);
+
+ for (auto& Kv : m_Buckets)
+ {
+ Kv.second.Scrub(Ctx);
+ }
+}
+
+void
+ZenCacheMemoryLayer::CacheBucket::Scrub(ScrubContext& Ctx)
+{
+ std::vector<IoHash> BadHashes;
+
+ for (auto& Kv : m_cacheMap)
+ {
+ if (Kv.first != IoHash::HashBuffer(Kv.second))
+ {
+ BadHashes.push_back(Kv.first);
+ }
+ }
+
+ if (!BadHashes.empty())
+ {
+ Ctx.ReportBadChunks(BadHashes);
+ }
}
bool
@@ -203,16 +220,16 @@ ZenCacheMemoryLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutV
{
RwLock::SharedLockScope _(m_bucketLock);
- auto bucketIt = m_cacheMap.find(HashKey);
-
- if (bucketIt == m_cacheMap.end())
+ if (auto bucketIt = m_cacheMap.find(HashKey); bucketIt == m_cacheMap.end())
{
return false;
}
+ else
+ {
+ OutValue.Value = bucketIt->second;
- OutValue.Value = bucketIt->second;
-
- return true;
+ return true;
+ }
}
void
@@ -241,8 +258,19 @@ struct DiskLocation
static uint64_t CombineOffsetAndFlags(uint64_t Offset, uint64_t Flags) { return Offset | Flags; }
- inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; }
- inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; }
+ inline uint64_t Offset() const { return OffsetAndFlags & kOffsetMask; }
+ inline uint64_t IsFlagSet(uint64_t Flag) const { return OffsetAndFlags & Flag; }
+ inline ZenContentType GetContentType() const
+ {
+ ZenContentType ContentType = ZenContentType::kBinary;
+
+ if (IsFlagSet(DiskLocation::kStructured))
+ {
+ ContentType = ZenContentType::kCbObject;
+ }
+
+ return ContentType;
+ }
};
struct DiskIndexEntry
@@ -264,9 +292,14 @@ struct ZenCacheDiskLayer::CacheBucket
static bool Delete(std::filesystem::path BucketDir);
bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
+
+ bool GetStandaloneCacheValue(const IoHash& HashKey, ZenCacheValue& OutValue, const DiskLocation& Loc);
+
+ bool GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue);
void Put(const IoHash& HashKey, const ZenCacheValue& Value);
void Drop();
void Flush();
+ void Scrub(ScrubContext& Ctx);
inline bool IsOk() const { return m_Ok; }
@@ -407,6 +440,37 @@ ZenCacheDiskLayer::CacheBucket::BuildPath(WideStringBuilderBase& Path, const IoH
}
bool
+ZenCacheDiskLayer::CacheBucket::GetInlineCacheValue(const DiskLocation& Loc, ZenCacheValue& OutValue)
+{
+ if (!Loc.IsFlagSet(DiskLocation::kStandaloneFile))
+ {
+ OutValue.Value = IoBufferBuilder::MakeFromFileHandle(m_SobsFile.Handle(), Loc.Offset(), Loc.Size);
+ OutValue.Value.SetContentType(Loc.GetContentType());
+
+ return true;
+ }
+
+ return false;
+}
+
+bool
+ZenCacheDiskLayer::CacheBucket::GetStandaloneCacheValue(const IoHash& HashKey, ZenCacheValue& OutValue, const DiskLocation& Loc)
+{
+ WideStringBuilder<128> DataFilePath;
+ BuildPath(DataFilePath, HashKey);
+
+ if (IoBuffer Data = IoBufferBuilder::MakeFromFile(DataFilePath.c_str()))
+ {
+ OutValue.Value = Data;
+ OutValue.Value.SetContentType(Loc.GetContentType());
+
+ return true;
+ }
+
+ return false;
+}
+
+bool
ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutValue)
{
if (!m_Ok)
@@ -420,35 +484,14 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal
{
const DiskLocation& Loc = it->second;
- ZenContentType ContentType = ZenContentType::kBinary;
-
- if (Loc.IsFlagSet(DiskLocation::kStructured))
- {
- ContentType = ZenContentType::kCbObject;
- }
-
- if (!Loc.IsFlagSet(DiskLocation::kStandaloneFile))
+ if (GetInlineCacheValue(Loc, OutValue))
{
- OutValue.Value = IoBufferBuilder::MakeFromFileHandle(m_SobsFile.Handle(), Loc.Offset(), Loc.Size);
- OutValue.Value.SetContentType(ContentType);
-
return true;
}
- else
- {
- _.ReleaseNow();
- WideStringBuilder<128> DataFilePath;
- BuildPath(DataFilePath, HashKey);
+ _.ReleaseNow();
- if (IoBuffer Data = IoBufferBuilder::MakeFromFile(DataFilePath.c_str()))
- {
- OutValue.Value = Data;
- OutValue.Value.SetContentType(ContentType);
-
- return true;
- }
- }
+ return GetStandaloneCacheValue(HashKey, OutValue, Loc);
}
return false;
@@ -518,9 +561,58 @@ ZenCacheDiskLayer::CacheBucket::Flush()
}
void
-ZenCacheDiskLayer::Scrub(ScrubContext& Ctx)
+ZenCacheDiskLayer::CacheBucket::Scrub(ScrubContext& Ctx)
{
- ZEN_UNUSED(Ctx);
+ std::vector<DiskIndexEntry> StandaloneFiles;
+
+ std::vector<IoHash> BadChunks;
+ std::vector<IoBuffer> BadStandaloneChunks;
+
+ {
+ RwLock::SharedLockScope _(m_IndexLock);
+
+ for (auto& Kv : m_Index)
+ {
+ const IoHash& Hash = Kv.first;
+ const DiskLocation& Loc = Kv.second;
+
+ ZenCacheValue Value;
+
+ if (!GetInlineCacheValue(Loc, Value))
+ {
+ ZEN_ASSERT(Loc.IsFlagSet(DiskLocation::kStandaloneFile));
+ StandaloneFiles.push_back({.Key = Hash, .Location = Loc});
+ }
+ else
+ {
+ if (GetStandaloneCacheValue(Hash, Value, Loc))
+ {
+ // Hash contents
+
+ const IoHash ComputedHash = HashBuffer(Value.Value);
+
+ if (ComputedHash != Hash)
+ {
+ BadChunks.push_back(Hash);
+ }
+ }
+ else
+ {
+ // Non-existent
+ }
+ }
+ }
+ }
+
+ if (Ctx.RunRecovery())
+ {
+ // Clean out bad chunks
+ }
+
+ if (!BadChunks.empty())
+ {
+ Ctx.ReportBadChunks(BadChunks);
+ }
}
void
@@ -729,6 +821,17 @@ ZenCacheDiskLayer::Flush()
}
}
+void
+ZenCacheDiskLayer::Scrub(ScrubContext& Ctx)
+{
+ RwLock::SharedLockScope _(m_Lock);
+
+ for (auto& Kv : m_Buckets)
+ {
+ Kv.second.Scrub(Ctx);
+ }
+}
+
//////////////////////////////////////////////////////////////////////////
ZenCacheTracker::ZenCacheTracker(ZenCacheStore& CacheStore)
diff --git a/zenserver/cache/structuredcachestore.h b/zenserver/cache/structuredcachestore.h
index 2cc3abb53..f96757409 100644
--- a/zenserver/cache/structuredcachestore.h
+++ b/zenserver/cache/structuredcachestore.h
@@ -65,6 +65,7 @@ private:
bool Get(const IoHash& HashKey, ZenCacheValue& OutValue);
void Put(const IoHash& HashKey, const ZenCacheValue& Value);
+ void Scrub(ScrubContext& Ctx);
};
RwLock m_Lock;
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp
index eaf72cb41..1db2b50bf 100644
--- a/zenstore/CAS.cpp
+++ b/zenstore/CAS.cpp
@@ -50,7 +50,7 @@ CasChunkSet::RemoveChunksIf(std::function<bool(const IoHash& CandidateHash)>&& P
void
CasChunkSet::IterateChunks(std::function<void(const IoHash& ChunkHash)>&& Callback)
{
- for (auto It = begin(m_ChunkSet), ItEnd = end(m_ChunkSet); It != ItEnd;)
+ for (auto It = begin(m_ChunkSet), ItEnd = end(m_ChunkSet); It != ItEnd; ++It)
{
Callback(*It);
}
diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp
index 08a3192ff..d76058bd1 100644
--- a/zenstore/cidstore.cpp
+++ b/zenstore/cidstore.cpp
@@ -45,11 +45,23 @@ struct CidStore::Impl
ZEN_ASSERT(Compressed != IoHash::Zero);
RwLock::ExclusiveLockScope _(m_Lock);
- m_CidMap.insert_or_assign(DecompressedId, Compressed);
- // TODO: it's pretty wasteful to log even idempotent updates
- // however we can't simply use the boolean returned by insert_or_assign
- // since there's not a 1:1 mapping between compressed and uncompressed
- // so if we want a last-write-wins policy then we have to log each update
+
+ auto It = m_CidMap.try_emplace(DecompressedId, Compressed);
+ if (!It.second)
+ {
+ if (It.first.value() != Compressed)
+ {
+ It.first.value() = Compressed;
+ }
+ else
+ {
+ // No point logging an update that won't change anything
+ return;
+ }
+ }
+
+ // It's not ideal to do this while holding the lock in case
+ // we end up in blocking I/O but that's for later
LogMapping(DecompressedId, Compressed);
}
@@ -68,6 +80,10 @@ struct CidStore::Impl
{
CompressedHash = It->second;
}
+ else
+ {
+ return {};
+ }
}
ZEN_ASSERT(CompressedHash != IoHash::Zero);
@@ -84,7 +100,7 @@ struct CidStore::Impl
if (It == m_CidMap.end())
{
- // Not in map, or tombstone
+ // Not in map
return false;
}
@@ -171,7 +187,7 @@ struct CidStore::Impl
const IoHash& BadHash = It->first;
// Log a tombstone record
- m_LogFile.Append({.Uncompressed = BadHash, .Compressed = IoHash::Zero});
+ LogMapping(BadHash, IoHash::Zero);
BadChunks.push_back(BadHash);
diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h
index f4439e083..2eea04164 100644
--- a/zenstore/include/zenstore/cidstore.h
+++ b/zenstore/include/zenstore/cidstore.h
@@ -25,6 +25,11 @@ class IoBuffer;
* be used to deal with other kinds of indirections in the future. For example, if we want
* to support chunking then a CID may represent a list of chunks which could be concatenated
* to form the referenced chunk.
+ *
+ * It would likely be possible to implement this mapping in a more efficient way if we
+ * integrate it into the CAS store itself, so we can avoid maintaining copies of large
+ * hashes in multiple locations. This would also allow us to consolidate commit logs etc
+ * which would be more resilient than the current split log scheme
*
*/
class CidStore