aboutsummaryrefslogtreecommitdiff
path: root/zenstore
diff options
context:
space:
mode:
authorPer Larsson <[email protected]>2021-09-20 08:54:34 +0200
committerPer Larsson <[email protected]>2021-09-20 08:54:34 +0200
commite25b4b20d8a5696aa7055c9c167fa47b3739bc7e (patch)
tree049654b87096a22e1bf696a385db608a75f229fa /zenstore
parentProbe upstream Zen server when initializing upstream cache. (diff)
parentFixed unused variable warnings exposed by xmake build (unclear why I do not r... (diff)
downloadzen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.tar.xz
zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.zip
Merge branch 'main' of https://github.com/EpicGames/zen
Diffstat (limited to 'zenstore')
-rw-r--r--zenstore/CAS.cpp98
-rw-r--r--zenstore/basicfile.cpp119
-rw-r--r--zenstore/compactcas.cpp142
-rw-r--r--zenstore/compactcas.h16
-rw-r--r--zenstore/filecas.cpp104
-rw-r--r--zenstore/filecas.h14
-rw-r--r--zenstore/include/zenstore/CAS.h22
-rw-r--r--zenstore/include/zenstore/basicfile.h28
-rw-r--r--zenstore/include/zenstore/caslog.h2
-rw-r--r--zenstore/include/zenstore/cidstore.h2
-rw-r--r--zenstore/include/zenstore/scrub.h2
-rw-r--r--zenstore/include/zenstore/zenstore.h13
-rw-r--r--zenstore/zenstore.cpp17
-rw-r--r--zenstore/zenstore.vcxproj5
-rw-r--r--zenstore/zenstore.vcxproj.filters5
15 files changed, 492 insertions, 97 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp
index e77c0ed64..a143230d3 100644
--- a/zenstore/CAS.cpp
+++ b/zenstore/CAS.cpp
@@ -11,6 +11,7 @@
#include <zencore/logging.h>
#include <zencore/memory.h>
#include <zencore/string.h>
+#include <zencore/testutils.h>
#include <zencore/thread.h>
#include <zencore/uid.h>
@@ -20,15 +21,23 @@
#include <functional>
#include <unordered_map>
-struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive-
-#include <atlfile.h>
-
//////////////////////////////////////////////////////////////////////////
namespace zen {
+void
+ScrubContext::ReportBadChunks(std::span<IoHash> BadChunks)
+{
+ ZEN_UNUSED(BadChunks);
+}
+
/**
- * Slightly less naive CAS store
+ * CAS store implementation
+ *
+ * Uses a basic strategy of splitting payloads by size, to improve ability to reclaim space
+ * quickly for unused large chunks and to maintain locality for small chunks which are
+ * frequently accessed together.
+ *
*/
class CasImpl : public CasStore
{
@@ -41,16 +50,15 @@ public:
virtual IoBuffer FindChunk(const IoHash& ChunkHash) override;
virtual void FilterChunks(CasChunkSet& InOutChunks) override;
virtual void Flush() override;
+ virtual void Scrub(ScrubContext& Ctx) override;
private:
- void PickDefaultDirectory();
-
CasContainerStrategy m_TinyStrategy;
CasContainerStrategy m_SmallStrategy;
FileCasStrategy m_LargeStrategy;
};
-CasImpl::CasImpl() : m_TinyStrategy(m_Config, m_Stats), m_SmallStrategy(m_Config, m_Stats), m_LargeStrategy(m_Config, m_Stats)
+CasImpl::CasImpl() : m_TinyStrategy(m_Config), m_SmallStrategy(m_Config), m_LargeStrategy(m_Config)
{
}
@@ -63,13 +71,16 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig)
{
m_Config = InConfig;
- ZEN_INFO("initializing CAS pool at {}", m_Config.RootDirectory);
+ ZEN_INFO("initializing CAS pool at '{}'", m_Config.RootDirectory);
// Ensure root directory exists - create if it doesn't exist already
std::filesystem::create_directories(m_Config.RootDirectory);
// Open or create manifest
+ //
+ // The manifest is not currently fully implemented. The goal is to
+ // use it for recovery and configuration
bool IsNewStore = false;
@@ -77,23 +88,22 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig)
std::filesystem::path ManifestPath = m_Config.RootDirectory;
ManifestPath /= ".ucas_root";
- CAtlFile marker;
- HRESULT hRes = marker.Create(ManifestPath.c_str(), GENERIC_READ, 0, OPEN_EXISTING);
+ std::error_code Ec;
+ BasicFile Marker;
+ Marker.Open(ManifestPath.c_str(), /* IsCreate */ false, Ec);
- if (FAILED(hRes))
+ if (Ec)
{
IsNewStore = true;
ExtendableStringBuilder<128> manifest;
- manifest.Append("#CAS_ROOT\n"); // TODO: should write something meaningful here
+ manifest.Append("#CAS_ROOT\n");
manifest.Append("ID=");
zen::Oid id = zen::Oid::NewOid();
id.ToString(manifest);
- hRes = marker.Create(ManifestPath.c_str(), GENERIC_WRITE, 0, CREATE_ALWAYS);
-
- if (SUCCEEDED(hRes))
- marker.Write(manifest.c_str(), (DWORD)manifest.Size());
+ Marker.Open(ManifestPath.c_str(), /* IsCreate */ true);
+ Marker.Write(manifest.c_str(), (DWORD)manifest.Size(), 0);
}
}
@@ -101,6 +111,9 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig)
m_TinyStrategy.Initialize("tobs", 16, IsNewStore);
m_SmallStrategy.Initialize("sobs", 4096, IsNewStore);
+
+ ScrubContext Ctx;
+ Scrub(Ctx);
}
CasStore::InsertResult
@@ -160,6 +173,14 @@ CasImpl::Flush()
m_LargeStrategy.Flush();
}
+void
+CasImpl::Scrub(ScrubContext& Ctx)
+{
+ m_SmallStrategy.Scrub(Ctx);
+ m_TinyStrategy.Scrub(Ctx);
+ m_LargeStrategy.Scrub(Ctx);
+}
+
//////////////////////////////////////////////////////////////////////////
CasStore*
@@ -173,18 +194,47 @@ CreateCasStore()
// Testing related code follows...
//
-void
-CAS_forcelink()
-{
-}
-
TEST_CASE("CasStore")
{
+ ScopedTemporaryDirectory TempDir;
+
zen::CasStoreConfiguration config;
- config.RootDirectory = "c:\\temp\\test";
+ config.RootDirectory = TempDir.Path();
+
+ std::unique_ptr<zen::CasStore> Store{CreateCasStore()};
+ Store->Initialize(config);
+
+ ScrubContext Ctx;
+ Store->Scrub(Ctx);
+
+ IoBuffer Value1{16};
+ memcpy(Value1.MutableData(), "1234567890123456", 16);
+ IoHash Hash1 = IoHash::HashBuffer(Value1.Data(), Value1.Size());
+ CasStore::InsertResult Result1 = Store->InsertChunk(Value1, Hash1);
+ CHECK(Result1.New);
+
+ IoBuffer Value2{16};
+ memcpy(Value2.MutableData(), "ABCDEFGHIJKLMNOP", 16);
+ IoHash Hash2 = IoHash::HashBuffer(Value2.Data(), Value2.Size());
+ CasStore::InsertResult Result2 = Store->InsertChunk(Value2, Hash2);
+ CHECK(Result2.New);
- std::unique_ptr<zen::CasStore> store{CreateCasStore()};
- store->Initialize(config);
+ CasChunkSet ChunkSet;
+ ChunkSet.AddChunk(Hash1);
+ ChunkSet.AddChunk(Hash2);
+
+ Store->FilterChunks(ChunkSet);
+ CHECK(ChunkSet.GetChunkSet().size() == 0);
+
+ IoBuffer Lookup1 = Store->FindChunk(Hash1);
+ CHECK(Lookup1);
+ IoBuffer Lookup2 = Store->FindChunk(Hash2);
+ CHECK(Lookup2);
+}
+
+void
+CAS_forcelink()
+{
}
} // namespace zen
diff --git a/zenstore/basicfile.cpp b/zenstore/basicfile.cpp
index 35ccdd042..0b92a8979 100644
--- a/zenstore/basicfile.cpp
+++ b/zenstore/basicfile.cpp
@@ -5,7 +5,9 @@
#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
+#include <zencore/testutils.h>
+#include <doctest/doctest.h>
#include <fmt/format.h>
#include <gsl/gsl-lite.hpp>
@@ -13,16 +15,54 @@ namespace zen {
using namespace fmt::literals;
+BasicFile::~BasicFile()
+{
+ Close();
+}
+
void
-BasicFile::Open(std::filesystem::path FileName, bool isCreate)
+BasicFile::Open(std::filesystem::path FileName, bool IsCreate)
{
- const DWORD dwCreationDisposition = isCreate ? CREATE_ALWAYS : OPEN_EXISTING;
+ std::error_code Ec;
+ Open(FileName, IsCreate, Ec);
- HRESULT hRes = m_File.Create(FileName.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, dwCreationDisposition);
+ if (Ec)
+ {
+ throw std::system_error(Ec, "failed to open file '{}'"_format(FileName));
+ }
+}
- if (FAILED(hRes))
+void
+BasicFile::Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec)
+{
+ const DWORD dwCreationDisposition = IsCreate ? CREATE_ALWAYS : OPEN_EXISTING;
+ const DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
+ const DWORD dwShareMode = FILE_SHARE_READ;
+ const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
+ HANDLE hTemplateFile = nullptr;
+
+ HANDLE FileHandle = CreateFile(FileName.c_str(),
+ dwDesiredAccess,
+ dwShareMode,
+ /* lpSecurityAttributes */ nullptr,
+ dwCreationDisposition,
+ dwFlagsAndAttributes,
+ hTemplateFile);
+
+ if (FileHandle == INVALID_HANDLE_VALUE)
{
- ThrowSystemException(hRes, "Failed to open bucket sobs file '{}'"_format(FileName));
+ Ec = zen::MakeErrorCodeFromLastError();
+ }
+
+ m_FileHandle = FileHandle;
+}
+
+void
+BasicFile::Close()
+{
+ if (m_FileHandle)
+ {
+ ::CloseHandle(m_FileHandle);
}
}
@@ -34,11 +74,14 @@ BasicFile::Read(void* Data, uint64_t Size, uint64_t Offset)
Ovl.Offset = DWORD(Offset & 0xffff'ffffu);
Ovl.OffsetHigh = DWORD(Offset >> 32);
- HRESULT hRes = m_File.Read(Data, gsl::narrow<DWORD>(Size), &Ovl);
+ DWORD dwNumberOfBytesToRead = gsl::narrow<DWORD>(Size);
+ DWORD dwNumberOfBytesRead = 0;
+
+ BOOL Success = ::ReadFile(m_FileHandle, Data, dwNumberOfBytesToRead, &dwNumberOfBytesRead, &Ovl);
- if (FAILED(hRes))
+ if (!Success)
{
- ThrowSystemException(hRes, "Failed to read from file '{}'"_format(zen::PathFromHandle(m_File)));
+ ThrowLastError("Failed to read from file '{}'"_format(zen::PathFromHandle(m_FileHandle)));
}
}
@@ -53,6 +96,35 @@ BasicFile::ReadAll()
}
void
+BasicFile::StreamFile(std::function<void(const void* Data, uint64_t Size)>&& ChunkFun)
+{
+ StreamByteRange(0, FileSize(), std::move(ChunkFun));
+}
+
+void
+BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun)
+{
+ const uint64_t ChunkSize = 128 * 1024;
+ IoBuffer ReadBuffer{ChunkSize};
+ void* BufferPtr = ReadBuffer.MutableData();
+
+ uint64_t RemainBytes = Size;
+ uint64_t CurrentOffset = FileOffset;
+
+ while (RemainBytes)
+ {
+ const uint64_t ThisChunkBytes = zen::Min(ChunkSize, RemainBytes);
+
+ Read(BufferPtr, ThisChunkBytes, CurrentOffset);
+
+ ChunkFun(BufferPtr, ThisChunkBytes);
+
+ CurrentOffset += ThisChunkBytes;
+ RemainBytes -= ThisChunkBytes;
+ }
+}
+
+void
BasicFile::Write(const void* Data, uint64_t Size, uint64_t Offset)
{
OVERLAPPED Ovl{};
@@ -60,33 +132,46 @@ BasicFile::Write(const void* Data, uint64_t Size, uint64_t Offset)
Ovl.Offset = DWORD(Offset & 0xffff'ffffu);
Ovl.OffsetHigh = DWORD(Offset >> 32);
- HRESULT hRes = m_File.Write(Data, gsl::narrow<DWORD>(Size), &Ovl);
+ DWORD dwNumberOfBytesToWrite = gsl::narrow<DWORD>(Size);
+ DWORD dwNumberOfBytesWritten = 0;
- if (FAILED(hRes))
+ BOOL Success = ::WriteFile(m_FileHandle, Data, dwNumberOfBytesToWrite, &dwNumberOfBytesWritten, &Ovl);
+
+ if (!Success)
{
- ThrowSystemException(hRes, "Failed to write to file '{}'"_format(zen::PathFromHandle(m_File)));
+ ThrowLastError("Failed to write to file '{}'"_format(zen::PathFromHandle(m_FileHandle)));
}
}
void
BasicFile::Flush()
{
- m_File.Flush();
+ FlushFileBuffers(m_FileHandle);
}
uint64_t
BasicFile::FileSize()
{
- ULONGLONG Sz;
- m_File.GetSize(Sz);
+ ULARGE_INTEGER liFileSize;
+ liFileSize.LowPart = ::GetFileSize(m_FileHandle, &liFileSize.HighPart);
- return uint64_t(Sz);
+ return uint64_t(liFileSize.QuadPart);
+}
+
+TEST_CASE("BasicFile")
+{
+ ScopedCurrentDirectoryChange _;
+
+ BasicFile File1;
+ CHECK_THROWS(File1.Open("zonk", false));
+ CHECK_NOTHROW(File1.Open("zonk", true));
+ CHECK_NOTHROW(File1.Write("abcd", 4, 0));
+ CHECK(File1.FileSize() == 4);
}
void
-BasicFile::Close()
+basicfile_forcelink()
{
- m_File.Close();
}
} // namespace zen
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 4407d8b08..070ca1503 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -10,18 +10,22 @@
#include <zencore/thread.h>
#include <zencore/uid.h>
-#include <gsl/gsl-lite.hpp>
-
-#include <functional>
-
-struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive-
-#include <atlfile.h>
#include <filesystem>
+#include <functional>
+#include <gsl/gsl-lite.hpp>
//////////////////////////////////////////////////////////////////////////
namespace zen {
+CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config)
+{
+}
+
+CasContainerStrategy::~CasContainerStrategy()
+{
+}
+
void
CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore)
{
@@ -43,7 +47,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6
uint64_t MaxFileOffset = 0;
{
- // This is not technically necessary but may help future static analysis
+ // This is not technically necessary (nobody should be accessing us from
+ // another thread at this stage) but may help static analysis
+
zen::RwLock::ExclusiveLockScope _(m_LocationMapLock);
m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
@@ -103,9 +109,8 @@ IoBuffer
CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
const CasDiskLocation& Location = KeyIt->second;
return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size);
@@ -120,9 +125,8 @@ bool
CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
- if (KeyIt != m_LocationMap.end())
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
return true;
}
@@ -133,6 +137,13 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
void
CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
{
+ // This implementation is good enough for relatively small
+ // chunk sets (in terms of chunk identifiers), but would
+ // benefit from a better implementation which removes
+ // items incrementally for large sets, especially when
+ // we're likely to already have a large proportion of the
+ // chunks in the set
+
std::unordered_set<IoHash> HaveSet;
for (const IoHash& Hash : InOutChunks.GetChunkSet())
@@ -157,4 +168,113 @@ CasContainerStrategy::Flush()
m_SmallObjectFile.Flush();
}
+void
+CasContainerStrategy::Scrub(ScrubContext& Ctx)
+{
+ const uint64_t WindowSize = 4 * 1024 * 1024;
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ const uint64_t FileSize = m_SmallObjectFile.FileSize();
+
+ std::vector<CasDiskIndexEntry> BigChunks;
+ std::vector<CasDiskIndexEntry> BadChunks;
+
+ // We do a read sweep through the payloads file and validate
+ // any entries that are contained within each segment, with
+ // the assumption that most entries will be checked in this
+ // pass. An alternative strategy would be to use memory mapping.
+
+ {
+ IoBuffer ReadBuffer{WindowSize};
+ void* BufferBase = ReadBuffer.MutableData();
+
+ RwLock::SharedLockScope _(m_LocationMapLock);
+
+ do
+ {
+ const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart);
+ m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart);
+
+ for (auto& Entry : m_LocationMap)
+ {
+ const uint64_t EntryOffset = Entry.second.Offset;
+
+ if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
+ {
+ const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+
+ if (EntryEnd >= WindowEnd)
+ {
+ BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+
+ continue;
+ }
+
+ const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size);
+
+ if (Entry.first != ComputedHash)
+ {
+ // Hash mismatch
+
+ BadChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+ }
+ }
+ }
+
+ WindowStart += WindowSize;
+ WindowEnd += WindowSize;
+ } while (WindowStart < FileSize);
+ }
+
+ // Deal with large chunks
+
+ for (const CasDiskIndexEntry& Entry : BigChunks)
+ {
+ IoHashStream Hasher;
+ m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) {
+ Hasher.Append(Data, Size);
+ });
+ IoHash ComputedHash = Hasher.GetHash();
+
+ if (Entry.Key != ComputedHash)
+ {
+ BadChunks.push_back(Entry);
+ }
+ }
+
+ // Deal with bad chunks by removing them from our lookup map
+
+ std::vector<IoHash> BadChunkHashes;
+
+ for (const CasDiskIndexEntry& Entry : BadChunks)
+ {
+ BadChunkHashes.push_back(Entry.Key);
+ m_LocationMap.erase(Entry.Key);
+ }
+
+ // Let whomever it concerns know about the bad chunks. This could
+ // be used to invalidate higher level data structures more efficiently
+ // than a full validation pass might be able to do
+
+ Ctx.ReportBadChunks(BadChunkHashes);
+}
+
+void
+CasContainerStrategy::MakeSnapshot()
+{
+ RwLock::SharedLockScope _(m_LocationMapLock);
+
+ std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()};
+
+ uint64_t EntryIndex = 0;
+ for (auto& Entry : m_LocationMap)
+ {
+ CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++];
+ IndexEntry.Key = Entry.first;
+ IndexEntry.Location = Entry.second;
+ }
+
+ m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0);
+}
+
} // namespace zen
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index 05bbf81f6..101e6b1b7 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -14,9 +14,6 @@
#include <zenstore/cas.h>
#include <zenstore/caslog.h>
-#include <atlfile.h>
-#include <functional>
-
namespace zen {
//////////////////////////////////////////////////////////////////////////
@@ -27,7 +24,10 @@ namespace zen {
struct CasDiskLocation
{
uint64_t Offset;
- uint32_t Size; // TODO: Make this more like the IoStore index so we can store larger chunks (should be five bytes)
+ // If we wanted to be able to store larger chunks using this storage mechanism then
+ // we could make this more like the IoStore index so we can store larger chunks.
+ // I.e use five bytes for size and seven for offset
+ uint32_t Size;
};
struct CasDiskIndexEntry
@@ -50,7 +50,9 @@ static_assert(sizeof(CasDiskIndexEntry) == 32);
struct CasContainerStrategy
{
- CasContainerStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {}
+ CasContainerStrategy(const CasStoreConfiguration& Config);
+ ~CasContainerStrategy();
+
CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash);
IoBuffer FindChunk(const IoHash& ChunkHash);
@@ -58,10 +60,10 @@ struct CasContainerStrategy
void FilterChunks(CasChunkSet& InOutChunks);
void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore);
void Flush();
+ void Scrub(ScrubContext& Ctx);
private:
const CasStoreConfiguration& m_Config;
- CasStore::Stats& m_Stats;
uint64_t m_PayloadAlignment = 1 << 4;
bool m_IsInitialized = false;
BasicFile m_SmallObjectFile;
@@ -73,6 +75,8 @@ private:
RwLock m_InsertLock; // used to serialize inserts
std::atomic<uint64_t> m_CurrentInsertOffset = 0;
std::atomic<uint64_t> m_CurrentIndexOffset = 0;
+
+ void MakeSnapshot();
};
} // namespace zen
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index 170f13875..31991a43e 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -10,6 +10,7 @@
#include <zencore/string.h>
#include <zencore/thread.h>
#include <zencore/uid.h>
+#include <zenstore/basicfile.h>
#include <gsl/gsl-lite.hpp>
@@ -17,6 +18,7 @@
#include <functional>
#include <unordered_map>
+// clang-format off
#include <zencore/prewindows.h>
struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive-
@@ -24,13 +26,19 @@ struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax erro
#include <zencore/postwindows.h>
// clang-format on
-//
-//////////////////////////////////////////////////////////////////////////
namespace zen {
using namespace fmt::literals;
+FileCasStrategy::FileCasStrategy(const CasStoreConfiguration& Config) : m_Config(Config)
+{
+}
+
+FileCasStrategy::~FileCasStrategy()
+{
+}
+
WideStringBuilderBase&
FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len)
{
@@ -56,7 +64,7 @@ FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHas
OutShard2len = ShardedPath.Size();
ShardedPath.Append('\\');
- ShardedPath.AppendAsciiRange(str + 6, str + 64);
+ ShardedPath.AppendAsciiRange(str + 5, str + 64);
return ShardedPath;
}
@@ -259,12 +267,9 @@ FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize
}
// We cannot rely on RAII to close the file handle since it would be closed
- // *after* the lock is released due to the initialization order.
+ // *after* the lock is released due to the initialization order
PayloadFile.Close();
- AtomicIncrement(m_Stats.PutCount);
- AtomicAdd(m_Stats.PutBytes, ChunkSize);
-
return {.New = true};
}
@@ -279,15 +284,7 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash)
RwLock::SharedLockScope _(LockForHash(ChunkHash));
- auto Chunk = IoBufferBuilder::MakeFromFile(ShardedPath.c_str());
-
- if (Chunk)
- {
- AtomicIncrement(m_Stats.GetCount);
- AtomicAdd(m_Stats.GetBytes, Chunk.Size());
- }
-
- return Chunk;
+ return IoBufferBuilder::MakeFromFile(ShardedPath.c_str());
}
bool
@@ -338,6 +335,62 @@ FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks)
}
void
+FileCasStrategy::IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback)
+{
+ struct Visitor : public FileSystemTraversal::TreeVisitor
+ {
+ Visitor(const std::filesystem::path& RootDir) : RootDirectory(RootDir) {}
+ virtual void VisitFile(const std::filesystem::path& Parent, const std::wstring_view& File, uint64_t FileSize) override
+ {
+ ZEN_UNUSED(FileSize);
+
+ std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory);
+
+ std::wstring PathString = RelPath.native();
+
+ if ((PathString.size() == (3 + 2 + 1)) && (File.size() == (40 - 3 - 2)))
+ {
+ if (PathString.at(3) == std::filesystem::path::preferred_separator)
+ {
+ PathString.erase(3, 1);
+ }
+ PathString.append(File);
+
+ StringBuilder<64> Utf8;
+ WideToUtf8(PathString, Utf8);
+
+ // TODO: should validate that we're actually dealing with a valid hex string here
+
+ IoHash NameHash = IoHash::FromHexString({Utf8.Data(), Utf8.Size()});
+
+ BasicFile PayloadFile;
+ std::error_code Ec;
+ PayloadFile.Open(Parent / File, false, Ec);
+
+ if (!Ec)
+ {
+ Callback(NameHash, PayloadFile);
+ }
+ }
+ }
+
+ virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent,
+ [[maybe_unused]] const std::wstring_view& DirectoryName)
+ {
+ return true;
+ }
+
+ const std::filesystem::path& RootDirectory;
+ std::function<void(const IoHash& Hash, BasicFile& PayloadFile)> Callback;
+ } CasVisitor{m_Config.RootDirectory};
+
+ CasVisitor.Callback = std::move(Callback);
+
+ FileSystemTraversal Traversal;
+ Traversal.TraverseFileSystem(m_Config.RootDirectory, CasVisitor);
+}
+
+void
FileCasStrategy::Flush()
{
// Since we don't keep files open after writing there's nothing specific
@@ -353,6 +406,25 @@ FileCasStrategy::Flush()
}
void
+FileCasStrategy::Scrub(ScrubContext& Ctx)
+{
+ std::vector<IoHash> BadHashes;
+
+ IterateChunks([&](const IoHash& Hash, BasicFile& Payload) {
+ IoHashStream Hasher;
+ Payload.StreamFile([&](const void* Data, size_t Size) { Hasher.Append(Data, Size); });
+ IoHash ComputedHash = Hasher.GetHash();
+
+ if (ComputedHash != Hash)
+ {
+ BadHashes.push_back(Hash);
+ }
+ });
+
+ Ctx.ReportBadChunks(BadHashes);
+}
+
+void
FileCasStrategy::GarbageCollect(GcContext& GcCtx)
{
ZEN_UNUSED(GcCtx);
diff --git a/zenstore/filecas.h b/zenstore/filecas.h
index 448d1a05f..885973810 100644
--- a/zenstore/filecas.h
+++ b/zenstore/filecas.h
@@ -10,11 +10,20 @@
#include <zencore/thread.h>
#include <zenstore/cas.h>
+#include <functional>
+
namespace zen {
+class BasicFile;
+
+/** CAS storage strategy using a file-per-chunk storage strategy
+*/
+
struct FileCasStrategy
{
- FileCasStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {}
+ FileCasStrategy(const CasStoreConfiguration& Config);
+ ~FileCasStrategy();
+
CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash);
IoBuffer FindChunk(const IoHash& ChunkHash);
@@ -22,15 +31,16 @@ struct FileCasStrategy
void FilterChunks(CasChunkSet& InOutChunks);
void Flush();
void GarbageCollect(GcContext& GcCtx);
+ void Scrub(ScrubContext& Ctx);
private:
const CasStoreConfiguration& m_Config;
- CasStore::Stats& m_Stats;
RwLock m_Lock;
RwLock m_ShardLocks[256]; // TODO: these should be spaced out so they don't share cache lines
inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardLocks[Hash.Hash[19]]; }
static WideStringBuilderBase& MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len);
+ void IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback);
};
} // namespace zen
diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h
index b4de533dd..bb310b179 100644
--- a/zenstore/include/zenstore/CAS.h
+++ b/zenstore/include/zenstore/CAS.h
@@ -2,7 +2,7 @@
#pragma once
-#include <zencore/zencore.h>
+#include "zenstore.h"
#include <zencore/blake3.h>
#include <zencore/iobuffer.h>
@@ -37,6 +37,14 @@ public:
private:
};
+class ScrubContext
+{
+public:
+ virtual void ReportBadChunks(std::span<IoHash> BadChunks);
+
+private:
+};
+
class CasChunkSet
{
public:
@@ -54,17 +62,7 @@ class CasStore
public:
virtual ~CasStore() = default;
- struct Stats
- {
- uint64_t PutBytes = 0;
- uint64_t PutCount = 0;
-
- uint64_t GetBytes = 0;
- uint64_t GetCount = 0;
- };
-
const CasStoreConfiguration& Config() { return m_Config; }
- const Stats& GetStats() const { return m_Stats; }
struct InsertResult
{
@@ -76,10 +74,10 @@ public:
virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0;
virtual void FilterChunks(CasChunkSet& InOutChunks) = 0;
virtual void Flush() = 0;
+ virtual void Scrub(ScrubContext& Ctx) = 0;
protected:
CasStoreConfiguration m_Config;
- Stats m_Stats;
};
ZENCORE_API CasStore* CreateCasStore();
diff --git a/zenstore/include/zenstore/basicfile.h b/zenstore/include/zenstore/basicfile.h
index c6f61d466..d4d65b366 100644
--- a/zenstore/include/zenstore/basicfile.h
+++ b/zenstore/include/zenstore/basicfile.h
@@ -2,34 +2,46 @@
#pragma once
-#include <zencore/iobuffer.h>
-#include <zencore/zencore.h>
+#include "zenstore.h"
+#include <zencore/iobuffer.h>
#include <zencore/windows.h>
-#include <atlfile.h>
#include <filesystem>
+#include <functional>
namespace zen {
/**
* Probably the most basic file abstraction in the universe
+ *
+ * One thing of note is that there is no notion of a "current file position"
+ * in this API -- all reads and writes are done from explicit offsets in
+ * the file. This avoids concurrency issues which can occur otherwise.
+ *
*/
class BasicFile
{
public:
+ BasicFile() = default;
+ ~BasicFile();
void Open(std::filesystem::path FileName, bool IsCreate);
- void Read(void* Data, uint64_t Size, uint64_t Offset);
- void Write(const void* Data, uint64_t Size, uint64_t Offset);
+ void Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec);
+ void Close();
+ void Read(void* Data, uint64_t Size, uint64_t FileOffset);
+ void StreamFile(std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
+ void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
+ void Write(const void* Data, uint64_t Size, uint64_t FileOffset);
void Flush();
uint64_t FileSize();
- void* Handle() { return m_File; }
- void Close();
+ void* Handle() { return m_FileHandle; }
IoBuffer ReadAll();
private:
- CAtlFile m_File;
+ void* m_FileHandle = nullptr; // This is either null or valid
};
+ZENCORE_API void basicfile_forcelink();
+
} // namespace zen
diff --git a/zenstore/include/zenstore/caslog.h b/zenstore/include/zenstore/caslog.h
index aea855e4c..3d558bee0 100644
--- a/zenstore/include/zenstore/caslog.h
+++ b/zenstore/include/zenstore/caslog.h
@@ -2,7 +2,7 @@
#pragma once
-#include <zencore/zencore.h>
+#include "zenstore.h"
#include <zencore/iobuffer.h>
#include <zencore/string.h>
diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h
index 76a33c915..f023ada40 100644
--- a/zenstore/include/zenstore/cidstore.h
+++ b/zenstore/include/zenstore/cidstore.h
@@ -2,6 +2,8 @@
#pragma once
+#include "zenstore.h"
+
#include <tsl/robin_map.h>
#include <zencore/iohash.h>
#include <zenstore/CAS.h>
diff --git a/zenstore/include/zenstore/scrub.h b/zenstore/include/zenstore/scrub.h
index 5a34d4860..4948afcd5 100644
--- a/zenstore/include/zenstore/scrub.h
+++ b/zenstore/include/zenstore/scrub.h
@@ -2,6 +2,8 @@
#pragma once
+#include "zenstore.h"
+
#include <zencore/iohash.h>
#include <span>
diff --git a/zenstore/include/zenstore/zenstore.h b/zenstore/include/zenstore/zenstore.h
new file mode 100644
index 000000000..46d62029d
--- /dev/null
+++ b/zenstore/include/zenstore/zenstore.h
@@ -0,0 +1,13 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/zencore.h>
+
+#define ZENSTORE_API
+
+namespace zen {
+
+ZENSTORE_API void zenstore_forcelinktests();
+
+}
diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp
new file mode 100644
index 000000000..cd16e5634
--- /dev/null
+++ b/zenstore/zenstore.cpp
@@ -0,0 +1,17 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "zenstore/zenstore.h"
+
+#include <zenstore/CAS.h>
+#include <zenstore/basicfile.h>
+
+namespace zen {
+
+void
+zenstore_forcelinktests()
+{
+ basicfile_forcelink();
+ CAS_forcelink();
+}
+
+} // namespace zen
diff --git a/zenstore/zenstore.vcxproj b/zenstore/zenstore.vcxproj
index 8d665f2c3..eb2ecd02b 100644
--- a/zenstore/zenstore.vcxproj
+++ b/zenstore/zenstore.vcxproj
@@ -19,6 +19,7 @@
<ClCompile Include="filecas.cpp" />
<ClCompile Include="gc.cpp" />
<ClCompile Include="scrub.cpp" />
+ <ClCompile Include="zenstore.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="compactcas.h" />
@@ -29,12 +30,16 @@
<ClInclude Include="include\zenstore\scrub.h" />
<ClInclude Include="include\zenstore\CAS.h" />
<ClInclude Include="include\zenstore\caslog.h" />
+ <ClInclude Include="include\zenstore\zenstore.h" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\zencore\zencore.vcxproj">
<Project>{d75bf9ab-c61e-4fff-ad59-1563430f05e2}</Project>
</ProjectReference>
</ItemGroup>
+ <ItemGroup>
+ <None Include="xmake.lua" />
+ </ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
diff --git a/zenstore/zenstore.vcxproj.filters b/zenstore/zenstore.vcxproj.filters
index 3dfb89dbf..8a52c69f6 100644
--- a/zenstore/zenstore.vcxproj.filters
+++ b/zenstore/zenstore.vcxproj.filters
@@ -9,6 +9,7 @@
<ClCompile Include="scrub.cpp" />
<ClCompile Include="basicfile.cpp" />
<ClCompile Include="cidstore.cpp" />
+ <ClCompile Include="zenstore.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="compactcas.h" />
@@ -19,5 +20,9 @@
<ClInclude Include="include\zenstore\scrub.h" />
<ClInclude Include="include\zenstore\basicfile.h" />
<ClInclude Include="include\zenstore\cidstore.h" />
+ <ClInclude Include="include\zenstore\zenstore.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="xmake.lua" />
</ItemGroup>
</Project> \ No newline at end of file