diff options
| author | Per Larsson <[email protected]> | 2021-09-20 08:54:34 +0200 |
|---|---|---|
| committer | Per Larsson <[email protected]> | 2021-09-20 08:54:34 +0200 |
| commit | e25b4b20d8a5696aa7055c9c167fa47b3739bc7e (patch) | |
| tree | 049654b87096a22e1bf696a385db608a75f229fa /zenstore | |
| parent | Probe upstream Zen server when initializing upstream cache. (diff) | |
| parent | Fixed unused variable warnings exposed by xmake build (unclear why I do not r... (diff) | |
| download | zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.tar.xz zen-e25b4b20d8a5696aa7055c9c167fa47b3739bc7e.zip | |
Merge branch 'main' of https://github.com/EpicGames/zen
Diffstat (limited to 'zenstore')
| -rw-r--r-- | zenstore/CAS.cpp | 98 | ||||
| -rw-r--r-- | zenstore/basicfile.cpp | 119 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 142 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 16 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 104 | ||||
| -rw-r--r-- | zenstore/filecas.h | 14 | ||||
| -rw-r--r-- | zenstore/include/zenstore/CAS.h | 22 | ||||
| -rw-r--r-- | zenstore/include/zenstore/basicfile.h | 28 | ||||
| -rw-r--r-- | zenstore/include/zenstore/caslog.h | 2 | ||||
| -rw-r--r-- | zenstore/include/zenstore/cidstore.h | 2 | ||||
| -rw-r--r-- | zenstore/include/zenstore/scrub.h | 2 | ||||
| -rw-r--r-- | zenstore/include/zenstore/zenstore.h | 13 | ||||
| -rw-r--r-- | zenstore/zenstore.cpp | 17 | ||||
| -rw-r--r-- | zenstore/zenstore.vcxproj | 5 | ||||
| -rw-r--r-- | zenstore/zenstore.vcxproj.filters | 5 |
15 files changed, 492 insertions, 97 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp index e77c0ed64..a143230d3 100644 --- a/zenstore/CAS.cpp +++ b/zenstore/CAS.cpp @@ -11,6 +11,7 @@ #include <zencore/logging.h> #include <zencore/memory.h> #include <zencore/string.h> +#include <zencore/testutils.h> #include <zencore/thread.h> #include <zencore/uid.h> @@ -20,15 +21,23 @@ #include <functional> #include <unordered_map> -struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- -#include <atlfile.h> - ////////////////////////////////////////////////////////////////////////// namespace zen { +void +ScrubContext::ReportBadChunks(std::span<IoHash> BadChunks) +{ + ZEN_UNUSED(BadChunks); +} + /** - * Slightly less naive CAS store + * CAS store implementation + * + * Uses a basic strategy of splitting payloads by size, to improve ability to reclaim space + * quickly for unused large chunks and to maintain locality for small chunks which are + * frequently accessed together. + * */ class CasImpl : public CasStore { @@ -41,16 +50,15 @@ public: virtual IoBuffer FindChunk(const IoHash& ChunkHash) override; virtual void FilterChunks(CasChunkSet& InOutChunks) override; virtual void Flush() override; + virtual void Scrub(ScrubContext& Ctx) override; private: - void PickDefaultDirectory(); - CasContainerStrategy m_TinyStrategy; CasContainerStrategy m_SmallStrategy; FileCasStrategy m_LargeStrategy; }; -CasImpl::CasImpl() : m_TinyStrategy(m_Config, m_Stats), m_SmallStrategy(m_Config, m_Stats), m_LargeStrategy(m_Config, m_Stats) +CasImpl::CasImpl() : m_TinyStrategy(m_Config), m_SmallStrategy(m_Config), m_LargeStrategy(m_Config) { } @@ -63,13 +71,16 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig) { m_Config = InConfig; - ZEN_INFO("initializing CAS pool at {}", m_Config.RootDirectory); + ZEN_INFO("initializing CAS pool at '{}'", m_Config.RootDirectory); // Ensure root directory exists - create if it doesn't exist already std::filesystem::create_directories(m_Config.RootDirectory); // Open or create manifest + // + // The manifest is not currently fully implemented. The goal is to + // use it for recovery and configuration bool IsNewStore = false; @@ -77,23 +88,22 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig) std::filesystem::path ManifestPath = m_Config.RootDirectory; ManifestPath /= ".ucas_root"; - CAtlFile marker; - HRESULT hRes = marker.Create(ManifestPath.c_str(), GENERIC_READ, 0, OPEN_EXISTING); + std::error_code Ec; + BasicFile Marker; + Marker.Open(ManifestPath.c_str(), /* IsCreate */ false, Ec); - if (FAILED(hRes)) + if (Ec) { IsNewStore = true; ExtendableStringBuilder<128> manifest; - manifest.Append("#CAS_ROOT\n"); // TODO: should write something meaningful here + manifest.Append("#CAS_ROOT\n"); manifest.Append("ID="); zen::Oid id = zen::Oid::NewOid(); id.ToString(manifest); - hRes = marker.Create(ManifestPath.c_str(), GENERIC_WRITE, 0, CREATE_ALWAYS); - - if (SUCCEEDED(hRes)) - marker.Write(manifest.c_str(), (DWORD)manifest.Size()); + Marker.Open(ManifestPath.c_str(), /* IsCreate */ true); + Marker.Write(manifest.c_str(), (DWORD)manifest.Size(), 0); } } @@ -101,6 +111,9 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig) m_TinyStrategy.Initialize("tobs", 16, IsNewStore); m_SmallStrategy.Initialize("sobs", 4096, IsNewStore); + + ScrubContext Ctx; + Scrub(Ctx); } CasStore::InsertResult @@ -160,6 +173,14 @@ CasImpl::Flush() m_LargeStrategy.Flush(); } +void +CasImpl::Scrub(ScrubContext& Ctx) +{ + m_SmallStrategy.Scrub(Ctx); + m_TinyStrategy.Scrub(Ctx); + m_LargeStrategy.Scrub(Ctx); +} + ////////////////////////////////////////////////////////////////////////// CasStore* @@ -173,18 +194,47 @@ CreateCasStore() // Testing related code follows... // -void -CAS_forcelink() -{ -} - TEST_CASE("CasStore") { + ScopedTemporaryDirectory TempDir; + zen::CasStoreConfiguration config; - config.RootDirectory = "c:\\temp\\test"; + config.RootDirectory = TempDir.Path(); + + std::unique_ptr<zen::CasStore> Store{CreateCasStore()}; + Store->Initialize(config); + + ScrubContext Ctx; + Store->Scrub(Ctx); + + IoBuffer Value1{16}; + memcpy(Value1.MutableData(), "1234567890123456", 16); + IoHash Hash1 = IoHash::HashBuffer(Value1.Data(), Value1.Size()); + CasStore::InsertResult Result1 = Store->InsertChunk(Value1, Hash1); + CHECK(Result1.New); + + IoBuffer Value2{16}; + memcpy(Value2.MutableData(), "ABCDEFGHIJKLMNOP", 16); + IoHash Hash2 = IoHash::HashBuffer(Value2.Data(), Value2.Size()); + CasStore::InsertResult Result2 = Store->InsertChunk(Value2, Hash2); + CHECK(Result2.New); - std::unique_ptr<zen::CasStore> store{CreateCasStore()}; - store->Initialize(config); + CasChunkSet ChunkSet; + ChunkSet.AddChunk(Hash1); + ChunkSet.AddChunk(Hash2); + + Store->FilterChunks(ChunkSet); + CHECK(ChunkSet.GetChunkSet().size() == 0); + + IoBuffer Lookup1 = Store->FindChunk(Hash1); + CHECK(Lookup1); + IoBuffer Lookup2 = Store->FindChunk(Hash2); + CHECK(Lookup2); +} + +void +CAS_forcelink() +{ } } // namespace zen diff --git a/zenstore/basicfile.cpp b/zenstore/basicfile.cpp index 35ccdd042..0b92a8979 100644 --- a/zenstore/basicfile.cpp +++ b/zenstore/basicfile.cpp @@ -5,7 +5,9 @@ #include <zencore/except.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> +#include <zencore/testutils.h> +#include <doctest/doctest.h> #include <fmt/format.h> #include <gsl/gsl-lite.hpp> @@ -13,16 +15,54 @@ namespace zen { using namespace fmt::literals; +BasicFile::~BasicFile() +{ + Close(); +} + void -BasicFile::Open(std::filesystem::path FileName, bool isCreate) +BasicFile::Open(std::filesystem::path FileName, bool IsCreate) { - const DWORD dwCreationDisposition = isCreate ? CREATE_ALWAYS : OPEN_EXISTING; + std::error_code Ec; + Open(FileName, IsCreate, Ec); - HRESULT hRes = m_File.Create(FileName.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, dwCreationDisposition); + if (Ec) + { + throw std::system_error(Ec, "failed to open file '{}'"_format(FileName)); + } +} - if (FAILED(hRes)) +void +BasicFile::Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec) +{ + const DWORD dwCreationDisposition = IsCreate ? CREATE_ALWAYS : OPEN_EXISTING; + const DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + const DWORD dwShareMode = FILE_SHARE_READ; + const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + HANDLE hTemplateFile = nullptr; + + HANDLE FileHandle = CreateFile(FileName.c_str(), + dwDesiredAccess, + dwShareMode, + /* lpSecurityAttributes */ nullptr, + dwCreationDisposition, + dwFlagsAndAttributes, + hTemplateFile); + + if (FileHandle == INVALID_HANDLE_VALUE) { - ThrowSystemException(hRes, "Failed to open bucket sobs file '{}'"_format(FileName)); + Ec = zen::MakeErrorCodeFromLastError(); + } + + m_FileHandle = FileHandle; +} + +void +BasicFile::Close() +{ + if (m_FileHandle) + { + ::CloseHandle(m_FileHandle); } } @@ -34,11 +74,14 @@ BasicFile::Read(void* Data, uint64_t Size, uint64_t Offset) Ovl.Offset = DWORD(Offset & 0xffff'ffffu); Ovl.OffsetHigh = DWORD(Offset >> 32); - HRESULT hRes = m_File.Read(Data, gsl::narrow<DWORD>(Size), &Ovl); + DWORD dwNumberOfBytesToRead = gsl::narrow<DWORD>(Size); + DWORD dwNumberOfBytesRead = 0; + + BOOL Success = ::ReadFile(m_FileHandle, Data, dwNumberOfBytesToRead, &dwNumberOfBytesRead, &Ovl); - if (FAILED(hRes)) + if (!Success) { - ThrowSystemException(hRes, "Failed to read from file '{}'"_format(zen::PathFromHandle(m_File))); + ThrowLastError("Failed to read from file '{}'"_format(zen::PathFromHandle(m_FileHandle))); } } @@ -53,6 +96,35 @@ BasicFile::ReadAll() } void +BasicFile::StreamFile(std::function<void(const void* Data, uint64_t Size)>&& ChunkFun) +{ + StreamByteRange(0, FileSize(), std::move(ChunkFun)); +} + +void +BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun) +{ + const uint64_t ChunkSize = 128 * 1024; + IoBuffer ReadBuffer{ChunkSize}; + void* BufferPtr = ReadBuffer.MutableData(); + + uint64_t RemainBytes = Size; + uint64_t CurrentOffset = FileOffset; + + while (RemainBytes) + { + const uint64_t ThisChunkBytes = zen::Min(ChunkSize, RemainBytes); + + Read(BufferPtr, ThisChunkBytes, CurrentOffset); + + ChunkFun(BufferPtr, ThisChunkBytes); + + CurrentOffset += ThisChunkBytes; + RemainBytes -= ThisChunkBytes; + } +} + +void BasicFile::Write(const void* Data, uint64_t Size, uint64_t Offset) { OVERLAPPED Ovl{}; @@ -60,33 +132,46 @@ BasicFile::Write(const void* Data, uint64_t Size, uint64_t Offset) Ovl.Offset = DWORD(Offset & 0xffff'ffffu); Ovl.OffsetHigh = DWORD(Offset >> 32); - HRESULT hRes = m_File.Write(Data, gsl::narrow<DWORD>(Size), &Ovl); + DWORD dwNumberOfBytesToWrite = gsl::narrow<DWORD>(Size); + DWORD dwNumberOfBytesWritten = 0; - if (FAILED(hRes)) + BOOL Success = ::WriteFile(m_FileHandle, Data, dwNumberOfBytesToWrite, &dwNumberOfBytesWritten, &Ovl); + + if (!Success) { - ThrowSystemException(hRes, "Failed to write to file '{}'"_format(zen::PathFromHandle(m_File))); + ThrowLastError("Failed to write to file '{}'"_format(zen::PathFromHandle(m_FileHandle))); } } void BasicFile::Flush() { - m_File.Flush(); + FlushFileBuffers(m_FileHandle); } uint64_t BasicFile::FileSize() { - ULONGLONG Sz; - m_File.GetSize(Sz); + ULARGE_INTEGER liFileSize; + liFileSize.LowPart = ::GetFileSize(m_FileHandle, &liFileSize.HighPart); - return uint64_t(Sz); + return uint64_t(liFileSize.QuadPart); +} + +TEST_CASE("BasicFile") +{ + ScopedCurrentDirectoryChange _; + + BasicFile File1; + CHECK_THROWS(File1.Open("zonk", false)); + CHECK_NOTHROW(File1.Open("zonk", true)); + CHECK_NOTHROW(File1.Write("abcd", 4, 0)); + CHECK(File1.FileSize() == 4); } void -BasicFile::Close() +basicfile_forcelink() { - m_File.Close(); } } // namespace zen diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 4407d8b08..070ca1503 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -10,18 +10,22 @@ #include <zencore/thread.h> #include <zencore/uid.h> -#include <gsl/gsl-lite.hpp> - -#include <functional> - -struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- -#include <atlfile.h> #include <filesystem> +#include <functional> +#include <gsl/gsl-lite.hpp> ////////////////////////////////////////////////////////////////////////// namespace zen { +CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config) : m_Config(Config) +{ +} + +CasContainerStrategy::~CasContainerStrategy() +{ +} + void CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore) { @@ -43,7 +47,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6 uint64_t MaxFileOffset = 0; { - // This is not technically necessary but may help future static analysis + // This is not technically necessary (nobody should be accessing us from + // another thread at this stage) but may help static analysis + zen::RwLock::ExclusiveLockScope _(m_LocationMapLock); m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { @@ -103,9 +109,8 @@ IoBuffer CasContainerStrategy::FindChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { const CasDiskLocation& Location = KeyIt->second; return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); @@ -120,9 +125,8 @@ bool CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - auto KeyIt = m_LocationMap.find(ChunkHash); - if (KeyIt != m_LocationMap.end()) + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { return true; } @@ -133,6 +137,13 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) void CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) { + // This implementation is good enough for relatively small + // chunk sets (in terms of chunk identifiers), but would + // benefit from a better implementation which removes + // items incrementally for large sets, especially when + // we're likely to already have a large proportion of the + // chunks in the set + std::unordered_set<IoHash> HaveSet; for (const IoHash& Hash : InOutChunks.GetChunkSet()) @@ -157,4 +168,113 @@ CasContainerStrategy::Flush() m_SmallObjectFile.Flush(); } +void +CasContainerStrategy::Scrub(ScrubContext& Ctx) +{ + const uint64_t WindowSize = 4 * 1024 * 1024; + uint64_t WindowStart = 0; + uint64_t WindowEnd = WindowSize; + const uint64_t FileSize = m_SmallObjectFile.FileSize(); + + std::vector<CasDiskIndexEntry> BigChunks; + std::vector<CasDiskIndexEntry> BadChunks; + + // We do a read sweep through the payloads file and validate + // any entries that are contained within each segment, with + // the assumption that most entries will be checked in this + // pass. An alternative strategy would be to use memory mapping. + + { + IoBuffer ReadBuffer{WindowSize}; + void* BufferBase = ReadBuffer.MutableData(); + + RwLock::SharedLockScope _(m_LocationMapLock); + + do + { + const uint64_t ChunkSize = zen::Min(WindowSize, FileSize - WindowStart); + m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart); + + for (auto& Entry : m_LocationMap) + { + const uint64_t EntryOffset = Entry.second.Offset; + + if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) + { + const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + + if (EntryEnd >= WindowEnd) + { + BigChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + + continue; + } + + const IoHash ComputedHash = IoHash::HashBuffer(BufferBase, Entry.second.Size); + + if (Entry.first != ComputedHash) + { + // Hash mismatch + + BadChunks.push_back({.Key = Entry.first, .Location = Entry.second}); + } + } + } + + WindowStart += WindowSize; + WindowEnd += WindowSize; + } while (WindowStart < FileSize); + } + + // Deal with large chunks + + for (const CasDiskIndexEntry& Entry : BigChunks) + { + IoHashStream Hasher; + m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) { + Hasher.Append(Data, Size); + }); + IoHash ComputedHash = Hasher.GetHash(); + + if (Entry.Key != ComputedHash) + { + BadChunks.push_back(Entry); + } + } + + // Deal with bad chunks by removing them from our lookup map + + std::vector<IoHash> BadChunkHashes; + + for (const CasDiskIndexEntry& Entry : BadChunks) + { + BadChunkHashes.push_back(Entry.Key); + m_LocationMap.erase(Entry.Key); + } + + // Let whomever it concerns know about the bad chunks. This could + // be used to invalidate higher level data structures more efficiently + // than a full validation pass might be able to do + + Ctx.ReportBadChunks(BadChunkHashes); +} + +void +CasContainerStrategy::MakeSnapshot() +{ + RwLock::SharedLockScope _(m_LocationMapLock); + + std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()}; + + uint64_t EntryIndex = 0; + for (auto& Entry : m_LocationMap) + { + CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++]; + IndexEntry.Key = Entry.first; + IndexEntry.Location = Entry.second; + } + + m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0); +} + } // namespace zen diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index 05bbf81f6..101e6b1b7 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -14,9 +14,6 @@ #include <zenstore/cas.h> #include <zenstore/caslog.h> -#include <atlfile.h> -#include <functional> - namespace zen { ////////////////////////////////////////////////////////////////////////// @@ -27,7 +24,10 @@ namespace zen { struct CasDiskLocation { uint64_t Offset; - uint32_t Size; // TODO: Make this more like the IoStore index so we can store larger chunks (should be five bytes) + // If we wanted to be able to store larger chunks using this storage mechanism then + // we could make this more like the IoStore index so we can store larger chunks. + // I.e use five bytes for size and seven for offset + uint32_t Size; }; struct CasDiskIndexEntry @@ -50,7 +50,9 @@ static_assert(sizeof(CasDiskIndexEntry) == 32); struct CasContainerStrategy { - CasContainerStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} + CasContainerStrategy(const CasStoreConfiguration& Config); + ~CasContainerStrategy(); + CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash); CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash); IoBuffer FindChunk(const IoHash& ChunkHash); @@ -58,10 +60,10 @@ struct CasContainerStrategy void FilterChunks(CasChunkSet& InOutChunks); void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); void Flush(); + void Scrub(ScrubContext& Ctx); private: const CasStoreConfiguration& m_Config; - CasStore::Stats& m_Stats; uint64_t m_PayloadAlignment = 1 << 4; bool m_IsInitialized = false; BasicFile m_SmallObjectFile; @@ -73,6 +75,8 @@ private: RwLock m_InsertLock; // used to serialize inserts std::atomic<uint64_t> m_CurrentInsertOffset = 0; std::atomic<uint64_t> m_CurrentIndexOffset = 0; + + void MakeSnapshot(); }; } // namespace zen diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index 170f13875..31991a43e 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -10,6 +10,7 @@ #include <zencore/string.h> #include <zencore/thread.h> #include <zencore/uid.h> +#include <zenstore/basicfile.h> #include <gsl/gsl-lite.hpp> @@ -17,6 +18,7 @@ #include <functional> #include <unordered_map> +// clang-format off #include <zencore/prewindows.h> struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- @@ -24,13 +26,19 @@ struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax erro #include <zencore/postwindows.h> // clang-format on -// -////////////////////////////////////////////////////////////////////////// namespace zen { using namespace fmt::literals; +FileCasStrategy::FileCasStrategy(const CasStoreConfiguration& Config) : m_Config(Config) +{ +} + +FileCasStrategy::~FileCasStrategy() +{ +} + WideStringBuilderBase& FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len) { @@ -56,7 +64,7 @@ FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHas OutShard2len = ShardedPath.Size(); ShardedPath.Append('\\'); - ShardedPath.AppendAsciiRange(str + 6, str + 64); + ShardedPath.AppendAsciiRange(str + 5, str + 64); return ShardedPath; } @@ -259,12 +267,9 @@ FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize } // We cannot rely on RAII to close the file handle since it would be closed - // *after* the lock is released due to the initialization order. + // *after* the lock is released due to the initialization order PayloadFile.Close(); - AtomicIncrement(m_Stats.PutCount); - AtomicAdd(m_Stats.PutBytes, ChunkSize); - return {.New = true}; } @@ -279,15 +284,7 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash) RwLock::SharedLockScope _(LockForHash(ChunkHash)); - auto Chunk = IoBufferBuilder::MakeFromFile(ShardedPath.c_str()); - - if (Chunk) - { - AtomicIncrement(m_Stats.GetCount); - AtomicAdd(m_Stats.GetBytes, Chunk.Size()); - } - - return Chunk; + return IoBufferBuilder::MakeFromFile(ShardedPath.c_str()); } bool @@ -338,6 +335,62 @@ FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks) } void +FileCasStrategy::IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback) +{ + struct Visitor : public FileSystemTraversal::TreeVisitor + { + Visitor(const std::filesystem::path& RootDir) : RootDirectory(RootDir) {} + virtual void VisitFile(const std::filesystem::path& Parent, const std::wstring_view& File, uint64_t FileSize) override + { + ZEN_UNUSED(FileSize); + + std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); + + std::wstring PathString = RelPath.native(); + + if ((PathString.size() == (3 + 2 + 1)) && (File.size() == (40 - 3 - 2))) + { + if (PathString.at(3) == std::filesystem::path::preferred_separator) + { + PathString.erase(3, 1); + } + PathString.append(File); + + StringBuilder<64> Utf8; + WideToUtf8(PathString, Utf8); + + // TODO: should validate that we're actually dealing with a valid hex string here + + IoHash NameHash = IoHash::FromHexString({Utf8.Data(), Utf8.Size()}); + + BasicFile PayloadFile; + std::error_code Ec; + PayloadFile.Open(Parent / File, false, Ec); + + if (!Ec) + { + Callback(NameHash, PayloadFile); + } + } + } + + virtual bool VisitDirectory([[maybe_unused]] const std::filesystem::path& Parent, + [[maybe_unused]] const std::wstring_view& DirectoryName) + { + return true; + } + + const std::filesystem::path& RootDirectory; + std::function<void(const IoHash& Hash, BasicFile& PayloadFile)> Callback; + } CasVisitor{m_Config.RootDirectory}; + + CasVisitor.Callback = std::move(Callback); + + FileSystemTraversal Traversal; + Traversal.TraverseFileSystem(m_Config.RootDirectory, CasVisitor); +} + +void FileCasStrategy::Flush() { // Since we don't keep files open after writing there's nothing specific @@ -353,6 +406,25 @@ FileCasStrategy::Flush() } void +FileCasStrategy::Scrub(ScrubContext& Ctx) +{ + std::vector<IoHash> BadHashes; + + IterateChunks([&](const IoHash& Hash, BasicFile& Payload) { + IoHashStream Hasher; + Payload.StreamFile([&](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + IoHash ComputedHash = Hasher.GetHash(); + + if (ComputedHash != Hash) + { + BadHashes.push_back(Hash); + } + }); + + Ctx.ReportBadChunks(BadHashes); +} + +void FileCasStrategy::GarbageCollect(GcContext& GcCtx) { ZEN_UNUSED(GcCtx); diff --git a/zenstore/filecas.h b/zenstore/filecas.h index 448d1a05f..885973810 100644 --- a/zenstore/filecas.h +++ b/zenstore/filecas.h @@ -10,11 +10,20 @@ #include <zencore/thread.h> #include <zenstore/cas.h> +#include <functional> + namespace zen { +class BasicFile; + +/** CAS storage strategy using a file-per-chunk storage strategy +*/ + struct FileCasStrategy { - FileCasStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} + FileCasStrategy(const CasStoreConfiguration& Config); + ~FileCasStrategy(); + CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash); CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash); IoBuffer FindChunk(const IoHash& ChunkHash); @@ -22,15 +31,16 @@ struct FileCasStrategy void FilterChunks(CasChunkSet& InOutChunks); void Flush(); void GarbageCollect(GcContext& GcCtx); + void Scrub(ScrubContext& Ctx); private: const CasStoreConfiguration& m_Config; - CasStore::Stats& m_Stats; RwLock m_Lock; RwLock m_ShardLocks[256]; // TODO: these should be spaced out so they don't share cache lines inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardLocks[Hash.Hash[19]]; } static WideStringBuilderBase& MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len); + void IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback); }; } // namespace zen diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h index b4de533dd..bb310b179 100644 --- a/zenstore/include/zenstore/CAS.h +++ b/zenstore/include/zenstore/CAS.h @@ -2,7 +2,7 @@ #pragma once -#include <zencore/zencore.h> +#include "zenstore.h" #include <zencore/blake3.h> #include <zencore/iobuffer.h> @@ -37,6 +37,14 @@ public: private: }; +class ScrubContext +{ +public: + virtual void ReportBadChunks(std::span<IoHash> BadChunks); + +private: +}; + class CasChunkSet { public: @@ -54,17 +62,7 @@ class CasStore public: virtual ~CasStore() = default; - struct Stats - { - uint64_t PutBytes = 0; - uint64_t PutCount = 0; - - uint64_t GetBytes = 0; - uint64_t GetCount = 0; - }; - const CasStoreConfiguration& Config() { return m_Config; } - const Stats& GetStats() const { return m_Stats; } struct InsertResult { @@ -76,10 +74,10 @@ public: virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0; virtual void FilterChunks(CasChunkSet& InOutChunks) = 0; virtual void Flush() = 0; + virtual void Scrub(ScrubContext& Ctx) = 0; protected: CasStoreConfiguration m_Config; - Stats m_Stats; }; ZENCORE_API CasStore* CreateCasStore(); diff --git a/zenstore/include/zenstore/basicfile.h b/zenstore/include/zenstore/basicfile.h index c6f61d466..d4d65b366 100644 --- a/zenstore/include/zenstore/basicfile.h +++ b/zenstore/include/zenstore/basicfile.h @@ -2,34 +2,46 @@ #pragma once -#include <zencore/iobuffer.h> -#include <zencore/zencore.h> +#include "zenstore.h" +#include <zencore/iobuffer.h> #include <zencore/windows.h> -#include <atlfile.h> #include <filesystem> +#include <functional> namespace zen { /** * Probably the most basic file abstraction in the universe + * + * One thing of note is that there is no notion of a "current file position" + * in this API -- all reads and writes are done from explicit offsets in + * the file. This avoids concurrency issues which can occur otherwise. + * */ class BasicFile { public: + BasicFile() = default; + ~BasicFile(); void Open(std::filesystem::path FileName, bool IsCreate); - void Read(void* Data, uint64_t Size, uint64_t Offset); - void Write(const void* Data, uint64_t Size, uint64_t Offset); + void Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec); + void Close(); + void Read(void* Data, uint64_t Size, uint64_t FileOffset); + void StreamFile(std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); + void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); + void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Flush(); uint64_t FileSize(); - void* Handle() { return m_File; } - void Close(); + void* Handle() { return m_FileHandle; } IoBuffer ReadAll(); private: - CAtlFile m_File; + void* m_FileHandle = nullptr; // This is either null or valid }; +ZENCORE_API void basicfile_forcelink(); + } // namespace zen diff --git a/zenstore/include/zenstore/caslog.h b/zenstore/include/zenstore/caslog.h index aea855e4c..3d558bee0 100644 --- a/zenstore/include/zenstore/caslog.h +++ b/zenstore/include/zenstore/caslog.h @@ -2,7 +2,7 @@ #pragma once -#include <zencore/zencore.h> +#include "zenstore.h" #include <zencore/iobuffer.h> #include <zencore/string.h> diff --git a/zenstore/include/zenstore/cidstore.h b/zenstore/include/zenstore/cidstore.h index 76a33c915..f023ada40 100644 --- a/zenstore/include/zenstore/cidstore.h +++ b/zenstore/include/zenstore/cidstore.h @@ -2,6 +2,8 @@ #pragma once +#include "zenstore.h" + #include <tsl/robin_map.h> #include <zencore/iohash.h> #include <zenstore/CAS.h> diff --git a/zenstore/include/zenstore/scrub.h b/zenstore/include/zenstore/scrub.h index 5a34d4860..4948afcd5 100644 --- a/zenstore/include/zenstore/scrub.h +++ b/zenstore/include/zenstore/scrub.h @@ -2,6 +2,8 @@ #pragma once +#include "zenstore.h" + #include <zencore/iohash.h> #include <span> diff --git a/zenstore/include/zenstore/zenstore.h b/zenstore/include/zenstore/zenstore.h new file mode 100644 index 000000000..46d62029d --- /dev/null +++ b/zenstore/include/zenstore/zenstore.h @@ -0,0 +1,13 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#define ZENSTORE_API + +namespace zen { + +ZENSTORE_API void zenstore_forcelinktests(); + +} diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp new file mode 100644 index 000000000..cd16e5634 --- /dev/null +++ b/zenstore/zenstore.cpp @@ -0,0 +1,17 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "zenstore/zenstore.h" + +#include <zenstore/CAS.h> +#include <zenstore/basicfile.h> + +namespace zen { + +void +zenstore_forcelinktests() +{ + basicfile_forcelink(); + CAS_forcelink(); +} + +} // namespace zen diff --git a/zenstore/zenstore.vcxproj b/zenstore/zenstore.vcxproj index 8d665f2c3..eb2ecd02b 100644 --- a/zenstore/zenstore.vcxproj +++ b/zenstore/zenstore.vcxproj @@ -19,6 +19,7 @@ <ClCompile Include="filecas.cpp" /> <ClCompile Include="gc.cpp" /> <ClCompile Include="scrub.cpp" /> + <ClCompile Include="zenstore.cpp" /> </ItemGroup> <ItemGroup> <ClInclude Include="compactcas.h" /> @@ -29,12 +30,16 @@ <ClInclude Include="include\zenstore\scrub.h" /> <ClInclude Include="include\zenstore\CAS.h" /> <ClInclude Include="include\zenstore\caslog.h" /> + <ClInclude Include="include\zenstore\zenstore.h" /> </ItemGroup> <ItemGroup> <ProjectReference Include="..\zencore\zencore.vcxproj"> <Project>{d75bf9ab-c61e-4fff-ad59-1563430f05e2}</Project> </ProjectReference> </ItemGroup> + <ItemGroup> + <None Include="xmake.lua" /> + </ItemGroup> <PropertyGroup Label="Globals"> <VCProjectVersion>16.0</VCProjectVersion> <Keyword>Win32Proj</Keyword> diff --git a/zenstore/zenstore.vcxproj.filters b/zenstore/zenstore.vcxproj.filters index 3dfb89dbf..8a52c69f6 100644 --- a/zenstore/zenstore.vcxproj.filters +++ b/zenstore/zenstore.vcxproj.filters @@ -9,6 +9,7 @@ <ClCompile Include="scrub.cpp" /> <ClCompile Include="basicfile.cpp" /> <ClCompile Include="cidstore.cpp" /> + <ClCompile Include="zenstore.cpp" /> </ItemGroup> <ItemGroup> <ClInclude Include="compactcas.h" /> @@ -19,5 +20,9 @@ <ClInclude Include="include\zenstore\scrub.h" /> <ClInclude Include="include\zenstore\basicfile.h" /> <ClInclude Include="include\zenstore\cidstore.h" /> + <ClInclude Include="include\zenstore\zenstore.h" /> + </ItemGroup> + <ItemGroup> + <None Include="xmake.lua" /> </ItemGroup> </Project>
\ No newline at end of file |