diff options
| author | Stefan Boberg <[email protected]> | 2021-05-11 13:05:39 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-05-11 13:05:39 +0200 |
| commit | f8d9ac5d13dd37b8b57af0478e77ba1e75c813aa (patch) | |
| tree | 1daf7621e110d48acd5e12e3073ce48ef0dd11b2 /zenstore | |
| download | zen-f8d9ac5d13dd37b8b57af0478e77ba1e75c813aa.tar.xz zen-f8d9ac5d13dd37b8b57af0478e77ba1e75c813aa.zip | |
Adding zenservice code
Diffstat (limited to 'zenstore')
| -rw-r--r-- | zenstore/CAS.cpp | 192 | ||||
| -rw-r--r-- | zenstore/caslog.cpp | 220 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 119 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 66 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 237 | ||||
| -rw-r--r-- | zenstore/filecas.h | 32 | ||||
| -rw-r--r-- | zenstore/gc.cpp | 26 | ||||
| -rw-r--r-- | zenstore/include/zenstore/CAS.h | 66 | ||||
| -rw-r--r-- | zenstore/include/zenstore/caslog.h | 96 | ||||
| -rw-r--r-- | zenstore/include/zenstore/gc.h | 28 | ||||
| -rw-r--r-- | zenstore/include/zenstore/scrub.h | 24 | ||||
| -rw-r--r-- | zenstore/scrub.cpp | 15 | ||||
| -rw-r--r-- | zenstore/zenstore.vcxproj | 121 | ||||
| -rw-r--r-- | zenstore/zenstore.vcxproj.filters | 15 |
14 files changed, 1257 insertions, 0 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp new file mode 100644 index 000000000..8d81fc5cb --- /dev/null +++ b/zenstore/CAS.cpp @@ -0,0 +1,192 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/cas.h> + +#include "compactcas.h" +#include "filecas.h" + +#include <doctest/doctest.h> +#include <zencore/except.h> +#include <zencore/fmtutils.h> +#include <zencore/memory.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> + +#include <spdlog/spdlog.h> + +#include <gsl/gsl-lite.hpp> + +#include <filesystem> +#include <functional> +#include <unordered_map> + +struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- +#include <atlfile.h> + +////////////////////////////////////////////////////////////////////////// + +namespace zen { + +/** + * Slightly less naive CAS store + */ +class CasImpl : public CasStore +{ +public: + CasImpl(); + virtual ~CasImpl(); + + virtual void Initialize(const CasStoreConfiguration& InConfig) override; + virtual CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash) override; + virtual CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) override; + virtual IoBuffer FindChunk(const IoHash& ChunkHash) override; + +private: + void PickDefaultDirectory(); + + CasContainerStrategy m_TinyStrategy; + CasContainerStrategy m_SmallStrategy; + FileCasStrategy m_LargeStrategy; +}; + +CasImpl::CasImpl() : m_TinyStrategy(m_Config, m_Stats), m_SmallStrategy(m_Config, m_Stats), m_LargeStrategy(m_Config, m_Stats) +{ +} + +CasImpl::~CasImpl() +{ +} + +void +CasImpl::Initialize(const CasStoreConfiguration& InConfig) +{ + m_Config = InConfig; + + spdlog::info("initializing CAS pool at {}", m_Config.RootDirectory); + + // Ensure root directory exists - create if it doesn't exist already + + std::filesystem::create_directories(m_Config.RootDirectory); + + // Open or create manifest + + bool IsNewStore = false; + + { + std::filesystem::path ManifestPath = m_Config.RootDirectory; + ManifestPath /= ".ucas_root"; + + CAtlFile marker; + HRESULT hRes = marker.Create(ManifestPath.c_str(), GENERIC_READ, 0, OPEN_EXISTING); + + if (FAILED(hRes)) + { + IsNewStore = true; + + ExtendableStringBuilder<128> manifest; + manifest.Append("#CAS_ROOT\n"); // TODO: should write something meaningful here + manifest.Append("ID="); + zen::Oid id = zen::Oid::NewOid(); + id.ToString(manifest); + + hRes = marker.Create(ManifestPath.c_str(), GENERIC_WRITE, 0, CREATE_ALWAYS); + + if (SUCCEEDED(hRes)) + marker.Write(manifest.c_str(), (DWORD)manifest.Size()); + } + } + + // Initialize payload storage + + m_TinyStrategy.Initialize("tobs", 16, IsNewStore); + m_SmallStrategy.Initialize("sobs", 4096, IsNewStore); +} + +CasStore::InsertResult +CasImpl::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash) +{ + if (ChunkSize < m_Config.TinyValueThreshold) + { + return m_TinyStrategy.InsertChunk(ChunkData, ChunkSize, ChunkHash); + } + else if (ChunkSize >= m_Config.HugeValueThreshold) + { + return m_LargeStrategy.InsertChunk(ChunkData, ChunkSize, ChunkHash); + } + else + { + return m_SmallStrategy.InsertChunk(ChunkData, ChunkSize, ChunkHash); + } +} + +CasStore::InsertResult +CasImpl::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) +{ + const uint64_t ChunkSize = Chunk.Size(); + + if (ChunkSize < m_Config.TinyValueThreshold) + { + return m_TinyStrategy.InsertChunk(Chunk, ChunkHash); + } + else if (Chunk.Size() >= m_Config.HugeValueThreshold) + { + return m_LargeStrategy.InsertChunk(Chunk, ChunkHash); + } + else + { + return m_SmallStrategy.InsertChunk(Chunk, ChunkHash); + } +} + +IoBuffer +CasImpl::FindChunk(const IoHash& ChunkHash) +{ + if (IoBuffer Found = m_SmallStrategy.FindChunk(ChunkHash)) + { + return Found; + } + + if (IoBuffer Found = m_TinyStrategy.FindChunk(ChunkHash)) + { + return Found; + } + + if (IoBuffer Found = m_LargeStrategy.FindChunk(ChunkHash)) + { + return Found; + } + + // Not found + return IoBuffer{}; +} + +////////////////////////////////////////////////////////////////////////// + +CasStore* +CreateCasStore() +{ + return new CasImpl(); + // return new FileCasImpl(); +} + +////////////////////////////////////////////////////////////////////////// +// +// Testing related code follows... +// + +void +CAS_forcelink() +{ +} + +TEST_CASE("CasStore") +{ + zen::CasStoreConfiguration config; + config.RootDirectory = "c:\\temp\\test"; + + std::unique_ptr<zen::CasStore> store{CreateCasStore()}; + store->Initialize(config); +} + +} // namespace zen diff --git a/zenstore/caslog.cpp b/zenstore/caslog.cpp new file mode 100644 index 000000000..0f918bfd8 --- /dev/null +++ b/zenstore/caslog.cpp @@ -0,0 +1,220 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/cas.h> + +#include "CompactCas.h" + +#include <zencore/except.h> +#include <zencore/memory.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> + +#include <xxhash.h> + +#include <gsl/gsl-lite.hpp> + +#include <functional> + +struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- +#include <atlfile.h> +#include <filesystem> + +////////////////////////////////////////////////////////////////////////// + +namespace zen { + +uint32_t +CasLogFile::FileHeader::ComputeChecksum() +{ + return XXH32(&this->Magic, sizeof(FileHeader) - 4, 0xC0C0'BABA); +} + +CasLogFile::CasLogFile() +{ +} + +CasLogFile::~CasLogFile() +{ +} + +void +CasLogFile::Open(std::filesystem::path FileName, size_t RecordSize, bool IsCreate) +{ + m_RecordSize = RecordSize; + + const DWORD dwCreationDisposition = IsCreate ? CREATE_ALWAYS : OPEN_EXISTING; + + HRESULT hRes = m_File.Create(FileName.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, dwCreationDisposition); + + if (FAILED(hRes)) + { + throw std::system_error(GetLastError(), std::system_category(), "Failed to open log file" /* TODO: add path */); + } + + uint64_t AppendOffset = 0; + + if (IsCreate) + { + // Initialize log by writing header + FileHeader Header = {.RecordSize = gsl::narrow<uint32_t>(RecordSize), .LogId = zen::Oid::NewOid(), .ValidatedTail = 0}; + memcpy(Header.Magic, FileHeader::MagicSequence, sizeof Header.Magic); + Header.Finalize(); + + m_File.Write(&Header, sizeof Header); + + AppendOffset = sizeof(FileHeader); + + m_Header = Header; + } + else + { + // Validate header and log contents and prepare for appending/replay + FileHeader Header; + m_File.Read(&Header, sizeof Header); + + if ((0 != memcmp(Header.Magic, FileHeader::MagicSequence, sizeof Header.Magic)) || (Header.Checksum != Header.ComputeChecksum())) + { + // TODO: provide more context! + throw std::exception("Mangled log header"); + } + + ULONGLONG Sz; + m_File.GetSize(Sz); + AppendOffset = Sz; + + m_Header = Header; + } + + m_AppendOffset = AppendOffset; +} + +void +CasLogFile::Close() +{ + // TODO: update header and maybe add trailer + + Flush(); +} + +void +CasLogFile::Replay(std::function<void(const void*)>&& Handler) +{ + ULONGLONG LogFileSize; + m_File.GetSize(LogFileSize); + + // Ensure we end up on a clean boundary + const uint64_t LogBaseOffset = sizeof(FileHeader); + const size_t LogEntryCount = (LogFileSize - LogBaseOffset) / m_RecordSize; + + if (LogEntryCount == 0) + { + return; + } + + const uint64_t LogDataSize = LogEntryCount * m_RecordSize; + + std::vector<uint8_t> ReadBuffer; + ReadBuffer.resize(LogDataSize); + + m_File.Seek(LogBaseOffset, FILE_BEGIN); + HRESULT hRes = m_File.Read(ReadBuffer.data(), gsl::narrow<DWORD>(LogDataSize)); + + zen::ThrowIfFailed(hRes, "Failed to read log file"); + + for (int i = 0; i < LogEntryCount; ++i) + { + Handler(ReadBuffer.data() + (i * m_RecordSize)); + } +} + +void +CasLogFile::Append(const void* DataPointer, uint64_t DataSize) +{ + HRESULT hRes = m_File.Write(DataPointer, gsl::narrow<DWORD>(DataSize)); + + if (FAILED(hRes)) + { + throw std::system_error(GetLastError(), std::system_category(), "Failed to write to log file" /* TODO: add context */); + } +} + +void +CasLogFile::Flush() +{ + m_File.Flush(); +} + +////////////////////////////////////////////////////////////////////////// + +void +CasBlobFile::Open(std::filesystem::path FileName, bool isCreate) +{ + const DWORD dwCreationDisposition = isCreate ? CREATE_ALWAYS : OPEN_EXISTING; + + HRESULT hRes = m_File.Create(FileName.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, dwCreationDisposition); + + if (FAILED(hRes)) + { + throw std::system_error(GetLastError(), std::system_category(), "Failed to open bucket sobs file"); + } +} + +void +CasBlobFile::Read(void* Data, uint64_t Size, uint64_t Offset) +{ + OVERLAPPED Ovl{}; + + Ovl.Offset = DWORD(Offset & 0xffff'ffffu); + Ovl.OffsetHigh = DWORD(Offset >> 32); + + HRESULT hRes = m_File.Read(Data, gsl::narrow<DWORD>(Size), &Ovl); + + if (FAILED(hRes)) + { + throw std::system_error(GetLastError(), std::system_category(), "Failed to read from file" /* TODO: add context */); + } +} + +IoBuffer +CasBlobFile::ReadAll() +{ + IoBuffer Buffer(FileSize()); + + Read((void*)Buffer.Data(), Buffer.Size(), 0); + + return Buffer; +} + +void +CasBlobFile::Write(const void* Data, uint64_t Size, uint64_t Offset) +{ + OVERLAPPED Ovl{}; + + Ovl.Offset = DWORD(Offset & 0xffff'ffffu); + Ovl.OffsetHigh = DWORD(Offset >> 32); + + HRESULT hRes = m_File.Write(Data, gsl::narrow<DWORD>(Size), &Ovl); + + if (FAILED(hRes)) + { + throw std::system_error(GetLastError(), std::system_category(), "Failed to write to file" /* TODO: add context */); + } +} + +void +CasBlobFile::Flush() +{ + m_File.Flush(); +} + +uint64_t +CasBlobFile::FileSize() +{ + ULONGLONG Sz; + m_File.GetSize(Sz); + + return uint64_t(Sz); +} + +} // namespace zen diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp new file mode 100644 index 000000000..416943b77 --- /dev/null +++ b/zenstore/compactcas.cpp @@ -0,0 +1,119 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/cas.h> + +#include "CompactCas.h" + +#include <zencore/except.h> +#include <zencore/memory.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> + +#include <gsl/gsl-lite.hpp> + +#include <functional> + +struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- +#include <atlfile.h> +#include <filesystem> + +////////////////////////////////////////////////////////////////////////// + +namespace zen { + +void +CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore) +{ + ZEN_ASSERT(IsPow2(Alignment)); + ZEN_ASSERT(!m_IsInitialized); + + m_PayloadAlignment = Alignment; + std::string BaseName(ContainerBaseName); + std::filesystem::path SobsPath = m_Config.RootDirectory / (BaseName + ".ucas"); + std::filesystem::path SidxPath = m_Config.RootDirectory / (BaseName + ".uidx"); + std::filesystem::path SlogPath = m_Config.RootDirectory / (BaseName + ".ulog"); + + m_SmallObjectFile.Open(SobsPath, IsNewStore); + m_SmallObjectIndex.Open(SidxPath, IsNewStore); + m_CasLog.Open(SlogPath, IsNewStore); + + // TODO: should validate integrity of container files here + + uint64_t MaxFileOffset = 0; + + { + // This is not technically necessary but may help future static analysis + zen::RwLock::ExclusiveLockScope _(m_LocationMapLock); + + m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { + m_LocationMap[Record.Key] = Record.Location; + + MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset + Record.Location.Size); + }); + } + + m_CurrentInsertOffset = (MaxFileOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + m_CurrentIndexOffset = m_SmallObjectIndex.FileSize(); + m_IsInitialized = true; +} + +CasStore::InsertResult +CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash) +{ + { + RwLock::SharedLockScope _(m_LocationMapLock); + auto KeyIt = m_LocationMap.find(ChunkHash); + + if (KeyIt != m_LocationMap.end()) + { + return CasStore::InsertResult{.New = false}; + } + } + + // New entry + + RwLock::ExclusiveLockScope _(m_InsertLock); + + const uint64_t InsertOffset = m_CurrentInsertOffset; + m_SmallObjectFile.Write(ChunkData, ChunkSize, InsertOffset); + + m_CurrentInsertOffset = (m_CurrentInsertOffset + ChunkSize + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1); + + RwLock::ExclusiveLockScope __(m_LocationMapLock); + + CasDiskLocation Location{.Offset = InsertOffset, .Size = /* TODO FIX */ uint32_t(ChunkSize)}; + + m_LocationMap[ChunkHash] = Location; + + CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location}; + + m_CasLog.Append(IndexEntry); + + return CasStore::InsertResult{.New = true}; +} + +CasStore::InsertResult +CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) +{ + return InsertChunk(Chunk.Data(), Chunk.Size(), ChunkHash); +} + +IoBuffer +CasContainerStrategy::FindChunk(const IoHash& ChunkHash) +{ + RwLock::SharedLockScope _(m_LocationMapLock); + auto KeyIt = m_LocationMap.find(ChunkHash); + + if (KeyIt != m_LocationMap.end()) + { + const CasDiskLocation& Location = KeyIt->second; + return zen::IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); + } + + // Not found + + return IoBuffer(); +} + +} // namespace zen diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h new file mode 100644 index 000000000..4d318c2e2 --- /dev/null +++ b/zenstore/compactcas.h @@ -0,0 +1,66 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#include <zencore/iobuffer.h> +#include <zencore/iohash.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> +#include <zencore/windows.h> +#include <zenstore/cas.h> +#include <zenstore/caslog.h> + +#include <atlfile.h> +#include <functional> + +namespace zen { + +////////////////////////////////////////////////////////////////////////// + +#pragma pack(push) +#pragma pack(1) + +struct CasDiskLocation +{ + uint64_t Offset; + uint32_t Size; // TODO: Make this more like the IoStore index so we can store larger chunks (should be five bytes) +}; + +struct CasDiskIndexEntry +{ + IoHash Key; + CasDiskLocation Location; +}; + +#pragma pack(pop) + +static_assert(sizeof(CasDiskIndexEntry) == 32); + +struct CasContainerStrategy +{ + CasContainerStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} + CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash); + CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash); + IoBuffer FindChunk(const IoHash& chunkHash); + void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); + +private: + const CasStoreConfiguration& m_Config; + CasStore::Stats& m_Stats; + uint64_t m_PayloadAlignment = 1 << 4; + bool m_IsInitialized = false; + CasBlobFile m_SmallObjectFile; + CasBlobFile m_SmallObjectIndex; + TCasLogFile<CasDiskIndexEntry> m_CasLog; + + RwLock m_LocationMapLock; + std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher> m_LocationMap; + RwLock m_InsertLock; // used to serialize inserts + std::atomic<uint64_t> m_CurrentInsertOffset = 0; + std::atomic<uint64_t> m_CurrentIndexOffset = 0; +}; + +} // namespace zen diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp new file mode 100644 index 000000000..84a06c3be --- /dev/null +++ b/zenstore/filecas.cpp @@ -0,0 +1,237 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "FileCas.h" + +#include <zencore/except.h> +#include <zencore/memory.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> + +#include <gsl/gsl-lite.hpp> + +#include <functional> +#include <unordered_map> + +struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- +#include <atlfile.h> +#include <filesystem> + +// Used for getting My Documents for default CAS +#include <ShlObj.h> +#pragma comment(lib, "shell32.lib") + +////////////////////////////////////////////////////////////////////////// + +namespace zen { + +WideStringBuilderBase& +FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len) +{ + ExtendableStringBuilder<96> HashString; + ChunkHash.ToHexString(HashString); + + const char* str = HashString.c_str(); + + // Shard into a path with two directory levels containing 12 bits and 8 bits + // respectively. + // + // This results in a maximum of 4096 * 256 directories + // + // The numbers have been chosen somewhat arbitrarily but are large to scale + // to very large chunk repositories. It may or may not make sense to make + // this a configurable policy, and it would probably be a good idea to + // measure performance for different policies and chunk counts + + ShardedPath.AppendAsciiRange(str, str + 3); + + ShardedPath.Append('\\'); + ShardedPath.AppendAsciiRange(str + 3, str + 5); + OutShard2len = ShardedPath.Size(); + + ShardedPath.Append('\\'); + ShardedPath.AppendAsciiRange(str + 6, str + 64); + + return ShardedPath; +} + +CasStore::InsertResult +FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) +{ + return InsertChunk(Chunk.Data(), Chunk.Size(), ChunkHash); +} + +CasStore::InsertResult +FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize, const IoHash& ChunkHash) +{ + size_t Shard2len = 0; + ExtendableWideStringBuilder<128> ShardedPath; + ShardedPath.Append(m_Config.RootDirectory.c_str()); + ShardedPath.Append(std::filesystem::path::preferred_separator); + MakeShardedPath(ShardedPath, ChunkHash, /* out */ Shard2len); + + // See if file already exists + // + // Future improvement: maintain Bloom filter to avoid expensive file system probes? + + CAtlFile PayloadFile; + + HRESULT hRes = PayloadFile.Create(ShardedPath.c_str(), GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING); + + if (SUCCEEDED(hRes)) + { + // If we succeeded in opening the file then we don't need to do anything else because it already exists and should contain the + // content we were about to insert + return CasStore::InsertResult{.New = false}; + } + + PayloadFile.Close(); + + RwLock::ExclusiveLockScope _(LockForHash(ChunkHash)); + + // For now, use double-checked locking to see if someone else was first + + hRes = PayloadFile.Create(ShardedPath.c_str(), GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING); + + if (SUCCEEDED(hRes)) + { + // If we succeeded in opening the file then we don't need to do anything + // else because someone else managed to create the file before we did. Just return. + return {.New = false}; + } + + auto InternalCreateFile = [&] { return PayloadFile.Create(ShardedPath.c_str(), GENERIC_WRITE, FILE_SHARE_DELETE, CREATE_ALWAYS); }; + + hRes = InternalCreateFile(); + + if (hRes == HRESULT_FROM_WIN32(ERROR_PATH_NOT_FOUND)) + { + // Ensure parent directories exist + + std::filesystem::create_directories(std::wstring_view(ShardedPath.c_str(), Shard2len)); + + hRes = InternalCreateFile(); + } + + if (FAILED(hRes)) + { + throw WindowsException(hRes, "Failed to open shard file"); + } + + size_t ChunkRemain = ChunkSize; + auto ChunkCursor = reinterpret_cast<const uint8_t*>(ChunkData); + + while (ChunkRemain != 0) + { + uint32_t ByteCount = uint32_t(std::min<size_t>(1024 * 1024ull, ChunkRemain)); + + PayloadFile.Write(ChunkCursor, ByteCount); + + ChunkCursor += ByteCount; + ChunkRemain -= ByteCount; + } + + AtomicIncrement(m_Stats.PutCount); + AtomicAdd(m_Stats.PutBytes, ChunkSize); + + return {.New = true}; +} + +IoBuffer +FileCasStrategy::FindChunk(const IoHash& ChunkHash) +{ + size_t Shard2len = 0; + ExtendableWideStringBuilder<128> ShardedPath; + ShardedPath.Append(m_Config.RootDirectory.c_str()); + ShardedPath.Append(std::filesystem::path::preferred_separator); + MakeShardedPath(ShardedPath, ChunkHash, /* out */ Shard2len); + + RwLock::SharedLockScope _(LockForHash(ChunkHash)); + + auto Chunk = IoBufferBuilder::MakeFromFile(ShardedPath.c_str()); + + if (Chunk) + { + AtomicIncrement(m_Stats.GetCount); + AtomicAdd(m_Stats.GetBytes, Chunk.Size()); + } + + return Chunk; +} + +/** + * Straightforward file-per-chunk CAS store implementation + */ +class FileCasImpl : public CasStore +{ +public: + FileCasImpl() : m_Strategy(m_Config, m_Stats) {} + virtual ~FileCasImpl() = default; + + void PickDefaultDirectory() + { + if (m_Config.RootDirectory.empty()) + { + // Pick sensible default + + WCHAR myDocumentsDir[MAX_PATH]; + HRESULT hRes = SHGetFolderPathW(NULL, + CSIDL_PERSONAL /* My Documents */, + NULL, + SHGFP_TYPE_CURRENT, + /* out */ myDocumentsDir); + + if (SUCCEEDED(hRes)) + { + wcscat_s(myDocumentsDir, L"\\zen\\DefaultCAS"); + + m_Config.RootDirectory = myDocumentsDir; + } + } + } + + virtual void Initialize(const CasStoreConfiguration& InConfig) override + { + m_Config = InConfig; + + if (m_Config.RootDirectory.empty()) + { + PickDefaultDirectory(); + } + + // Ensure root directory exists - create if it doesn't exist already + + std::filesystem::create_directories(m_Config.RootDirectory); + + std::filesystem::path filepath = m_Config.RootDirectory; + filepath /= ".cas_root"; + + CAtlFile marker; + HRESULT hRes = marker.Create(filepath.c_str(), GENERIC_READ, 0, OPEN_EXISTING); + + if (FAILED(hRes)) + { + ExtendableStringBuilder<128> manifest; + manifest.Append("CAS_ROOT"); + hRes = marker.Create(filepath.c_str(), GENERIC_WRITE, 0, CREATE_ALWAYS); + + if (SUCCEEDED(hRes)) + marker.Write(manifest.c_str(), (DWORD)manifest.Size()); + } + } + + virtual CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash) override + { + return m_Strategy.InsertChunk(chunkData, chunkSize, chunkHash); + } + virtual CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash) override + { + return m_Strategy.InsertChunk(Chunk, chunkHash); + } + virtual IoBuffer FindChunk(const IoHash& chunkHash) override { return m_Strategy.FindChunk(chunkHash); } + +private: + FileCasStrategy m_Strategy; +}; + +} // namespace zen diff --git a/zenstore/filecas.h b/zenstore/filecas.h new file mode 100644 index 000000000..21ad8ba7c --- /dev/null +++ b/zenstore/filecas.h @@ -0,0 +1,32 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#include <zencore/iobuffer.h> +#include <zencore/iohash.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zenstore/cas.h> + +namespace zen { + +struct FileCasStrategy +{ + FileCasStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} + CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash); + CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash); + IoBuffer FindChunk(const IoHash& chunkHash); + +private: + const CasStoreConfiguration& m_Config; + CasStore::Stats& m_Stats; + RwLock m_Lock; + RwLock m_ShardLocks[256]; // TODO: these should be spaced out so they don't share cache lines + + inline RwLock& LockForHash(const IoHash& Hash) { return m_ShardLocks[Hash.Hash[19]]; } + static WideStringBuilderBase& MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len); +}; + +} // namespace zen diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp new file mode 100644 index 000000000..bfb8f015e --- /dev/null +++ b/zenstore/gc.cpp @@ -0,0 +1,26 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/gc.h> + +namespace zen { + +CasGc::CasGc(CasStore& Store) : m_CasStore(Store) +{ +} + +CasGc::~CasGc() +{ +} + +void +CasGc::CollectGarbage() +{ +} + +void +CasGc::OnNewReferences(std::span<IoHash> Hashes) +{ + ZEN_UNUSED(Hashes); +} + +} // namespace zen diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h new file mode 100644 index 000000000..8b9a66e3f --- /dev/null +++ b/zenstore/include/zenstore/CAS.h @@ -0,0 +1,66 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#include <zencore/blake3.h> +#include <zencore/iobuffer.h> +#include <zencore/iohash.h> +#include <zencore/refcount.h> +#include <atomic> +#include <filesystem> +#include <memory> +#include <string> + +namespace zen { + +struct CasStoreConfiguration +{ + // Root directory for CAS store -- if not specified a default folder will be assigned in 'Documents\zen' + std::filesystem::path RootDirectory; + + // Threshold below which values are considered 'tiny' and managed using the 'tiny values' strategy + uint64_t TinyValueThreshold = 1024; + + // Threshold above which values are considered 'tiny' and managed using the 'huge values' strategy + uint64_t HugeValueThreshold = 1024 * 1024; +}; + +class CasStore +{ +public: + virtual ~CasStore() = default; + + struct Stats + { + uint64_t PutBytes = 0; + uint64_t PutCount = 0; + + uint64_t GetBytes = 0; + uint64_t GetCount = 0; + }; + + const CasStoreConfiguration& Config() { return m_Config; } + const Stats& GetStats() const { return m_Stats; } + + struct InsertResult + { + bool New = false; + }; + + virtual void Initialize(const CasStoreConfiguration& Config) = 0; + virtual InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash) = 0; + virtual InsertResult InsertChunk(IoBuffer Data, const IoHash& ChunkHash) = 0; + virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0; + +protected: + CasStoreConfiguration m_Config; + Stats m_Stats; +}; + +ZENCORE_API CasStore* CreateCasStore(); + +void CAS_forcelink(); + +} // namespace zen diff --git a/zenstore/include/zenstore/caslog.h b/zenstore/include/zenstore/caslog.h new file mode 100644 index 000000000..b318577d7 --- /dev/null +++ b/zenstore/include/zenstore/caslog.h @@ -0,0 +1,96 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> + +#include <zencore/iobuffer.h> +#include <zencore/string.h> +#include <zencore/thread.h> +#include <zencore/uid.h> +#include <zencore/windows.h> +#include <zenstore/cas.h> + +#include <atlfile.h> +#include <functional> + +namespace zen { + +class CasLogFile +{ +public: + CasLogFile(); + ~CasLogFile(); + + void Open(std::filesystem::path FileName, size_t RecordSize, bool isCreate); + void Append(const void* DataPointer, uint64_t DataSize); + void Replay(std::function<void(const void*)>&& Handler); + void Flush(); + void Close(); + +private: + struct FileHeader + { + uint8_t Magic[16]; + uint32_t RecordSize = 0; + zen::Oid LogId; + uint32_t ValidatedTail = 0; + uint32_t Pad[6]; + uint32_t Checksum = 0; + + static const inline uint8_t MagicSequence[16] = {'.', '-', '=', ' ', 'C', 'A', 'S', 'L', 'O', 'G', 'v', '1', ' ', '=', '-', '.'}; + + ZENCORE_API uint32_t ComputeChecksum(); + void Finalize() { Checksum = ComputeChecksum(); } + }; + + static_assert(sizeof(FileHeader) == 64); + +private: + CAtlFile m_File; + FileHeader m_Header; + size_t m_RecordSize = 1; + uint64_t m_AppendOffset = 0; +}; + +template<typename T> +class TCasLogFile : public CasLogFile +{ +public: + // This should be called before the Replay() is called to do some basic sanity checking + bool Initialize() { return true; } + + void Replay(std::invocable<const T&> auto Handler) + { + CasLogFile::Replay([&](const void* VoidPtr) { + const T& Record = *reinterpret_cast<const T*>(VoidPtr); + + Handler(Record); + }); + } + + void Append(const T& Record) { CasLogFile::Append(&Record, sizeof Record); } + void Open(std::filesystem::path FileName, bool IsCreate) { CasLogFile::Open(FileName, sizeof(T), IsCreate); } +}; + +////////////////////////////////////////////////////////////////////////// +// +// This should go in its own header +// + +class CasBlobFile +{ +public: + void Open(std::filesystem::path FileName, bool IsCreate); + void Read(void* Data, uint64_t Size, uint64_t Offset); + void Write(const void* Data, uint64_t Size, uint64_t Offset); + void Flush(); + uint64_t FileSize(); + void* Handle() { return m_File; } + IoBuffer ReadAll(); + +private: + CAtlFile m_File; +}; + +} // namespace zen diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h new file mode 100644 index 000000000..055843547 --- /dev/null +++ b/zenstore/include/zenstore/gc.h @@ -0,0 +1,28 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> + +#include <span> + +namespace zen { + +class CasStore; +struct IoHash; + +class CasGc +{ +public: + CasGc(CasStore& Store); + ~CasGc(); + + void CollectGarbage(); + + void OnNewReferences(std::span<IoHash> Hashes); + +private: + CasStore& m_CasStore; +}; + +} // namespace zen diff --git a/zenstore/include/zenstore/scrub.h b/zenstore/include/zenstore/scrub.h new file mode 100644 index 000000000..5a34d4860 --- /dev/null +++ b/zenstore/include/zenstore/scrub.h @@ -0,0 +1,24 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> + +#include <span> + +namespace zen { + +class CasStore; +struct IoHash; + +class CasScrubber +{ +public: + CasScrubber(CasStore& Store); + ~CasScrubber(); + +private: + CasStore& m_CasStore; +}; + +} // namespace zen diff --git a/zenstore/scrub.cpp b/zenstore/scrub.cpp new file mode 100644 index 000000000..4df337349 --- /dev/null +++ b/zenstore/scrub.cpp @@ -0,0 +1,15 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/scrub.h> + +namespace zen { + +CasScrubber::CasScrubber(CasStore& Store) : m_CasStore(Store) +{ +} + +CasScrubber::~CasScrubber() +{ +} + +} // namespace zen diff --git a/zenstore/zenstore.vcxproj b/zenstore/zenstore.vcxproj new file mode 100644 index 000000000..4a39e826d --- /dev/null +++ b/zenstore/zenstore.vcxproj @@ -0,0 +1,121 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <ItemGroup> + <ClCompile Include="CAS.cpp" /> + <ClCompile Include="caslog.cpp" /> + <ClCompile Include="compactcas.cpp" /> + <ClCompile Include="filecas.cpp" /> + <ClCompile Include="gc.cpp" /> + <ClCompile Include="scrub.cpp" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="compactcas.h" /> + <ClInclude Include="filecas.h" /> + <ClInclude Include="include\zenstore\gc.h" /> + <ClInclude Include="include\zenstore\scrub.h" /> + <ClInclude Include="include\zenstore\CAS.h" /> + <ClInclude Include="include\zenstore\caslog.h" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\zencore\zencore.vcxproj"> + <Project>{d75bf9ab-c61e-4fff-ad59-1563430f05e2}</Project> + </ProjectReference> + </ItemGroup> + <PropertyGroup Label="Globals"> + <VCProjectVersion>16.0</VCProjectVersion> + <Keyword>Win32Proj</Keyword> + <ProjectGuid>{26cbbaeb-14c1-4efc-877d-80f48215651c}</ProjectGuid> + <RootNamespace>zenstore</RootNamespace> + <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseDebugLibraries>true</UseDebugLibraries> + <PlatformToolset>v142</PlatformToolset> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>StaticLibrary</ConfigurationType> + <UseDebugLibraries>false</UseDebugLibraries> + <PlatformToolset>v142</PlatformToolset> + <WholeProgramOptimization>true</WholeProgramOptimization> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="Shared"> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="..\zenfs_common.props" /> + <Import Project="..\zen_base_debug.props" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + <Import Project="..\zenfs_common.props" /> + <Import Project="..\zen_base_release.props" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <LinkIncremental>true</LinkIncremental> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <LinkIncremental>false</LinkIncremental> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <VcpkgEnableManifest>true</VcpkgEnableManifest> + <VcpkgUseStatic>true</VcpkgUseStatic> + </PropertyGroup> + <PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <VcpkgEnableManifest>true</VcpkgEnableManifest> + <VcpkgUseStatic>true</VcpkgUseStatic> + </PropertyGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <AdditionalIncludeDirectories>../zencore/include;./include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem> + </SubSystem> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <FunctionLevelLinking>true</FunctionLevelLinking> + <IntrinsicFunctions>true</IntrinsicFunctions> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> + <AdditionalIncludeDirectories>../zencore/include;./include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem> + </SubSystem> + <EnableCOMDATFolding>true</EnableCOMDATFolding> + <OptimizeReferences>true</OptimizeReferences> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project>
\ No newline at end of file diff --git a/zenstore/zenstore.vcxproj.filters b/zenstore/zenstore.vcxproj.filters new file mode 100644 index 000000000..6ab5a7cb2 --- /dev/null +++ b/zenstore/zenstore.vcxproj.filters @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <ClCompile Include="CAS.cpp" /> + <ClCompile Include="caslog.cpp" /> + <ClCompile Include="compactcas.cpp" /> + <ClCompile Include="filecas.cpp" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="compactcas.h" /> + <ClInclude Include="filecas.h" /> + <ClInclude Include="include\zenstore\CAS.h" /> + <ClInclude Include="include\zenstore\caslog.h" /> + </ItemGroup> +</Project>
\ No newline at end of file |