diff options
| author | Stefan Boberg <[email protected]> | 2021-09-19 19:30:16 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-09-19 19:30:16 +0200 |
| commit | 8f82467ea5e8e90e459d78d603c67a7938ae8ead (patch) | |
| tree | a7d1a3e2d897e9a53e075485c437c440f3684ce2 | |
| parent | Added zenstore.h and made headers use it (diff) | |
| download | zen-8f82467ea5e8e90e459d78d603c67a7938ae8ead.tar.xz zen-8f82467ea5e8e90e459d78d603c67a7938ae8ead.zip | |
Changed some code over from ATL to BasicFile and added Scrub() stubs.
| -rw-r--r-- | zenstore/CAS.cpp | 85 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 43 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 11 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 5 | ||||
| -rw-r--r-- | zenstore/filecas.h | 1 | ||||
| -rw-r--r-- | zenstore/include/zenstore/CAS.h | 3 |
6 files changed, 113 insertions, 35 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp index e77c0ed64..af0fcc609 100644 --- a/zenstore/CAS.cpp +++ b/zenstore/CAS.cpp @@ -11,6 +11,7 @@ #include <zencore/logging.h> #include <zencore/memory.h> #include <zencore/string.h> +#include <zencore/testutils.h> #include <zencore/thread.h> #include <zencore/uid.h> @@ -20,15 +21,17 @@ #include <functional> #include <unordered_map> -struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- -#include <atlfile.h> - ////////////////////////////////////////////////////////////////////////// namespace zen { /** - * Slightly less naive CAS store + * CAS store implementation + * + * Uses a basic strategy of splitting payloads by size, to improve ability to reclaim space + * quickly for unused large chunks and to maintain locality for small chunks which are + * frequently accessed together. + * */ class CasImpl : public CasStore { @@ -41,10 +44,9 @@ public: virtual IoBuffer FindChunk(const IoHash& ChunkHash) override; virtual void FilterChunks(CasChunkSet& InOutChunks) override; virtual void Flush() override; + virtual void Scrub() override; private: - void PickDefaultDirectory(); - CasContainerStrategy m_TinyStrategy; CasContainerStrategy m_SmallStrategy; FileCasStrategy m_LargeStrategy; @@ -63,13 +65,16 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig) { m_Config = InConfig; - ZEN_INFO("initializing CAS pool at {}", m_Config.RootDirectory); + ZEN_INFO("initializing CAS pool at '{}'", m_Config.RootDirectory); // Ensure root directory exists - create if it doesn't exist already std::filesystem::create_directories(m_Config.RootDirectory); // Open or create manifest + // + // The manifest is not currently fully implemented. The goal is to + // use it for recovery and configuration bool IsNewStore = false; @@ -77,23 +82,22 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig) std::filesystem::path ManifestPath = m_Config.RootDirectory; ManifestPath /= ".ucas_root"; - CAtlFile marker; - HRESULT hRes = marker.Create(ManifestPath.c_str(), GENERIC_READ, 0, OPEN_EXISTING); + std::error_code Ec; + BasicFile Marker; + Marker.Open(ManifestPath.c_str(), /* IsCreate */ false, Ec); - if (FAILED(hRes)) + if (Ec) { IsNewStore = true; ExtendableStringBuilder<128> manifest; - manifest.Append("#CAS_ROOT\n"); // TODO: should write something meaningful here + manifest.Append("#CAS_ROOT\n"); manifest.Append("ID="); zen::Oid id = zen::Oid::NewOid(); id.ToString(manifest); - hRes = marker.Create(ManifestPath.c_str(), GENERIC_WRITE, 0, CREATE_ALWAYS); - - if (SUCCEEDED(hRes)) - marker.Write(manifest.c_str(), (DWORD)manifest.Size()); + Marker.Open(ManifestPath.c_str(), /* IsCreate */ true); + Marker.Write(manifest.c_str(), (DWORD)manifest.Size(), 0); } } @@ -160,6 +164,14 @@ CasImpl::Flush() m_LargeStrategy.Flush(); } +void +CasImpl::Scrub() +{ + m_SmallStrategy.Scrub(); + m_TinyStrategy.Scrub(); + m_LargeStrategy.Scrub(); +} + ////////////////////////////////////////////////////////////////////////// CasStore* @@ -173,18 +185,45 @@ CreateCasStore() // Testing related code follows... // -void -CAS_forcelink() -{ -} - TEST_CASE("CasStore") { + ScopedTemporaryDirectory TempDir; + zen::CasStoreConfiguration config; - config.RootDirectory = "c:\\temp\\test"; + config.RootDirectory = TempDir.Path(); + + std::unique_ptr<zen::CasStore> Store{CreateCasStore()}; + Store->Initialize(config); + Store->Scrub(); + + IoBuffer Value1{16}; + memcpy(Value1.MutableData(), "1234567890123456", 16); + IoHash Hash1 = IoHash::HashBuffer(Value1.Data(), Value1.Size()); + CasStore::InsertResult Result1 = Store->InsertChunk(Value1, Hash1); + CHECK(Result1.New); + + IoBuffer Value2{16}; + memcpy(Value2.MutableData(), "ABCDEFGHIJKLMNOP", 16); + IoHash Hash2 = IoHash::HashBuffer(Value2.Data(), Value2.Size()); + CasStore::InsertResult Result2 = Store->InsertChunk(Value2, Hash2); + CHECK(Result2.New); + + CasChunkSet ChunkSet; + ChunkSet.AddChunk(Hash1); + ChunkSet.AddChunk(Hash2); + + Store->FilterChunks(ChunkSet); + CHECK(ChunkSet.GetChunkSet().size() == 0); + + IoBuffer Lookup1 = Store->FindChunk(Hash1); + CHECK(Lookup1); + IoBuffer Lookup2 = Store->FindChunk(Hash2); + CHECK(Lookup2); +} - std::unique_ptr<zen::CasStore> store{CreateCasStore()}; - store->Initialize(config); +void +CAS_forcelink() +{ } } // namespace zen diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index 4407d8b08..71d52e56a 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -10,13 +10,9 @@ #include <zencore/thread.h> #include <zencore/uid.h> -#include <gsl/gsl-lite.hpp> - -#include <functional> - -struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- -#include <atlfile.h> #include <filesystem> +#include <functional> +#include <gsl/gsl-lite.hpp> ////////////////////////////////////////////////////////////////////////// @@ -43,7 +39,9 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6 uint64_t MaxFileOffset = 0; { - // This is not technically necessary but may help future static analysis + // This is not technically necessary (nobody should be accessing us from + // another thread at this stage) but may help static analysis + zen::RwLock::ExclusiveLockScope _(m_LocationMapLock); m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { @@ -133,6 +131,13 @@ CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) void CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) { + // This implementation is good enough for relatively small + // chunk sets (in terms of chunk identifiers), but would + // benefit from a better implementation which removes + // items incrementally for large sets, especially when + // we're likely to already have a large proportion of the + // chunks in the set + std::unordered_set<IoHash> HaveSet; for (const IoHash& Hash : InOutChunks.GetChunkSet()) @@ -157,4 +162,28 @@ CasContainerStrategy::Flush() m_SmallObjectFile.Flush(); } +void +CasContainerStrategy::Scrub() +{ + RwLock::SharedLockScope _(m_LocationMapLock); +} + +void +CasContainerStrategy::MakeSnapshot() +{ + RwLock::SharedLockScope _(m_LocationMapLock); + + std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()}; + + uint64_t EntryIndex = 0; + for (auto& Entry : m_LocationMap) + { + CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++]; + IndexEntry.Key = Entry.first; + IndexEntry.Location = Entry.second; + } + + m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0); +} + } // namespace zen diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index 05bbf81f6..63d8f8511 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -14,9 +14,6 @@ #include <zenstore/cas.h> #include <zenstore/caslog.h> -#include <atlfile.h> -#include <functional> - namespace zen { ////////////////////////////////////////////////////////////////////////// @@ -27,7 +24,10 @@ namespace zen { struct CasDiskLocation { uint64_t Offset; - uint32_t Size; // TODO: Make this more like the IoStore index so we can store larger chunks (should be five bytes) + // If we wanted to be able to store larger chunks using this storage mechanism then + // we could make this more like the IoStore index so we can store larger chunks. + // I.e use five bytes for size and seven for offset + uint32_t Size; }; struct CasDiskIndexEntry @@ -58,6 +58,7 @@ struct CasContainerStrategy void FilterChunks(CasChunkSet& InOutChunks); void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); void Flush(); + void Scrub(); private: const CasStoreConfiguration& m_Config; @@ -73,6 +74,8 @@ private: RwLock m_InsertLock; // used to serialize inserts std::atomic<uint64_t> m_CurrentInsertOffset = 0; std::atomic<uint64_t> m_CurrentIndexOffset = 0; + + void MakeSnapshot(); }; } // namespace zen diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index 170f13875..5fdf505d4 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -353,6 +353,11 @@ FileCasStrategy::Flush() } void +FileCasStrategy::Scrub() +{ +} + +void FileCasStrategy::GarbageCollect(GcContext& GcCtx) { ZEN_UNUSED(GcCtx); diff --git a/zenstore/filecas.h b/zenstore/filecas.h index 448d1a05f..c7cd9d7ca 100644 --- a/zenstore/filecas.h +++ b/zenstore/filecas.h @@ -22,6 +22,7 @@ struct FileCasStrategy void FilterChunks(CasChunkSet& InOutChunks); void Flush(); void GarbageCollect(GcContext& GcCtx); + void Scrub(); private: const CasStoreConfiguration& m_Config; diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h index b4de533dd..c6c919593 100644 --- a/zenstore/include/zenstore/CAS.h +++ b/zenstore/include/zenstore/CAS.h @@ -2,7 +2,7 @@ #pragma once -#include <zencore/zencore.h> +#include "zenstore.h" #include <zencore/blake3.h> #include <zencore/iobuffer.h> @@ -76,6 +76,7 @@ public: virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0; virtual void FilterChunks(CasChunkSet& InOutChunks) = 0; virtual void Flush() = 0; + virtual void Scrub() = 0; protected: CasStoreConfiguration m_Config; |