diff options
| author | Stefan Boberg <[email protected]> | 2021-10-22 23:17:32 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-10-22 23:18:25 +0200 |
| commit | 6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f (patch) | |
| tree | a53ecfc80256d3fceea89b44e06c514b9d916884 | |
| parent | z$: Removed CasStore (diff) | |
| download | zen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.tar.xz zen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.zip | |
compactcas: Changed CasDiskLocation implementation so it can store chunks > 4GB (max is now 1TB)
The offset and size members are now five bytes each, which leaves two bytes which are intended to be used for content-type and flags but are currently unused
Also added some unit tests for compactcas
This change bumps the global storage schema in order to wipe any existing disk state
| -rw-r--r-- | zenserver/zenserver.cpp | 2 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 101 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 39 | ||||
| -rw-r--r-- | zenstore/zenstore.cpp | 2 |
4 files changed, 129 insertions, 15 deletions
diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp index 73a66bdac..f2442907b 100644 --- a/zenserver/zenserver.cpp +++ b/zenserver/zenserver.cpp @@ -44,7 +44,7 @@ ZEN_THIRD_PARTY_INCLUDES_END # define BUILD_VERSION ("dev-build") #endif -#define ZEN_SCHEMA_VERSION 1 +#define ZEN_SCHEMA_VERSION 2 /* latest change by: stefan boberg */ ////////////////////////////////////////////////////////////////////////// // We don't have any doctest code in this file but this is needed to bring diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index ee027b261..bb2f92858 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -4,13 +4,19 @@ #include "CompactCas.h" +#include <zencore/compactbinarybuilder.h> #include <zencore/except.h> +#include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/memory.h> #include <zencore/string.h> +#include <zencore/testing.h> +#include <zencore/testutils.h> #include <zencore/thread.h> #include <zencore/uid.h> +#include <zenstore/gc.h> + #include <filesystem> #include <functional> #include <gsl/gsl-lite.hpp> @@ -58,7 +64,7 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6 m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { m_LocationMap[Record.Key] = Record.Location; - MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset + Record.Location.Size); + MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize()); }); } @@ -91,7 +97,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const RwLock::ExclusiveLockScope __(m_LocationMapLock); - CasDiskLocation Location{.Offset = InsertOffset, .Size = /* TODO FIX */ uint32_t(ChunkSize)}; + const CasDiskLocation Location{InsertOffset, ChunkSize}; m_LocationMap[ChunkHash] = Location; @@ -116,7 +122,8 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { const CasDiskLocation& Location = KeyIt->second; - return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); + + return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.GetOffset(), Location.GetSize()); } // Not found @@ -187,11 +194,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (auto& Entry : m_LocationMap) { - const uint64_t EntryOffset = Entry.second.Offset; + const uint64_t EntryOffset = Entry.second.GetOffset(); if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) { - const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + const uint64_t EntryEnd = EntryOffset + Entry.second.GetSize(); if (EntryEnd >= WindowEnd) { @@ -201,7 +208,8 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } const IoHash ComputedHash = - IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart, Entry.second.Size); + IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.GetOffset() - WindowStart, + Entry.second.GetSize()); if (Entry.first != ComputedHash) { @@ -222,7 +230,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (const CasDiskIndexEntry& Entry : BigChunks) { IoHashStream Hasher; - m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) { + m_SmallObjectFile.StreamByteRange(Entry.Location.GetOffset(), Entry.Location.GetSize(), [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); @@ -258,7 +266,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } void -CasContainerStrategy::GarbageCollect(GcContext& GcCtx) +CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { } @@ -280,4 +288,81 @@ CasContainerStrategy::MakeSnapshot() m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0); } +////////////////////////////////////////////////////////////////////////// + +#if ZEN_WITH_TESTS + +TEST_CASE("cas.compact.gc") +{ + ScopedTemporaryDirectory TempDir; + + CasStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path(); + + CreateDirectories(CasConfig.RootDirectory); + + const int kIterationCount = 1000; + + std::vector<IoHash> Keys(kIterationCount); + + { + CasContainerStrategy Cas(CasConfig); + Cas.Initialize("test", 16, true); + + for (int i = 0; i < kIterationCount; ++i) + { + CbObjectWriter Cbo; + Cbo << "id" << i; + CbObject Obj = Cbo.Save(); + + IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer(); + const IoHash Hash = HashBuffer(ObjBuffer); + + Cas.InsertChunk(ObjBuffer, Hash); + + Keys[i] = Hash; + } + + for (int i = 0; i < kIterationCount; ++i) + { + IoBuffer Chunk = Cas.FindChunk(Keys[i]); + + CHECK(!!Chunk); + + CbObject Value = LoadCompactBinaryObject(Chunk); + + CHECK_EQ(Value["id"].AsInt32(), i); + } + } + + // Validate that we can still read the inserted data after closing + // the original cas store + + { + CasContainerStrategy Cas(CasConfig); + Cas.Initialize("test", 16, false); + + for (int i = 0; i < kIterationCount; ++i) + { + IoBuffer Chunk = Cas.FindChunk(Keys[i]); + + CHECK(!!Chunk); + + CbObject Value = LoadCompactBinaryObject(Chunk); + + CHECK_EQ(Value["id"].AsInt32(), i); + } + + GcContext Ctx; + Cas.CollectGarbage(Ctx); + } +} + +#endif + +void +compactcas_forcelink() +{ +} + } // namespace zen diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index 8f7c0213b..a3f3121e6 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -23,17 +23,42 @@ namespace zen { struct CasDiskLocation { - uint64_t Offset; - // If we wanted to be able to store larger chunks using this storage mechanism then - // we could make this more like the IoStore index so we can store larger chunks. - // I.e use five bytes for size and seven for offset - uint32_t Size; + CasDiskLocation(uint64_t InOffset, uint64_t InSize) + { + ZEN_ASSERT(InOffset <= 0xff'ffff'ffff); + ZEN_ASSERT(InSize <= 0xff'ffff'ffff); + + memcpy(&m_Offset[0], &InOffset, sizeof m_Offset); + memcpy(&m_Size[0], &InSize, sizeof m_Size); + } + + CasDiskLocation() = default; + + inline uint64_t GetOffset() const + { + uint64_t Offset = 0; + memcpy(&Offset, &m_Offset, sizeof m_Offset); + return Offset; + } + + inline uint64_t GetSize() const + { + uint64_t Size = 0; + memcpy(&Size, &m_Size, sizeof m_Size); + return Size; + } + +private: + uint8_t m_Offset[5]; + uint8_t m_Size[5]; }; struct CasDiskIndexEntry { IoHash Key; CasDiskLocation Location; + ZenContentType ContentType = ZenContentType::kUnknownContentType; + uint8_t Flags = 0; }; #pragma pack(pop) @@ -61,7 +86,7 @@ struct CasContainerStrategy void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); void Flush(); void Scrub(ScrubContext& Ctx); - void GarbageCollect(GcContext& GcCtx); + void CollectGarbage(GcContext& GcCtx); private: const CasStoreConfiguration& m_Config; @@ -81,4 +106,6 @@ private: void MakeSnapshot(); }; +void compactcas_forcelink(); + } // namespace zen diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp index d852fa64b..de0f43cb3 100644 --- a/zenstore/zenstore.cpp +++ b/zenstore/zenstore.cpp @@ -5,6 +5,7 @@ #include <zenstore/CAS.h> #include <zenstore/basicfile.h> #include "filecas.h" +#include "compactcas.h" namespace zen { @@ -14,6 +15,7 @@ zenstore_forcelinktests() basicfile_forcelink(); CAS_forcelink(); filecas_forcelink(); + compactcas_forcelink(); } } // namespace zen |