diff options
| author | Stefan Boberg <[email protected]> | 2021-10-22 23:17:32 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-10-22 23:18:25 +0200 |
| commit | 6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f (patch) | |
| tree | a53ecfc80256d3fceea89b44e06c514b9d916884 /zenstore | |
| parent | z$: Removed CasStore (diff) | |
| download | zen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.tar.xz zen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.zip | |
compactcas: Changed CasDiskLocation implementation so it can store chunks > 4GB (max is now 1TB)
The offset and size members are now five bytes each, which leaves two bytes which are intended to be used for content-type and flags but are currently unused
Also added some unit tests for compactcas
This change bumps the global storage schema in order to wipe any existing disk state
Diffstat (limited to 'zenstore')
| -rw-r--r-- | zenstore/compactcas.cpp | 101 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 39 | ||||
| -rw-r--r-- | zenstore/zenstore.cpp | 2 |
3 files changed, 128 insertions, 14 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index ee027b261..bb2f92858 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -4,13 +4,19 @@ #include "CompactCas.h" +#include <zencore/compactbinarybuilder.h> #include <zencore/except.h> +#include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/memory.h> #include <zencore/string.h> +#include <zencore/testing.h> +#include <zencore/testutils.h> #include <zencore/thread.h> #include <zencore/uid.h> +#include <zenstore/gc.h> + #include <filesystem> #include <functional> #include <gsl/gsl-lite.hpp> @@ -58,7 +64,7 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6 m_CasLog.Replay([&](const CasDiskIndexEntry& Record) { m_LocationMap[Record.Key] = Record.Location; - MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset + Record.Location.Size); + MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize()); }); } @@ -91,7 +97,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const RwLock::ExclusiveLockScope __(m_LocationMapLock); - CasDiskLocation Location{.Offset = InsertOffset, .Size = /* TODO FIX */ uint32_t(ChunkSize)}; + const CasDiskLocation Location{InsertOffset, ChunkSize}; m_LocationMap[ChunkHash] = Location; @@ -116,7 +122,8 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) { const CasDiskLocation& Location = KeyIt->second; - return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size); + + return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.GetOffset(), Location.GetSize()); } // Not found @@ -187,11 +194,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (auto& Entry : m_LocationMap) { - const uint64_t EntryOffset = Entry.second.Offset; + const uint64_t EntryOffset = Entry.second.GetOffset(); if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd)) { - const uint64_t EntryEnd = EntryOffset + Entry.second.Size; + const uint64_t EntryEnd = EntryOffset + Entry.second.GetSize(); if (EntryEnd >= WindowEnd) { @@ -201,7 +208,8 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } const IoHash ComputedHash = - IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart, Entry.second.Size); + IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.GetOffset() - WindowStart, + Entry.second.GetSize()); if (Entry.first != ComputedHash) { @@ -222,7 +230,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) for (const CasDiskIndexEntry& Entry : BigChunks) { IoHashStream Hasher; - m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) { + m_SmallObjectFile.StreamByteRange(Entry.Location.GetOffset(), Entry.Location.GetSize(), [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); }); IoHash ComputedHash = Hasher.GetHash(); @@ -258,7 +266,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx) } void -CasContainerStrategy::GarbageCollect(GcContext& GcCtx) +CasContainerStrategy::CollectGarbage(GcContext& GcCtx) { } @@ -280,4 +288,81 @@ CasContainerStrategy::MakeSnapshot() m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0); } +////////////////////////////////////////////////////////////////////////// + +#if ZEN_WITH_TESTS + +TEST_CASE("cas.compact.gc") +{ + ScopedTemporaryDirectory TempDir; + + CasStoreConfiguration CasConfig; + CasConfig.RootDirectory = TempDir.Path(); + + CreateDirectories(CasConfig.RootDirectory); + + const int kIterationCount = 1000; + + std::vector<IoHash> Keys(kIterationCount); + + { + CasContainerStrategy Cas(CasConfig); + Cas.Initialize("test", 16, true); + + for (int i = 0; i < kIterationCount; ++i) + { + CbObjectWriter Cbo; + Cbo << "id" << i; + CbObject Obj = Cbo.Save(); + + IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer(); + const IoHash Hash = HashBuffer(ObjBuffer); + + Cas.InsertChunk(ObjBuffer, Hash); + + Keys[i] = Hash; + } + + for (int i = 0; i < kIterationCount; ++i) + { + IoBuffer Chunk = Cas.FindChunk(Keys[i]); + + CHECK(!!Chunk); + + CbObject Value = LoadCompactBinaryObject(Chunk); + + CHECK_EQ(Value["id"].AsInt32(), i); + } + } + + // Validate that we can still read the inserted data after closing + // the original cas store + + { + CasContainerStrategy Cas(CasConfig); + Cas.Initialize("test", 16, false); + + for (int i = 0; i < kIterationCount; ++i) + { + IoBuffer Chunk = Cas.FindChunk(Keys[i]); + + CHECK(!!Chunk); + + CbObject Value = LoadCompactBinaryObject(Chunk); + + CHECK_EQ(Value["id"].AsInt32(), i); + } + + GcContext Ctx; + Cas.CollectGarbage(Ctx); + } +} + +#endif + +void +compactcas_forcelink() +{ +} + } // namespace zen diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index 8f7c0213b..a3f3121e6 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -23,17 +23,42 @@ namespace zen { struct CasDiskLocation { - uint64_t Offset; - // If we wanted to be able to store larger chunks using this storage mechanism then - // we could make this more like the IoStore index so we can store larger chunks. - // I.e use five bytes for size and seven for offset - uint32_t Size; + CasDiskLocation(uint64_t InOffset, uint64_t InSize) + { + ZEN_ASSERT(InOffset <= 0xff'ffff'ffff); + ZEN_ASSERT(InSize <= 0xff'ffff'ffff); + + memcpy(&m_Offset[0], &InOffset, sizeof m_Offset); + memcpy(&m_Size[0], &InSize, sizeof m_Size); + } + + CasDiskLocation() = default; + + inline uint64_t GetOffset() const + { + uint64_t Offset = 0; + memcpy(&Offset, &m_Offset, sizeof m_Offset); + return Offset; + } + + inline uint64_t GetSize() const + { + uint64_t Size = 0; + memcpy(&Size, &m_Size, sizeof m_Size); + return Size; + } + +private: + uint8_t m_Offset[5]; + uint8_t m_Size[5]; }; struct CasDiskIndexEntry { IoHash Key; CasDiskLocation Location; + ZenContentType ContentType = ZenContentType::kUnknownContentType; + uint8_t Flags = 0; }; #pragma pack(pop) @@ -61,7 +86,7 @@ struct CasContainerStrategy void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); void Flush(); void Scrub(ScrubContext& Ctx); - void GarbageCollect(GcContext& GcCtx); + void CollectGarbage(GcContext& GcCtx); private: const CasStoreConfiguration& m_Config; @@ -81,4 +106,6 @@ private: void MakeSnapshot(); }; +void compactcas_forcelink(); + } // namespace zen diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp index d852fa64b..de0f43cb3 100644 --- a/zenstore/zenstore.cpp +++ b/zenstore/zenstore.cpp @@ -5,6 +5,7 @@ #include <zenstore/CAS.h> #include <zenstore/basicfile.h> #include "filecas.h" +#include "compactcas.h" namespace zen { @@ -14,6 +15,7 @@ zenstore_forcelinktests() basicfile_forcelink(); CAS_forcelink(); filecas_forcelink(); + compactcas_forcelink(); } } // namespace zen |