aboutsummaryrefslogtreecommitdiff
path: root/zenstore
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2021-10-22 23:17:32 +0200
committerStefan Boberg <[email protected]>2021-10-22 23:18:25 +0200
commit6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f (patch)
treea53ecfc80256d3fceea89b44e06c514b9d916884 /zenstore
parentz$: Removed CasStore (diff)
downloadzen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.tar.xz
zen-6abc1975643bbf4c2ecf0e85fceca56f23fc5b6f.zip
compactcas: Changed CasDiskLocation implementation so it can store chunks > 4GB (max is now 1TB)
The offset and size members are now five bytes each, which leaves two bytes which are intended to be used for content-type and flags but are currently unused Also added some unit tests for compactcas This change bumps the global storage schema in order to wipe any existing disk state
Diffstat (limited to 'zenstore')
-rw-r--r--zenstore/compactcas.cpp101
-rw-r--r--zenstore/compactcas.h39
-rw-r--r--zenstore/zenstore.cpp2
3 files changed, 128 insertions, 14 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index ee027b261..bb2f92858 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -4,13 +4,19 @@
#include "CompactCas.h"
+#include <zencore/compactbinarybuilder.h>
#include <zencore/except.h>
+#include <zencore/filesystem.h>
#include <zencore/logging.h>
#include <zencore/memory.h>
#include <zencore/string.h>
+#include <zencore/testing.h>
+#include <zencore/testutils.h>
#include <zencore/thread.h>
#include <zencore/uid.h>
+#include <zenstore/gc.h>
+
#include <filesystem>
#include <functional>
#include <gsl/gsl-lite.hpp>
@@ -58,7 +64,7 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6
m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
m_LocationMap[Record.Key] = Record.Location;
- MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.Offset + Record.Location.Size);
+ MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize());
});
}
@@ -91,7 +97,7 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
RwLock::ExclusiveLockScope __(m_LocationMapLock);
- CasDiskLocation Location{.Offset = InsertOffset, .Size = /* TODO FIX */ uint32_t(ChunkSize)};
+ const CasDiskLocation Location{InsertOffset, ChunkSize};
m_LocationMap[ChunkHash] = Location;
@@ -116,7 +122,8 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
{
const CasDiskLocation& Location = KeyIt->second;
- return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.Offset, Location.Size);
+
+ return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.GetOffset(), Location.GetSize());
}
// Not found
@@ -187,11 +194,11 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (auto& Entry : m_LocationMap)
{
- const uint64_t EntryOffset = Entry.second.Offset;
+ const uint64_t EntryOffset = Entry.second.GetOffset();
if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
{
- const uint64_t EntryEnd = EntryOffset + Entry.second.Size;
+ const uint64_t EntryEnd = EntryOffset + Entry.second.GetSize();
if (EntryEnd >= WindowEnd)
{
@@ -201,7 +208,8 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
}
const IoHash ComputedHash =
- IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.Offset - WindowStart, Entry.second.Size);
+ IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.GetOffset() - WindowStart,
+ Entry.second.GetSize());
if (Entry.first != ComputedHash)
{
@@ -222,7 +230,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
for (const CasDiskIndexEntry& Entry : BigChunks)
{
IoHashStream Hasher;
- m_SmallObjectFile.StreamByteRange(Entry.Location.Offset, Entry.Location.Size, [&](const void* Data, uint64_t Size) {
+ m_SmallObjectFile.StreamByteRange(Entry.Location.GetOffset(), Entry.Location.GetSize(), [&](const void* Data, uint64_t Size) {
Hasher.Append(Data, Size);
});
IoHash ComputedHash = Hasher.GetHash();
@@ -258,7 +266,7 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
}
void
-CasContainerStrategy::GarbageCollect(GcContext& GcCtx)
+CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
}
@@ -280,4 +288,81 @@ CasContainerStrategy::MakeSnapshot()
m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0);
}
+//////////////////////////////////////////////////////////////////////////
+
+#if ZEN_WITH_TESTS
+
+TEST_CASE("cas.compact.gc")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+
+ CreateDirectories(CasConfig.RootDirectory);
+
+ const int kIterationCount = 1000;
+
+ std::vector<IoHash> Keys(kIterationCount);
+
+ {
+ CasContainerStrategy Cas(CasConfig);
+ Cas.Initialize("test", 16, true);
+
+ for (int i = 0; i < kIterationCount; ++i)
+ {
+ CbObjectWriter Cbo;
+ Cbo << "id" << i;
+ CbObject Obj = Cbo.Save();
+
+ IoBuffer ObjBuffer = Obj.GetBuffer().AsIoBuffer();
+ const IoHash Hash = HashBuffer(ObjBuffer);
+
+ Cas.InsertChunk(ObjBuffer, Hash);
+
+ Keys[i] = Hash;
+ }
+
+ for (int i = 0; i < kIterationCount; ++i)
+ {
+ IoBuffer Chunk = Cas.FindChunk(Keys[i]);
+
+ CHECK(!!Chunk);
+
+ CbObject Value = LoadCompactBinaryObject(Chunk);
+
+ CHECK_EQ(Value["id"].AsInt32(), i);
+ }
+ }
+
+ // Validate that we can still read the inserted data after closing
+ // the original cas store
+
+ {
+ CasContainerStrategy Cas(CasConfig);
+ Cas.Initialize("test", 16, false);
+
+ for (int i = 0; i < kIterationCount; ++i)
+ {
+ IoBuffer Chunk = Cas.FindChunk(Keys[i]);
+
+ CHECK(!!Chunk);
+
+ CbObject Value = LoadCompactBinaryObject(Chunk);
+
+ CHECK_EQ(Value["id"].AsInt32(), i);
+ }
+
+ GcContext Ctx;
+ Cas.CollectGarbage(Ctx);
+ }
+}
+
+#endif
+
+void
+compactcas_forcelink()
+{
+}
+
} // namespace zen
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index 8f7c0213b..a3f3121e6 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -23,17 +23,42 @@ namespace zen {
struct CasDiskLocation
{
- uint64_t Offset;
- // If we wanted to be able to store larger chunks using this storage mechanism then
- // we could make this more like the IoStore index so we can store larger chunks.
- // I.e use five bytes for size and seven for offset
- uint32_t Size;
+ CasDiskLocation(uint64_t InOffset, uint64_t InSize)
+ {
+ ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
+ ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
+
+ memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
+ memcpy(&m_Size[0], &InSize, sizeof m_Size);
+ }
+
+ CasDiskLocation() = default;
+
+ inline uint64_t GetOffset() const
+ {
+ uint64_t Offset = 0;
+ memcpy(&Offset, &m_Offset, sizeof m_Offset);
+ return Offset;
+ }
+
+ inline uint64_t GetSize() const
+ {
+ uint64_t Size = 0;
+ memcpy(&Size, &m_Size, sizeof m_Size);
+ return Size;
+ }
+
+private:
+ uint8_t m_Offset[5];
+ uint8_t m_Size[5];
};
struct CasDiskIndexEntry
{
IoHash Key;
CasDiskLocation Location;
+ ZenContentType ContentType = ZenContentType::kUnknownContentType;
+ uint8_t Flags = 0;
};
#pragma pack(pop)
@@ -61,7 +86,7 @@ struct CasContainerStrategy
void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore);
void Flush();
void Scrub(ScrubContext& Ctx);
- void GarbageCollect(GcContext& GcCtx);
+ void CollectGarbage(GcContext& GcCtx);
private:
const CasStoreConfiguration& m_Config;
@@ -81,4 +106,6 @@ private:
void MakeSnapshot();
};
+void compactcas_forcelink();
+
} // namespace zen
diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp
index d852fa64b..de0f43cb3 100644
--- a/zenstore/zenstore.cpp
+++ b/zenstore/zenstore.cpp
@@ -5,6 +5,7 @@
#include <zenstore/CAS.h>
#include <zenstore/basicfile.h>
#include "filecas.h"
+#include "compactcas.h"
namespace zen {
@@ -14,6 +15,7 @@ zenstore_forcelinktests()
basicfile_forcelink();
CAS_forcelink();
filecas_forcelink();
+ compactcas_forcelink();
}
} // namespace zen