aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2021-08-23 19:12:14 +0200
committerStefan Boberg <[email protected]>2021-08-23 19:12:14 +0200
commit9497ba8cba4347112e4335ca02d143aec8a45f24 (patch)
tree7c625a69063fc6aecbc4eb6384d5a561ce94e3ad
parentImproved ZenCacheStore::DropBucket logic and added logging (diff)
downloadzen-9497ba8cba4347112e4335ca02d143aec8a45f24.tar.xz
zen-9497ba8cba4347112e4335ca02d143aec8a45f24.zip
Implemented more formalised CAS chunk filtering (with plenty of room for optimization)
-rw-r--r--zenstore/CAS.cpp9
-rw-r--r--zenstore/compactcas.cpp33
-rw-r--r--zenstore/compactcas.h14
-rw-r--r--zenstore/filecas.cpp39
-rw-r--r--zenstore/filecas.h8
-rw-r--r--zenstore/include/zenstore/CAS.h14
6 files changed, 112 insertions, 5 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp
index 36cf85549..d91e8cb2c 100644
--- a/zenstore/CAS.cpp
+++ b/zenstore/CAS.cpp
@@ -40,6 +40,7 @@ public:
virtual void Initialize(const CasStoreConfiguration& InConfig) override;
virtual CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) override;
virtual IoBuffer FindChunk(const IoHash& ChunkHash) override;
+ virtual void FilterChunks(CasChunkSet& InOutChunks) override;
virtual void Flush() override;
private:
@@ -144,6 +145,14 @@ CasImpl::FindChunk(const IoHash& ChunkHash)
return IoBuffer{};
}
+void
+CasImpl::FilterChunks(CasChunkSet& InOutChunks)
+{
+ m_SmallStrategy.FilterChunks(InOutChunks);
+ m_TinyStrategy.FilterChunks(InOutChunks);
+ m_LargeStrategy.FilterChunks(InOutChunks);
+}
+
void
CasImpl::Flush()
{
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index b658425e7..4407d8b08 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -116,6 +116,39 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
return IoBuffer();
}
+bool
+CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
+{
+ RwLock::SharedLockScope _(m_LocationMapLock);
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+
+ if (KeyIt != m_LocationMap.end())
+ {
+ return true;
+ }
+
+ return false;
+}
+
+void
+CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
+{
+ std::unordered_set<IoHash> HaveSet;
+
+ for (const IoHash& Hash : InOutChunks.GetChunkSet())
+ {
+ if (HaveChunk(Hash))
+ {
+ HaveSet.insert(Hash);
+ }
+ }
+
+ for (const IoHash& Hash : HaveSet)
+ {
+ InOutChunks.RemoveIfPresent(Hash);
+ }
+}
+
void
CasContainerStrategy::Flush()
{
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index c65af0435..05bbf81f6 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -40,12 +40,22 @@ struct CasDiskIndexEntry
static_assert(sizeof(CasDiskIndexEntry) == 32);
+/** This implements a storage strategy for small CAS values
+ *
+ * New chunks are simply appended to a small object file, and an index is
+ * maintained to allow chunks to be looked up within the active small object
+ * files
+ *
+ */
+
struct CasContainerStrategy
{
CasContainerStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {}
- CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash);
+ CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash);
- IoBuffer FindChunk(const IoHash& chunkHash);
+ IoBuffer FindChunk(const IoHash& ChunkHash);
+ bool HaveChunk(const IoHash& ChunkHash);
+ void FilterChunks(CasChunkSet& InOutChunks);
void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore);
void Flush();
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index 04a6f7aa0..cddf22503 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -279,6 +279,45 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash)
return Chunk;
}
+bool
+FileCasStrategy::HaveChunk(const IoHash& ChunkHash)
+{
+ size_t Shard2len = 0;
+ ExtendableWideStringBuilder<128> ShardedPath;
+ ShardedPath.Append(m_Config.RootDirectory.c_str());
+ ShardedPath.Append(std::filesystem::path::preferred_separator);
+ MakeShardedPath(ShardedPath, ChunkHash, /* out */ Shard2len);
+
+ RwLock::SharedLockScope _(LockForHash(ChunkHash));
+
+ std::error_code Ec;
+ if (std::filesystem::exists(ShardedPath.c_str(), Ec))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+void
+FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks)
+{
+ std::unordered_set<IoHash> HaveSet;
+
+ for (const IoHash& Hash : InOutChunks.GetChunkSet())
+ {
+ if (HaveChunk(Hash))
+ {
+ HaveSet.insert(Hash);
+ }
+ }
+
+ for (const IoHash& Hash : HaveSet)
+ {
+ InOutChunks.RemoveIfPresent(Hash);
+ }
+}
+
void
FileCasStrategy::Flush()
{
diff --git a/zenstore/filecas.h b/zenstore/filecas.h
index da89b927c..0213b52c3 100644
--- a/zenstore/filecas.h
+++ b/zenstore/filecas.h
@@ -15,9 +15,11 @@ namespace zen {
struct FileCasStrategy
{
FileCasStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {}
- CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash);
- CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash);
- IoBuffer FindChunk(const IoHash& chunkHash);
+ CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
+ CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash);
+ IoBuffer FindChunk(const IoHash& ChunkHash);
+ bool HaveChunk(const IoHash& ChunkHash);
+ void FilterChunks(CasChunkSet& InOutChunks);
void Flush();
private:
diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h
index f01b562cb..ec594af8b 100644
--- a/zenstore/include/zenstore/CAS.h
+++ b/zenstore/include/zenstore/CAS.h
@@ -12,6 +12,7 @@
#include <filesystem>
#include <memory>
#include <string>
+#include <unordered_set>
namespace zen {
@@ -27,6 +28,18 @@ struct CasStoreConfiguration
uint64_t HugeValueThreshold = 1024 * 1024;
};
+class CasChunkSet
+{
+public:
+ void AddChunk(const IoHash& HashToAdd) { m_ChunkSet.insert(HashToAdd); }
+ bool RemoveIfPresent(const IoHash& HashToRemove) { return 0 != m_ChunkSet.erase(HashToRemove); }
+ const std::unordered_set<IoHash>& GetChunkSet() const { return m_ChunkSet; }
+ bool IsEmpty() const { return m_ChunkSet.empty(); }
+
+private:
+ std::unordered_set<IoHash> m_ChunkSet;
+};
+
class CasStore
{
public:
@@ -52,6 +65,7 @@ public:
virtual void Initialize(const CasStoreConfiguration& Config) = 0;
virtual InsertResult InsertChunk(IoBuffer Data, const IoHash& ChunkHash) = 0;
virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0;
+ virtual void FilterChunks(CasChunkSet& InOutChunks) = 0;
virtual void Flush() = 0;
protected: