diff options
| author | Stefan Boberg <[email protected]> | 2021-08-23 19:12:14 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-08-23 19:12:14 +0200 |
| commit | 9497ba8cba4347112e4335ca02d143aec8a45f24 (patch) | |
| tree | 7c625a69063fc6aecbc4eb6384d5a561ce94e3ad | |
| parent | Improved ZenCacheStore::DropBucket logic and added logging (diff) | |
| download | zen-9497ba8cba4347112e4335ca02d143aec8a45f24.tar.xz zen-9497ba8cba4347112e4335ca02d143aec8a45f24.zip | |
Implemented more formalised CAS chunk filtering (with plenty of room for optimization)
| -rw-r--r-- | zenstore/CAS.cpp | 9 | ||||
| -rw-r--r-- | zenstore/compactcas.cpp | 33 | ||||
| -rw-r--r-- | zenstore/compactcas.h | 14 | ||||
| -rw-r--r-- | zenstore/filecas.cpp | 39 | ||||
| -rw-r--r-- | zenstore/filecas.h | 8 | ||||
| -rw-r--r-- | zenstore/include/zenstore/CAS.h | 14 |
6 files changed, 112 insertions, 5 deletions
diff --git a/zenstore/CAS.cpp b/zenstore/CAS.cpp index 36cf85549..d91e8cb2c 100644 --- a/zenstore/CAS.cpp +++ b/zenstore/CAS.cpp @@ -40,6 +40,7 @@ public: virtual void Initialize(const CasStoreConfiguration& InConfig) override; virtual CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) override; virtual IoBuffer FindChunk(const IoHash& ChunkHash) override; + virtual void FilterChunks(CasChunkSet& InOutChunks) override; virtual void Flush() override; private: @@ -144,6 +145,14 @@ CasImpl::FindChunk(const IoHash& ChunkHash) return IoBuffer{}; } +void +CasImpl::FilterChunks(CasChunkSet& InOutChunks) +{ + m_SmallStrategy.FilterChunks(InOutChunks); + m_TinyStrategy.FilterChunks(InOutChunks); + m_LargeStrategy.FilterChunks(InOutChunks); +} + void CasImpl::Flush() { diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp index b658425e7..4407d8b08 100644 --- a/zenstore/compactcas.cpp +++ b/zenstore/compactcas.cpp @@ -116,6 +116,39 @@ CasContainerStrategy::FindChunk(const IoHash& ChunkHash) return IoBuffer(); } +bool +CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) +{ + RwLock::SharedLockScope _(m_LocationMapLock); + auto KeyIt = m_LocationMap.find(ChunkHash); + + if (KeyIt != m_LocationMap.end()) + { + return true; + } + + return false; +} + +void +CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks) +{ + std::unordered_set<IoHash> HaveSet; + + for (const IoHash& Hash : InOutChunks.GetChunkSet()) + { + if (HaveChunk(Hash)) + { + HaveSet.insert(Hash); + } + } + + for (const IoHash& Hash : HaveSet) + { + InOutChunks.RemoveIfPresent(Hash); + } +} + void CasContainerStrategy::Flush() { diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h index c65af0435..05bbf81f6 100644 --- a/zenstore/compactcas.h +++ b/zenstore/compactcas.h @@ -40,12 +40,22 @@ struct CasDiskIndexEntry static_assert(sizeof(CasDiskIndexEntry) == 32); +/** This implements a storage strategy for small CAS values + * + * New chunks are simply appended to a small object file, and an index is + * maintained to allow chunks to be looked up within the active small object + * files + * + */ + struct CasContainerStrategy { CasContainerStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} - CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash); + CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash); CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash); - IoBuffer FindChunk(const IoHash& chunkHash); + IoBuffer FindChunk(const IoHash& ChunkHash); + bool HaveChunk(const IoHash& ChunkHash); + void FilterChunks(CasChunkSet& InOutChunks); void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore); void Flush(); diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index 04a6f7aa0..cddf22503 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -279,6 +279,45 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash) return Chunk; } +bool +FileCasStrategy::HaveChunk(const IoHash& ChunkHash) +{ + size_t Shard2len = 0; + ExtendableWideStringBuilder<128> ShardedPath; + ShardedPath.Append(m_Config.RootDirectory.c_str()); + ShardedPath.Append(std::filesystem::path::preferred_separator); + MakeShardedPath(ShardedPath, ChunkHash, /* out */ Shard2len); + + RwLock::SharedLockScope _(LockForHash(ChunkHash)); + + std::error_code Ec; + if (std::filesystem::exists(ShardedPath.c_str(), Ec)) + { + return true; + } + + return false; +} + +void +FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks) +{ + std::unordered_set<IoHash> HaveSet; + + for (const IoHash& Hash : InOutChunks.GetChunkSet()) + { + if (HaveChunk(Hash)) + { + HaveSet.insert(Hash); + } + } + + for (const IoHash& Hash : HaveSet) + { + InOutChunks.RemoveIfPresent(Hash); + } +} + void FileCasStrategy::Flush() { diff --git a/zenstore/filecas.h b/zenstore/filecas.h index da89b927c..0213b52c3 100644 --- a/zenstore/filecas.h +++ b/zenstore/filecas.h @@ -15,9 +15,11 @@ namespace zen { struct FileCasStrategy { FileCasStrategy(const CasStoreConfiguration& Config, CasStore::Stats& Stats) : m_Config(Config), m_Stats(Stats) {} - CasStore::InsertResult InsertChunk(const void* chunkData, size_t chunkSize, const IoHash& chunkHash); - CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& chunkHash); - IoBuffer FindChunk(const IoHash& chunkHash); + CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash); + CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash); + IoBuffer FindChunk(const IoHash& ChunkHash); + bool HaveChunk(const IoHash& ChunkHash); + void FilterChunks(CasChunkSet& InOutChunks); void Flush(); private: diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h index f01b562cb..ec594af8b 100644 --- a/zenstore/include/zenstore/CAS.h +++ b/zenstore/include/zenstore/CAS.h @@ -12,6 +12,7 @@ #include <filesystem> #include <memory> #include <string> +#include <unordered_set> namespace zen { @@ -27,6 +28,18 @@ struct CasStoreConfiguration uint64_t HugeValueThreshold = 1024 * 1024; }; +class CasChunkSet +{ +public: + void AddChunk(const IoHash& HashToAdd) { m_ChunkSet.insert(HashToAdd); } + bool RemoveIfPresent(const IoHash& HashToRemove) { return 0 != m_ChunkSet.erase(HashToRemove); } + const std::unordered_set<IoHash>& GetChunkSet() const { return m_ChunkSet; } + bool IsEmpty() const { return m_ChunkSet.empty(); } + +private: + std::unordered_set<IoHash> m_ChunkSet; +}; + class CasStore { public: @@ -52,6 +65,7 @@ public: virtual void Initialize(const CasStoreConfiguration& Config) = 0; virtual InsertResult InsertChunk(IoBuffer Data, const IoHash& ChunkHash) = 0; virtual IoBuffer FindChunk(const IoHash& ChunkHash) = 0; + virtual void FilterChunks(CasChunkSet& InOutChunks) = 0; virtual void Flush() = 0; protected: |