aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-11 22:15:39 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:31 +0200
commit7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d (patch)
treeb0078b524adab6b874440db392916f2ffe10baee /zenstore/compactcas.cpp
parentMore tests (diff)
downloadzen-7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d.tar.xz
zen-7b9f4bd105c7db6cb94d1129fe7db5e0a323c50d.zip
Simplified logic of last chunk move
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp173
1 files changed, 74 insertions, 99 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 020fd6dbc..806cea69f 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -75,12 +75,6 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const
const uint64_t InsertOffset = m_CurrentInsertOffset;
m_SmallObjectFile.Write(ChunkData, ChunkSize, InsertOffset);
- auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, ChunkData, ChunkSize));
- if (VerifyChunkHash != ChunkHash)
- {
- ZEN_ASSERT(false);
- }
-
m_CurrentInsertOffset = (m_CurrentInsertOffset + ChunkSize + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
RwLock::ExclusiveLockScope __(m_LocationMapLock);
@@ -262,11 +256,8 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
- std::vector<CasDiskLocation> ChunkLocations; // Sorted by position
- std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations
- std::unordered_set<IoHash, IoHash::Hasher> ChunksToKeep;
- const uint64_t ChunkCount = m_LocationMap.size();
- uint64_t TotalSize{};
+ const uint64_t TotalChunkCount = m_LocationMap.size();
+ uint64_t TotalSize = m_TotalSize.load();
RwLock::ExclusiveLockScope _i(m_InsertLock);
RwLock::ExclusiveLockScope _l(m_LocationMapLock);
@@ -279,66 +270,83 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
return;
}
- ChunkLocations.reserve(m_LocationMap.size());
- ChunkHashes.reserve(m_LocationMap.size());
-
+ std::vector<IoHash> TotalChunkHashes;
+ TotalChunkHashes.reserve(m_LocationMap.size());
for (auto& Entry : m_LocationMap)
{
- ChunkHashes.push_back(Entry.first);
- TotalSize += Entry.second.GetSize();
+ TotalChunkHashes.push_back(Entry.first);
}
- GcCtx.FilterCas(ChunkHashes, [&ChunksToKeep](const IoHash& Hash, bool Keep) {
+ std::vector<IoHash> DeletedChunks;
+ std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations
+ ChunkHashes.reserve(m_LocationMap.size());
+
+ const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
+
+ GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
if (Keep)
{
- ChunksToKeep.insert(Hash);
+ ChunkHashes.push_back(ChunkHash);
+ }
+ else
+ {
+ DeletedChunks.push_back(ChunkHash);
}
});
- if (ChunksToKeep.size() == m_LocationMap.size())
+ if (ChunkHashes.size() == TotalChunkCount)
{
ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
- ChunkCount,
+ TotalChunkCount,
NiceBytes(TotalSize),
m_Config.RootDirectory / m_ContainerBaseName);
return;
}
+ const uint64_t ChunkCount = ChunkHashes.size();
+
std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
auto LhsKeyIt = m_LocationMap.find(Lhs);
auto RhsKeyIt = m_LocationMap.find(Rhs);
return LhsKeyIt->second.GetOffset() < RhsKeyIt->second.GetOffset();
});
+ uint64_t NewTotalSize = 0;
+ std::vector<CasDiskLocation> ChunkLocations; // Sorted by position
+ ChunkLocations.reserve(ChunkHashes.size());
for (auto Entry : ChunkHashes)
{
- auto KeyIt = m_LocationMap.find(Entry);
- ChunkLocations.push_back(KeyIt->second);
+ auto KeyIt = m_LocationMap.find(Entry);
+ const auto& ChunkLocation = KeyIt->second;
+ ChunkLocations.push_back(ChunkLocation);
+ NewTotalSize += ChunkLocation.GetSize();
}
- const uint64_t NewChunkCount = ChunksToKeep.size();
- uint64_t NewTotalSize = 0;
-
- for (const IoHash& Key : ChunksToKeep)
- {
- const CasDiskLocation& Loc = m_LocationMap[Key];
- NewTotalSize += Loc.GetSize();
- }
-
- const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
-
if (!CollectSmallObjects)
{
ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}",
m_Config.RootDirectory / m_ContainerBaseName,
- ChunkCount - NewChunkCount,
+ TotalChunkCount - ChunkCount,
NiceBytes(TotalSize - NewTotalSize),
- ChunkCount,
+ TotalChunkCount,
NiceBytes(TotalSize));
return;
}
- std::vector<IoHash> DeletedChunks;
+ for (auto ChunkHash : DeletedChunks)
+ {
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const auto& ChunkLocation = KeyIt->second;
+ uint64_t NextChunkOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize();
+ m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone});
+ m_LocationMap.erase(ChunkHash);
+ if (m_CurrentInsertOffset == NextChunkOffset)
+ {
+ m_CurrentInsertOffset = ChunkLocation.GetOffset();
+ }
+ m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize()));
+ }
+
std::vector<IoHash> MovedChunks;
uint64_t WriteOffset{};
@@ -347,24 +355,9 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
IoHash ChunkHash = ChunkHashes[ChunkIndex];
const auto& ChunkLocation = ChunkLocations[ChunkIndex];
- bool KeepChunk = ChunksToKeep.end() != ChunksToKeep.find(ChunkHash);
-
- uint64_t NextWriteOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize();
- NextWriteOffset = (NextWriteOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
- if (!KeepChunk)
- {
- DeletedChunks.push_back(ChunkHash);
- m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone});
- m_LocationMap.erase(ChunkHash);
- if (m_CurrentInsertOffset == NextWriteOffset)
- {
- m_CurrentInsertOffset = WriteOffset;
- }
- m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize()));
- ChunkIndex++;
- continue;
- }
+ uint64_t NextChunkOffset =
+ (ChunkLocation.GetOffset() + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
uint64_t FreeChunkSize = ChunkLocation.GetOffset() - WriteOffset;
@@ -373,17 +366,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
// We should move as many keep chunk at the end as we can possibly fit
uint64_t LastKeepChunkIndex = ChunkHashes.size() - 1;
- while (LastKeepChunkIndex > ChunkIndex)
- {
- IoHash LastChunkHash = ChunkHashes[LastKeepChunkIndex];
- bool LastKeepChunk = ChunksToKeep.end() != ChunksToKeep.find(LastChunkHash);
- if (LastKeepChunk)
- {
- break;
- }
- LastKeepChunkIndex--;
- }
-
if (LastKeepChunkIndex == ChunkIndex)
{
break;
@@ -400,11 +382,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<uint8_t> Chunk;
Chunk.resize(LastChunkLocation.GetSize());
m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), LastChunkLocation.GetOffset());
- auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, Chunk.data(), Chunk.size()));
- if (VerifyChunkHash != LastChunkHash)
- {
- ZEN_ASSERT(false);
- }
CasDiskLocation NewChunkLocation(WriteOffset, LastChunkLocation.GetSize());
m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset());
@@ -413,17 +390,17 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
m_LocationMap[LastChunkHash] = NewChunkLocation;
ChunkHashes.pop_back();
- uint64_t OldNextChunkWritePos = LastChunkLocation.GetOffset() + Chunk.size();
- OldNextChunkWritePos = (OldNextChunkWritePos + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
-
- if (m_CurrentInsertOffset == OldNextChunkWritePos)
- {
- m_CurrentInsertOffset = LastChunkLocation.GetOffset();
- }
+ uint64_t LastChunkNextChunkOffset =
+ (LastChunkLocation.GetOffset() + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
WriteOffset = (WriteOffset + NewChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
FreeChunkSize = ChunkLocation.GetOffset() - WriteOffset;
MovedChunks.push_back(LastChunkHash);
+
+ if (m_CurrentInsertOffset == LastChunkNextChunkOffset)
+ {
+ m_CurrentInsertOffset = WriteOffset;
+ }
}
// TODO: We could keep some wiggle room here, don't move chunk if we only move it a very small amount
@@ -432,11 +409,6 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<uint8_t> Chunk;
Chunk.resize(ChunkLocation.GetSize());
m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
- auto VerifyChunkHash = IoHash::HashBuffer(IoBuffer(IoBuffer::Wrap, Chunk.data(), Chunk.size()));
- if (VerifyChunkHash != ChunkHash)
- {
- ZEN_ASSERT(false);
- }
CasDiskLocation NewChunkLocation(WriteOffset, ChunkLocation.GetSize());
m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset());
@@ -444,34 +416,37 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
m_CasLog.Append(IndexEntry);
m_LocationMap[ChunkHash] = NewChunkLocation;
- uint64_t ChunkEnd = ChunkLocation.GetOffset() + ChunkLocation.GetSize();
- uint64_t NextChunkWritePos = (ChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
- // Update insert location if this is the last chunk in the file
-
- if (m_CurrentInsertOffset == NextChunkWritePos)
- {
- uint64_t NewChunkEnd = NewChunkLocation.GetOffset() + NewChunkLocation.GetSize();
- m_CurrentInsertOffset = (NewChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
- }
-
MovedChunks.push_back(ChunkHash);
- WriteOffset = (WriteOffset + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ WriteOffset = (NewChunkLocation.GetOffset() + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
}
else
{
- WriteOffset = NextWriteOffset;
+ WriteOffset = NextChunkOffset;
}
+
+ // Update insert location if this is the last chunk in the file
+ if (m_CurrentInsertOffset == NextChunkOffset)
+ {
+ m_CurrentInsertOffset = WriteOffset;
+ }
+
ChunkIndex++;
}
- uint64_t CurrentSize = m_SmallObjectFile.FileSize();
- if (CurrentSize > m_CurrentInsertOffset)
+ if (ChunkCount == 0)
{
- ZEN_INFO("new write position '{}', from {} to {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- NiceBytes(CurrentSize),
- NiceBytes(m_CurrentInsertOffset));
+ m_CurrentInsertOffset = 0;
}
+
+ uint64_t CurrentSize = m_SmallObjectFile.FileSize();
+ ZEN_INFO("garbage collection complete '{}', space {} to {}, moved {} and delete {} chunks",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceBytes(CurrentSize),
+ NiceBytes(m_CurrentInsertOffset),
+ MovedChunks.size(),
+ DeletedChunks.size());
+
+ // TODO: Should we truncate the file or just keep the size of the file and reuse the space?
}
void