aboutsummaryrefslogtreecommitdiff
path: root/zenstore/compactcas.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-11 12:17:46 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:31 +0200
commit769dc100be423f43df52e24b78b210a969520478 (patch)
treef2eec935b3fa51d97899945e0ff5ea484c69800c /zenstore/compactcas.cpp
parentFix gc shutdown stalling if shutting down while gc is running (diff)
downloadzen-769dc100be423f43df52e24b78b210a969520478.tar.xz
zen-769dc100be423f43df52e24b78b210a969520478.zip
WIP
Diffstat (limited to 'zenstore/compactcas.cpp')
-rw-r--r--zenstore/compactcas.cpp203
1 files changed, 94 insertions, 109 deletions
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 3bf0c70df..a2142b497 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -261,43 +261,52 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
- RwLock::ExclusiveLockScope _(m_LocationMapLock);
+ std::vector<CasDiskLocation> ChunkLocations; // Sorted by position
+ std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations
+ std::unordered_set<IoHash, IoHash::Hasher> ChunksToKeep;
+ const uint64_t ChunkCount = m_LocationMap.size();
+ uint64_t TotalSize{};
+ {
+ RwLock::ExclusiveLockScope _(m_LocationMapLock);
- Flush();
+ Flush();
- std::vector<IoHash> Candidates;
- std::vector<IoHash> ChunksToKeep;
- std::vector<IoHash> ChunksToDelete;
- const uint64_t ChunkCount = m_LocationMap.size();
- uint64_t TotalSize{};
+ ChunkLocations.reserve(m_LocationMap.size());
+ ChunkHashes.reserve(m_LocationMap.size());
- Candidates.reserve(m_LocationMap.size());
+ for (auto& Entry : m_LocationMap)
+ {
+ ChunkHashes.push_back(Entry.first);
+ TotalSize += Entry.second.GetSize();
+ }
- for (auto& Entry : m_LocationMap)
- {
- Candidates.push_back(Entry.first);
- TotalSize += Entry.second.GetSize();
- }
+ GcCtx.FilterCas(ChunkHashes, [&ChunksToKeep](const IoHash& Hash, bool Keep) {
+ if (Keep)
+ {
+ ChunksToKeep.insert(Hash);
+ }
+ });
- ChunksToKeep.reserve(Candidates.size());
- GcCtx.FilterCas(Candidates, [&ChunksToKeep, &ChunksToDelete](const IoHash& Hash, bool Keep) {
- if (Keep)
+ if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size())
{
- ChunksToKeep.push_back(Hash);
+ ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
+ ChunkCount,
+ NiceBytes(TotalSize),
+ m_Config.RootDirectory / m_ContainerBaseName);
+ return;
}
- else
+
+ std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
+ auto LhsKeyIt = m_LocationMap.find(Lhs);
+ auto RhsKeyIt = m_LocationMap.find(Rhs);
+ return LhsKeyIt->second.GetOffset() < RhsKeyIt->second.GetOffset();
+ });
+
+ for (auto Entry : ChunkHashes)
{
- ChunksToDelete.push_back(Hash);
+ auto KeyIt = m_LocationMap.find(Entry);
+ ChunkLocations.push_back(KeyIt->second);
}
- });
-
- if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size())
- {
- ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
- ChunkCount,
- NiceBytes(TotalSize),
- m_Config.RootDirectory / m_ContainerBaseName);
- return;
}
const uint64_t NewChunkCount = ChunksToKeep.size();
@@ -309,109 +318,84 @@ CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
NewTotalSize += Loc.GetSize();
}
- std::error_code Error;
- DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
- if (Error)
- {
- ZEN_ERROR("get disk space FAILED, reason '{}'", Error.message());
- return;
- }
-
- if (Space.Free < NewTotalSize + (64 << 20))
- {
- ZEN_INFO("garbage collect from '{}' FAILED, required disk space {}, free {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- NiceBytes(NewTotalSize),
- NiceBytes(Space.Free));
- return;
- }
-
const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
if (!CollectSmallObjects)
{
ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- ChunkCount - NewChunkCount,
- NiceBytes(TotalSize - NewTotalSize),
- ChunkCount,
- NiceBytes(TotalSize));
+ m_Config.RootDirectory / m_ContainerBaseName,
+ ChunkCount - NewChunkCount,
+ NiceBytes(TotalSize - NewTotalSize),
+ ChunkCount,
+ NiceBytes(TotalSize));
return;
}
- fs::path TmpSobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ucas");
- fs::path TmpSlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ulog");
-
+ std::vector<IoHash> DeletedChunks;
+ std::vector<IoHash> MovedChunks;
+ uint64_t WriteOffset{};
+ for (uint64_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++)
{
- ZEN_DEBUG("creating temporary container cas '{}'...", TmpSobsPath);
-
- TCasLogFile<CasDiskIndexEntry> TmpLog;
- BasicFile TmpObjectFile;
- bool IsNew = true;
-
- TmpLog.Open(TmpSlogPath, IsNew);
- TmpObjectFile.Open(TmpSobsPath, IsNew);
-
- std::vector<uint8_t> Chunk;
- uint64_t NextInsertOffset{};
-
- for (const IoHash& Key : ChunksToKeep)
+ IoHash ChunkHash = ChunkHashes[ChunkIndex];
+ const auto& ChunkLocation = ChunkLocations[ChunkIndex];
+ uint64_t ChunkEnd = ChunkLocation.GetOffset() + ChunkLocation.GetSize();
+ uint64_t NextChunkWritePos = (ChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ bool KeepChunk = ChunksToKeep.end() != ChunksToKeep.find(ChunkHash);
+ if (KeepChunk && WriteOffset == ChunkLocation.GetOffset())
{
- const auto Entry = m_LocationMap.find(Key);
- const auto& Loc = Entry->second;
-
- Chunk.resize(Loc.GetSize());
- m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), Loc.GetOffset());
+ WriteOffset = ChunkLocation.GetOffset() + ChunkLocation.GetSize();
+ continue;
+ }
- const uint64_t InsertOffset = NextInsertOffset;
- TmpObjectFile.Write(Chunk.data(), Chunk.size(), InsertOffset);
- TmpLog.Append({.Key = Key, .Location = {InsertOffset, Chunk.size()}});
+ RwLock::ExclusiveLockScope _(m_LocationMapLock);
- NextInsertOffset = (NextInsertOffset + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ // Verify if we should still keep the chunk
+ std::vector<IoHash> Chunks;
+ GcCtx.FilterCas(ChunkHashes, [&KeepChunk](const IoHash& /*Hash*/, bool Keep) { KeepChunk = Keep; });
+ if (!KeepChunk)
+ {
+ DeletedChunks.push_back(ChunkHash);
+ m_CasLog.Append({.Key = ChunkHash, .Location = ChunkLocation, .Flags = CasDiskIndexEntry::kTombstone});
+ m_LocationMap.erase(ChunkHash);
+ if (m_CurrentInsertOffset == NextChunkWritePos)
+ {
+ m_CurrentInsertOffset = WriteOffset;
+ }
+ m_TotalSize.fetch_sub(static_cast<uint64_t>(ChunkLocation.GetSize()));
+ continue;
}
- }
- try
- {
- CloseContainer();
+ // Move the chunk
+ std::vector<uint8_t> Chunk;
+ m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), ChunkLocation.GetOffset());
+ CasDiskLocation NewChunkLocation(WriteOffset, ChunkLocation.GetSize());
+ m_SmallObjectFile.Write(Chunk.data(), Chunk.size(), NewChunkLocation.GetOffset());
- fs::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
- fs::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
- fs::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+ CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = NewChunkLocation};
+ m_CasLog.Append(IndexEntry);
- fs::remove(SobsPath);
- fs::remove(SidxPath);
- fs::remove(SlogPath);
+ MovedChunks.push_back(ChunkHash);
+ m_LocationMap[ChunkHash] = NewChunkLocation;
- fs::rename(TmpSobsPath, SobsPath);
- fs::rename(TmpSlogPath, SlogPath);
+ WriteOffset = (WriteOffset + ChunkLocation.GetSize() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ // Update insert location if this is the last chunk in the file
+ if (m_CurrentInsertOffset == NextChunkWritePos)
{
- // Create a new empty index file
- BasicFile SidxFile;
- SidxFile.Open(SidxPath, true);
+ uint64_t NewChunkEnd = NewChunkLocation.GetOffset() + NewChunkLocation.GetSize();
+ m_CurrentInsertOffset = (NewChunkEnd + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
}
-
- OpenContainer(false /* IsNewStore */);
-
- GcCtx.DeletedCas(ChunksToDelete);
-
- ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- ChunkCount - NewChunkCount,
- NiceBytes(TotalSize - NewTotalSize),
- ChunkCount,
- NiceBytes(TotalSize));
}
- catch (std::exception& Err)
{
- ZEN_ERROR("garbage collection FAILED, reason '{}'", Err.what());
-
- // Something went wrong, try create a new container
- OpenContainer(true /* IsNewStore */);
-
- GcCtx.DeletedCas(ChunksToDelete);
- GcCtx.DeletedCas(ChunksToKeep);
+ RwLock::ExclusiveLockScope _(m_LocationMapLock);
+ uint64_t CurrentSize = m_SmallObjectFile.FileSize();
+ if (CurrentSize > m_CurrentInsertOffset)
+ {
+ ZEN_INFO("truncate '{}' DISABLED, from {} to {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceBytes(CurrentSize),
+ NiceBytes(m_CurrentInsertOffset));
+ }
}
}
@@ -458,6 +442,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore)
if (Record.Flags & CasDiskIndexEntry::kTombstone)
{
m_TotalSize.fetch_sub(Record.Location.GetSize());
+ m_LocationMap.erase(Record.Key);
}
else
{