aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2022-03-14 23:50:38 +0100
committerDan Engelbrecht <[email protected]>2022-03-31 11:28:31 +0200
commit481e3cfad99792ac8b2de733ae3a97db08ceb666 (patch)
treee70dc226cf4c638acc86c91277164f0230775d4a
parentSplit chunkbundler into size-limited blocks (diff)
downloadzen-481e3cfad99792ac8b2de733ae3a97db08ceb666.tar.xz
zen-481e3cfad99792ac8b2de733ae3a97db08ceb666.zip
block files
-rw-r--r--zenstore/chunkbundler.cpp88
-rw-r--r--zenstore/chunkbundler.h12
2 files changed, 78 insertions, 22 deletions
diff --git a/zenstore/chunkbundler.cpp b/zenstore/chunkbundler.cpp
index 87b71df09..2c1924303 100644
--- a/zenstore/chunkbundler.cpp
+++ b/zenstore/chunkbundler.cpp
@@ -167,6 +167,7 @@ ChunkBundler::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash&
uint64_t CurrentBlockSize = m_CurrentBlock.lock()->FileSize();
if (CurrentBlockSize + m_CurrentInsertOffset > m_MaxBlockSize)
{
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
m_CurrentFileIndex++;
std::filesystem::path path = m_RootDirectory / "ucas" / (std::to_string(m_CurrentFileIndex) + ".ucas");
auto SmallObjectFile = std::make_shared<BasicFile>();
@@ -221,12 +222,7 @@ ChunkBundler::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
- if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
- {
- return true;
- }
-
- return false;
+ return m_LocationMap.contains(ChunkHash);
}
void
@@ -245,6 +241,7 @@ ChunkBundler::FilterChunks(CasChunkSet& InOutChunks)
void
ChunkBundler::Flush()
{
+ RwLock::ExclusiveLockScope _l(m_InsertLock);
m_OpLog.Flush();
m_CurrentBlock.lock()->Flush();
}
@@ -386,13 +383,15 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx)
// added betwen each move of a block.
ZEN_INFO("collecting garbage from '{}'", m_RootDirectory / m_ContainerBaseName);
+ std::unordered_map<uint16_t, std::unordered_map<IoHash, CompactDiskLocation, IoHash::Hasher>> KeepChunksPerBlock;
std::vector<IoHash> DeletedChunks;
- std::unordered_set<int> BlocksToReWrite;
+ std::unordered_set<uint16_t> BlocksToReWrite;
{
RwLock::ExclusiveLockScope _i(m_InsertLock);
RwLock::ExclusiveLockScope _l(m_LocationMapLock);
- Flush();
+ m_OpLog.Flush();
+ m_CurrentBlock.lock()->Flush();
BlocksToReWrite.reserve(m_OpenBlocks.size());
@@ -407,27 +406,31 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx)
std::vector<IoHash> TotalChunkHashes;
TotalChunkHashes.reserve(m_LocationMap.size());
- for (auto& Entry : m_LocationMap)
+ for (const auto& Entry : m_LocationMap)
{
TotalChunkHashes.push_back(Entry.first);
}
- std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations
- ChunkHashes.reserve(m_LocationMap.size());
+ //std::vector<IoHash> ChunkHashes; // Same sort order as ChunkLocations
+ //ChunkHashes.reserve(m_LocationMap.size());
const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
+ uint64_t NewTotalSize = 0;
GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
if (Keep)
{
- ChunkHashes.push_back(ChunkHash);
+ auto KeyIt = m_LocationMap.find(ChunkHash);
+ const auto& ChunkLocation = KeyIt->second;
+ KeepChunksPerBlock[ChunkLocation.BlockIndex][ChunkHash] = ChunkLocation;
+ NewTotalSize += ChunkLocation.Size;
}
else
{
DeletedChunks.push_back(ChunkHash);
}
});
-
+ /*
if (ChunkHashes.size() == TotalChunkCount)
{
ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
@@ -455,19 +458,19 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx)
ChunkLocations.push_back(ChunkLocation);
NewTotalSize += ChunkLocation.Size;
}
-
+ */
if (!CollectSmallObjects)
{
ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}",
m_RootDirectory / m_ContainerBaseName,
- TotalChunkCount - ChunkCount,
+ DeletedChunks.size(),
NiceBytes(TotalSize - NewTotalSize),
TotalChunkCount,
NiceBytes(TotalSize));
return;
}
- for (auto ChunkHash : DeletedChunks)
+ for (const auto& ChunkHash : DeletedChunks)
{
auto KeyIt = m_LocationMap.find(ChunkHash);
const auto& ChunkLocation = KeyIt->second;
@@ -491,6 +494,59 @@ ChunkBundler::CollectGarbage(GcContext& GcCtx)
{
// Rewrite all BlocksToReWrite
+ for (auto BlockIndex : BlocksToReWrite)
+ {
+ std::shared_ptr<BasicFile> BlockFile;
+ {
+ RwLock::ExclusiveLockScope _i(m_InsertLock);
+ BlockFile = m_OpenBlocks[BlockIndex];
+ }
+ std::filesystem::path BlockPath = m_RootDirectory / "ucas" / (std::to_string(BlockIndex) + ".ucas");
+ auto& KeepChunksForBlock = KeepChunksPerBlock[BlockIndex];
+ if (KeepChunksForBlock.empty())
+ {
+ RwLock::ExclusiveLockScope _i(m_InsertLock);
+ BlockFile = m_OpenBlocks[BlockIndex];
+ BlockFile->Close(); // TODO: We can't know that someone isn't holding a IoBuffer for this block at this point!
+ m_OpenBlocks.erase(BlockIndex);
+ fs::remove(BlockPath);
+ }
+ else
+ {
+ std::filesystem::path TmpBlockPath = m_RootDirectory / "ucas" / (std::to_string(BlockIndex) + ".gc.ucas");
+ auto TmpBlock = std::make_shared<BasicFile>();
+ TmpBlock->Open(TmpBlockPath, true);
+ std::vector<uint8_t> Chunk;
+ uint64_t WriteOffset = 0;
+
+ for (auto& Entry : KeepChunksForBlock)
+ {
+ const CompactDiskLocation& ChunkLocation = Entry.second;
+ Chunk.resize(ChunkLocation.Size);
+ BlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
+ CompactDiskLocation NewChunkLocation(ChunkLocation.BlockIndex,
+ gsl::narrow<uint32_t>(WriteOffset),
+ gsl::narrow<uint32_t>(Chunk.size()));
+ TmpBlock->Write(Chunk.data(), Chunk.size(), NewChunkLocation.Offset);
+ Entry.second = NewChunkLocation;
+ WriteOffset = AlignPositon(WriteOffset + Chunk.size(), m_PayloadAlignment);
+ }
+ TmpBlock->Close();
+
+ RwLock::ExclusiveLockScope _i(m_InsertLock);
+ RwLock::ExclusiveLockScope _l(m_LocationMapLock);
+ for (const auto& Entry : KeepChunksForBlock)
+ {
+ m_LocationMap[Entry.first] = Entry.second;
+ m_OpLog.Append({.Key = Entry.first, .Location = Entry.second});
+ }
+ BlockFile->Close(); // TODO: We can't know that someone isn't holding a IoBuffer for this block at this point!
+ fs::remove(BlockPath);
+ fs::rename(TmpBlockPath, BlockPath);
+ BlockFile->Open(BlockPath, false);
+ }
+ }
+
#if 0
// We can break here if we only want to remove items without compacting of space
diff --git a/zenstore/chunkbundler.h b/zenstore/chunkbundler.h
index d84ee9627..4b24ec958 100644
--- a/zenstore/chunkbundler.h
+++ b/zenstore/chunkbundler.h
@@ -85,18 +85,18 @@ private:
std::string m_ContainerBaseName;
RwLock m_LocationMapLock;
- RwLock m_InsertLock; // used to serialize inserts
std::unordered_map<IoHash, CompactDiskLocation, IoHash::Hasher> m_LocationMap;
+ std::unordered_map<uint16_t, std::shared_ptr<BasicFile>> m_OpenBlocks;
+ uint16_t m_CurrentFileIndex = 0;
+
+ RwLock m_InsertLock; // used to serialize inserts
+ std::weak_ptr<BasicFile> m_CurrentBlock;
std::atomic_uint32_t m_CurrentInsertOffset{};
+
std::atomic_uint64_t m_CurrentIndexOffset{};
std::atomic_uint64_t m_TotalSize{};
void MakeIndexSnapshot();
-
- // Reserve one block of 1Gb
- std::unordered_map<uint16_t, std::shared_ptr<BasicFile>> m_OpenBlocks;
- std::weak_ptr<BasicFile> m_CurrentBlock;
- uint16_t m_CurrentFileIndex = 0;
};
//////////////////////////////////////////////////////////////////////////