diff options
| author | Dan Engelbrecht <[email protected]> | 2023-09-12 09:11:48 -0400 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-09-12 15:11:48 +0200 |
| commit | c190ff256f13645d2905bd8bd744699559d5c5f6 (patch) | |
| tree | 85e5d01e0df562b11ddfdc77d702b901c516dacb /src/zenserver/projectstore/remoteprojectstore.cpp | |
| parent | Make sure error logging or destructors don't throw exception when trying to g... (diff) | |
| download | zen-c190ff256f13645d2905bd8bd744699559d5c5f6.tar.xz zen-c190ff256f13645d2905bd8bd744699559d5c5f6.zip | |
incremental oplog upload for block-based targets (#392)
* add option for base container for oplog export
read base oplog and fetch known blocks
* reuse blocks if a known block has 80+ % usage
* changelog
* better logging and added base to remotestore descriptions
Diffstat (limited to 'src/zenserver/projectstore/remoteprojectstore.cpp')
| -rw-r--r-- | src/zenserver/projectstore/remoteprojectstore.cpp | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 080517a8d..235166659 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -195,6 +195,7 @@ BuildContainer(CidStore& ChunkStore, size_t MaxBlockSize, size_t MaxChunkEmbedSize, bool BuildBlocks, + const std::vector<Block>& KnownBlocks, WorkerThreadPool& WorkerPool, const std::function<void(CompressedBuffer&&, const IoHash&)>& AsyncOnBlock, const std::function<void(const IoHash&)>& OnLargeAttachment, @@ -383,6 +384,53 @@ BuildContainer(CidStore& ChunkStore, OpCount++; }); + if (!Attachments.empty() && !KnownBlocks.empty()) + { + size_t ReusedBlockCount = 0; + ZEN_INFO("Checking {} known blocks for reuse", KnownBlocks.size()); + for (const Block& KnownBlock : KnownBlocks) + { + size_t BlockAttachmentCount = KnownBlock.ChunksInBlock.size(); + if (BlockAttachmentCount == 0) + { + continue; + } + size_t FoundAttachmentCount = 0; + for (const IoHash& KnownHash : KnownBlock.ChunksInBlock) + { + if (Attachments.contains(KnownHash)) + { + FoundAttachmentCount++; + } + } + + size_t ReusePercent = (FoundAttachmentCount * 100) / BlockAttachmentCount; + // TODO: Configure reuse-level + if (ReusePercent > 80) + { + ZEN_DEBUG("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + for (const IoHash& KnownHash : KnownBlock.ChunksInBlock) + { + Attachments.erase(KnownHash); + } + + BlocksLock.WithExclusiveLock([&]() { Blocks.push_back(KnownBlock); }); + ReusedBlockCount++; + } + else if (FoundAttachmentCount > 0) + { + ZEN_DEBUG("Skipping block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + } + } + ZEN_INFO("Reusing {} out of {} known blocks", ReusedBlockCount, KnownBlocks.size()); + } + ZEN_INFO("Sorting {} attachments from {} ops", Attachments.size(), OpLSNToKey.size()); // Sort attachments so we get predictable blocks for the same oplog upload @@ -649,6 +697,7 @@ BuildContainer(CidStore& ChunkStore, MaxBlockSize, MaxChunkEmbedSize, BuildBlocks, + {}, WorkerPool, AsyncOnBlock, OnLargeAttachment, @@ -778,6 +827,46 @@ SaveOplog(CidStore& ChunkStore, OnBlock = UploadBlock; } + std::vector<Block> KnownBlocks; + + if (BuildBlocks) + { + ZEN_INFO("Loading oplog base container"); + RemoteProjectStore::LoadContainerResult BaseContainerResult = RemoteStore.LoadBaseContainer(); + if (BaseContainerResult.ErrorCode != static_cast<int>(HttpResponseCode::NoContent)) + { + if (BaseContainerResult.ErrorCode) + { + ZEN_WARN("Failed to load oplog base container, reason: '{}', error code: {}", + BaseContainerResult.Reason, + BaseContainerResult.ErrorCode); + } + else + { + CbArrayView BlocksArray = BaseContainerResult.ContainerObject["blocks"sv].AsArrayView(); + for (CbFieldView BlockField : BlocksArray) + { + CbObjectView BlockView = BlockField.AsObjectView(); + IoHash BlockHash = BlockView["rawhash"sv].AsBinaryAttachment(); + + std::vector<IoHash> ChunksInBlock; + CbArrayView ChunksArray = BlockView["chunks"sv].AsArrayView(); + if (BlockHash == IoHash::Zero) + { + continue; + } + + ChunksInBlock.reserve(ChunksArray.Num()); + for (CbFieldView ChunkField : ChunksArray) + { + ChunksInBlock.push_back(ChunkField.AsHash()); + } + KnownBlocks.push_back({.BlockHash = BlockHash, .ChunksInBlock = std::move(ChunksInBlock)}); + }; + } + } + } + tsl::robin_map<IoHash, IoBuffer, IoHash::Hasher> TempAttachments; CbObject OplogContainerObject = BuildContainer(ChunkStore, Project, @@ -785,6 +874,7 @@ SaveOplog(CidStore& ChunkStore, MaxBlockSize, MaxChunkEmbedSize, BuildBlocks, + KnownBlocks, WorkerPool, OnBlock, OnLargeAttachment, |