aboutsummaryrefslogtreecommitdiff
path: root/src/zenserver/projectstore/remoteprojectstore.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-09-12 09:11:48 -0400
committerGitHub <[email protected]>2023-09-12 15:11:48 +0200
commitc190ff256f13645d2905bd8bd744699559d5c5f6 (patch)
tree85e5d01e0df562b11ddfdc77d702b901c516dacb /src/zenserver/projectstore/remoteprojectstore.cpp
parentMake sure error logging or destructors don't throw exception when trying to g... (diff)
downloadzen-c190ff256f13645d2905bd8bd744699559d5c5f6.tar.xz
zen-c190ff256f13645d2905bd8bd744699559d5c5f6.zip
incremental oplog upload for block-based targets (#392)
* add option for base container for oplog export read base oplog and fetch known blocks * reuse blocks if a known block has 80+ % usage * changelog * better logging and added base to remotestore descriptions
Diffstat (limited to 'src/zenserver/projectstore/remoteprojectstore.cpp')
-rw-r--r--src/zenserver/projectstore/remoteprojectstore.cpp90
1 files changed, 90 insertions, 0 deletions
diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp
index 080517a8d..235166659 100644
--- a/src/zenserver/projectstore/remoteprojectstore.cpp
+++ b/src/zenserver/projectstore/remoteprojectstore.cpp
@@ -195,6 +195,7 @@ BuildContainer(CidStore& ChunkStore,
size_t MaxBlockSize,
size_t MaxChunkEmbedSize,
bool BuildBlocks,
+ const std::vector<Block>& KnownBlocks,
WorkerThreadPool& WorkerPool,
const std::function<void(CompressedBuffer&&, const IoHash&)>& AsyncOnBlock,
const std::function<void(const IoHash&)>& OnLargeAttachment,
@@ -383,6 +384,53 @@ BuildContainer(CidStore& ChunkStore,
OpCount++;
});
+ if (!Attachments.empty() && !KnownBlocks.empty())
+ {
+ size_t ReusedBlockCount = 0;
+ ZEN_INFO("Checking {} known blocks for reuse", KnownBlocks.size());
+ for (const Block& KnownBlock : KnownBlocks)
+ {
+ size_t BlockAttachmentCount = KnownBlock.ChunksInBlock.size();
+ if (BlockAttachmentCount == 0)
+ {
+ continue;
+ }
+ size_t FoundAttachmentCount = 0;
+ for (const IoHash& KnownHash : KnownBlock.ChunksInBlock)
+ {
+ if (Attachments.contains(KnownHash))
+ {
+ FoundAttachmentCount++;
+ }
+ }
+
+ size_t ReusePercent = (FoundAttachmentCount * 100) / BlockAttachmentCount;
+ // TODO: Configure reuse-level
+ if (ReusePercent > 80)
+ {
+ ZEN_DEBUG("Reusing block {}. {} attachments found, usage level: {}%",
+ KnownBlock.BlockHash,
+ FoundAttachmentCount,
+ ReusePercent);
+ for (const IoHash& KnownHash : KnownBlock.ChunksInBlock)
+ {
+ Attachments.erase(KnownHash);
+ }
+
+ BlocksLock.WithExclusiveLock([&]() { Blocks.push_back(KnownBlock); });
+ ReusedBlockCount++;
+ }
+ else if (FoundAttachmentCount > 0)
+ {
+ ZEN_DEBUG("Skipping block {}. {} attachments found, usage level: {}%",
+ KnownBlock.BlockHash,
+ FoundAttachmentCount,
+ ReusePercent);
+ }
+ }
+ ZEN_INFO("Reusing {} out of {} known blocks", ReusedBlockCount, KnownBlocks.size());
+ }
+
ZEN_INFO("Sorting {} attachments from {} ops", Attachments.size(), OpLSNToKey.size());
// Sort attachments so we get predictable blocks for the same oplog upload
@@ -649,6 +697,7 @@ BuildContainer(CidStore& ChunkStore,
MaxBlockSize,
MaxChunkEmbedSize,
BuildBlocks,
+ {},
WorkerPool,
AsyncOnBlock,
OnLargeAttachment,
@@ -778,6 +827,46 @@ SaveOplog(CidStore& ChunkStore,
OnBlock = UploadBlock;
}
+ std::vector<Block> KnownBlocks;
+
+ if (BuildBlocks)
+ {
+ ZEN_INFO("Loading oplog base container");
+ RemoteProjectStore::LoadContainerResult BaseContainerResult = RemoteStore.LoadBaseContainer();
+ if (BaseContainerResult.ErrorCode != static_cast<int>(HttpResponseCode::NoContent))
+ {
+ if (BaseContainerResult.ErrorCode)
+ {
+ ZEN_WARN("Failed to load oplog base container, reason: '{}', error code: {}",
+ BaseContainerResult.Reason,
+ BaseContainerResult.ErrorCode);
+ }
+ else
+ {
+ CbArrayView BlocksArray = BaseContainerResult.ContainerObject["blocks"sv].AsArrayView();
+ for (CbFieldView BlockField : BlocksArray)
+ {
+ CbObjectView BlockView = BlockField.AsObjectView();
+ IoHash BlockHash = BlockView["rawhash"sv].AsBinaryAttachment();
+
+ std::vector<IoHash> ChunksInBlock;
+ CbArrayView ChunksArray = BlockView["chunks"sv].AsArrayView();
+ if (BlockHash == IoHash::Zero)
+ {
+ continue;
+ }
+
+ ChunksInBlock.reserve(ChunksArray.Num());
+ for (CbFieldView ChunkField : ChunksArray)
+ {
+ ChunksInBlock.push_back(ChunkField.AsHash());
+ }
+ KnownBlocks.push_back({.BlockHash = BlockHash, .ChunksInBlock = std::move(ChunksInBlock)});
+ };
+ }
+ }
+ }
+
tsl::robin_map<IoHash, IoBuffer, IoHash::Hasher> TempAttachments;
CbObject OplogContainerObject = BuildContainer(ChunkStore,
Project,
@@ -785,6 +874,7 @@ SaveOplog(CidStore& ChunkStore,
MaxBlockSize,
MaxChunkEmbedSize,
BuildBlocks,
+ KnownBlocks,
WorkerPool,
OnBlock,
OnLargeAttachment,