diff options
| author | Dan Engelbrecht <[email protected]> | 2025-10-14 13:13:59 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-14 13:13:59 +0200 |
| commit | 9b7580230798d83d9bb36d40150913af69a13929 (patch) | |
| tree | 73552ec1d3e9d955ce391cad894c637b74be91d4 /src/zenremotestore/include | |
| parent | move all storage-related services into storage tree (#571) (diff) | |
| download | zen-9b7580230798d83d9bb36d40150913af69a13929.tar.xz zen-9b7580230798d83d9bb36d40150913af69a13929.zip | |
refactor builds cmd part2 (#572)
* fix metadata info in filebuildstorage GetBuild
* move MakeSafeAbsolutePathÃnPlace to filesystem.h/cpp
* add BuildsOperationUploadFolder op moving code from builds_cmd.cpp
Diffstat (limited to 'src/zenremotestore/include')
| -rw-r--r-- | src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h | 245 |
1 files changed, 240 insertions, 5 deletions
diff --git a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h index 0e719edc6..8ba32127a 100644 --- a/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h +++ b/src/zenremotestore/include/zenremotestore/builds/buildstorageoperations.h @@ -22,6 +22,7 @@ class HttpClient; class ParallelWork; class WorkerThreadPool; class FilteredRate; +class ReadFileCache; class BufferedWriteFileCache; struct ChunkBlockDescription; @@ -192,8 +193,8 @@ public: bool PrimeCacheOnly = false; bool EnableOtherDownloadsScavenging = true; bool EnableTargetFolderScavenging = true; - std::vector<std::string> DefaultExcludeFolders; - std::vector<std::string> DefaultExcludeExtensions; + std::vector<std::string> ExcludeFolders; + std::vector<std::string> ExcludeExtensions; }; BuildsOperationUpdateFolder(BuildOpLogOutput& LogOutput, @@ -210,11 +211,11 @@ public: const ChunkedContentLookup& RemoteLookup, const std::vector<ChunkBlockDescription>& BlockDescriptions, const std::vector<IoHash>& LooseChunkHashes, - const Options& Options, - DiskStatistics& DiskStats); + const Options& Options); void Execute(FolderContent& OutLocalFolderState); + DiskStatistics m_DiskStats; CacheMappingStatistics m_CacheMappingStats; GetFolderContentStatistics m_ScavengedFolderScanStats; DownloadStatistics m_DownloadStats; @@ -372,10 +373,244 @@ private: const std::vector<ChunkBlockDescription>& m_BlockDescriptions; const std::vector<IoHash>& m_LooseChunkHashes; const Options m_Options; - DiskStatistics& m_DiskStats; const std::filesystem::path m_CacheFolderPath; }; +struct FindBlocksStatistics +{ + uint64_t FindBlockTimeMS = 0; + uint64_t PotentialChunkCount = 0; + uint64_t PotentialChunkByteCount = 0; + uint64_t FoundBlockCount = 0; + uint64_t FoundBlockChunkCount = 0; + uint64_t FoundBlockByteCount = 0; + uint64_t AcceptedBlockCount = 0; + uint64_t AcceptedChunkCount = 0; + uint64_t AcceptedByteCount = 0; + uint64_t AcceptedRawByteCount = 0; + uint64_t RejectedBlockCount = 0; + uint64_t RejectedChunkCount = 0; + uint64_t RejectedByteCount = 0; + uint64_t AcceptedReduntantChunkCount = 0; + uint64_t AcceptedReduntantByteCount = 0; + uint64_t NewBlocksCount = 0; + uint64_t NewBlocksChunkCount = 0; + uint64_t NewBlocksChunkByteCount = 0; +}; + +struct UploadStatistics +{ + std::atomic<uint64_t> BlockCount = 0; + std::atomic<uint64_t> BlocksBytes = 0; + std::atomic<uint64_t> ChunkCount = 0; + std::atomic<uint64_t> ChunksBytes = 0; + std::atomic<uint64_t> ReadFromDiskBytes = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + uint64_t ElapsedWallTimeUS = 0; + + UploadStatistics& operator+=(const UploadStatistics& Rhs) + { + BlockCount += Rhs.BlockCount; + BlocksBytes += Rhs.BlocksBytes; + ChunkCount += Rhs.ChunkCount; + ChunksBytes += Rhs.ChunksBytes; + ReadFromDiskBytes += Rhs.ReadFromDiskBytes; + MultipartAttachmentCount += Rhs.MultipartAttachmentCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } +}; + +struct LooseChunksStatistics +{ + uint64_t ChunkCount = 0; + uint64_t ChunkByteCount = 0; + std::atomic<uint64_t> CompressedChunkCount = 0; + std::atomic<uint64_t> CompressedChunkRawBytes = 0; + std::atomic<uint64_t> CompressedChunkBytes = 0; + uint64_t CompressChunksElapsedWallTimeUS = 0; + + LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) + { + ChunkCount += Rhs.ChunkCount; + ChunkByteCount += Rhs.ChunkByteCount; + CompressedChunkCount += Rhs.CompressedChunkCount; + CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes; + CompressedChunkBytes += Rhs.CompressedChunkBytes; + CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; + return *this; + } +}; + +struct GenerateBlocksStatistics +{ + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + std::atomic<uint64_t> GeneratedBlockCount = 0; + uint64_t GenerateBlocksElapsedWallTimeUS = 0; + + GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) + { + GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; + GeneratedBlockCount += Rhs.GeneratedBlockCount; + GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; + return *this; + } +}; + +class BuildsOperationUploadFolder +{ +public: + struct ChunksBlockParameters + { + size_t MaxBlockSize = 64u * 1024u * 1024u; + size_t MaxChunksPerBlock = 4u * 1000u; + size_t MaxChunkEmbedSize = 3u * 512u * 1024u; + }; + + struct Options + { + bool IsQuiet = false; + bool IsVerbose = false; + + const uint64_t FindBlockMaxCount = 10000; + const uint8_t BlockReuseMinPercentLimit = 85; + bool AllowMultiparts = true; + bool IgnoreExistingBlocks = false; + ChunksBlockParameters BlockParameters; + + uint32_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u; + + std::filesystem::path TempDir; + std::vector<std::string> ExcludeFolders; + std::vector<std::string> ExcludeExtensions; + std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + }; + BuildsOperationUploadFolder(BuildOpLogOutput& LogOutput, + StorageInstance& Storage, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag, + WorkerThreadPool& IOWorkerPool, + WorkerThreadPool& NetworkPool, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath, + bool CreateBuild, + const CbObject& MetaData, + const Options& Options); + + void Execute(); + + DiskStatistics m_DiskStats; + GetFolderContentStatistics m_LocalFolderScanStats; + ChunkingStatistics m_ChunkingStats; + FindBlocksStatistics m_FindBlocksStats; + UploadStatistics m_UploadStats; + GenerateBlocksStatistics m_GenerateBlocksStats; + LooseChunksStatistics m_LooseChunksStats; + +private: + std::vector<size_t> FindReuseBlocks(const std::vector<ChunkBlockDescription>& KnownBlocks, + std::span<const IoHash> ChunkHashes, + std::span<const uint32_t> ChunkIndexes, + std::vector<uint32_t>& OutUnusedChunkIndexes); + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::vector<uint32_t>& ChunkIndexes, + std::vector<std::vector<uint32_t>>& OutBlocks); + struct GeneratedBlocks + { + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<uint64_t> BlockSizes; + std::vector<CompositeBuffer> BlockHeaders; + std::vector<CbObject> BlockMetaDatas; + std::vector<uint8_t> + MetaDataHasBeenUploaded; // NOTE: Do not use std::vector<bool> here as this vector is modified by multiple threads + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex; + }; + + void GenerateBuildBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& OutBlocks); + + std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, + const std::span<const uint32_t> LocalChunkOrder, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, + const std::span<const uint32_t>& LooseChunkIndexes, + const std::span<const ChunkBlockDescription>& BlockDescriptions); + void CalculateLocalChunkOrders(const std::span<const uint32_t>& AbsoluteChunkOrders, + const std::span<const IoHash> LooseChunkHashes, + const std::span<const uint64_t> LooseChunkRawSizes, + const std::span<const ChunkBlockDescription>& BlockDescriptions, + std::vector<IoHash>& OutLocalChunkHashes, + std::vector<uint64_t>& OutLocalChunkRawSizes, + std::vector<uint32_t>& OutLocalChunkOrders); + + void WriteBuildContentToCompactBinary(CbObjectWriter& PartManifestWriter, + const SourcePlatform Platform, + std::span<const std::filesystem::path> Paths, + std::span<const IoHash> RawHashes, + std::span<const uint64_t> RawSizes, + std::span<const uint32_t> Attributes, + std::span<const IoHash> SequenceRawHashes, + std::span<const uint32_t> ChunkCounts, + std::span<const IoHash> LocalChunkHashes, + std::span<const uint64_t> LocalChunkRawSizes, + const std::vector<uint32_t>& AbsoluteChunkOrders, + const std::span<const uint32_t> LooseLocalChunkIndexes, + const std::span<IoHash> BlockHashes); + + CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache); + + CompressedBuffer GenerateBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<uint32_t>& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription); + + CompressedBuffer RebuildBlock(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector<uint32_t>& ChunksInBlock); + + void UploadPartBlobs(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<IoHash> RawHashes, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + UploadStatistics& TempUploadStats, + LooseChunksStatistics& TempLooseChunksStats); + + CompositeBuffer CompressChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + LooseChunksStatistics& TempLooseChunksStats); + + BuildOpLogOutput& m_LogOutput; + StorageInstance& m_Storage; + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + WorkerThreadPool& m_IOWorkerPool; + WorkerThreadPool& m_NetworkPool; + const Oid m_BuildId; + const Oid m_BuildPartId; + const std::string m_BuildPartName; + + const std::filesystem::path m_Path; + const std::filesystem::path m_ManifestPath; + const bool m_CreateBuild; // ?? Member? + const CbObject m_MetaData; // ?? Member + const Options m_Options; +}; + void DownloadLargeBlob(BuildStorage& Storage, const std::filesystem::path& DownloadFolder, const Oid& BuildId, |