diff options
| author | Dan Engelbrecht <[email protected]> | 2025-03-18 09:48:52 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-03-18 09:48:52 +0100 |
| commit | 0295cd45276c9b99f72dd0f35f3b5d5ffe7cb0df (patch) | |
| tree | 007d94ef784fe20644e2a3a88a879ff406a3d07f /src | |
| parent | Reduced disk I/O when writing out chunk blocks during download (#309) (diff) | |
| download | zen-0295cd45276c9b99f72dd0f35f3b5d5ffe7cb0df.tar.xz zen-0295cd45276c9b99f72dd0f35f3b5d5ffe7cb0df.zip | |
collapse local writes (#310)
* collapse read/writes during local data copy
Diffstat (limited to 'src')
| -rw-r--r-- | src/zen/cmds/builds_cmd.cpp | 349 | ||||
| -rw-r--r-- | src/zencore/basicfile.cpp | 6 |
2 files changed, 199 insertions, 156 deletions
diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp index 8e3d50790..8566f1540 100644 --- a/src/zen/cmds/builds_cmd.cpp +++ b/src/zen/cmds/builds_cmd.cpp @@ -1520,7 +1520,6 @@ namespace { } WorkerThreadPool& NetworkPool = GetSmallWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // - WorkerThreadPool& ReadPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // WorkerThreadPool& VerifyPool = GetMediumWorkerPool(EWorkloadType::Burst); // GetSyncWorkerPool(); // ParallellWork Work(AbortFlag); @@ -1545,7 +1544,7 @@ namespace { for (const IoHash& ChunkAttachment : ChunkAttachments) { Work.ScheduleWork( - ReadPool, + NetworkPool, [&, ChunkAttachment](std::atomic<bool>&) { if (!AbortFlag) { @@ -5043,160 +5042,6 @@ namespace { } } - for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) - { - if (AbortFlag) - { - break; - } - - Work.ScheduleWork( - WritePool, // GetSyncWorkerPool(),// - [&, CopyDataIndex](std::atomic<bool>&) { - if (!AbortFlag) - { - ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); - - FilteredWrittenBytesPerSecond.Start(); - const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; - const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; - const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); - ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); - - uint64_t CacheLocalFileBytesRead = 0; - - size_t TargetStart = 0; - const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets( - CopyData.TargetChunkLocationPtrs); - - struct WriteOp - { - const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; - uint64_t CacheFileOffset = (uint64_t)-1; - uint32_t ChunkIndex = (uint32_t)-1; - }; - - std::vector<WriteOp> WriteOps; - - if (!AbortFlag) - { - ZEN_TRACE_CPU("Sort"); - WriteOps.reserve(AllTargets.size()); - for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) - { - std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = - AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); - for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) - { - WriteOps.push_back(WriteOp{.Target = Target, - .CacheFileOffset = ChunkTarget.CacheFileOffset, - .ChunkIndex = ChunkTarget.RemoteChunkIndex}); - } - TargetStart += ChunkTarget.TargetChunkLocationCount; - } - - std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { - if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) - { - return true; - } - else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) - { - return false; - } - if (Lhs.Target->Offset < Rhs.Target->Offset) - { - return true; - } - return false; - }); - } - - if (!AbortFlag) - { - ZEN_TRACE_CPU("Write"); - - tsl::robin_set<uint32_t> ChunkIndexesWritten; - - BufferedOpenFile SourceFile(LocalFilePath, DiskStats); - WriteFileCache OpenFileCache(DiskStats); - for (const WriteOp& Op : WriteOps) - { - if (AbortFlag) - { - break; - } - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= - RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); - ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); - const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; - const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex]; - CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ChunkSize); - - ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); - - OpenFileCache.WriteToFile<CompositeBuffer>( - RemoteSequenceIndex, - [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { - return GetTempChunkedSequenceFileName( - CacheFolderPath, - RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); - }, - ChunkSource, - Op.Target->Offset, - RemoteContent.RawSizes[RemotePathIndex]); - - if (ChunkIndexesWritten.insert(Op.ChunkIndex).second) - { - WriteChunkStats.ChunkCountWritten++; - WriteChunkStats.ChunkBytesWritten += ChunkSize; - } - - CacheLocalFileBytesRead += ChunkSize; // TODO: This should be the sum of unique chunk sizes? - } - } - if (!AbortFlag) - { - // Write tracking, updating this must be done without any files open (WriteFileCache) - for (const WriteOp& Op : WriteOps) - { - const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; - if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) - { - const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; - { - ZEN_TRACE_CPU("VerifyHash"); - const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( - GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - if (VerifyChunkHash != SequenceRawHash) - { - throw std::runtime_error( - fmt::format("Written chunk sequence {} hash does not match expected hash {}", - VerifyChunkHash, - SequenceRawHash)); - } - } - - ZEN_TRACE_CPU("rename"); - ZEN_ASSERT_SLOW( - !std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); - std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), - GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); - } - } - ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); - } - WritePartsComplete++; - if (WritePartsComplete == TotalPartWriteCount) - { - FilteredWrittenBytesPerSecond.Stop(); - } - } - }, - Work.DefaultErrorFunction()); - } - for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) { if (AbortFlag) @@ -5386,6 +5231,198 @@ namespace { Work.DefaultErrorFunction()); } + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) + { + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, // GetSyncWorkerPool(),// + [&, CopyDataIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); + + FilteredWrittenBytesPerSecond.Start(); + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.LocalSequenceIndex]; + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); + + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets( + CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + uint64_t CacheFileOffset = (uint64_t)-1; + uint32_t ChunkIndex = (uint32_t)-1; + }; + + std::vector<WriteOp> WriteOps; + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Sort"); + WriteOps.reserve(AllTargets.size()); + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkIndex = ChunkTarget.RemoteChunkIndex}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + } + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Write"); + + tsl::robin_set<uint32_t> ChunkIndexesWritten; + + BufferedOpenFile SourceFile(LocalFilePath, DiskStats); + WriteFileCache OpenFileCache(DiskStats); + for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();) + { + if (AbortFlag) + { + break; + } + const WriteOp& Op = WriteOps[WriteOpIndex]; + + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); + const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex]; + + uint64_t ReadLength = ChunkSize; + size_t WriteCount = 1; + uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize; + uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize; + while ((WriteOpIndex + WriteCount) < WriteOps.size()) + { + const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount]; + if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex) + { + break; + } + if (NextOp.Target->Offset != OpTargetEnd) + { + break; + } + if (NextOp.CacheFileOffset != OpSourceEnd) + { + break; + } + const uint64_t NextChunkLength = RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex]; + if (ReadLength + NextChunkLength > 512u * 1024u) + { + break; + } + ReadLength += NextChunkLength; + OpSourceEnd += NextChunkLength; + OpTargetEnd += NextChunkLength; + WriteCount++; + } + + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength); + ZEN_ASSERT(Op.Target->Offset + ChunkSource.GetSize() <= RemoteContent.RawSizes[RemotePathIndex]); + + OpenFileCache.WriteToFile<CompositeBuffer>( + RemoteSequenceIndex, + [&CacheFolderPath, &RemoteContent](uint32_t SequenceIndex) { + return GetTempChunkedSequenceFileName( + CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + }, + ChunkSource, + Op.Target->Offset, + RemoteContent.RawSizes[RemotePathIndex]); + for (size_t WrittenOpIndex = WriteOpIndex; WrittenOpIndex < WriteOpIndex + WriteCount; WrittenOpIndex++) + { + const WriteOp& WrittenOp = WriteOps[WrittenOpIndex]; + if (ChunkIndexesWritten.insert(WrittenOp.ChunkIndex).second) + { + WriteChunkStats.ChunkCountWritten++; + WriteChunkStats.ChunkBytesWritten += + RemoteContent.ChunkedContent.ChunkRawSizes[WrittenOp.ChunkIndex]; + } + } + + CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes? + + WriteOpIndex += WriteCount; + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (WriteFileCache) + for (const WriteOp& Op : WriteOps) + { + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + if (SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1) == 1) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + { + ZEN_TRACE_CPU("VerifyHash"); + const IoHash VerifyChunkHash = IoHash::HashBuffer(IoBufferBuilder::MakeFromFile( + GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", + VerifyChunkHash, + SequenceRawHash)); + } + } + + ZEN_TRACE_CPU("rename"); + ZEN_ASSERT_SLOW( + !std::filesystem::exists(GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash))); + std::filesystem::rename(GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash), + GetFinalChunkedSequenceFileName(CacheFolderPath, SequenceRawHash)); + } + } + ZEN_DEBUG("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), LocalContent.Paths[LocalPathIndex]); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }, + Work.DefaultErrorFunction()); + } + for (uint32_t BlockIndex : CachedChunkBlockIndexes) { if (AbortFlag) diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index 95876cff4..a181bbd66 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -796,6 +796,12 @@ BasicFileWriter::Write(const void* Data, uint64_t Size, uint64_t FileOffset) { if (m_Buffer == nullptr || (Size >= m_BufferSize)) { + if (FileOffset == m_BufferEnd) + { + Flush(); + m_BufferStart = m_BufferEnd = FileOffset + Size; + } + m_Base.Write(Data, Size, FileOffset); return; } |