optimized index snapshot reading/writing (#561)

the previous implementation of in-memory index snapshots serialise data to memory before writing to disk and vice versa when reading. This leads to some memory spikes which end up pushing useful data out of system cache and also cause stalls on I/O operations. this change moves more code to a streaming serialisation approach which scales better from a memory usage perspective and also performs much better
author: Stefan Boberg <[email protected]> 2023-11-27 14:32:19 +0100
committer: GitHub <[email protected]> 2023-11-27 14:32:19 +0100
commit: 4d95b578350ebfbbf6d54407c9403547b01cac4c (patch)
tree: 9f8df5d934a6a62fdcebeac94dffe52139d3ea6b /src/zenstore/caslog.cpp
parent: gc stop command (#569) (diff)
download: zen-4d95b578350ebfbbf6d54407c9403547b01cac4c.tar.xz
zen-4d95b578350ebfbbf6d54407c9403547b01cac4c.zip
1 files changed, 18 insertions, 8 deletions
diff --git a/src/zenstore/caslog.cpp b/src/zenstore/caslog.cpp
index c04324fbc..cf3bd76da 100644
--- a/src/zenstore/caslog.cpp
+++ b/src/zenstore/caslog.cpp
@@ -188,20 +188,30 @@ CasLogFile::Replay(std::function<void(const void*)>&& Handler, uint64_t SkipEntr
 	LogBaseOffset += SkipEntryCount * m_RecordSize;
 	LogEntryCount -= SkipEntryCount;
 
-	// This should really be streaming the data rather than just
-	// reading it into memory, though we don't tend to get very
-	// large logs so it may not matter
+	const uint64_t LogDataSize	 = LogEntryCount * m_RecordSize;
+	uint64_t	   LogDataRemain = LogDataSize;
 
-	const uint64_t LogDataSize = LogEntryCount * m_RecordSize;
+	const uint64_t MaxBufferSize = 1024 * 1024;
 
 	std::vector<uint8_t> ReadBuffer;
-	ReadBuffer.resize(LogDataSize);
+	ReadBuffer.resize((Min(LogDataSize, MaxBufferSize) / m_RecordSize) * m_RecordSize);
 
-	m_File.Read(ReadBuffer.data(), LogDataSize, LogBaseOffset);
+	uint64_t ReadOffset = 0;
 
-	for (int i = 0; i < int(LogEntryCount); ++i)
+	while (LogDataRemain)
 	{
-		Handler(ReadBuffer.data() + (i * m_RecordSize));
+		const uint64_t BytesToRead	 = Min(ReadBuffer.size(), LogDataRemain);
+		const uint64_t EntriesToRead = BytesToRead / m_RecordSize;
+
+		m_File.Read(ReadBuffer.data(), BytesToRead, LogBaseOffset + ReadOffset);
+
+		for (int i = 0; i < int(EntriesToRead); ++i)
+		{
+			Handler(ReadBuffer.data() + (i * m_RecordSize));
+		}
+
+		LogDataRemain -= BytesToRead;
+		ReadOffset += BytesToRead;
 	}
 
 	m_AppendOffset = LogBaseOffset + (m_RecordSize * LogEntryCount);
author	Stefan Boberg <[email protected]>	2023-11-27 14:32:19 +0100
committer	GitHub <[email protected]>	2023-11-27 14:32:19 +0100
commit	4d95b578350ebfbbf6d54407c9403547b01cac4c (patch)
tree	9f8df5d934a6a62fdcebeac94dffe52139d3ea6b /src/zenstore/caslog.cpp
parent	gc stop command (#569) (diff)
download	zen-4d95b578350ebfbbf6d54407c9403547b01cac4c.tar.xz zen-4d95b578350ebfbbf6d54407c9403547b01cac4c.zip