mem cache perf improvements (#592)

- Improvement: Refactor memory cache for faster trimming and correct trim reporting - Improvement: Added trace scopes for memory cache trimming Adding a link back to the cache item payload on the memory cache item allows us to iterate over only the items cached in memory instead of over the entire index. This also allows us to do efficient compact of the memory cache array when trimming. It adds 4 bytes of overhead to each item cached in memory.
author: Dan Engelbrecht <[email protected]> 2023-12-11 06:36:48 -0500
committer: GitHub <[email protected]> 2023-12-11 12:36:48 +0100
commit: c6cce91a514ba747b19f4fe8acfd2443405c960d (patch)
tree: 9fc2916f7b730cd769492f7fdf45ef454381fb62 /src/zenserver/cache/cachedisklayer.cpp
parent: Update CHANGELOG.md (diff)
download: zen-c6cce91a514ba747b19f4fe8acfd2443405c960d.tar.xz
zen-c6cce91a514ba747b19f4fe8acfd2443405c960d.zip
1 files changed, 132 insertions, 104 deletions
diff --git a/src/zenserver/cache/cachedisklayer.cpp b/src/zenserver/cache/cachedisklayer.cpp
index 13f3c9e58..0987cd0f1 100644
--- a/src/zenserver/cache/cachedisklayer.cpp
+++ b/src/zenserver/cache/cachedisklayer.cpp
@@ -209,9 +209,6 @@ namespace {
 			zen::Sleep(100);
 		} while (true);
 	}
-
-	uint64_t EstimateMemCachePayloadMemory(uint64_t PayloadSize) { return 8u + 32u + RoundUp(PayloadSize, 8u); }
-
 }  // namespace
 
 namespace fs = std::filesystem;
@@ -1189,7 +1186,7 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal
 		return false;
 	}
 
-	size_t EntryIndex		  = It.value();
+	PayloadIndex EntryIndex	  = It.value();
 	m_AccessTimes[EntryIndex] = GcClock::TickCount();
 	DiskLocation Location	  = m_Payloads[EntryIndex].Location;
 
@@ -1206,7 +1203,7 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal
 
 	if (Payload->MemCached)
 	{
-		OutValue.Value = m_MemCachedPayloads[Payload->MemCached];
+		OutValue.Value = m_MemCachedPayloads[Payload->MemCached].Payload;
 		Payload		   = nullptr;
 		IndexLock.ReleaseNow();
 		m_MemoryHitCount++;
@@ -1240,7 +1237,7 @@ ZenCacheDiskLayer::CacheBucket::Get(const IoHash& HashKey, ZenCacheValue& OutVal
 						// Only update if it has not already been updated by other thread
 						if (!WritePayload.MemCached)
 						{
-							SetMemCachedData(UpdateIndexLock, WritePayload, OutValue.Value);
+							SetMemCachedData(UpdateIndexLock, UpdateIt->second, OutValue.Value);
 						}
 					}
 				}
@@ -1307,64 +1304,84 @@ ZenCacheDiskLayer::CacheBucket::Put(const IoHash& HashKey, const ZenCacheValue&
 	m_DiskWriteCount++;
 }
 
-void
+uint64_t
 ZenCacheDiskLayer::CacheBucket::MemCacheTrim(GcClock::TimePoint ExpireTime)
 {
+	ZEN_TRACE_CPU("Z$::Disk::Bucket::MemCacheTrim");
+
+	uint64_t	  Trimmed	  = 0;
 	GcClock::Tick ExpireTicks = ExpireTime.time_since_epoch().count();
 
 	RwLock::ExclusiveLockScope IndexLock(m_IndexLock);
-	if (m_MemCachedPayloads.empty())
+	uint32_t				   MemCachedCount = gsl::narrow<uint32_t>(m_MemCachedPayloads.size());
+	if (MemCachedCount == 0)
 	{
-		return;
+		return 0;
 	}
-	for (const auto& Kv : m_Index)
+
+	uint32_t WriteIndex = 0;
+	for (uint32_t ReadIndex = 0; ReadIndex < MemCachedCount; ++ReadIndex)
 	{
-		size_t		   Index   = Kv.second;
-		BucketPayload& Payload = m_Payloads[Index];
-		if (!Payload.MemCached)
+		MemCacheData& Data = m_MemCachedPayloads[ReadIndex];
+		if (!Data.Payload)
 		{
 			continue;
 		}
-		if (m_AccessTimes[Index] < ExpireTicks)
+		PayloadIndex Index = Data.OwnerIndex;
+		ZEN_ASSERT_SLOW(m_Payloads[Index].MemCached == MemCachedIndex(ReadIndex));
+		GcClock::Tick AccessTime = m_AccessTimes[Index];
+		if (AccessTime < ExpireTicks)
+		{
+			size_t PayloadSize = Data.Payload.GetSize();
+			RemoveMemCacheUsage(EstimateMemCachePayloadMemory(PayloadSize));
+			Data						= {};
+			m_Payloads[Index].MemCached = {};
+			Trimmed += PayloadSize;
+			continue;
+		}
+		if (ReadIndex > WriteIndex)
 		{
-			RemoveMemCachedData(IndexLock, Payload);
+			m_MemCachedPayloads[WriteIndex] = MemCacheData{.Payload = std::move(Data.Payload), .OwnerIndex = Index};
+			m_Payloads[Index].MemCached		= MemCachedIndex(WriteIndex);
 		}
+		WriteIndex++;
 	}
+	m_MemCachedPayloads.resize(WriteIndex);
 	m_MemCachedPayloads.shrink_to_fit();
-	m_FreeMemCachedPayloads.shrink_to_fit();
-	m_FreeMetaDatas.shrink_to_fit();
+	zen::Reset(m_FreeMemCachedPayloads);
+	return Trimmed;
 }
 
 void
-ZenCacheDiskLayer::CacheBucket::GetUsageByAccess(GcClock::TimePoint		TickStart,
-												 GcClock::Duration		SectionLength,
-												 std::vector<uint64_t>& InOutUsageSlots)
+ZenCacheDiskLayer::CacheBucket::GetUsageByAccess(GcClock::TimePoint Now, GcClock::Duration MaxAge, std::vector<uint64_t>& InOutUsageSlots)
 {
+	ZEN_TRACE_CPU("Z$::Disk::Bucket::GetUsageByAccess");
+
+	size_t					SlotCount = InOutUsageSlots.capacity();
 	RwLock::SharedLockScope _(m_IndexLock);
-	if (m_MemCachedPayloads.empty())
+	uint32_t				MemCachedCount = gsl::narrow<uint32_t>(m_MemCachedPayloads.size());
+	if (MemCachedCount == 0)
 	{
 		return;
 	}
-	for (const auto& It : m_Index)
+	for (uint32_t ReadIndex = 0; ReadIndex < MemCachedCount; ++ReadIndex)
 	{
-		size_t		   Index   = It.second;
-		BucketPayload& Payload = m_Payloads[Index];
-		if (!Payload.MemCached)
+		MemCacheData& Data = m_MemCachedPayloads[ReadIndex];
+		if (!Data.Payload)
 		{
 			continue;
 		}
+		PayloadIndex Index = Data.OwnerIndex;
+		ZEN_ASSERT_SLOW(m_Payloads[Index].MemCached == MemCachedIndex(ReadIndex));
 		GcClock::TimePoint ItemAccessTime = GcClock::TimePointFromTick(GcClock::Tick(m_AccessTimes[Index]));
-		GcClock::Duration  Age			  = TickStart.time_since_epoch() - ItemAccessTime.time_since_epoch();
-		uint64_t		   Slot			  = gsl::narrow<uint64_t>(Age.count() > 0 ? Age.count() / SectionLength.count() : 0);
-		if (Slot >= InOutUsageSlots.capacity())
+		GcClock::Duration  Age			  = Now > ItemAccessTime ? Now - ItemAccessTime : GcClock::Duration(0);
+		size_t			   Slot = Age < MaxAge ? gsl::narrow<size_t>((Age.count() * SlotCount) / MaxAge.count()) : (SlotCount - 1);
+		ZEN_ASSERT_SLOW(Slot < SlotCount);
+		if (Slot >= InOutUsageSlots.size())
 		{
-			Slot = InOutUsageSlots.capacity() - 1;
+			InOutUsageSlots.resize(Slot + 1, 0);
 		}
-		if (Slot > InOutUsageSlots.size())
-		{
-			InOutUsageSlots.resize(uint64_t(Slot + 1), 0);
-		}
-		InOutUsageSlots[Slot] += m_MemCachedPayloads[Payload.MemCached].GetSize();
+		InOutUsageSlots[Slot] += EstimateMemCachePayloadMemory(Data.Payload.GetSize());
 	}
 }
 
@@ -1823,7 +1840,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx)
 				std::vector<BucketPayload>	Payloads;
 				std::vector<AccessTime>		AccessTimes;
 				std::vector<BucketMetaData> MetaDatas;
-				std::vector<IoBuffer>		MemCachedPayloads;
+				std::vector<MemCacheData>	MemCachedPayloads;
 				std::vector<ReferenceIndex> FirstReferenceIndex;
 				IndexMap					Index;
 
@@ -2002,7 +2019,7 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx)
 					const BucketPayload& CachedPayload = Payloads[It->second];
 					if (CachedPayload.MemCached)
 					{
-						Buffer = m_MemCachedPayloads[CachedPayload.MemCached];
+						Buffer = m_MemCachedPayloads[CachedPayload.MemCached].Payload;
 						ZEN_ASSERT_SLOW(Buffer);
 					}
 					else
@@ -2124,7 +2141,7 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx)
 			std::vector<BucketPayload>	Payloads;
 			std::vector<AccessTime>		AccessTimes;
 			std::vector<BucketMetaData> MetaDatas;
-			std::vector<IoBuffer>		MemCachedPayloads;
+			std::vector<MemCacheData>	MemCachedPayloads;
 			std::vector<ReferenceIndex> FirstReferenceIndex;
 			IndexMap					Index;
 			{
@@ -2468,7 +2485,10 @@ ZenCacheDiskLayer::CollectGarbage(GcContext& GcCtx)
 	{
 		Bucket->CollectGarbage(GcCtx);
 	}
-	MemCacheTrim(Buckets, GcCtx.CacheExpireTime());
+	if (!m_IsMemCacheTrimming)
+	{
+		MemCacheTrim(Buckets, GcCtx.CacheExpireTime());
+	}
 }
 
 void
@@ -2671,16 +2691,17 @@ ZenCacheDiskLayer::CacheBucket::RemoveMetaData(RwLock::ExclusiveLockScope&, Buck
 }
 
 void
-ZenCacheDiskLayer::CacheBucket::SetMemCachedData(RwLock::ExclusiveLockScope&, BucketPayload& Payload, IoBuffer& MemCachedData)
+ZenCacheDiskLayer::CacheBucket::SetMemCachedData(RwLock::ExclusiveLockScope&, PayloadIndex PayloadIndex, IoBuffer& MemCachedData)
 {
-	uint64_t PayloadSize = MemCachedData.GetSize();
+	BucketPayload& Payload	   = m_Payloads[PayloadIndex];
+	uint64_t	   PayloadSize = MemCachedData.GetSize();
 	ZEN_ASSERT(PayloadSize != 0);
 	if (m_FreeMemCachedPayloads.empty())
 	{
 		if (m_MemCachedPayloads.size() != std::numeric_limits<uint32_t>::max())
 		{
 			Payload.MemCached = MemCachedIndex(gsl::narrow<uint32_t>(m_MemCachedPayloads.size()));
-			m_MemCachedPayloads.push_back(MemCachedData);
+			m_MemCachedPayloads.emplace_back(MemCacheData{.Payload = MemCachedData, .OwnerIndex = PayloadIndex});
 			AddMemCacheUsage(EstimateMemCachePayloadMemory(PayloadSize));
 			m_MemoryWriteCount++;
 		}
@@ -2689,7 +2710,7 @@ ZenCacheDiskLayer::CacheBucket::SetMemCachedData(RwLock::ExclusiveLockScope&, Bu
 	{
 		Payload.MemCached = m_FreeMemCachedPayloads.back();
 		m_FreeMemCachedPayloads.pop_back();
-		m_MemCachedPayloads[Payload.MemCached] = MemCachedData;
+		m_MemCachedPayloads[Payload.MemCached] = MemCacheData{.Payload = MemCachedData, .OwnerIndex = PayloadIndex};
 		AddMemCacheUsage(EstimateMemCachePayloadMemory(PayloadSize));
 		m_MemoryWriteCount++;
 	}
@@ -2700,9 +2721,9 @@ ZenCacheDiskLayer::CacheBucket::RemoveMemCachedData(RwLock::ExclusiveLockScope&,
 {
 	if (Payload.MemCached)
 	{
-		size_t PayloadSize = m_MemCachedPayloads[Payload.MemCached].GetSize();
+		size_t PayloadSize = m_MemCachedPayloads[Payload.MemCached].Payload.GetSize();
 		RemoveMemCacheUsage(EstimateMemCachePayloadMemory(PayloadSize));
-		m_MemCachedPayloads[Payload.MemCached] = IoBuffer{};
+		m_MemCachedPayloads[Payload.MemCached] = {};
 		m_FreeMemCachedPayloads.push_back(Payload.MemCached);
 		Payload.MemCached = {};
 		return PayloadSize;
@@ -3117,7 +3138,7 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats)
 		std::vector<BucketPayload>	Payloads;
 		std::vector<AccessTime>		AccessTimes;
 		std::vector<BucketMetaData> MetaDatas;
-		std::vector<IoBuffer>		MemCachedPayloads;
+		std::vector<MemCacheData>	MemCachedPayloads;
 		std::vector<ReferenceIndex> FirstReferenceIndex;
 		IndexMap					Index;
 		{
@@ -3708,7 +3729,7 @@ ZenCacheDiskLayer::CacheBucket::CompactState(RwLock::ExclusiveLockScope&,
 											 std::vector<BucketPayload>&  Payloads,
 											 std::vector<AccessTime>&	  AccessTimes,
 											 std::vector<BucketMetaData>& MetaDatas,
-											 std::vector<IoBuffer>&		  MemCachedPayloads,
+											 std::vector<MemCacheData>&	  MemCachedPayloads,
 											 std::vector<ReferenceIndex>& FirstReferenceIndex,
 											 IndexMap&					  Index,
 											 RwLock::ExclusiveLockScope&  IndexLock)
@@ -3738,7 +3759,8 @@ ZenCacheDiskLayer::CacheBucket::CompactState(RwLock::ExclusiveLockScope&,
 		}
 		if (Payload.MemCached)
 		{
-			MemCachedPayloads.push_back(std::move(m_MemCachedPayloads[Payload.MemCached]));
+			MemCachedPayloads.emplace_back(
+				MemCacheData{.Payload = std::move(m_MemCachedPayloads[Payload.MemCached].Payload), .OwnerIndex = EntryIndex});
 			Payload.MemCached = MemCachedIndex(gsl::narrow<uint32_t>(MemCachedPayloads.size() - 1));
 		}
 		if (m_Configuration.EnableReferenceCaching)
@@ -4216,17 +4238,8 @@ ZenCacheDiskLayer::MemCacheTrim()
 	ZEN_TRACE_CPU("Z$::Disk::MemCacheTrim");
 
 	ZEN_ASSERT(m_Configuration.MemCacheTargetFootprintBytes != 0);
-
-	const GcClock::TimePoint Now = GcClock::Now();
-
-	const GcClock::Tick		   NowTick			   = Now.time_since_epoch().count();
-	const std::chrono::seconds TrimInterval		   = std::chrono::seconds(m_Configuration.MemCacheTrimIntervalSeconds);
-	GcClock::Tick			   LastTrimTick		   = m_LastTickMemCacheTrim;
-	const GcClock::Tick		   NextAllowedTrimTick = LastTrimTick + GcClock::Duration(TrimInterval).count();
-	if (NowTick < NextAllowedTrimTick)
-	{
-		return;
-	}
+	ZEN_ASSERT(m_Configuration.MemCacheMaxAgeSeconds != 0);
+	ZEN_ASSERT(m_Configuration.MemCacheTrimIntervalSeconds != 0);
 
 	bool Expected = false;
 	if (!m_IsMemCacheTrimming.compare_exchange_strong(Expected, true))
@@ -4234,75 +4247,90 @@ ZenCacheDiskLayer::MemCacheTrim()
 		return;
 	}
 
-	// Bump time forward so we don't keep trying to do m_IsTrimming.compare_exchange_strong
-	const GcClock::Tick NextTrimTick = NowTick + GcClock::Duration(TrimInterval).count();
-	m_LastTickMemCacheTrim.store(NextTrimTick);
+	try
+	{
+		m_JobQueue.QueueJob("ZenCacheDiskLayer::MemCacheTrim", [this](JobContext&) {
+			ZEN_TRACE_CPU("Z$::ZenCacheDiskLayer::MemCacheTrim [Async]");
+
+			const std::chrono::seconds TrimInterval = std::chrono::seconds(m_Configuration.MemCacheTrimIntervalSeconds);
+			uint64_t				   TrimmedSize	= 0;
+			Stopwatch				   Timer;
+			const auto				   Guard = MakeGuard([&] {
+				ZEN_INFO("trimmed {} (remaining {}), from memory cache in {}",
+						 NiceBytes(TrimmedSize),
+						 NiceBytes(m_TotalMemCachedSize),
+						 NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+
+				const GcClock::Tick NowTick		 = GcClock::TickCount();
+				const GcClock::Tick NextTrimTick = NowTick + GcClock::Duration(TrimInterval).count();
+				m_NextAllowedTrimTick.store(NextTrimTick);
+				m_IsMemCacheTrimming.store(false);
+			});
 
-	m_JobQueue.QueueJob("ZenCacheDiskLayer::MemCacheTrim", [this, Now, TrimInterval](JobContext&) {
-		ZEN_TRACE_CPU("Z$::ZenCacheDiskLayer::MemCacheTrim [Async]");
+			const std::chrono::seconds MaxAge = std::chrono::seconds(m_Configuration.MemCacheMaxAgeSeconds);
 
-		uint64_t   StartSize = m_TotalMemCachedSize.load();
-		Stopwatch  Timer;
-		const auto Guard = MakeGuard([&] {
-			uint64_t EndSize = m_TotalMemCachedSize.load();
-			ZEN_INFO("trimmed {} (remaining {}), from memory cache in {}",
-					 NiceBytes(StartSize > EndSize ? StartSize - EndSize : 0),
-					 NiceBytes(m_TotalMemCachedSize),
-					 NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
-			m_IsMemCacheTrimming.store(false);
-		});
+			static const size_t	  UsageSlotCount = 2048;
+			std::vector<uint64_t> UsageSlots;
+			UsageSlots.reserve(UsageSlotCount);
 
-		const std::chrono::seconds MaxAge = std::chrono::seconds(m_Configuration.MemCacheMaxAgeSeconds);
-
-		std::vector<uint64_t> UsageSlots;
-		UsageSlots.reserve(std::chrono::seconds(MaxAge / TrimInterval).count());
+			std::vector<CacheBucket*> Buckets;
+			{
+				RwLock::SharedLockScope __(m_Lock);
+				Buckets.reserve(m_Buckets.size());
+				for (auto& Kv : m_Buckets)
+				{
+					Buckets.push_back(Kv.second.get());
+				}
+			}
 
-		std::vector<CacheBucket*> Buckets;
-		{
-			RwLock::SharedLockScope __(m_Lock);
-			Buckets.reserve(m_Buckets.size());
-			for (auto& Kv : m_Buckets)
+			const GcClock::TimePoint Now = GcClock::Now();
 			{
-				Buckets.push_back(Kv.second.get());
+				ZEN_TRACE_CPU("Z$::ZenCacheDiskLayer::MemCacheTrim GetUsageByAccess");
+				for (CacheBucket* Bucket : Buckets)
+				{
+					Bucket->GetUsageByAccess(Now, MaxAge, UsageSlots);
+				}
 			}
-		}
-		for (CacheBucket* Bucket : Buckets)
-		{
-			Bucket->GetUsageByAccess(Now, GcClock::Duration(TrimInterval), UsageSlots);
-		}
 
-		uint64_t TotalSize = 0;
-		for (size_t Index = 0; Index < UsageSlots.size(); ++Index)
-		{
-			TotalSize += UsageSlots[Index];
-			if (TotalSize >= m_Configuration.MemCacheTargetFootprintBytes)
+			uint64_t TotalSize = 0;
+			for (size_t Index = 0; Index < UsageSlots.size(); ++Index)
 			{
-				GcClock::TimePoint ExpireTime = Now - (TrimInterval * Index);
-				MemCacheTrim(Buckets, ExpireTime);
-				break;
+				TotalSize += UsageSlots[Index];
+				if (TotalSize >= m_Configuration.MemCacheTargetFootprintBytes)
+				{
+					GcClock::TimePoint ExpireTime = Now - ((GcClock::Duration(MaxAge) * Index) / UsageSlotCount);
+					TrimmedSize					  = MemCacheTrim(Buckets, ExpireTime);
+					break;
+				}
 			}
-		}
-	});
+		});
+	}
+	catch (std::exception& Ex)
+	{
+		ZEN_ERROR("Failed scheduling ZenCacheDiskLayer::MemCacheTrim. Reason: '{}'", Ex.what());
+		m_IsMemCacheTrimming.store(false);
+	}
 }
 
-void
+uint64_t
 ZenCacheDiskLayer::MemCacheTrim(std::vector<CacheBucket*>& Buckets, GcClock::TimePoint ExpireTime)
 {
 	if (m_Configuration.MemCacheTargetFootprintBytes == 0)
 	{
-		return;
+		return 0;
 	}
-	RwLock::SharedLockScope __(m_Lock);
+	uint64_t TrimmedSize = 0;
 	for (CacheBucket* Bucket : Buckets)
 	{
-		Bucket->MemCacheTrim(ExpireTime);
+		TrimmedSize += Bucket->MemCacheTrim(ExpireTime);
 	}
 	const GcClock::TimePoint   Now				   = GcClock::Now();
 	const GcClock::Tick		   NowTick			   = Now.time_since_epoch().count();
 	const std::chrono::seconds TrimInterval		   = std::chrono::seconds(m_Configuration.MemCacheTrimIntervalSeconds);
-	GcClock::Tick			   LastTrimTick		   = m_LastTickMemCacheTrim;
+	GcClock::Tick			   LastTrimTick		   = m_NextAllowedTrimTick;
 	const GcClock::Tick		   NextAllowedTrimTick = NowTick + GcClock::Duration(TrimInterval).count();
-	m_LastTickMemCacheTrim.compare_exchange_strong(LastTrimTick, NextAllowedTrimTick);
+	m_NextAllowedTrimTick.compare_exchange_strong(LastTrimTick, NextAllowedTrimTick);
+	return TrimmedSize;
 }
 
 #if ZEN_WITH_TESTS
author	Dan Engelbrecht <[email protected]>	2023-12-11 06:36:48 -0500
committer	GitHub <[email protected]>	2023-12-11 12:36:48 +0100
commit	c6cce91a514ba747b19f4fe8acfd2443405c960d (patch)
tree	9fc2916f7b730cd769492f7fdf45ef454381fb62 /src/zenserver/cache/cachedisklayer.cpp
parent	Update CHANGELOG.md (diff)
download	zen-c6cce91a514ba747b19f4fe8acfd2443405c960d.tar.xz zen-c6cce91a514ba747b19f4fe8acfd2443405c960d.zip