aboutsummaryrefslogtreecommitdiff
path: root/src/zenremotestore/chunking/chunkingcontroller.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2025-10-17 11:19:56 +0200
committerGitHub Enterprise <[email protected]>2025-10-17 11:19:56 +0200
commit5467e316e0fe071a9a9cf90b2c905da65d8566e7 (patch)
tree33698fa16e8515854bc62f65364d3a7bad6ed572 /src/zenremotestore/chunking/chunkingcontroller.cpp
parentclean up http response formatters (#584) (diff)
downloadzen-5467e316e0fe071a9a9cf90b2c905da65d8566e7.tar.xz
zen-5467e316e0fe071a9a9cf90b2c905da65d8566e7.zip
exclude .sym and .psym (#585)
* exclude .sym and .psym * add more text file types to list of extensions to exclude from chunking * use hash set for extensions when checking for chunking strategy
Diffstat (limited to 'src/zenremotestore/chunking/chunkingcontroller.cpp')
-rw-r--r--src/zenremotestore/chunking/chunkingcontroller.cpp56
1 files changed, 43 insertions, 13 deletions
diff --git a/src/zenremotestore/chunking/chunkingcontroller.cpp b/src/zenremotestore/chunking/chunkingcontroller.cpp
index 49332c2ce..cc20446ea 100644
--- a/src/zenremotestore/chunking/chunkingcontroller.cpp
+++ b/src/zenremotestore/chunking/chunkingcontroller.cpp
@@ -9,6 +9,7 @@
ZEN_THIRD_PARTY_INCLUDES_START
#include <tsl/robin_map.h>
+#include <tsl/robin_set.h>
ZEN_THIRD_PARTY_INCLUDES_END
namespace zen {
@@ -81,7 +82,14 @@ namespace {
class BasicChunkingController : public ChunkingController
{
public:
- BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {}
+ BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings)
+ {
+ m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
+ for (const std::string& Extension : Settings.ExcludeExtensions)
+ {
+ m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
+ }
BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
@@ -92,11 +100,15 @@ public:
std::atomic<bool>& AbortFlag) const override
{
ZEN_TRACE_CPU("BasicChunkingController::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
+ if (RawSize < m_Settings.ChunkFileSizeLimit)
+ {
+ return false;
+ }
+
+ const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
+ const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
+ if (ExcludeFromChunking)
{
return false;
}
@@ -151,12 +163,26 @@ private:
}
const BasicChunkingControllerSettings m_Settings;
+ tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
};
class ChunkingControllerWithFixedChunking : public ChunkingController
{
public:
- ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {}
+ ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings)
+ {
+ m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size());
+ for (const std::string& Extension : Settings.ExcludeExtensions)
+ {
+ m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
+
+ m_FixedChunkingExtensionHashes.reserve(Settings.FixedChunkingExtensions.size());
+ for (const std::string& Extension : Settings.FixedChunkingExtensions)
+ {
+ m_FixedChunkingExtensionHashes.insert(HashStringAsLowerDjb2(Extension));
+ }
+ }
ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {}
@@ -167,18 +193,20 @@ public:
std::atomic<bool>& AbortFlag) const override
{
ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile");
- const bool ExcludeFromChunking =
- std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) !=
- m_Settings.ExcludeExtensions.end();
+ if (RawSize < m_Settings.ChunkFileSizeLimit)
+ {
+ return false;
+ }
+
+ const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string());
+ const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash);
- if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit))
+ if (ExcludeFromChunking)
{
return false;
}
- const bool FixedChunkingExtension =
- std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) !=
- m_Settings.FixedChunkingExtensions.end();
+ const bool FixedChunkingExtension = m_FixedChunkingExtensionHashes.contains(ExtensionHash);
if (FixedChunkingExtension)
{
@@ -318,6 +346,8 @@ private:
}
const ChunkingControllerWithFixedChunkingSettings m_Settings;
+ tsl::robin_set<uint32_t> m_FixedChunkingExtensionHashes;
+ tsl::robin_set<uint32_t> m_ExcludeExtensionHashes;
};
std::unique_ptr<ChunkingController>