diff options
| author | Dan Engelbrecht <[email protected]> | 2025-10-17 11:19:56 +0200 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-10-17 11:19:56 +0200 |
| commit | 5467e316e0fe071a9a9cf90b2c905da65d8566e7 (patch) | |
| tree | 33698fa16e8515854bc62f65364d3a7bad6ed572 /src/zenremotestore/chunking/chunkingcontroller.cpp | |
| parent | clean up http response formatters (#584) (diff) | |
| download | zen-5467e316e0fe071a9a9cf90b2c905da65d8566e7.tar.xz zen-5467e316e0fe071a9a9cf90b2c905da65d8566e7.zip | |
exclude .sym and .psym (#585)
* exclude .sym and .psym
* add more text file types to list of extensions to exclude from chunking
* use hash set for extensions when checking for chunking strategy
Diffstat (limited to 'src/zenremotestore/chunking/chunkingcontroller.cpp')
| -rw-r--r-- | src/zenremotestore/chunking/chunkingcontroller.cpp | 56 |
1 files changed, 43 insertions, 13 deletions
diff --git a/src/zenremotestore/chunking/chunkingcontroller.cpp b/src/zenremotestore/chunking/chunkingcontroller.cpp index 49332c2ce..cc20446ea 100644 --- a/src/zenremotestore/chunking/chunkingcontroller.cpp +++ b/src/zenremotestore/chunking/chunkingcontroller.cpp @@ -9,6 +9,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> +#include <tsl/robin_set.h> ZEN_THIRD_PARTY_INCLUDES_END namespace zen { @@ -81,7 +82,14 @@ namespace { class BasicChunkingController : public ChunkingController { public: - BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} + BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) + { + m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size()); + for (const std::string& Extension : Settings.ExcludeExtensions) + { + m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); + } + } BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} @@ -92,11 +100,15 @@ public: std::atomic<bool>& AbortFlag) const override { ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); + if (RawSize < m_Settings.ChunkFileSizeLimit) + { + return false; + } + + const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string()); + const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash); - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) + if (ExcludeFromChunking) { return false; } @@ -151,12 +163,26 @@ private: } const BasicChunkingControllerSettings m_Settings; + tsl::robin_set<uint32_t> m_ExcludeExtensionHashes; }; class ChunkingControllerWithFixedChunking : public ChunkingController { public: - ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} + ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) + { + m_ExcludeExtensionHashes.reserve(Settings.ExcludeExtensions.size()); + for (const std::string& Extension : Settings.ExcludeExtensions) + { + m_ExcludeExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); + } + + m_FixedChunkingExtensionHashes.reserve(Settings.FixedChunkingExtensions.size()); + for (const std::string& Extension : Settings.FixedChunkingExtensions) + { + m_FixedChunkingExtensionHashes.insert(HashStringAsLowerDjb2(Extension)); + } + } ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} @@ -167,18 +193,20 @@ public: std::atomic<bool>& AbortFlag) const override { ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); - const bool ExcludeFromChunking = - std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != - m_Settings.ExcludeExtensions.end(); + if (RawSize < m_Settings.ChunkFileSizeLimit) + { + return false; + } + + const uint32_t ExtensionHash = HashStringAsLowerDjb2(InputPath.extension().string()); + const bool ExcludeFromChunking = m_ExcludeExtensionHashes.contains(ExtensionHash); - if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) + if (ExcludeFromChunking) { return false; } - const bool FixedChunkingExtension = - std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != - m_Settings.FixedChunkingExtensions.end(); + const bool FixedChunkingExtension = m_FixedChunkingExtensionHashes.contains(ExtensionHash); if (FixedChunkingExtension) { @@ -318,6 +346,8 @@ private: } const ChunkingControllerWithFixedChunkingSettings m_Settings; + tsl::robin_set<uint32_t> m_FixedChunkingExtensionHashes; + tsl::robin_set<uint32_t> m_ExcludeExtensionHashes; }; std::unique_ptr<ChunkingController> |