aboutsummaryrefslogtreecommitdiff
path: root/src/zen/cmds/hash_cmd.cpp
diff options
context:
space:
mode:
authorDan Engelbrecht <[email protected]>2023-09-22 08:22:06 -0400
committerGitHub <[email protected]>2023-09-22 14:22:06 +0200
commitc7d4dc6a4d13881028d566f5ce501335e47e48bf (patch)
tree493110da583a8e5d97fe05e14f23469ee6244d2b /src/zen/cmds/hash_cmd.cpp
parentadd trace command to enable/disable tracing at runtime (#416) (diff)
downloadarchived-zen-c7d4dc6a4d13881028d566f5ce501335e47e48bf.tar.xz
archived-zen-c7d4dc6a4d13881028d566f5ce501335e47e48bf.zip
Collect all zen admin-related commands into admin.h/.cpp (#418)
* move commands in scrub.h/cpp to admin_cmd.h/cpp * move job command into admin_cmd.h/.cpp * admin -> admin_cmd * bench -> bench_cmd * cache -> cache_cmd * copy -> copy_cmd * dedup -> dedup_cmd * hash -> hash_cmd * print -> print_cmd * projectstore -> projectstore_cmd * rpcreplay -> rpcreplay_cmd * serve -> serve_cmd * status -> status_cmd * top -> top_cmd * trace -> trace_cmd * up -> up_cmd * version -> version_cmd
Diffstat (limited to 'src/zen/cmds/hash_cmd.cpp')
-rw-r--r--src/zen/cmds/hash_cmd.cpp171
1 files changed, 171 insertions, 0 deletions
diff --git a/src/zen/cmds/hash_cmd.cpp b/src/zen/cmds/hash_cmd.cpp
new file mode 100644
index 000000000..d1f7a1975
--- /dev/null
+++ b/src/zen/cmds/hash_cmd.cpp
@@ -0,0 +1,171 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "hash_cmd.h"
+
+#include <zencore/blake3.h>
+#include <zencore/logging.h>
+#include <zencore/string.h>
+#include <zencore/timer.h>
+
+#if ZEN_PLATFORM_WINDOWS
+# include <ppl.h>
+#endif
+
+namespace zen {
+
+////////////////////////////////////////////////////////////////////////////////
+
+#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+
+namespace Concurrency {
+
+ template<typename IterType, typename LambdaType>
+ void parallel_for_each(IterType Cursor, IterType End, const LambdaType& Lambda)
+ {
+ for (; Cursor < End; ++Cursor)
+ {
+ Lambda(*Cursor);
+ }
+ }
+
+ template<typename T>
+ struct combinable
+ {
+ combinable<T>& local() { return *this; }
+
+ void operator+=(T Rhs) { Value += Rhs; }
+
+ template<typename LambdaType>
+ void combine_each(const LambdaType& Lambda)
+ {
+ Lambda(Value);
+ }
+
+ T Value = 0;
+ };
+
+} // namespace Concurrency
+
+#endif // ZEN_PLATFORM_LINUX|MAC
+
+////////////////////////////////////////////////////////////////////////////////
+
+HashCommand::HashCommand()
+{
+ m_Options.add_options()("d,dir", "Directory to scan", cxxopts::value<std::string>(m_ScanDirectory))(
+ "o,output",
+ "Output file",
+ cxxopts::value<std::string>(m_OutputFile));
+}
+
+HashCommand::~HashCommand() = default;
+
+int
+HashCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv)
+{
+ ZEN_UNUSED(GlobalOptions);
+
+ if (!ParseOptions(argc, argv))
+ {
+ return 0;
+ }
+
+ bool valid = m_ScanDirectory.length();
+
+ if (!valid)
+ throw zen::OptionParseException("Hash command requires a directory to scan");
+
+ // Gather list of files to process
+
+ ZEN_CONSOLE("Gathering files from {}", m_ScanDirectory);
+
+ struct FileEntry
+ {
+ std::filesystem::path FilePath;
+ zen::BLAKE3 FileHash;
+ };
+
+ std::vector<FileEntry> FileList;
+ uint64_t FileBytes = 0;
+
+ std::filesystem::path ScanDirectoryPath{m_ScanDirectory};
+
+ for (const std::filesystem::directory_entry& Entry : std::filesystem::recursive_directory_iterator(ScanDirectoryPath))
+ {
+ if (Entry.is_regular_file())
+ {
+ FileList.push_back({Entry.path()});
+ FileBytes += Entry.file_size();
+ }
+ }
+
+ ZEN_CONSOLE("Gathered {} files, total size {}", FileList.size(), zen::NiceBytes(FileBytes));
+
+ Concurrency::combinable<uint64_t> TotalBytes;
+
+ auto hashFile = [&](FileEntry& File) {
+ InternalFile InputFile;
+ InputFile.OpenRead(File.FilePath);
+ const uint8_t* DataPointer = (const uint8_t*)InputFile.MemoryMapFile();
+ const size_t DataSize = InputFile.GetFileSize();
+
+ File.FileHash = zen::BLAKE3::HashMemory(DataPointer, DataSize);
+
+ TotalBytes.local() += DataSize;
+ };
+
+ // Process them as quickly as possible
+
+ zen::Stopwatch Timer;
+
+#if 1
+ Concurrency::parallel_for_each(begin(FileList), end(FileList), [&](auto& file) { hashFile(file); });
+#else
+ for (const auto& file : FileList)
+ {
+ hashFile(file);
+ }
+#endif
+
+ size_t TotalByteCount = 0;
+
+ TotalBytes.combine_each([&](size_t Total) { TotalByteCount += Total; });
+
+ const uint64_t ElapsedMs = Timer.GetElapsedTimeMs();
+ ZEN_CONSOLE("Scanned {} files in {}", FileList.size(), zen::NiceTimeSpanMs(ElapsedMs));
+ ZEN_CONSOLE("Total bytes {} ({})", zen::NiceBytes(TotalByteCount), zen::NiceByteRate(TotalByteCount, ElapsedMs));
+
+ InternalFile Output;
+
+ if (m_OutputFile.empty())
+ {
+ // TEMPORARY -- should properly open stdout
+ Output.OpenWrite("CONOUT$", false);
+ }
+ else
+ {
+ Output.OpenWrite(m_OutputFile, true);
+ }
+
+ zen::ExtendableStringBuilder<256> Line;
+
+ uint64_t CurrentOffset = 0;
+
+ for (const auto& File : FileList)
+ {
+ Line.Append(File.FilePath.generic_u8string().c_str());
+ Line.Append(',');
+ File.FileHash.ToHexString(Line);
+ Line.Append('\n');
+
+ Output.Write(Line.Data(), Line.Size(), CurrentOffset);
+ CurrentOffset += Line.Size();
+
+ Line.Reset();
+ }
+
+ // TODO: implement snapshot enumeration and display
+ return 0;
+}
+
+} // namespace zen