diff options
| author | zousar <[email protected]> | 2025-06-24 16:26:29 -0600 |
|---|---|---|
| committer | zousar <[email protected]> | 2025-06-24 16:26:29 -0600 |
| commit | bb298631ba35a323827dda0b8cd6158e276b5f61 (patch) | |
| tree | 7ba8db91c44ce83f2c518f80f80ab14910eefa6f /src | |
| parent | Change to PutResult structure (diff) | |
| parent | 5.6.14 (diff) | |
| download | zen-bb298631ba35a323827dda0b8cd6158e276b5f61.tar.xz zen-bb298631ba35a323827dda0b8cd6158e276b5f61.zip | |
Merge branch 'main' into zs/put-overwrite-policy
Diffstat (limited to 'src')
191 files changed, 28786 insertions, 3036 deletions
diff --git a/src/transports/transport-sdk/include/transportplugin.h b/src/transports/transport-sdk/include/transportplugin.h index 4347868e6..a78a758bc 100644 --- a/src/transports/transport-sdk/include/transportplugin.h +++ b/src/transports/transport-sdk/include/transportplugin.h @@ -17,10 +17,14 @@ namespace zen { +// Current API version, value will be incremented to represent breaking changes +static const uint32_t kTransportApiVersion = 1; + class TransportConnection; class TransportPlugin; class TransportServerConnection; class TransportServer; +class TransportLogger; /************************************************************************* @@ -60,6 +64,29 @@ public: virtual TransportServerConnection* CreateConnectionHandler(TransportConnection* Connection) = 0; }; +/** Logger interface + + There will be one instance of this provided by the system to the transport plugin + + The plugin can use this to log messages back to zen server + + */ +class TransportLogger +{ +public: + enum class LogLevel : uint32_t + { + Trace = 0, + Debug = 1, + Info = 2, + Warn = 3, + Err = 4, + Critical = 5, + }; + + virtual void LogMessage(LogLevel Level, const char* Message) = 0; +}; + /************************************************************************* The following interfaces are to be implemented by transport plugins. @@ -116,7 +143,18 @@ public: extern "C" { - DLL_TRANSPORT_API zen::TransportPlugin* CreateTransportPlugin(); + /** Provide information about plugin version + + Fills out API version (kTransportApiVersion) plugin was built against. + Fills out plugin own version ever increasing version number, + a copy of plugin with higher version will be used. + */ + DLL_TRANSPORT_API void GetTransportPluginVersion(uint32_t* OutApiVersion, uint32_t* OutPluginVersion); + + // Return nullptr if requested api version mismatches api version plugin was built against + DLL_TRANSPORT_API zen::TransportPlugin* CreateTransportPlugin(zen::TransportLogger* Logger); } -typedef zen::TransportPlugin* (*PfnCreateTransportPlugin)(); +typedef void (*PfnGetTransportPluginVersion)(uint32_t* OutApiVersion, uint32_t* OutPluginVersion); + +typedef zen::TransportPlugin* (*PfnCreateTransportPlugin)(zen::TransportLogger* Logger); diff --git a/src/transports/winsock/winsock.cpp b/src/transports/winsock/winsock.cpp index 1c3ee909a..f98984726 100644 --- a/src/transports/winsock/winsock.cpp +++ b/src/transports/winsock/winsock.cpp @@ -364,8 +364,22 @@ WinsockTransportPlugin::IsAvailable() ////////////////////////////////////////////////////////////////////////// +void +GetTransportPluginVersion(uint32_t* OutApiVersion, uint32_t* OutPluginVersion) +{ + if (OutApiVersion != nullptr) + { + *OutApiVersion = kTransportApiVersion; + } + + if (OutPluginVersion != nullptr) + { + *OutPluginVersion = 1; + } +} + TransportPlugin* -CreateTransportPlugin() +CreateTransportPlugin([[maybe_unused]] TransportLogger* Logger) { return new WinsockTransportPlugin; } diff --git a/src/zen/cmds/admin_cmd.cpp b/src/zen/cmds/admin_cmd.cpp index 995ed4136..fe2bbbdc7 100644 --- a/src/zen/cmds/admin_cmd.cpp +++ b/src/zen/cmds/admin_cmd.cpp @@ -57,10 +57,7 @@ ScrubCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else if (int StatusCode = (int)Response.StatusCode) { - ZEN_ERROR("scrub start failed: {}: {} ({})", - (int)Response.StatusCode, - ReasonStringForHttpResultCode((int)Response.StatusCode), - Response.ToText()); + ZEN_ERROR("scrub start failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText()); } else { @@ -645,10 +642,7 @@ FlushCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else if (int StatusCode = (int)Response.StatusCode) { - ZEN_ERROR("flush failed: {}: {} ({})", - (int)Response.StatusCode, - ReasonStringForHttpResultCode((int)Response.StatusCode), - Response.ToText()); + ZEN_ERROR("flush failed: {}: {} ({})", StatusCode, ReasonStringForHttpResultCode(StatusCode), Response.ToText()); } else { @@ -688,7 +682,7 @@ Copy(const std::filesystem::path& Source, const std::filesystem::path& Target) static bool TryCopy(const std::filesystem::path& Source, const std::filesystem::path& Target) { - if (!std::filesystem::is_regular_file(Source)) + if (!IsFile(Source)) { return false; } @@ -714,7 +708,7 @@ CopyStateCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw OptionParseException("data path must be given"); } - if (!std::filesystem::is_directory(m_DataPath)) + if (!IsDir(m_DataPath)) { throw OptionParseException("data path must exist"); } diff --git a/src/zen/cmds/builds_cmd.cpp b/src/zen/cmds/builds_cmd.cpp new file mode 100644 index 000000000..cd27daa9e --- /dev/null +++ b/src/zen/cmds/builds_cmd.cpp @@ -0,0 +1,11644 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "builds_cmd.h" + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryfile.h> +#include <zencore/compactbinaryfmt.h> +#include <zencore/compactbinaryvalue.h> +#include <zencore/compress.h> +#include <zencore/config.h> +#include <zencore/except.h> +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/session.h> +#include <zencore/stream.h> +#include <zencore/string.h> +#include <zencore/trace.h> +#include <zencore/uid.h> +#include <zenhttp/formatters.h> +#include <zenhttp/httpclient.h> +#include <zenhttp/httpclientauth.h> +#include <zenhttp/httpcommon.h> +#include <zenutil/bufferedwritefilecache.h> +#include <zenutil/buildstoragecache.h> +#include <zenutil/chunkblock.h> +#include <zenutil/chunkedcontent.h> +#include <zenutil/chunkedfile.h> +#include <zenutil/chunkingcontroller.h> +#include <zenutil/filebuildstorage.h> +#include <zenutil/jupiter/jupiterbuildstorage.h> +#include <zenutil/jupiter/jupitersession.h> +#include <zenutil/parallelwork.h> +#include <zenutil/workerpools.h> +#include <zenutil/zenserverprocess.h> + +#include <signal.h> +#include <memory> +#include <regex> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <tsl/robin_set.h> +#include <json11.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +#else +# include <fcntl.h> +# include <sys/file.h> +# include <sys/stat.h> +# include <unistd.h> +#endif + +#define EXTRA_VERIFY 0 + +#define ZEN_CLOUD_STORAGE "Cloud Storage" + +namespace zen { + +using namespace std::literals; + +namespace { + namespace zenutil { +#if ZEN_PLATFORM_WINDOWS + class SecurityAttributes + { + public: + inline SECURITY_ATTRIBUTES* Attributes() { return &m_Attributes; } + + protected: + SECURITY_ATTRIBUTES m_Attributes{}; + SECURITY_DESCRIPTOR m_Sd{}; + }; + + // Security attributes which allows any user access + + class AnyUserSecurityAttributes : public SecurityAttributes + { + public: + AnyUserSecurityAttributes() + { + m_Attributes.nLength = sizeof m_Attributes; + m_Attributes.bInheritHandle = false; // Disable inheritance + + const BOOL Success = InitializeSecurityDescriptor(&m_Sd, SECURITY_DESCRIPTOR_REVISION); + + if (Success) + { + if (!SetSecurityDescriptorDacl(&m_Sd, TRUE, (PACL)NULL, FALSE)) + { + ThrowLastError("SetSecurityDescriptorDacl failed"); + } + + m_Attributes.lpSecurityDescriptor = &m_Sd; + } + } + }; +#endif // ZEN_PLATFORM_WINDOWS + + } // namespace zenutil + + static std::atomic<bool> AbortFlag = false; + static std::atomic<bool> PauseFlag = false; + + static void SignalCallbackHandler(int SigNum) + { + if (SigNum == SIGINT) + { + PauseFlag = false; + AbortFlag = true; + } +#if ZEN_PLATFORM_WINDOWS + if (SigNum == SIGBREAK) + { + PauseFlag = false; + AbortFlag = true; + } +#endif // ZEN_PLATFORM_WINDOWS + } + + struct ZenStateSharedData + { + static constexpr uint64_t kMagicV1 = 0x3176646d636e657a; // zencmdv1 + + uint64_t Magic = 0; // Implies the size and layout of this struct - changes to the data requires change to Magic constant + std::atomic<uint32_t> Pid; + uint8_t SessionId[12]; + uint8_t Padding1[4]; + std::atomic<uint8_t> Abort; + std::atomic<uint8_t> Pause; + uint8_t Padding2[2]; + }; + + struct MemMap + { + void* Handle = nullptr; + void* Data = nullptr; + size_t Size = 0; + std::string Name; + }; + + class ZenState + { + public: + ZenState(const ZenState&) = delete; + ZenState& operator=(const ZenState&) = delete; + + ZenState(); + explicit ZenState(uint32_t Pid); + ~ZenState(); + + const ZenStateSharedData& StateData() const + { + ZEN_ASSERT(m_Data); + return *m_Data; + } + ZenStateSharedData& StateData() + { + ZEN_ASSERT(m_Data); + return *m_Data; + } + + private: + static constexpr std::string_view MapBaseName = "UnrealEngineZenCmd_"sv; + static constexpr size_t MapSize = sizeof(ZenStateSharedData); + + bool m_Created = false; + std::unique_ptr<SharedMemory> m_MemMap; + ZenStateSharedData* m_Data = nullptr; + + std::thread m_StateMonitor; + Event m_ExitStateMonitorEvent; + }; + + ZenState::ZenState(uint32_t Pid) + { + const std::string ZenStateMapName = fmt::format("{}{}", MapBaseName, Pid); + + if (!IsProcessRunning(Pid)) + { + throw std::runtime_error(fmt::format("The process {} is not running", Pid)); + } + std::unique_ptr<SharedMemory> MemMap = OpenSharedMemory(ZenStateMapName, MapSize, false); + if (!MemMap) + { + throw std::runtime_error(fmt::format("The process {} is not a running zen process", Pid)); + } + ZenStateSharedData* data = (ZenStateSharedData*)MemMap->GetData(); + if (uint64_t MemMagic = data->Magic; MemMagic != ZenStateSharedData::kMagicV1) + { + throw std::runtime_error(fmt::format("The mem map for process {} has an unsupported magic {:x}, expected {:x}", + Pid, + MemMagic, + ZenStateSharedData::kMagicV1)); + } + if (uint32_t MemPid = data->Pid.load(); MemPid != Pid) + { + throw std::runtime_error( + fmt::format("The mem map for process {} has an missmatching pid of {}, expected {}", Pid, MemPid, Pid)); + } + m_MemMap = std::move(MemMap); + m_Data = data; + } + + ZenState::ZenState() + { + int Pid = GetCurrentProcessId(); + + const std::string ZenStateMapName = fmt::format("{}{}", MapBaseName, Pid); + + std::unique_ptr<SharedMemory> MemMap = CreateSharedMemory(ZenStateMapName, MapSize, false); + if (!MemMap) + { + throw std::runtime_error(fmt::format("The mem map for process {} could not be created", Pid)); + } + + ZenStateSharedData* data = (ZenStateSharedData*)MemMap->GetData(); + memset(data, 0, sizeof(ZenStateSharedData)); + data->Magic = ZenStateSharedData::kMagicV1; + data->Pid.store(gsl::narrow<uint32_t>(Pid)); + data->SessionId; + data->Abort.store(false); + data->Pause.store(false); + const Oid SessionId = GetSessionId(); + memcpy(data->SessionId, &SessionId, sizeof SessionId); + + m_MemMap = std::move(MemMap); + m_Data = data; + + m_StateMonitor = std::thread([this]() { + while (!m_ExitStateMonitorEvent.Wait(500)) + { + if (m_Data->Abort.load()) + { + AbortFlag.store(true); + } + PauseFlag.store(m_Data->Pause.load()); + } + }); + } + + ZenState::~ZenState() + { + try + { + if (m_StateMonitor.joinable()) + { + m_ExitStateMonitorEvent.Set(); + m_StateMonitor.join(); + } + m_MemMap.reset(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("ZenState::~ZenState threw exception: {}", Ex.what()); + } + } + + static const size_t DefaultMaxBlockSize = 64u * 1024u * 1024u; + static const size_t DefaultMaxChunkEmbedSize = 3u * 512u * 1024u; + + struct ChunksBlockParameters + { + size_t MaxBlockSize = DefaultMaxBlockSize; + size_t MaxChunkEmbedSize = DefaultMaxChunkEmbedSize; + }; + + const ChunksBlockParameters DefaultChunksBlockParams{ + .MaxBlockSize = 32u * 1024u * 1024u, + .MaxChunkEmbedSize = 2u * 1024u * 1024u // DefaultChunkedParams.MaxSize + }; + const uint64_t DefaultPreferredMultipartChunkSize = 32u * 1024u * 1024u; + + const double DefaultLatency = 0; // .0010; + const double DefaultDelayPerKBSec = 0; // 0.00005; + + const bool SingleThreaded = false; + bool BoostWorkerThreads = false; + bool UseSparseFiles = false; + + WorkerThreadPool& GetIOWorkerPool() + { + return SingleThreaded ? GetSyncWorkerPool() + : BoostWorkerThreads ? GetLargeWorkerPool(EWorkloadType::Burst) + : GetMediumWorkerPool(EWorkloadType::Burst); + } + WorkerThreadPool& GetNetworkPool() { return SingleThreaded ? GetSyncWorkerPool() : GetSmallWorkerPool(EWorkloadType::Burst); } + + static const std::vector<uint32_t> NonCompressableExtensions({HashStringDjb2(".mp4"sv), + HashStringDjb2(".zip"sv), + HashStringDjb2(".7z"sv), + HashStringDjb2(".bzip"sv), + HashStringDjb2(".rar"sv), + HashStringDjb2(".gzip"sv), + HashStringDjb2(".apk"sv), + HashStringDjb2(".nsp"sv), + HashStringDjb2(".xvc"sv), + HashStringDjb2(".pkg"sv), + HashStringDjb2(".dmg"sv), + HashStringDjb2(".ipa"sv)}); + + static const tsl::robin_set<uint32_t> NonCompressableExtensionSet(NonCompressableExtensions.begin(), NonCompressableExtensions.end()); + + static bool IsExtensionHashCompressable(const uint32_t PathHash) { return !NonCompressableExtensionSet.contains(PathHash); } + + static bool IsChunkCompressable(const ChunkedFolderContent& Content, const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) + { + ZEN_UNUSED(Content); + const uint32_t ChunkLocationCount = Lookup.ChunkSequenceLocationCounts[ChunkIndex]; + if (ChunkLocationCount == 0) + { + return false; + } + const size_t ChunkLocationOffset = Lookup.ChunkSequenceLocationOffset[ChunkIndex]; + const uint32_t SequenceIndex = Lookup.ChunkSequenceLocations[ChunkLocationOffset].SequenceIndex; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint32_t ExtensionHash = Lookup.PathExtensionHash[PathIndex]; + + const bool IsCompressable = IsExtensionHashCompressable(ExtensionHash); + return IsCompressable; + } + + const uint64_t MinimumSizeForCompressInBlock = 2u * 1024u; + + const std::string ZenFolderName = ".zen"; + std::filesystem::path ZenStateFilePath(const std::filesystem::path& ZenFolderPath) { return ZenFolderPath / "current_state.cbo"; } + // std::filesystem::path ZenStateFileJsonPath(const std::filesystem::path& ZenFolderPath) { return ZenFolderPath / "current_state.json"; + // } + std::filesystem::path ZenTempFolderPath(const std::filesystem::path& ZenFolderPath) { return ZenFolderPath / "tmp"; } + + std::filesystem::path ZenTempCacheFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "cache"; // Decompressed and verified data - chunks & sequences + } + std::filesystem::path ZenTempBlockFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "blocks"; // Temp storage for whole and partial blocks + } + std::filesystem::path UploadTempDirectory(const std::filesystem::path& Path) + { + const std::u8string LocalPathString = Path.generic_u8string(); + IoHash PathHash = IoHash::HashBuffer(LocalPathString.data(), LocalPathString.length()); + return std::filesystem::temp_directory_path() / fmt::format("zen_{}", PathHash); + } + + std::filesystem::path ZenTempDownloadFolderPath(const std::filesystem::path& ZenFolderPath) + { + return ZenTempFolderPath(ZenFolderPath) / "download"; // Temp storage for decompressed and validated chunks + } + + // std::filesystem::path ZenTempStorageFolderPath(const std::filesystem::path& ZenFolderPath) + // { + // return ZenTempFolderPath(ZenFolderPath) / "storage"; // Temp storage folder for BuildStorage implementations + // } + + const std::string ZenExcludeManifestName = ".zen_exclude_manifest.txt"; + + const std::string UnsyncFolderName = ".unsync"; + + const std::string UGSFolderName = ".ugs"; + const std::string LegacyZenTempFolderName = ".zen-tmp"; + + const std::vector<std::string_view> DefaultExcludeFolders({UnsyncFolderName, ZenFolderName, UGSFolderName, LegacyZenTempFolderName}); + const std::vector<std::string_view> DefaultExcludeExtensions({}); + + static bool IsVerbose = false; + static ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; + + uint32_t GetUpdateDelayMS(ProgressBar::Mode InMode) + { + switch (InMode) + { + case ProgressBar::Mode::Plain: + return 5000; + case ProgressBar::Mode::Pretty: + return 200; + case ProgressBar::Mode::Log: + return 2000; + default: + ZEN_ASSERT(false); + return 0; + } + } + +#define ZEN_CONSOLE_VERBOSE(fmtstr, ...) \ + if (IsVerbose) \ + { \ + ZEN_CONSOLE_LOG(zen::logging::level::Info, fmtstr, ##__VA_ARGS__); \ + } + + const std::string DefaultAccessTokenEnvVariableName( +#if ZEN_PLATFORM_WINDOWS + "UE-CloudDataCacheAccessToken"sv +#endif +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + "UE_CloudDataCacheAccessToken"sv +#endif + + ); + + bool IsFileWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + bool Result = IsFile(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + Result = IsFile(Path, Ec); + } + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Check path '{}' is file failed with: {} ({})", Path, Ec.message(), Ec.value())); + } + return Result; + } + + bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly) + { + std::error_code Ec; + bool Result = SetFileReadOnly(Path, ReadOnly, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsFileWithRetry(Path)) + { + return false; + } + Ec.clear(); + Result = SetFileReadOnly(Path, ReadOnly, Ec); + } + if (Ec) + { + throw std::system_error( + std::error_code(Ec.value(), std::system_category()), + fmt::format("Failed {} read only flag for '{}' failed with: {}", ReadOnly ? "setting" : "clearing", Path, Ec.message())); + } + return Result; + } + + void RenameFileWithRetry(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath) + { + std::error_code Ec; + RenameFile(SourcePath, TargetPath, Ec); + for (size_t Retries = 0; Ec && Retries < 10; Retries++) + { + ZEN_ASSERT_SLOW(IsFile(SourcePath)); + if (Retries > 5) + { + ZEN_CONSOLE("Unable to overwrite file {} ({}: {}), retrying...", TargetPath, Ec.value(), Ec.message()); + } + Sleep(50 + int(Retries * 150)); + Ec.clear(); + RenameFile(SourcePath, TargetPath, Ec); + } + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Rename from '{}' to '{}' failed with: {}", SourcePath, TargetPath, Ec.message())); + } + } + + void TryRemoveFile(const std::filesystem::path& Path) + { + std::error_code Ec; + RemoveFile(Path, Ec); + if (Ec) + { + if (IsFile(Path, Ec)) + { + Ec.clear(); + RemoveFile(Path, Ec); + if (Ec) + { + ZEN_DEBUG("Failed removing file '{}', reason: {}", Path, Ec.message()); + } + } + } + } + + void RemoveFileWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + RemoveFile(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 6; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsFileWithRetry(Path)) + { + return; + } + Ec.clear(); + RemoveFile(Path, Ec); + } + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Removing file '{}' failed with: {}", Path, Ec.message())); + } + } + + void RemoveDirWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + RemoveDir(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsDir(Path)) + { + return; + } + Ec.clear(); + RemoveDir(Path, Ec); + } + if (Ec) + { + throw std::system_error(std::error_code(Ec.value(), std::system_category()), + fmt::format("Removing directory '{}' failed with: {}", Path, Ec.message())); + } + } + + void CopyFile(const std::filesystem::path& SourceFilePath, + const std::filesystem::path& TargetFilePath, + uint64_t RawSize, + std::atomic<uint64_t>& WriteCount, + std::atomic<uint64_t>& WriteByteCount) + { + BasicFile TargetFile(TargetFilePath, BasicFile::Mode::kTruncate); + if (UseSparseFiles) + { + PrepareFileForScatteredWrite(TargetFile.Handle(), RawSize); + } + uint64_t Offset = 0; + if (!ScanFile(SourceFilePath, 512u * 1024u, [&](const void* Data, size_t Size) { + TargetFile.Write(Data, Size, Offset); + Offset += Size; + WriteCount++; + WriteByteCount += Size; + })) + { + throw std::runtime_error(fmt::format("Failed to copy scavenged file '{}' to '{}'", SourceFilePath, TargetFilePath)); + } + } + + uint32_t SetNativeFileAttributes(const std::filesystem::path FilePath, SourcePlatform SourcePlatform, uint32_t Attributes) + { +#if ZEN_PLATFORM_WINDOWS + if (SourcePlatform == SourcePlatform::Windows) + { + SetFileAttributes(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentAttributes = GetFileAttributes(FilePath); + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, IsFileModeReadOnly(Attributes)); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(FilePath, NewAttributes); + } + return NewAttributes; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (SourcePlatform != SourcePlatform::Windows) + { + SetFileMode(FilePath, Attributes); + return Attributes; + } + else + { + uint32_t CurrentMode = GetFileMode(FilePath); + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, IsFileAttributeReadOnly(Attributes)); + if (CurrentMode != NewMode) + { + SetFileMode(FilePath, NewMode); + } + return NewMode; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + }; + + uint32_t GetNativeFileAttributes(const std::filesystem::path FilePath) + { +#if ZEN_PLATFORM_WINDOWS + return GetFileAttributes(FilePath); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return GetFileMode(FilePath); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + + template<typename T> + std::string FormatArray(std::span<const T> Items, std::string_view Prefix) + { + ExtendableStringBuilder<512> SB; + for (const T& Item : Items) + { + SB.Append(fmt::format("{}{}", Prefix, Item)); + } + return SB.ToString(); + } + + bool CleanDirectory(const std::filesystem::path& Path, std::span<const std::string_view> ExcludeDirectories) + { + ZEN_TRACE_CPU("CleanDirectory"); + Stopwatch Timer; + + ProgressBar Progress(ProgressMode, "Clean Folder"); + + std::atomic<bool> CleanWipe = true; + std::atomic<uint64_t> DiscoveredItemCount = 0; + std::atomic<uint64_t> DeletedItemCount = 0; + std::atomic<uint64_t> DeletedByteCount = 0; + ParallelWork Work(AbortFlag, PauseFlag); + + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(const std::filesystem::path& InPath, + std::atomic<bool>& InCleanWipe, + std::atomic<uint64_t>& InDiscoveredItemCount, + std::atomic<uint64_t>& InDeletedItemCount, + std::atomic<uint64_t>& InDeletedByteCount, + std::span<const std::string_view> InExcludeDirectories) + : Path(InPath) + , CleanWipe(InCleanWipe) + , DiscoveredItemCount(InDiscoveredItemCount) + , DeletedItemCount(InDeletedItemCount) + , DeletedByteCount(InDeletedByteCount) + , ExcludeDirectories(InExcludeDirectories) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + ZEN_TRACE_CPU("CleanDirectory_AsyncVisitDirectory"); + if (!AbortFlag) + { + if (!Content.FileNames.empty()) + { + DiscoveredItemCount += Content.FileNames.size(); + + const std::string RelativeRootString = RelativeRoot.generic_string(); + bool RemoveContent = true; + for (const std::string_view ExcludeDirectory : ExcludeDirectories) + { + if (RelativeRootString.starts_with(ExcludeDirectory)) + { + if (RelativeRootString.length() > ExcludeDirectory.length()) + { + const char MaybePathDelimiter = RelativeRootString[ExcludeDirectory.length()]; + if (MaybePathDelimiter == '/' || MaybePathDelimiter == '\\' || + MaybePathDelimiter == std::filesystem::path::preferred_separator) + { + RemoveContent = false; + break; + } + } + else + { + RemoveContent = false; + break; + } + } + } + if (RemoveContent) + { + ZEN_TRACE_CPU("DeleteFiles"); + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + const std::filesystem::path FilePath = (Path / RelativeRoot / FileName).make_preferred(); + try + { + SetFileReadOnlyWithRetry(FilePath, false); + RemoveFileWithRetry(FilePath); + DeletedItemCount++; + DeletedByteCount += Content.FileSizes[FileIndex]; + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing file {}. Reason: {}", FilePath, Ex.what()); + CleanWipe = false; + } + } + } + } + } + } + const std::filesystem::path& Path; + std::atomic<bool>& CleanWipe; + std::atomic<uint64_t>& DiscoveredItemCount; + std::atomic<uint64_t>& DeletedItemCount; + std::atomic<uint64_t>& DeletedByteCount; + std::span<const std::string_view> ExcludeDirectories; + } Visitor(Path, CleanWipe, DiscoveredItemCount, DeletedItemCount, DeletedByteCount, ExcludeDirectories); + + GetDirectoryContent( + Path, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes, + Visitor, + GetIOWorkerPool(), + Work.PendingWork()); + + DirectoryContent LocalDirectoryContent; + GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); + DiscoveredItemCount += LocalDirectoryContent.Directories.size(); + std::vector<std::filesystem::path> DirectoriesToDelete; + DirectoriesToDelete.reserve(LocalDirectoryContent.Directories.size()); + for (std::filesystem::path& LocalDirPath : LocalDirectoryContent.Directories) + { + bool Leave = false; + for (const std::string_view ExcludeDirectory : ExcludeDirectories) + { + if (LocalDirPath == (Path / ExcludeDirectory)) + { + Leave = true; + break; + } + } + if (!Leave) + { + DirectoriesToDelete.emplace_back(std::move(LocalDirPath)); + DiscoveredItemCount++; + } + } + + uint64_t LastUpdateTimeMs = Timer.GetElapsedTimeMs(); + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + LastUpdateTimeMs = Timer.GetElapsedTimeMs(); + + uint64_t Deleted = DeletedItemCount.load(); + uint64_t DeletedBytes = DeletedByteCount.load(); + uint64_t Discovered = DiscoveredItemCount.load(); + Progress.UpdateState({.Task = "Cleaning folder ", + .Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)), + .TotalCount = Discovered, + .RemainingCount = Discovered - Deleted, + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + for (const std::filesystem::path& DirectoryToDelete : DirectoriesToDelete) + { + ZEN_TRACE_CPU("DeleteDirs"); + try + { + std::error_code Ec; + zen::CleanDirectory(DirectoryToDelete, true, Ec); + if (Ec) + { + Sleep(200); + zen::CleanDirectory(DirectoryToDelete, true); + Ec.clear(); + } + + RemoveDirWithRetry(DirectoryToDelete); + + DeletedItemCount++; + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing directory {}. Reason: {}", DirectoryToDelete, Ex.what()); + CleanWipe = false; + } + + uint64_t NowMs = Timer.GetElapsedTimeMs(); + if ((NowMs - LastUpdateTimeMs) >= GetUpdateDelayMS(ProgressMode)) + { + LastUpdateTimeMs = NowMs; + + uint64_t Deleted = DeletedItemCount.load(); + uint64_t DeletedBytes = DeletedByteCount.load(); + uint64_t Discovered = DiscoveredItemCount.load(); + Progress.UpdateState({.Task = "Cleaning folder ", + .Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)), + .TotalCount = Discovered, + .RemainingCount = Discovered - Deleted, + .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, + false); + } + } + + Progress.Finish(); + if (AbortFlag) + { + return false; + } + + uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs(); + if (ElapsedTimeMs >= 200) + { + ZEN_CONSOLE("Wiped folder '{}' {} ({}) in {}", + Path, + DiscoveredItemCount.load(), + NiceBytes(DeletedByteCount.load()), + NiceTimeSpanMs(ElapsedTimeMs)); + } + return CleanWipe; + } + + bool ParseCloudUrl(std::string_view InUrl, + std::string& OutHost, + std::string& OutNamespace, + std::string& OutBucket, + std::string& OutBuildId) + { + std::string Url(RemoveQuotes(InUrl)); + const std::string_view ExtendedApiString = "api/v2/builds/"; + if (auto ApiString = ToLower(Url).find(ExtendedApiString); ApiString != std::string::npos) + { + Url.erase(ApiString, ExtendedApiString.length()); + } + + const std::string ArtifactURLRegExString = R"((http[s]?:\/\/.*?)\/(.*?)\/(.*?)\/(.*))"; + const std::regex ArtifactURLRegEx(ArtifactURLRegExString, std::regex::ECMAScript | std::regex::icase); + std::match_results<std::string_view::const_iterator> MatchResults; + std::string_view UrlToParse(Url); + if (regex_match(begin(UrlToParse), end(UrlToParse), MatchResults, ArtifactURLRegEx) && MatchResults.size() == 5) + { + auto GetMatch = [&MatchResults](uint32_t Index) -> std::string_view { + ZEN_ASSERT(Index < MatchResults.size()); + + const auto& Match = MatchResults[Index]; + + return std::string_view(&*Match.first, Match.second - Match.first); + }; + + const std::string_view Host = GetMatch(1); + const std::string_view Namespace = GetMatch(2); + const std::string_view Bucket = GetMatch(3); + const std::string_view BuildId = GetMatch(4); + + OutHost = Host; + OutNamespace = Namespace; + OutBucket = Bucket; + OutBuildId = BuildId; + return true; + } + else + { + return false; + } + } + + std::string ReadAccessTokenFromFile(const std::filesystem::path& Path) + { + if (!IsFile(Path)) + { + throw std::runtime_error(fmt::format("the file '{}' does not exist", Path)); + } + IoBuffer Body = IoBufferBuilder::MakeFromFile(Path); + std::string JsonText(reinterpret_cast<const char*>(Body.GetData()), Body.GetSize()); + std::string JsonError; + json11::Json TokenInfo = json11::Json::parse(JsonText, JsonError); + if (!JsonError.empty()) + { + throw std::runtime_error(fmt::format("failed parsing json file '{}'. Reason: '{}'", Path, JsonError)); + } + const std::string AuthToken = TokenInfo["Token"].string_value(); + if (AuthToken.empty()) + { + throw std::runtime_error(fmt::format("the json file '{}' does not contain a value for \"Token\"", Path)); + } + return AuthToken; + } + + IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, + const std::filesystem::path& TempFolderPath, + const IoHash& Hash, + const std::string& Suffix = {}) + { + std::filesystem::path TempFilePath = TempFolderPath / (Hash.ToHexString() + Suffix); + return WriteToTempFile(std::move(Buffer), TempFilePath); + } + + class FilteredRate + { + public: + FilteredRate() {} + + void Start() + { + if (StartTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + if (StartTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs())) + { + LastTimeUS = StartTimeUS.load(); + } + } + } + void Stop() + { + if (EndTimeUS == (uint64_t)-1) + { + uint64_t Expected = (uint64_t)-1; + EndTimeUS.compare_exchange_weak(Expected, Timer.GetElapsedTimeUs()); + } + } + + void Update(uint64_t Count) + { + if (LastTimeUS == (uint64_t)-1) + { + return; + } + uint64_t TimeUS = Timer.GetElapsedTimeUs(); + uint64_t TimeDeltaUS = TimeUS - LastTimeUS; + if (TimeDeltaUS >= 2000000) + { + uint64_t Delta = Count - LastCount; + uint64_t PerSecond = (Delta * 1000000) / TimeDeltaUS; + + LastPerSecond = PerSecond; + + LastCount = Count; + + FilteredPerSecond = (PerSecond + (LastPerSecond * 7)) / 8; + + LastTimeUS = TimeUS; + } + } + + uint64_t GetCurrent() const // If Stopped - return total count / total time + { + if (LastTimeUS == (uint64_t)-1) + { + return 0; + } + return FilteredPerSecond; + } + + uint64_t GetElapsedTimeUS() const + { + if (StartTimeUS == (uint64_t)-1) + { + return 0; + } + if (EndTimeUS == (uint64_t)-1) + { + return 0; + } + uint64_t TimeDeltaUS = EndTimeUS - StartTimeUS; + return TimeDeltaUS; + } + + bool IsActive() const { return (StartTimeUS != (uint64_t)-1) && (EndTimeUS == (uint64_t)-1); } + + private: + Stopwatch Timer; + std::atomic<uint64_t> StartTimeUS = (uint64_t)-1; + std::atomic<uint64_t> EndTimeUS = (uint64_t)-1; + std::atomic<uint64_t> LastTimeUS = (uint64_t)-1; + uint64_t LastCount = 0; + uint64_t LastPerSecond = 0; + uint64_t FilteredPerSecond = 0; + }; + + uint64_t GetBytesPerSecond(uint64_t ElapsedWallTimeUS, uint64_t Count) + { + if (ElapsedWallTimeUS == 0) + { + return 0; + } + return Count * 1000000 / ElapsedWallTimeUS; + } + + std::filesystem::path GetTempChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return CacheFolderPath / (RawHash.ToHexString() + ".tmp"); + } + + std::filesystem::path GetFinalChunkedSequenceFileName(const std::filesystem::path& CacheFolderPath, const IoHash& RawHash) + { + return CacheFolderPath / RawHash.ToHexString(); + } + + struct DiskStatistics + { + std::atomic<uint64_t> OpenReadCount = 0; + std::atomic<uint64_t> OpenWriteCount = 0; + std::atomic<uint64_t> ReadCount = 0; + std::atomic<uint64_t> ReadByteCount = 0; + std::atomic<uint64_t> WriteCount = 0; + std::atomic<uint64_t> WriteByteCount = 0; + std::atomic<uint64_t> CurrentOpenFileCount = 0; + }; + + struct FindBlocksStatistics + { + uint64_t FindBlockTimeMS = 0; + uint64_t PotentialChunkCount = 0; + uint64_t PotentialChunkByteCount = 0; + uint64_t FoundBlockCount = 0; + uint64_t FoundBlockChunkCount = 0; + uint64_t FoundBlockByteCount = 0; + uint64_t AcceptedBlockCount = 0; + uint64_t AcceptedChunkCount = 0; + uint64_t AcceptedByteCount = 0; + uint64_t AcceptedRawByteCount = 0; + uint64_t RejectedBlockCount = 0; + uint64_t RejectedChunkCount = 0; + uint64_t RejectedByteCount = 0; + uint64_t AcceptedReduntantChunkCount = 0; + uint64_t AcceptedReduntantByteCount = 0; + uint64_t NewBlocksCount = 0; + uint64_t NewBlocksChunkCount = 0; + uint64_t NewBlocksChunkByteCount = 0; + }; + + struct UploadStatistics + { + std::atomic<uint64_t> BlockCount = 0; + std::atomic<uint64_t> BlocksBytes = 0; + std::atomic<uint64_t> ChunkCount = 0; + std::atomic<uint64_t> ChunksBytes = 0; + std::atomic<uint64_t> ReadFromDiskBytes = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + uint64_t ElapsedWallTimeUS = 0; + + UploadStatistics& operator+=(const UploadStatistics& Rhs) + { + BlockCount += Rhs.BlockCount; + BlocksBytes += Rhs.BlocksBytes; + ChunkCount += Rhs.ChunkCount; + ChunksBytes += Rhs.ChunksBytes; + ReadFromDiskBytes += Rhs.ReadFromDiskBytes; + MultipartAttachmentCount += Rhs.MultipartAttachmentCount; + ElapsedWallTimeUS += Rhs.ElapsedWallTimeUS; + return *this; + } + }; + + struct LooseChunksStatistics + { + uint64_t ChunkCount = 0; + uint64_t ChunkByteCount = 0; + std::atomic<uint64_t> CompressedChunkCount = 0; + std::atomic<uint64_t> CompressedChunkRawBytes = 0; + std::atomic<uint64_t> CompressedChunkBytes = 0; + uint64_t CompressChunksElapsedWallTimeUS = 0; + + LooseChunksStatistics& operator+=(const LooseChunksStatistics& Rhs) + { + ChunkCount += Rhs.ChunkCount; + ChunkByteCount += Rhs.ChunkByteCount; + CompressedChunkCount += Rhs.CompressedChunkCount; + CompressedChunkRawBytes += Rhs.CompressedChunkRawBytes; + CompressedChunkBytes += Rhs.CompressedChunkBytes; + CompressChunksElapsedWallTimeUS += Rhs.CompressChunksElapsedWallTimeUS; + return *this; + } + }; + + struct GenerateBlocksStatistics + { + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + std::atomic<uint64_t> GeneratedBlockCount = 0; + uint64_t GenerateBlocksElapsedWallTimeUS = 0; + + GenerateBlocksStatistics& operator+=(const GenerateBlocksStatistics& Rhs) + { + GeneratedBlockByteCount += Rhs.GeneratedBlockByteCount; + GeneratedBlockCount += Rhs.GeneratedBlockCount; + GenerateBlocksElapsedWallTimeUS += Rhs.GenerateBlocksElapsedWallTimeUS; + return *this; + } + }; + + struct CacheMappingStatistics + { + uint64_t CacheChunkCount = 0; + uint64_t CacheChunkByteCount = 0; + + uint64_t CacheBlockCount = 0; + uint64_t CacheBlocksByteCount = 0; + + uint64_t CacheSequenceHashesCount = 0; + uint64_t CacheSequenceHashesByteCount = 0; + + uint64_t CacheScanElapsedWallTimeUs = 0; + + uint32_t LocalPathsMatchingSequencesCount = 0; + uint64_t LocalPathsMatchingSequencesByteCount = 0; + + uint64_t LocalChunkMatchingRemoteCount = 0; + uint64_t LocalChunkMatchingRemoteByteCount = 0; + + uint64_t LocalScanElapsedWallTimeUs = 0; + + uint32_t ScavengedPathsMatchingSequencesCount = 0; + uint64_t ScavengedPathsMatchingSequencesByteCount = 0; + + uint64_t ScavengedChunkMatchingRemoteCount = 0; + uint64_t ScavengedChunkMatchingRemoteByteCount = 0; + + uint64_t ScavengeElapsedWallTimeUs = 0; + }; + + struct DownloadStatistics + { + std::atomic<uint64_t> RequestsCompleteCount = 0; + + std::atomic<uint64_t> DownloadedChunkCount = 0; + std::atomic<uint64_t> DownloadedChunkByteCount = 0; + std::atomic<uint64_t> MultipartAttachmentCount = 0; + + std::atomic<uint64_t> DownloadedBlockCount = 0; + std::atomic<uint64_t> DownloadedBlockByteCount = 0; + + std::atomic<uint64_t> DownloadedPartialBlockCount = 0; + std::atomic<uint64_t> DownloadedPartialBlockByteCount = 0; + }; + + struct WriteChunkStatistics + { + uint64_t DownloadTimeUs = 0; + uint64_t WriteTimeUs = 0; + uint64_t WriteChunksElapsedWallTimeUs = 0; + }; + + struct RebuildFolderStateStatistics + { + uint64_t CleanFolderElapsedWallTimeUs = 0; + std::atomic<uint32_t> FinalizeTreeFilesMovedCount = 0; + std::atomic<uint32_t> FinalizeTreeFilesCopiedCount = 0; + uint64_t FinalizeTreeElapsedWallTimeUs = 0; + }; + + struct VerifyFolderStatistics + { + std::atomic<uint64_t> FilesVerified = 0; + std::atomic<uint64_t> FilesFailed = 0; + std::atomic<uint64_t> ReadBytes = 0; + uint64_t VerifyElapsedWallTimeUs = 0; + }; + + struct StorageInstance + { + std::unique_ptr<HttpClient> BuildStorageHttp; + std::unique_ptr<BuildStorage> BuildStorage; + std::string StorageName; + std::unique_ptr<HttpClient> CacheHttp; + std::unique_ptr<BuildStorageCache> BuildCacheStorage; + std::string CacheName; + }; + + std::vector<uint32_t> CalculateAbsoluteChunkOrders(const std::span<const IoHash> LocalChunkHashes, + const std::span<const uint32_t> LocalChunkOrder, + const tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToLocalChunkIndex, + const std::span<const uint32_t>& LooseChunkIndexes, + const std::span<const ChunkBlockDescription>& BlockDescriptions) + { + ZEN_TRACE_CPU("CalculateAbsoluteChunkOrders"); + +#if EXTRA_VERIFY + std::vector<IoHash> TmpAbsoluteChunkHashes; + TmpAbsoluteChunkHashes.reserve(LocalChunkHashes.size()); +#endif // EXTRA_VERIFY + std::vector<uint32_t> LocalChunkIndexToAbsoluteChunkIndex; + LocalChunkIndexToAbsoluteChunkIndex.resize(LocalChunkHashes.size(), (uint32_t)-1); + std::uint32_t AbsoluteChunkCount = 0; + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + LocalChunkIndexToAbsoluteChunkIndex[ChunkIndex] = AbsoluteChunkCount; +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(LocalChunkHashes[ChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + if (auto It = ChunkHashToLocalChunkIndex.find(ChunkHash); It != ChunkHashToLocalChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + ZEN_ASSERT_SLOW(LocalChunkHashes[LocalChunkIndex] == ChunkHash); + LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex] = AbsoluteChunkCount; + } +#if EXTRA_VERIFY + TmpAbsoluteChunkHashes.push_back(ChunkHash); +#endif // EXTRA_VERIFY + AbsoluteChunkCount++; + } + } + std::vector<uint32_t> AbsoluteChunkOrder; + AbsoluteChunkOrder.reserve(LocalChunkHashes.size()); + for (const uint32_t LocalChunkIndex : LocalChunkOrder) + { + const uint32_t AbsoluteChunkIndex = LocalChunkIndexToAbsoluteChunkIndex[LocalChunkIndex]; +#if EXTRA_VERIFY + ZEN_ASSERT(LocalChunkHashes[LocalChunkIndex] == TmpAbsoluteChunkHashes[AbsoluteChunkIndex]); +#endif // EXTRA_VERIFY + AbsoluteChunkOrder.push_back(AbsoluteChunkIndex); + } +#if EXTRA_VERIFY + { + uint32_t OrderIndex = 0; + while (OrderIndex < LocalChunkOrder.size()) + { + const uint32_t LocalChunkIndex = LocalChunkOrder[OrderIndex]; + const IoHash& LocalChunkHash = LocalChunkHashes[LocalChunkIndex]; + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrder[OrderIndex]; + const IoHash& AbsoluteChunkHash = TmpAbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + OrderIndex++; + } + } +#endif // EXTRA_VERIFY + return AbsoluteChunkOrder; + } + + void CalculateLocalChunkOrders(const std::span<const uint32_t>& AbsoluteChunkOrders, + const std::span<const IoHash> LooseChunkHashes, + const std::span<const uint64_t> LooseChunkRawSizes, + const std::span<const ChunkBlockDescription>& BlockDescriptions, + std::vector<IoHash>& OutLocalChunkHashes, + std::vector<uint64_t>& OutLocalChunkRawSizes, + std::vector<uint32_t>& OutLocalChunkOrders) + { + ZEN_TRACE_CPU("CalculateLocalChunkOrders"); + + std::vector<IoHash> AbsoluteChunkHashes; + std::vector<uint64_t> AbsoluteChunkRawSizes; + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), LooseChunkHashes.begin(), LooseChunkHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), LooseChunkRawSizes.begin(), LooseChunkRawSizes.end()); + for (const ChunkBlockDescription& Block : BlockDescriptions) + { + AbsoluteChunkHashes.insert(AbsoluteChunkHashes.end(), Block.ChunkRawHashes.begin(), Block.ChunkRawHashes.end()); + AbsoluteChunkRawSizes.insert(AbsoluteChunkRawSizes.end(), Block.ChunkRawLengths.begin(), Block.ChunkRawLengths.end()); + } + OutLocalChunkHashes.reserve(AbsoluteChunkHashes.size()); + OutLocalChunkRawSizes.reserve(AbsoluteChunkRawSizes.size()); + OutLocalChunkOrders.reserve(AbsoluteChunkOrders.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(AbsoluteChunkHashes.size()); + + for (uint32_t AbsoluteChunkOrderIndex = 0; AbsoluteChunkOrderIndex < AbsoluteChunkOrders.size(); AbsoluteChunkOrderIndex++) + { + const uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + const uint64_t AbsoluteChunkRawSize = AbsoluteChunkRawSizes[AbsoluteChunkIndex]; + + if (auto It = ChunkHashToChunkIndex.find(AbsoluteChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t LocalChunkIndex = It->second; + OutLocalChunkOrders.push_back(LocalChunkIndex); + } + else + { + uint32_t LocalChunkIndex = gsl::narrow<uint32_t>(OutLocalChunkHashes.size()); + OutLocalChunkHashes.push_back(AbsoluteChunkHash); + OutLocalChunkRawSizes.push_back(AbsoluteChunkRawSize); + OutLocalChunkOrders.push_back(LocalChunkIndex); + ChunkHashToChunkIndex.insert_or_assign(AbsoluteChunkHash, LocalChunkIndex); + } +#if EXTRA_VERIFY + const uint32_t LocalChunkIndex = OutLocalChunkOrders[AbsoluteChunkOrderIndex]; + const IoHash& LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + const uint64_t& LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + ZEN_ASSERT(LocalChunkRawSize == AbsoluteChunkRawSize); +#endif // EXTRA_VERIFY + } +#if EXTRA_VERIFY + for (uint32_t OrderIndex = 0; OrderIndex < OutLocalChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = OutLocalChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = OutLocalChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = OutLocalChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = AbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = AbsoluteChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } +#endif // EXTRA_VERIFY + } + + void WriteBuildContentToCompactBinary(CbObjectWriter& PartManifestWriter, + const SourcePlatform Platform, + std::span<const std::filesystem::path> Paths, + std::span<const IoHash> RawHashes, + std::span<const uint64_t> RawSizes, + std::span<const uint32_t> Attributes, + std::span<const IoHash> SequenceRawHashes, + std::span<const uint32_t> ChunkCounts, + std::span<const IoHash> LocalChunkHashes, + std::span<const uint64_t> LocalChunkRawSizes, + const std::vector<uint32_t>& AbsoluteChunkOrders, + const std::span<const uint32_t> LooseLocalChunkIndexes, + const std::span<IoHash> BlockHashes) + { + ZEN_ASSERT(Platform != SourcePlatform::_Count); + PartManifestWriter.AddString("platform"sv, ToString(Platform)); + + uint64_t TotalSize = 0; + for (const uint64_t Size : RawSizes) + { + TotalSize += Size; + } + PartManifestWriter.AddInteger("totalSize", TotalSize); + + PartManifestWriter.BeginObject("files"sv); + { + compactbinary_helpers::WriteArray(Paths, "paths"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawHashes, "rawhashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(RawSizes, "rawsizes"sv, PartManifestWriter); + if (Platform == SourcePlatform::Windows) + { + compactbinary_helpers::WriteArray(Attributes, "attributes"sv, PartManifestWriter); + } + if (Platform == SourcePlatform::Linux || Platform == SourcePlatform::MacOS) + { + compactbinary_helpers::WriteArray(Attributes, "mode"sv, PartManifestWriter); + } + } + PartManifestWriter.EndObject(); // files + + PartManifestWriter.BeginObject("chunkedContent"); + { + compactbinary_helpers::WriteArray(SequenceRawHashes, "sequenceRawHashes"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(ChunkCounts, "chunkcounts"sv, PartManifestWriter); + compactbinary_helpers::WriteArray(AbsoluteChunkOrders, "chunkorders"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // chunkedContent + + size_t LooseChunkCount = LooseLocalChunkIndexes.size(); + if (LooseChunkCount > 0) + { + PartManifestWriter.BeginObject("chunkAttachments"); + { + PartManifestWriter.BeginArray("rawHashes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddBinaryAttachment(LocalChunkHashes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // rawHashes + + PartManifestWriter.BeginArray("chunkRawSizes"sv); + for (uint32_t ChunkIndex : LooseLocalChunkIndexes) + { + PartManifestWriter.AddInteger(LocalChunkRawSizes[ChunkIndex]); + } + PartManifestWriter.EndArray(); // chunkSizes + } + PartManifestWriter.EndObject(); // + } + + if (BlockHashes.size() > 0) + { + PartManifestWriter.BeginObject("blockAttachments"); + { + compactbinary_helpers::WriteBinaryAttachmentArray(BlockHashes, "rawHashes"sv, PartManifestWriter); + } + PartManifestWriter.EndObject(); // blocks + } + } + + void ReadBuildContentFromCompactBinary(CbObjectView BuildPartManifest, + SourcePlatform& OutPlatform, + std::vector<std::filesystem::path>& OutPaths, + std::vector<IoHash>& OutRawHashes, + std::vector<uint64_t>& OutRawSizes, + std::vector<uint32_t>& OutAttributes, + std::vector<IoHash>& OutSequenceRawHashes, + std::vector<uint32_t>& OutChunkCounts, + std::vector<uint32_t>& OutAbsoluteChunkOrders, + std::vector<IoHash>& OutLooseChunkHashes, + std::vector<uint64_t>& OutLooseChunkRawSizes, + std::vector<IoHash>& OutBlockRawHashes) + { + OutPlatform = FromString(BuildPartManifest["platform"sv].AsString(), SourcePlatform::_Count); + + CbObjectView FilesObject = BuildPartManifest["files"sv].AsObjectView(); + + compactbinary_helpers::ReadArray("paths"sv, FilesObject, OutPaths); + compactbinary_helpers::ReadArray("rawhashes"sv, FilesObject, OutRawHashes); + compactbinary_helpers::ReadArray("rawsizes"sv, FilesObject, OutRawSizes); + + uint64_t PathCount = OutPaths.size(); + if (OutRawHashes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw hashes entries does not match number of paths")); + } + if (OutRawSizes.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of raw sizes entries does not match number of paths")); + } + + std::vector<uint32_t> ModeArray; + compactbinary_helpers::ReadArray("mode"sv, FilesObject, ModeArray); + if (ModeArray.size() != PathCount && ModeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + std::vector<uint32_t> AttributeArray; + compactbinary_helpers::ReadArray("attributes"sv, FilesObject, ModeArray); + if (AttributeArray.size() != PathCount && AttributeArray.size() != 0) + { + throw std::runtime_error(fmt::format("Number of attribute entries does not match number of paths")); + } + + if (ModeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Linux; // Best guess - under dev format + } + OutAttributes = std::move(ModeArray); + } + else if (AttributeArray.size() > 0) + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = SourcePlatform::Windows; + } + OutAttributes = std::move(AttributeArray); + } + else + { + if (OutPlatform == SourcePlatform::_Count) + { + OutPlatform = GetSourceCurrentPlatform(); + } + } + + if (CbObjectView ChunkContentView = BuildPartManifest["chunkedContent"sv].AsObjectView(); ChunkContentView) + { + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkContentView, OutSequenceRawHashes); + compactbinary_helpers::ReadArray("chunkcounts"sv, ChunkContentView, OutChunkCounts); + if (OutChunkCounts.size() != OutSequenceRawHashes.size()) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + compactbinary_helpers::ReadArray("chunkorders"sv, ChunkContentView, OutAbsoluteChunkOrders); + } + else if (FilesObject["chunkcounts"sv]) + { + // Legacy zen style + + std::vector<uint32_t> LegacyChunkCounts; + compactbinary_helpers::ReadArray("chunkcounts"sv, FilesObject, LegacyChunkCounts); + if (LegacyChunkCounts.size() != PathCount) + { + throw std::runtime_error(fmt::format("Number of chunk count entries does not match number of paths")); + } + std::vector<uint32_t> LegacyAbsoluteChunkOrders; + compactbinary_helpers::ReadArray("chunkorders"sv, FilesObject, LegacyAbsoluteChunkOrders); + + CbArrayView ChunkOrdersArray = BuildPartManifest["chunkorders"sv].AsArrayView(); + const uint64_t ChunkOrdersCount = ChunkOrdersArray.Num(); + + tsl::robin_set<IoHash, IoHash::Hasher> FoundRawHashes; + FoundRawHashes.reserve(PathCount); + + OutChunkCounts.reserve(PathCount); + OutAbsoluteChunkOrders.reserve(ChunkOrdersCount); + + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + uint32_t LegacyChunkCount = LegacyChunkCounts[PathIndex]; + + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(LegacyChunkCount); + std::span<uint32_t> AbsoluteChunkOrder = + std::span<uint32_t>(LegacyAbsoluteChunkOrders).subspan(OrderIndexOffset, LegacyChunkCount); + OutAbsoluteChunkOrders.insert(OutAbsoluteChunkOrders.end(), AbsoluteChunkOrder.begin(), AbsoluteChunkOrder.end()); + } + OrderIndexOffset += LegacyChunkCounts[PathIndex]; + } + } + else + { + // Legacy C# style + + tsl::robin_set<IoHash, IoHash::Hasher> FoundRawHashes; + FoundRawHashes.reserve(PathCount); + uint32_t OrderIndexOffset = 0; + for (uint32_t PathIndex = 0; PathIndex < OutPaths.size(); PathIndex++) + { + if (OutRawSizes[PathIndex] > 0) + { + const IoHash& PathRawHash = OutRawHashes[PathIndex]; + if (FoundRawHashes.insert(PathRawHash).second) + { + OutSequenceRawHashes.push_back(PathRawHash); + OutChunkCounts.push_back(1); + OutAbsoluteChunkOrders.push_back(OrderIndexOffset); + OutLooseChunkHashes.push_back(PathRawHash); + OutLooseChunkRawSizes.push_back(OutRawSizes[PathIndex]); + OrderIndexOffset += 1; + } + } + } + } + + CbObjectView ChunkAttachmentsView = BuildPartManifest["chunkAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, ChunkAttachmentsView, OutLooseChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkAttachmentsView, OutLooseChunkRawSizes); + if (OutLooseChunkHashes.size() != OutLooseChunkRawSizes.size()) + { + throw std::runtime_error( + fmt::format("Number of attachment chunk hashes does not match number of attachemnt chunk raw sizes")); + } + } + + CbObjectView BlocksView = BuildPartManifest["blockAttachments"sv].AsObjectView(); + { + compactbinary_helpers::ReadBinaryAttachmentArray("rawHashes"sv, BlocksView, OutBlockRawHashes); + } + } + + bool ReadStateObject(CbObjectView StateView, + Oid& OutBuildId, + std::vector<Oid>& BuildPartsIds, + std::vector<std::string>& BuildPartsNames, + std::vector<ChunkedFolderContent>& OutPartContents, + FolderContent& OutLocalFolderState) + { + try + { + CbObjectView BuildView = StateView["builds"sv].AsArrayView().CreateViewIterator().AsObjectView(); + OutBuildId = BuildView["buildId"sv].AsObjectId(); + for (CbFieldView PartView : BuildView["parts"sv].AsArrayView()) + { + CbObjectView PartObjectView = PartView.AsObjectView(); + BuildPartsIds.push_back(PartObjectView["partId"sv].AsObjectId()); + BuildPartsNames.push_back(std::string(PartObjectView["partName"sv].AsString())); + OutPartContents.push_back(LoadChunkedFolderContentToCompactBinary(PartObjectView["content"sv].AsObjectView())); + } + OutLocalFolderState = LoadFolderContentToCompactBinary(StateView["localFolderState"sv].AsObjectView()); + return true; + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Unable to read local state: ", Ex.what()); + return false; + } + } + + CbObject CreateStateObject(const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& AllBuildParts, + std::span<const ChunkedFolderContent> PartContents, + const FolderContent& LocalFolderState, + const std::filesystem::path& LocalPath) + { + CbObjectWriter CurrentStateWriter; + CurrentStateWriter.AddString("path", (const char*)LocalPath.u8string().c_str()); + CurrentStateWriter.BeginArray("builds"sv); + { + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("buildId"sv, BuildId); + CurrentStateWriter.BeginArray("parts"sv); + for (size_t PartIndex = 0; PartIndex < AllBuildParts.size(); PartIndex++) + { + const Oid BuildPartId = AllBuildParts[PartIndex].first; + CurrentStateWriter.BeginObject(); + { + CurrentStateWriter.AddObjectId("partId"sv, BuildPartId); + CurrentStateWriter.AddString("partName"sv, AllBuildParts[PartIndex].second); + CurrentStateWriter.BeginObject("content"); + { + SaveChunkedFolderContentToCompactBinary(PartContents[PartIndex], CurrentStateWriter); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // parts + } + CurrentStateWriter.EndObject(); + } + CurrentStateWriter.EndArray(); // builds + + CurrentStateWriter.BeginObject("localFolderState"sv); + { + SaveFolderContentToCompactBinary(LocalFolderState, CurrentStateWriter); + } + CurrentStateWriter.EndObject(); // localFolderState + + return CurrentStateWriter.Save(); + } + + void AddDownloadedPath(const std::filesystem::path& SystemRootDir, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + const std::filesystem::path& StateFilePath, + const std::filesystem::path& LocalPath) + { + ZEN_ASSERT(!SystemRootDir.empty()); + ZEN_ASSERT(!StateFilePath.empty()); + ZEN_ASSERT(!LocalPath.empty()); + const std::u8string LocalPathString = LocalPath.generic_u8string(); + IoHash PathHash = IoHash::HashBuffer(LocalPathString.data(), LocalPathString.length()); + std::filesystem::path WritePath = SystemRootDir / "builds" / "downloads" / (PathHash.ToHexString() + ".json"); + CreateDirectories(WritePath.parent_path()); + CbObjectWriter Writer; + Writer.AddString("path", (const char*)LocalPath.u8string().c_str()); + Writer.AddString("statePath", (const char*)StateFilePath.u8string().c_str()); + Writer.AddDateTime("date", DateTime::Now()); + Writer.BeginArray("builds"sv); + { + Writer.BeginObject(); + { + Writer.AddObjectId("buildId", BuildId); + Writer.BeginArray("parts"); + for (const auto& It : BuildParts) + { + Writer.BeginObject(); + { + Writer.AddObjectId("partId", It.first); + Writer.AddString("partName", It.second); + } + Writer.EndObject(); + } + Writer.EndArray(); // parts + } + Writer.EndObject(); + } + Writer.EndArray(); // builds + + CbObject Payload = Writer.Save(); + ExtendableStringBuilder<512> SB; + CompactBinaryToJson(Payload.GetView(), SB); + MemoryView JsonPayload(SB.Data(), SB.Size()); + TemporaryFile::SafeWriteFile(WritePath, JsonPayload); + } + + struct ScavengeSource + { + std::filesystem::path StateFilePath; + std::filesystem::path Path; + }; + + std::vector<ScavengeSource> GetDownloadedStatePaths(const std::filesystem::path& SystemRootDir) + { + std::vector<ScavengeSource> Result; + DirectoryContent Content; + GetDirectoryContent(SystemRootDir / "builds" / "downloads", DirectoryContentFlags::IncludeFiles, Content); + for (const std::filesystem::path& EntryPath : Content.Files) + { + bool DeleteEntry = false; + IoHash EntryPathHash; + if (IoHash::TryParse(EntryPath.stem().string(), EntryPathHash)) + { + // Read state and verify that it is valid + IoBuffer MetaDataJson = ReadFile(EntryPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + CbObject DownloadInfo = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (JsonError.empty()) + { + std::filesystem::path StateFilePath = DownloadInfo["statePath"].AsU8String(); + if (IsFile(StateFilePath)) + { + std::filesystem::path Path = DownloadInfo["path"].AsU8String(); + if (IsDir(Path)) + { + Result.push_back({.StateFilePath = std::move(StateFilePath), .Path = std::move(Path)}); + } + else + { + DeleteEntry = true; + } + } + else + { + DeleteEntry = true; + } + } + else + { + ZEN_WARN("Invalid download state file at {}. '{}'", EntryPath, JsonError); + DeleteEntry = true; + } + } + + if (DeleteEntry) + { + std::error_code DummyEc; + std::filesystem::remove(EntryPath, DummyEc); + } + } + return Result; + } + + class BufferedOpenFile + { + public: + BufferedOpenFile(const std::filesystem::path Path, DiskStatistics& DiskStats) + : m_Source(Path, BasicFile::Mode::kRead) + , m_SourceSize(m_Source.FileSize()) + , m_DiskStats(DiskStats) + { + m_DiskStats.OpenReadCount++; + m_DiskStats.CurrentOpenFileCount++; + } + ~BufferedOpenFile() { m_DiskStats.CurrentOpenFileCount--; } + BufferedOpenFile() = delete; + BufferedOpenFile(const BufferedOpenFile&) = delete; + BufferedOpenFile(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(BufferedOpenFile&&) = delete; + BufferedOpenFile& operator=(const BufferedOpenFile&) = delete; + + const uint64_t BlockSize = 256u * 1024u; + CompositeBuffer GetRange(uint64_t Offset, uint64_t Size) + { + ZEN_TRACE_CPU("BufferedOpenFile::GetRange"); + + ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache); + auto _ = MakeGuard([&]() { ZEN_ASSERT((m_CacheBlockIndex == (uint64_t)-1) || m_Cache); }); + + ZEN_ASSERT((Offset + Size) <= m_SourceSize); + const uint64_t BlockIndexStart = Offset / BlockSize; + const uint64_t BlockIndexEnd = (Offset + Size - 1) / BlockSize; + + std::vector<SharedBuffer> BufferRanges; + BufferRanges.reserve(BlockIndexEnd - BlockIndexStart + 1); + + uint64_t ReadOffset = Offset; + for (uint64_t BlockIndex = BlockIndexStart; BlockIndex <= BlockIndexEnd; BlockIndex++) + { + const uint64_t BlockStartOffset = BlockIndex * BlockSize; + if (m_CacheBlockIndex != BlockIndex) + { + uint64_t CacheSize = Min(BlockSize, m_SourceSize - BlockStartOffset); + ZEN_ASSERT(CacheSize > 0); + m_Cache = IoBuffer(CacheSize); + m_Source.Read(m_Cache.GetMutableView().GetData(), CacheSize, BlockStartOffset); + m_DiskStats.ReadCount++; + m_DiskStats.ReadByteCount += CacheSize; + m_CacheBlockIndex = BlockIndex; + } + + const uint64_t BytesRead = ReadOffset - Offset; + ZEN_ASSERT(BlockStartOffset <= ReadOffset); + const uint64_t OffsetIntoBlock = ReadOffset - BlockStartOffset; + ZEN_ASSERT(OffsetIntoBlock < m_Cache.GetSize()); + const uint64_t BlockBytes = Min(m_Cache.GetSize() - OffsetIntoBlock, Size - BytesRead); + BufferRanges.emplace_back(SharedBuffer(IoBuffer(m_Cache, OffsetIntoBlock, BlockBytes))); + ReadOffset += BlockBytes; + } + CompositeBuffer Result(std::move(BufferRanges)); + ZEN_ASSERT(Result.GetSize() == Size); + return Result; + } + + private: + BasicFile m_Source; + const uint64_t m_SourceSize; + DiskStatistics& m_DiskStats; + uint64_t m_CacheBlockIndex = (uint64_t)-1; + IoBuffer m_Cache; + }; + + class ReadFileCache + { + public: + // A buffered file reader that provides CompositeBuffer where the buffers are owned and the memory never overwritten + ReadFileCache(DiskStatistics& DiskStats, + const std::filesystem::path& Path, + const ChunkedFolderContent& LocalContent, + const ChunkedContentLookup& LocalLookup, + size_t MaxOpenFileCount) + : m_Path(Path) + , m_LocalContent(LocalContent) + , m_LocalLookup(LocalLookup) + , m_DiskStats(DiskStats) + { + m_OpenFiles.reserve(MaxOpenFileCount); + } + ~ReadFileCache() { m_OpenFiles.clear(); } + + CompositeBuffer GetRange(uint32_t SequenceIndex, uint64_t Offset, uint64_t Size) + { + ZEN_TRACE_CPU("ReadFileCache::GetRange"); + + auto CacheIt = std::find_if(m_OpenFiles.begin(), m_OpenFiles.end(), [SequenceIndex](const auto& Lhs) { + return Lhs.first == SequenceIndex; + }); + if (CacheIt != m_OpenFiles.end()) + { + if (CacheIt != m_OpenFiles.begin()) + { + auto CachedFile(std::move(CacheIt->second)); + m_OpenFiles.erase(CacheIt); + m_OpenFiles.insert(m_OpenFiles.begin(), std::make_pair(SequenceIndex, std::move(CachedFile))); + } + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + return Result; + } + const uint32_t LocalPathIndex = m_LocalLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path LocalFilePath = (m_Path / m_LocalContent.Paths[LocalPathIndex]).make_preferred(); + if (Size == m_LocalContent.RawSizes[LocalPathIndex]) + { + IoBuffer Result = IoBufferBuilder::MakeFromFile(LocalFilePath); + return CompositeBuffer(SharedBuffer(Result)); + } + if (m_OpenFiles.size() == m_OpenFiles.capacity()) + { + m_OpenFiles.pop_back(); + } + m_OpenFiles.insert(m_OpenFiles.begin(), + std::make_pair(SequenceIndex, std::make_unique<BufferedOpenFile>(LocalFilePath, m_DiskStats))); + CompositeBuffer Result = m_OpenFiles.front().second->GetRange(Offset, Size); + return Result; + } + + private: + const std::filesystem::path m_Path; + const ChunkedFolderContent& m_LocalContent; + const ChunkedContentLookup& m_LocalLookup; + std::vector<std::pair<uint32_t, std::unique_ptr<BufferedOpenFile>>> m_OpenFiles; + DiskStatistics& m_DiskStats; + }; + + CompositeBuffer ValidateBlob(IoBuffer&& Payload, const IoHash& BlobHash, uint64_t& OutCompressedSize, uint64_t& OutDecompressedSize) + { + ZEN_TRACE_CPU("ValidateBlob"); + + if (Payload.GetContentType() != ZenContentType::kCompressedBinary) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) has unexpected content type '{}'", + BlobHash, + Payload.GetSize(), + ToString(Payload.GetContentType()))); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) compressed header is invalid", BlobHash, Payload.GetSize())); + } + if (RawHash != BlobHash) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) compressed header has a mismatching raw hash {}", BlobHash, Payload.GetSize(), RawHash)); + } + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream( + 0, + RawSize, + [&Hash](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + if (!AbortFlag) + { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; + } + return false; + }); + + if (AbortFlag) + { + return CompositeBuffer{}; + } + + if (!CouldDecompress) + { + throw std::runtime_error( + fmt::format("Blob {} ({} bytes) failed to decompress - header information mismatch", BlobHash, Payload.GetSize())); + } + IoHash ValidateRawHash = Hash.GetHash(); + if (ValidateRawHash != BlobHash) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) decompressed hash {} does not match header information", + BlobHash, + Payload.GetSize(), + ValidateRawHash)); + } + OodleCompressor Compressor; + OodleCompressionLevel CompressionLevel; + uint64_t BlockSize; + if (!Compressed.TryGetCompressParameters(Compressor, CompressionLevel, BlockSize)) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to get compression details", BlobHash, Payload.GetSize())); + } + OutCompressedSize = Payload.GetSize(); + OutDecompressedSize = RawSize; + if (CompressionLevel == OodleCompressionLevel::None) + { + // Only decompress to composite if we need it for block verification + CompositeBuffer DecompressedComposite = Compressed.DecompressToComposite(); + if (!DecompressedComposite) + { + throw std::runtime_error(fmt::format("Blob {} ({} bytes) failed to decompress to composite", BlobHash, Payload.GetSize())); + } + return DecompressedComposite; + } + return CompositeBuffer{}; + } + + CompositeBuffer ValidateBlob(BuildStorage& Storage, + const Oid& BuildId, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + ZEN_TRACE_CPU("ValidateBlob"); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlobHash); + if (!Payload) + { + throw std::runtime_error(fmt::format("Blob {} could not be found", BlobHash)); + } + return ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + } + + ChunkBlockDescription ValidateChunkBlock(IoBuffer&& Payload, + const IoHash& BlobHash, + uint64_t& OutCompressedSize, + uint64_t& OutDecompressedSize) + { + CompositeBuffer BlockBuffer = ValidateBlob(std::move(Payload), BlobHash, OutCompressedSize, OutDecompressedSize); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Chunk block blob {} is not compressed using 'None' compression level", BlobHash)); + } + return GetChunkBlockDescription(BlockBuffer.Flatten(), BlobHash); + } + + CompositeBuffer FetchChunk(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const IoHash& ChunkHash, + ReadFileCache& OpenFileCache) + { + ZEN_TRACE_CPU("FetchChunk"); + auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(It != Lookup.ChunkHashToChunkIndex.end()); + uint32_t ChunkIndex = It->second; + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); + return Chunk; + }; + + CompressedBuffer GenerateBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + const std::vector<uint32_t>& ChunksInBlock, + ChunkBlockDescription& OutBlockDescription, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("GenerateBlock"); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); + + std::vector<std::pair<IoHash, FetchChunkFunc>> BlockContent; + BlockContent.reserve(ChunksInBlock.size()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + BlockContent.emplace_back(std::make_pair( + Content.ChunkedContent.ChunkHashes[ChunkIndex], + [&Content, &Lookup, &OpenFileCache, ChunkIndex](const IoHash& ChunkHash) -> std::pair<uint64_t, CompressedBuffer> { + CompositeBuffer Chunk = FetchChunk(Content, Lookup, ChunkHash, OpenFileCache); + if (!Chunk) + { + ZEN_ASSERT(false); + } + uint64_t RawSize = Chunk.GetSize(); + const bool ShouldCompressChunk = Lookup.RawHashToSequenceIndex.contains(ChunkHash) && + (RawSize >= MinimumSizeForCompressInBlock) && + IsChunkCompressable(Content, Lookup, ChunkIndex); + const OodleCompressionLevel CompressionLevel = + ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + return {RawSize, CompressedBuffer::Compress(Chunk, OodleCompressor::Mermaid, CompressionLevel)}; + })); + } + + return GenerateChunkBlock(std::move(BlockContent), OutBlockDescription); + }; + + CompressedBuffer RebuildBlock(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + CompositeBuffer&& HeaderBuffer, + const std::vector<uint32_t>& ChunksInBlock, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("RebuildBlock"); + ReadFileCache OpenFileCache(DiskStats, Path, Content, Lookup, 4); + + std::vector<SharedBuffer> ResultBuffers; + ResultBuffers.reserve(HeaderBuffer.GetSegments().size() + ChunksInBlock.size()); + ResultBuffers.insert(ResultBuffers.end(), HeaderBuffer.GetSegments().begin(), HeaderBuffer.GetSegments().end()); + for (uint32_t ChunkIndex : ChunksInBlock) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkLocations = GetChunkSequenceLocations(Lookup, ChunkIndex); + ZEN_ASSERT(!ChunkLocations.empty()); + const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + CompositeBuffer Chunk = OpenFileCache.GetRange(ChunkLocations[0].SequenceIndex, + ChunkLocations[0].Offset, + Content.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(Chunk) == ChunkHash); + + const uint64_t RawSize = Chunk.GetSize(); + const bool ShouldCompressChunk = Lookup.RawHashToSequenceIndex.contains(ChunkHash) && + (RawSize >= MinimumSizeForCompressInBlock) && IsChunkCompressable(Content, Lookup, ChunkIndex); + const OodleCompressionLevel CompressionLevel = + ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + + CompositeBuffer CompressedChunk = + CompressedBuffer::Compress(std::move(Chunk), OodleCompressor::Mermaid, CompressionLevel).GetCompressed(); + ResultBuffers.insert(ResultBuffers.end(), CompressedChunk.GetSegments().begin(), CompressedChunk.GetSegments().end()); + } + return CompressedBuffer::FromCompressedNoValidate(CompositeBuffer(std::move(ResultBuffers))); + }; + + void DownloadLargeBlob(BuildStorage& Storage, + const std::filesystem::path& DownloadFolder, + const Oid& BuildId, + const IoHash& ChunkHash, + const std::uint64_t PreferredMultipartChunkSize, + ParallelWork& Work, + WorkerThreadPool& NetworkPool, + DownloadStatistics& DownloadStats, + std::function<void(IoBuffer&& Payload)>&& OnDownloadComplete) + { + ZEN_TRACE_CPU("DownloadLargeBlob"); + + struct WorkloadData + { + TemporaryFile TempFile; + }; + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + + std::error_code Ec; + Workload->TempFile.CreateTemporary(DownloadFolder, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + std::vector<std::function<void()>> WorkItems = Storage.GetLargeBuildBlob( + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + [Workload, &DownloadStats](uint64_t Offset, const IoBuffer& Chunk) { + DownloadStats.DownloadedChunkByteCount += Chunk.GetSize(); + + if (!AbortFlag.load()) + { + ZEN_TRACE_CPU("DownloadLargeBlob_Save"); + Workload->TempFile.Write(Chunk.GetView(), Offset); + } + }, + [Workload, &DownloadStats, OnDownloadComplete = std::move(OnDownloadComplete)]() { + DownloadStats.DownloadedChunkCount++; + if (!AbortFlag.load()) + { + uint64_t PayloadSize = Workload->TempFile.FileSize(); + void* FileHandle = Workload->TempFile.Detach(); + ZEN_ASSERT(FileHandle != nullptr); + IoBuffer Payload(IoBuffer::File, FileHandle, 0, PayloadSize, true); + Payload.SetDeleteOnClose(true); + OnDownloadComplete(std::move(Payload)); + } + }); + if (!WorkItems.empty()) + { + DownloadStats.MultipartAttachmentCount++; + } + for (auto& WorkItem : WorkItems) + { + Work.ScheduleWork(NetworkPool, [WorkItem = std::move(WorkItem)](std::atomic<bool>&) { + ZEN_TRACE_CPU("DownloadLargeBlob_Work"); + if (!AbortFlag) + { + WorkItem(); + } + }); + } + } + + struct ValidateStatistics + { + uint64_t BuildBlobSize = 0; + uint64_t BuildPartSize = 0; + uint64_t ChunkAttachmentCount = 0; + uint64_t BlockAttachmentCount = 0; + std::atomic<uint64_t> VerifiedAttachmentCount = 0; + std::atomic<uint64_t> VerifiedByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; + }; + + void ValidateBuildPart(BuildStorage& Storage, + const Oid& BuildId, + Oid BuildPartId, + const std::string_view BuildPartName, + ValidateStatistics& ValidateStats, + DownloadStatistics& DownloadStats) + { + ZEN_TRACE_CPU("ValidateBuildPart"); + + ProgressBar::SetLogOperationName(ProgressMode, "Validate Part"); + + enum TaskSteps : uint32_t + { + FetchBuild, + FetchBuildPart, + ValidateBlobs, + Cleanup, + StepCount + }; + + auto EndProgress = + MakeGuard([&]() { ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::StepCount, TaskSteps::StepCount); }); + + Stopwatch Timer; + auto _ = MakeGuard([&]() { + ZEN_CONSOLE("Validated build part {}/{} ('{}') in {}", + BuildId, + BuildPartId, + BuildPartName, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::FetchBuild, TaskSteps::StepCount); + + CbObject Build = Storage.GetBuild(BuildId); + if (!BuildPartName.empty()) + { + BuildPartId = Build["parts"sv].AsObjectView()[BuildPartName].AsObjectId(); + if (BuildPartId == Oid::Zero) + { + throw std::runtime_error(fmt::format("Build {} does not have a part named '{}'", BuildId, BuildPartName)); + } + } + ValidateStats.BuildBlobSize = Build.GetSize(); + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + PreferredMultipartChunkSize = ChunkSize; + } + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::FetchBuildPart, TaskSteps::StepCount); + + CbObject BuildPart = Storage.GetBuildPart(BuildId, BuildPartId); + ValidateStats.BuildPartSize = BuildPart.GetSize(); + ZEN_CONSOLE("Validating build part {}/{} ({})", BuildId, BuildPartId, NiceBytes(BuildPart.GetSize())); + std::vector<IoHash> ChunkAttachments; + for (CbFieldView LooseFileView : BuildPart["chunkAttachments"sv].AsObjectView()["rawHashes"sv]) + { + ChunkAttachments.push_back(LooseFileView.AsBinaryAttachment()); + } + ValidateStats.ChunkAttachmentCount = ChunkAttachments.size(); + std::vector<IoHash> BlockAttachments; + for (CbFieldView BlocksView : BuildPart["blockAttachments"sv].AsObjectView()["rawHashes"sv]) + { + BlockAttachments.push_back(BlocksView.AsBinaryAttachment()); + } + ValidateStats.BlockAttachmentCount = BlockAttachments.size(); + + std::vector<ChunkBlockDescription> VerifyBlockDescriptions = + ParseChunkBlockDescriptionList(Storage.GetBlockMetadatas(BuildId, BlockAttachments)); + if (VerifyBlockDescriptions.size() != BlockAttachments.size()) + { + throw std::runtime_error(fmt::format("Uploaded blocks metadata could not all be found, {} blocks metadata is missing", + BlockAttachments.size() - VerifyBlockDescriptions.size())); + } + + WorkerThreadPool& NetworkPool = GetNetworkPool(); + WorkerThreadPool& VerifyPool = GetIOWorkerPool(); + ParallelWork Work(AbortFlag, PauseFlag); + + const std::filesystem::path TempFolder = ".zen-tmp"; + + CreateDirectories(TempFolder); + auto __ = MakeGuard([&TempFolder]() { + if (CleanDirectory(TempFolder, {})) + { + std::error_code DummyEc; + RemoveDir(TempFolder, DummyEc); + } + }); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::ValidateBlobs, TaskSteps::StepCount); + + ProgressBar ProgressBar(ProgressMode, "Validate Blobs"); + + uint64_t AttachmentsToVerifyCount = ChunkAttachments.size() + BlockAttachments.size(); + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredVerifiedBytesPerSecond; + + std::atomic<uint64_t> MultipartAttachmentCount = 0; + + for (const IoHash& ChunkAttachment : ChunkAttachments) + { + Work.ScheduleWork(NetworkPool, + [&Storage, + &NetworkPool, + &VerifyPool, + &Work, + &DownloadStats, + &ValidateStats, + AttachmentsToVerifyCount, + &TempFolder, + BuildId = Oid(BuildId), + PreferredMultipartChunkSize, + &FilteredDownloadedBytesPerSecond, + &FilteredVerifiedBytesPerSecond, + ChunkAttachment](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetChunk"); + + FilteredDownloadedBytesPerSecond.Start(); + DownloadLargeBlob( + Storage, + TempFolder, + BuildId, + ChunkAttachment, + PreferredMultipartChunkSize, + Work, + NetworkPool, + DownloadStats, + [&Work, + &VerifyPool, + &DownloadStats, + &ValidateStats, + AttachmentsToVerifyCount, + &FilteredDownloadedBytesPerSecond, + &FilteredVerifiedBytesPerSecond, + ChunkHash = ChunkAttachment](IoBuffer&& Payload) { + Payload.SetContentType(ZenContentType::kCompressedBinary); + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&DownloadStats, + &ValidateStats, + AttachmentsToVerifyCount, + &FilteredDownloadedBytesPerSecond, + &FilteredVerifiedBytesPerSecond, + Payload = std::move(Payload), + ChunkHash](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_Validate"); + + if (DownloadStats.DownloadedChunkCount + DownloadStats.DownloadedBlockCount == + AttachmentsToVerifyCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(std::move(Payload), ChunkHash, CompressedSize, DecompressedSize); + ValidateStats.VerifiedAttachmentCount++; + ValidateStats.VerifiedByteCount += DecompressedSize; + if (ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }); + } + }); + } + }); + } + + for (const IoHash& BlockAttachment : BlockAttachments) + { + Work.ScheduleWork( + NetworkPool, + [&Storage, + &BuildId, + &Work, + &VerifyPool, + &DownloadStats, + &ValidateStats, + AttachmentsToVerifyCount, + &FilteredDownloadedBytesPerSecond, + &FilteredVerifiedBytesPerSecond, + BlockAttachment](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_GetBlock"); + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer Payload = Storage.GetBuildBlob(BuildId, BlockAttachment); + DownloadStats.DownloadedBlockCount++; + DownloadStats.DownloadedBlockByteCount += Payload.GetSize(); + if (DownloadStats.DownloadedChunkCount + DownloadStats.DownloadedBlockCount == AttachmentsToVerifyCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + if (!Payload) + { + throw std::runtime_error(fmt::format("Block attachment {} could not be found", BlockAttachment)); + } + if (!AbortFlag) + { + Work.ScheduleWork( + VerifyPool, + [&FilteredVerifiedBytesPerSecond, + AttachmentsToVerifyCount, + &ValidateStats, + Payload = std::move(Payload), + BlockAttachment](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("ValidateBuildPart_ValidateBlock"); + + FilteredVerifiedBytesPerSecond.Start(); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateChunkBlock(std::move(Payload), BlockAttachment, CompressedSize, DecompressedSize); + ValidateStats.VerifiedAttachmentCount++; + ValidateStats.VerifiedByteCount += DecompressedSize; + if (ValidateStats.VerifiedAttachmentCount.load() == AttachmentsToVerifyCount) + { + FilteredVerifiedBytesPerSecond.Stop(); + } + } + }); + } + } + }); + } + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + + const uint64_t DownloadedAttachmentCount = DownloadStats.DownloadedChunkCount + DownloadStats.DownloadedBlockCount; + const uint64_t DownloadedByteCount = DownloadStats.DownloadedChunkByteCount + DownloadStats.DownloadedBlockByteCount; + + FilteredDownloadedBytesPerSecond.Update(DownloadedByteCount); + FilteredVerifiedBytesPerSecond.Update(ValidateStats.VerifiedByteCount); + + std::string Details = fmt::format("Downloaded {}/{} ({}, {}bits/s). Verified {}/{} ({}, {}B/s)", + DownloadedAttachmentCount, + AttachmentsToVerifyCount, + NiceBytes(DownloadedByteCount), + NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8), + ValidateStats.VerifiedAttachmentCount.load(), + AttachmentsToVerifyCount, + NiceBytes(ValidateStats.VerifiedByteCount.load()), + NiceNum(FilteredVerifiedBytesPerSecond.GetCurrent())); + + ProgressBar.UpdateState( + {.Task = "Validating blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2), + .RemainingCount = gsl::narrow<uint64_t>(AttachmentsToVerifyCount * 2 - + (DownloadedAttachmentCount + ValidateStats.VerifiedAttachmentCount.load())), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ProgressBar.Finish(); + ValidateStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Cleanup, TaskSteps::StepCount); + } + + void ArrangeChunksIntoBlocks(const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint64_t MaxBlockSize, + std::vector<uint32_t>& ChunkIndexes, + std::vector<std::vector<uint32_t>>& OutBlocks) + { + ZEN_TRACE_CPU("ArrangeChunksIntoBlocks"); + std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&Content, &Lookup](uint32_t Lhs, uint32_t Rhs) { + const ChunkedContentLookup::ChunkSequenceLocation& LhsLocation = GetChunkSequenceLocations(Lookup, Lhs)[0]; + const ChunkedContentLookup::ChunkSequenceLocation& RhsLocation = GetChunkSequenceLocations(Lookup, Rhs)[0]; + if (LhsLocation.SequenceIndex < RhsLocation.SequenceIndex) + { + return true; + } + else if (LhsLocation.SequenceIndex > RhsLocation.SequenceIndex) + { + return false; + } + return LhsLocation.Offset < RhsLocation.Offset; + }); + + uint64_t MaxBlockSizeLowThreshold = MaxBlockSize - (MaxBlockSize / 16); + + uint64_t BlockSize = 0; + + uint32_t ChunkIndexStart = 0; + for (uint32_t ChunkIndexOffset = 0; ChunkIndexOffset < ChunkIndexes.size();) + { + const uint32_t ChunkIndex = ChunkIndexes[ChunkIndexOffset]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + if ((BlockSize + ChunkSize) > MaxBlockSize) + { + // Within the span of MaxBlockSizeLowThreshold and MaxBlockSize, see if there is a break + // between source paths for chunks. Break the block at the last such break if any. + ZEN_ASSERT(ChunkIndexOffset > ChunkIndexStart); + + const uint32_t ChunkSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[ChunkIndex]].SequenceIndex; + + uint64_t ScanBlockSize = BlockSize; + + uint32_t ScanChunkIndexOffset = ChunkIndexOffset - 1; + while (ScanChunkIndexOffset > (ChunkIndexStart + 2)) + { + const uint32_t TestChunkIndex = ChunkIndexes[ScanChunkIndexOffset]; + const uint64_t TestChunkSize = Content.ChunkedContent.ChunkRawSizes[TestChunkIndex]; + if ((ScanBlockSize - TestChunkSize) < MaxBlockSizeLowThreshold) + { + break; + } + + const uint32_t TestSequenceIndex = + Lookup.ChunkSequenceLocations[Lookup.ChunkSequenceLocationOffset[TestChunkIndex]].SequenceIndex; + if (ChunkSequenceIndex != TestSequenceIndex) + { + ChunkIndexOffset = ScanChunkIndexOffset + 1; + break; + } + + ScanBlockSize -= TestChunkSize; + ScanChunkIndexOffset--; + } + + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexOffset - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexOffset; AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + BlockSize = 0; + ChunkIndexStart = ChunkIndexOffset; + } + else + { + ChunkIndexOffset++; + BlockSize += ChunkSize; + } + } + if (ChunkIndexStart < ChunkIndexes.size()) + { + std::vector<uint32_t> ChunksInBlock; + ChunksInBlock.reserve(ChunkIndexes.size() - ChunkIndexStart); + for (uint32_t AddIndexOffset = ChunkIndexStart; AddIndexOffset < ChunkIndexes.size(); AddIndexOffset++) + { + const uint32_t AddChunkIndex = ChunkIndexes[AddIndexOffset]; + ChunksInBlock.push_back(AddChunkIndex); + } + OutBlocks.emplace_back(std::move(ChunksInBlock)); + } + } + + CompositeBuffer CompressChunk(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex, + const std::filesystem::path& TempFolderPath, + LooseChunksStatistics& LooseChunksStats) + { + ZEN_TRACE_CPU("CompressChunk"); + ZEN_ASSERT(!TempFolderPath.empty()); + const IoHash& ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + const uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + + const ChunkedContentLookup::ChunkSequenceLocation& Source = GetChunkSequenceLocations(Lookup, ChunkIndex)[0]; + const std::uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[Source.SequenceIndex]; + IoBuffer RawSource = IoBufferBuilder::MakeFromFile((Path / Content.Paths[PathIndex]).make_preferred(), Source.Offset, ChunkSize); + if (!RawSource) + { + throw std::runtime_error(fmt::format("Failed fetching chunk {}", ChunkHash)); + } + if (RawSource.GetSize() != ChunkSize) + { + throw std::runtime_error(fmt::format("Fetched chunk {} has invalid size", ChunkHash)); + } + + const bool ShouldCompressChunk = IsChunkCompressable(Content, Lookup, ChunkIndex); + const OodleCompressionLevel CompressionLevel = ShouldCompressChunk ? OodleCompressionLevel::VeryFast : OodleCompressionLevel::None; + + if (ShouldCompressChunk) + { + std::filesystem::path TempFilePath = TempFolderPath / ChunkHash.ToHexString(); + + BasicFile CompressedFile; + std::error_code Ec; + CompressedFile.Open(TempFilePath, BasicFile::Mode::kTruncateDelete, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for compressing blob {}. Reason: {}", ChunkHash, Ec.message())); + } + + uint64_t StreamRawBytes = 0; + uint64_t StreamCompressedBytes = 0; + + bool CouldCompress = CompressedBuffer::CompressToStream( + CompositeBuffer(SharedBuffer(RawSource)), + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + LooseChunksStats.CompressedChunkRawBytes += SourceSize; + CompressedFile.Write(RangeBuffer, Offset); + LooseChunksStats.CompressedChunkBytes += RangeBuffer.GetSize(); + StreamRawBytes += SourceSize; + StreamCompressedBytes += RangeBuffer.GetSize(); + }, + OodleCompressor::Mermaid, + CompressionLevel); + if (CouldCompress) + { + uint64_t CompressedSize = CompressedFile.FileSize(); + void* FileHandle = CompressedFile.Detach(); + IoBuffer TempPayload = IoBuffer(IoBuffer::File, + FileHandle, + 0, + CompressedSize, + /*IsWholeFile*/ true); + ZEN_ASSERT(TempPayload); + TempPayload.SetDeleteOnClose(true); + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(TempPayload), RawHash, RawSize); + ZEN_ASSERT(Compressed); + ZEN_ASSERT(RawHash == ChunkHash); + ZEN_ASSERT(RawSize == ChunkSize); + + LooseChunksStats.CompressedChunkCount++; + + return Compressed.GetCompressed(); + } + else + { + LooseChunksStats.CompressedChunkRawBytes -= StreamRawBytes; + LooseChunksStats.CompressedChunkBytes -= StreamCompressedBytes; + } + CompressedFile.Close(); + RemoveFile(TempFilePath, Ec); + ZEN_UNUSED(Ec); + } + + CompressedBuffer CompressedBlob = + CompressedBuffer::Compress(SharedBuffer(std::move(RawSource)), OodleCompressor::Mermaid, CompressionLevel); + if (!CompressedBlob) + { + throw std::runtime_error(fmt::format("Failed to compress large blob {}", ChunkHash)); + } + ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawHash() == ChunkHash); + ZEN_ASSERT_SLOW(CompressedBlob.DecodeRawSize() == ChunkSize); + + LooseChunksStats.CompressedChunkRawBytes += ChunkSize; + LooseChunksStats.CompressedChunkBytes += CompressedBlob.GetCompressedSize(); + + // If we use none-compression, the compressed blob references the data and has 64 kb in memory so we don't need to write it to disk + if (ShouldCompressChunk) + { + IoBuffer TempPayload = WriteToTempFile(std::move(CompressedBlob).GetCompressed(), TempFolderPath, ChunkHash); + CompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(TempPayload)); + } + + LooseChunksStats.CompressedChunkCount++; + return std::move(CompressedBlob).GetCompressed(); + } + + struct GeneratedBlocks + { + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<uint64_t> BlockSizes; + std::vector<CompositeBuffer> BlockHeaders; + std::vector<CbObject> BlockMetaDatas; + std::vector<bool> MetaDataHasBeenUploaded; + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockHashToBlockIndex; + }; + + void GenerateBuildBlocks(const std::filesystem::path& Path, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& OutBlocks, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + GenerateBlocksStatistics& GenerateBlocksStats) + { + ZEN_TRACE_CPU("GenerateBuildBlocks"); + const std::size_t NewBlockCount = NewBlockChunks.size(); + if (NewBlockCount > 0) + { + ProgressBar ProgressBar(ProgressMode, "Generate Blocks"); + + OutBlocks.BlockDescriptions.resize(NewBlockCount); + OutBlocks.BlockSizes.resize(NewBlockCount); + OutBlocks.BlockMetaDatas.resize(NewBlockCount); + OutBlocks.BlockHeaders.resize(NewBlockCount); + OutBlocks.MetaDataHasBeenUploaded.resize(NewBlockCount, false); + OutBlocks.BlockHashToBlockIndex.reserve(NewBlockCount); + + RwLock Lock; + + WorkerThreadPool& GenerateBlobsPool = GetIOWorkerPool(); + WorkerThreadPool& UploadBlocksPool = GetNetworkPool(); + + FilteredRate FilteredGeneratedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallelWork Work(AbortFlag, PauseFlag); + + std::atomic<uint64_t> QueuedPendingBlocksForUpload = 0; + + for (size_t BlockIndex = 0; BlockIndex < NewBlockCount; BlockIndex++) + { + if (Work.IsAborted()) + { + break; + } + const std::vector<uint32_t>& ChunksInBlock = NewBlockChunks[BlockIndex]; + Work.ScheduleWork( + GenerateBlobsPool, + [&Storage, + BuildId, + &Path, + &Content, + &Lookup, + &Work, + &UploadBlocksPool, + NewBlockCount, + ChunksInBlock, + &Lock, + &OutBlocks, + &DiskStats, + &GenerateBlocksStats, + &UploadStats, + &FilteredGeneratedBytesPerSecond, + &QueuedPendingBlocksForUpload, + &FilteredUploadedBytesPerSecond, + BlockIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Generate"); + + FilteredGeneratedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + Stopwatch GenerateTimer; + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, ChunksInBlock, OutBlocks.BlockDescriptions[BlockIndex], DiskStats); + ZEN_CONSOLE_VERBOSE("Generated block {} ({}) containing {} chunks in {}", + OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(CompressedBlock.GetCompressedSize()), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), + NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); + + OutBlocks.BlockSizes[BlockIndex] = CompressedBlock.GetCompressedSize(); + { + CbObjectWriter Writer; + Writer.AddString("createdBy", "zen"); + OutBlocks.BlockMetaDatas[BlockIndex] = Writer.Save(); + } + GenerateBlocksStats.GeneratedBlockByteCount += OutBlocks.BlockSizes[BlockIndex]; + GenerateBlocksStats.GeneratedBlockCount++; + + Lock.WithExclusiveLock([&]() { + OutBlocks.BlockHashToBlockIndex.insert_or_assign(OutBlocks.BlockDescriptions[BlockIndex].BlockHash, + BlockIndex); + }); + + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + FilteredGeneratedBytesPerSecond.Stop(); + } + + if (QueuedPendingBlocksForUpload.load() > 16) + { + std::span<const SharedBuffer> Segments = CompressedBlock.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + if (!AbortFlag) + { + QueuedPendingBlocksForUpload++; + + Work.ScheduleWork( + UploadBlocksPool, + [&Storage, + BuildId, + NewBlockCount, + &FilteredUploadedBytesPerSecond, + &QueuedPendingBlocksForUpload, + &GenerateBlocksStats, + &UploadStats, + &OutBlocks, + BlockIndex, + Payload = std::move(CompressedBlock)](std::atomic<bool>&) mutable { + auto _ = MakeGuard([&QueuedPendingBlocksForUpload] { QueuedPendingBlocksForUpload--; }); + if (!AbortFlag) + { + if (GenerateBlocksStats.GeneratedBlockCount == NewBlockCount) + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Save"); + + FilteredUploadedBytesPerSecond.Stop(); + std::span<const SharedBuffer> Segments = Payload.GetCompressed().GetSegments(); + ZEN_ASSERT(Segments.size() >= 2); + OutBlocks.BlockHeaders[BlockIndex] = CompositeBuffer(Segments[0], Segments[1]); + } + else + { + ZEN_TRACE_CPU("GenerateBuildBlocks_Upload"); + + FilteredUploadedBytesPerSecond.Start(); + // TODO: Convert ScheduleWork body to function + + const CbObject BlockMetaData = + BuildChunkBlockDescription(OutBlocks.BlockDescriptions[BlockIndex], + OutBlocks.BlockMetaDatas[BlockIndex]); + + const IoHash& BlockHash = OutBlocks.BlockDescriptions[BlockIndex].BlockHash; + const uint64_t CompressedBlockSize = Payload.GetCompressedSize(); + + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob(BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + Payload.GetCompressed()); + } + + Storage.BuildStorage->PutBuildBlob(BuildId, + BlockHash, + ZenContentType::kCompressedBinary, + std::move(Payload).GetCompressed()); + UploadStats.BlocksBytes += CompressedBlockSize; + + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + BlockHash, + NiceBytes(CompressedBlockSize), + OutBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBlobMetadatas(BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + + bool MetadataSucceeded = + Storage.BuildStorage->PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + if (MetadataSucceeded) + { + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", + BlockHash, + NiceBytes(BlockMetaData.GetSize())); + + OutBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + } + + UploadStats.BlockCount++; + if (UploadStats.BlockCount == NewBlockCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }); + } + } + } + }); + } + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + + FilteredGeneratedBytesPerSecond.Update(GenerateBlocksStats.GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadStats.BlocksBytes.load()); + + std::string Details = fmt::format("Generated {}/{} ({}, {}B/s). Uploaded {}/{} ({}, {}bits/s)", + GenerateBlocksStats.GeneratedBlockCount.load(), + NewBlockCount, + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(FilteredGeneratedBytesPerSecond.GetCurrent()), + UploadStats.BlockCount.load(), + NewBlockCount, + NiceBytes(UploadStats.BlocksBytes.load()), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent() * 8)); + + ProgressBar.UpdateState( + {.Task = "Generating blocks", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(NewBlockCount), + .RemainingCount = gsl::narrow<uint64_t>(NewBlockCount - GenerateBlocksStats.GeneratedBlockCount.load()), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ZEN_ASSERT(AbortFlag || QueuedPendingBlocksForUpload.load() == 0); + + ProgressBar.Finish(); + + GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS = FilteredGeneratedBytesPerSecond.GetElapsedTimeUS(); + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + } + } + + void UploadPartBlobs(StorageInstance& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const std::filesystem::path& TempDir, + const ChunkedFolderContent& Content, + const ChunkedContentLookup& Lookup, + std::span<IoHash> RawHashes, + const std::vector<std::vector<uint32_t>>& NewBlockChunks, + GeneratedBlocks& NewBlocks, + std::span<const uint32_t> LooseChunkIndexes, + const std::uint64_t LargeAttachmentSize, + DiskStatistics& DiskStats, + UploadStatistics& UploadStats, + LooseChunksStatistics& LooseChunksStats) + { + ZEN_TRACE_CPU("UploadPartBlobs"); + { + ProgressBar ProgressBar(ProgressMode, "Upload Blobs"); + + WorkerThreadPool& ReadChunkPool = GetIOWorkerPool(); + WorkerThreadPool& UploadChunkPool = GetNetworkPool(); + + FilteredRate FilteredGenerateBlockBytesPerSecond; + FilteredRate FilteredCompressedBytesPerSecond; + FilteredRate FilteredUploadedBytesPerSecond; + + ParallelWork Work(AbortFlag, PauseFlag); + + std::atomic<size_t> UploadedBlockSize = 0; + std::atomic<size_t> UploadedBlockCount = 0; + std::atomic<size_t> UploadedRawChunkSize = 0; + std::atomic<size_t> UploadedCompressedChunkSize = 0; + std::atomic<uint32_t> UploadedChunkCount = 0; + + tsl::robin_map<uint32_t, uint32_t> ChunkIndexToLooseChunkOrderIndex; + ChunkIndexToLooseChunkOrderIndex.reserve(LooseChunkIndexes.size()); + for (uint32_t OrderIndex = 0; OrderIndex < LooseChunkIndexes.size(); OrderIndex++) + { + ChunkIndexToLooseChunkOrderIndex.insert_or_assign(LooseChunkIndexes[OrderIndex], OrderIndex); + } + + std::vector<size_t> BlockIndexes; + std::vector<uint32_t> LooseChunkOrderIndexes; + + uint64_t TotalLooseChunksSize = 0; + uint64_t TotalBlocksSize = 0; + for (const IoHash& RawHash : RawHashes) + { + if (auto It = NewBlocks.BlockHashToBlockIndex.find(RawHash); It != NewBlocks.BlockHashToBlockIndex.end()) + { + BlockIndexes.push_back(It->second); + TotalBlocksSize += NewBlocks.BlockSizes[It->second]; + } + else if (auto ChunkIndexIt = Lookup.ChunkHashToChunkIndex.find(RawHash); ChunkIndexIt != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIndexIt->second; + if (auto LooseOrderIndexIt = ChunkIndexToLooseChunkOrderIndex.find(ChunkIndex); + LooseOrderIndexIt != ChunkIndexToLooseChunkOrderIndex.end()) + { + LooseChunkOrderIndexes.push_back(LooseOrderIndexIt->second); + TotalLooseChunksSize += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + else + { + throw std::runtime_error( + fmt::format("Can not upload requested build blob {} as it was not generated by this upload", RawHash)); + } + } + uint64_t TotalRawSize = TotalLooseChunksSize + TotalBlocksSize; + + const size_t UploadBlockCount = BlockIndexes.size(); + const uint32_t UploadChunkCount = gsl::narrow<uint32_t>(LooseChunkOrderIndexes.size()); + + auto AsyncUploadBlock = [&Storage, + &BuildId, + &Work, + &TempDir, + &NewBlocks, + UploadBlockCount, + &UploadedBlockCount, + UploadChunkCount, + &UploadedChunkCount, + &UploadedBlockSize, + &UploadStats, + &FilteredUploadedBytesPerSecond, + &UploadChunkPool](const size_t BlockIndex, + const IoHash BlockHash, + CompositeBuffer&& Payload, + std::atomic<uint64_t>& QueuedPendingInMemoryBlocksForUpload) { + bool IsInMemoryBlock = true; + if (QueuedPendingInMemoryBlocksForUpload.load() > 16) + { + ZEN_TRACE_CPU("AsyncUploadBlock_WriteTempBlock"); + Payload = CompositeBuffer(WriteToTempFile(std::move(Payload), TempDir, BlockHash)); + IsInMemoryBlock = false; + } + else + { + QueuedPendingInMemoryBlocksForUpload++; + } + + Work.ScheduleWork( + UploadChunkPool, + [&Storage, + &BuildId, + &QueuedPendingInMemoryBlocksForUpload, + &NewBlocks, + UploadBlockCount, + &UploadedBlockCount, + UploadChunkCount, + &UploadedChunkCount, + &UploadedBlockSize, + &UploadStats, + &FilteredUploadedBytesPerSecond, + IsInMemoryBlock, + BlockIndex, + BlockHash, + Payload = std::move(Payload)](std::atomic<bool>&) mutable { + auto _ = MakeGuard([IsInMemoryBlock, &QueuedPendingInMemoryBlocksForUpload] { + if (IsInMemoryBlock) + { + QueuedPendingInMemoryBlocksForUpload--; + } + }); + if (!AbortFlag) + { + ZEN_TRACE_CPU("AsyncUploadBlock"); + + const uint64_t PayloadSize = Payload.GetSize(); + + FilteredUploadedBytesPerSecond.Start(); + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + } + Storage.BuildStorage->PutBuildBlob(BuildId, BlockHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded block {} ({}) containing {} chunks", + BlockHash, + NiceBytes(PayloadSize), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size()); + UploadedBlockSize += PayloadSize; + UploadStats.BlocksBytes += PayloadSize; + + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBlobMetadatas(BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + bool MetadataSucceeded = Storage.BuildStorage->PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + if (MetadataSucceeded) + { + ZEN_CONSOLE_VERBOSE("Uploaded block {} metadata ({})", BlockHash, NiceBytes(BlockMetaData.GetSize())); + + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + } + + UploadStats.BlockCount++; + + UploadedBlockCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + }); + }; + + auto AsyncUploadLooseChunk = [&Storage, + BuildId, + LargeAttachmentSize, + &Work, + &UploadChunkPool, + &FilteredUploadedBytesPerSecond, + &UploadedBlockCount, + &UploadedChunkCount, + UploadBlockCount, + UploadChunkCount, + &UploadedCompressedChunkSize, + &UploadedRawChunkSize, + &UploadStats](const IoHash& RawHash, uint64_t RawSize, CompositeBuffer&& Payload) { + Work.ScheduleWork( + UploadChunkPool, + [&Storage, + BuildId, + &Work, + LargeAttachmentSize, + &FilteredUploadedBytesPerSecond, + &UploadChunkPool, + &UploadedBlockCount, + &UploadedChunkCount, + UploadBlockCount, + UploadChunkCount, + &UploadedCompressedChunkSize, + &UploadedRawChunkSize, + &UploadStats, + RawHash, + RawSize, + Payload = CompositeBuffer(std::move(Payload))](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk"); + + const uint64_t PayloadSize = Payload.GetSize(); + + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob(BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + } + + if (PayloadSize >= LargeAttachmentSize) + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart"); + UploadStats.MultipartAttachmentCount++; + std::vector<std::function<void()>> MultipartWork = Storage.BuildStorage->PutLargeBuildBlob( + BuildId, + RawHash, + ZenContentType::kCompressedBinary, + PayloadSize, + [Payload = std::move(Payload), &FilteredUploadedBytesPerSecond](uint64_t Offset, + uint64_t Size) mutable -> IoBuffer { + FilteredUploadedBytesPerSecond.Start(); + + IoBuffer PartPayload = Payload.Mid(Offset, Size).Flatten().AsIoBuffer(); + PartPayload.SetContentType(ZenContentType::kBinary); + return PartPayload; + }, + [RawSize, + &UploadStats, + &UploadedCompressedChunkSize, + &UploadChunkPool, + &UploadedBlockCount, + UploadBlockCount, + &UploadedChunkCount, + UploadChunkCount, + &FilteredUploadedBytesPerSecond, + &UploadedRawChunkSize](uint64_t SentBytes, bool IsComplete) { + UploadStats.ChunksBytes += SentBytes; + UploadedCompressedChunkSize += SentBytes; + if (IsComplete) + { + UploadStats.ChunkCount++; + UploadedChunkCount++; + if (UploadedBlockCount == UploadBlockCount && UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + UploadedRawChunkSize += RawSize; + } + }); + for (auto& WorkPart : MultipartWork) + { + Work.ScheduleWork(UploadChunkPool, [Work = std::move(WorkPart)](std::atomic<bool>&) { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Multipart_Work"); + if (!AbortFlag) + { + Work(); + } + }); + } + ZEN_CONSOLE_VERBOSE("Uploaded multipart chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + } + else + { + ZEN_TRACE_CPU("AsyncUploadLooseChunk_Singlepart"); + Storage.BuildStorage->PutBuildBlob(BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); + ZEN_CONSOLE_VERBOSE("Uploaded chunk {} ({})", RawHash, NiceBytes(PayloadSize)); + UploadStats.ChunksBytes += Payload.GetSize(); + UploadStats.ChunkCount++; + UploadedCompressedChunkSize += Payload.GetSize(); + UploadedRawChunkSize += RawSize; + UploadedChunkCount++; + if (UploadedChunkCount == UploadChunkCount) + { + FilteredUploadedBytesPerSecond.Stop(); + } + } + } + }); + }; + + std::vector<size_t> GenerateBlockIndexes; + + std::atomic<uint64_t> GeneratedBlockCount = 0; + std::atomic<uint64_t> GeneratedBlockByteCount = 0; + + std::atomic<uint64_t> QueuedPendingInMemoryBlocksForUpload = 0; + + // Start generation of any non-prebuilt blocks and schedule upload + for (const size_t BlockIndex : BlockIndexes) + { + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!AbortFlag) + { + Work.ScheduleWork( + ReadChunkPool, + [BlockHash = IoHash(BlockHash), + BlockIndex, + &FilteredGenerateBlockBytesPerSecond, + Path, + &Content, + &Lookup, + &NewBlocks, + &NewBlockChunks, + &GenerateBlockIndexes, + &GeneratedBlockCount, + &GeneratedBlockByteCount, + &DiskStats, + &AsyncUploadBlock, + &QueuedPendingInMemoryBlocksForUpload](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UploadPartBlobs_GenerateBlock"); + + FilteredGenerateBlockBytesPerSecond.Start(); + + Stopwatch GenerateTimer; + CompositeBuffer Payload; + if (NewBlocks.BlockHeaders[BlockIndex]) + { + Payload = RebuildBlock(Path, + Content, + Lookup, + std::move(NewBlocks.BlockHeaders[BlockIndex]), + NewBlockChunks[BlockIndex], + DiskStats) + .GetCompressed(); + } + else + { + ChunkBlockDescription BlockDescription; + CompressedBuffer CompressedBlock = + GenerateBlock(Path, Content, Lookup, NewBlockChunks[BlockIndex], BlockDescription, DiskStats); + if (!CompressedBlock) + { + throw std::runtime_error(fmt::format("Failed generating block {}", BlockHash)); + } + ZEN_ASSERT(BlockDescription.BlockHash == BlockHash); + Payload = std::move(CompressedBlock).GetCompressed(); + } + + GeneratedBlockByteCount += NewBlocks.BlockSizes[BlockIndex]; + GeneratedBlockCount++; + if (GeneratedBlockCount == GenerateBlockIndexes.size()) + { + FilteredGenerateBlockBytesPerSecond.Stop(); + } + ZEN_CONSOLE_VERBOSE("{} block {} ({}) containing {} chunks in {}", + NewBlocks.BlockHeaders[BlockIndex] ? "Regenerated" : "Generated", + NewBlocks.BlockDescriptions[BlockIndex].BlockHash, + NiceBytes(NewBlocks.BlockSizes[BlockIndex]), + NewBlocks.BlockDescriptions[BlockIndex].ChunkRawHashes.size(), + NiceTimeSpanMs(GenerateTimer.GetElapsedTimeMs())); + if (!AbortFlag) + { + AsyncUploadBlock(BlockIndex, BlockHash, std::move(Payload), QueuedPendingInMemoryBlocksForUpload); + } + } + }); + } + } + + // Start compression of any non-precompressed loose chunks and schedule upload + for (const uint32_t LooseChunkOrderIndex : LooseChunkOrderIndexes) + { + const uint32_t ChunkIndex = LooseChunkIndexes[LooseChunkOrderIndex]; + Work.ScheduleWork( + ReadChunkPool, + [&Path, + &Content, + &Lookup, + &TempDir, + &LooseChunksStats, + &LooseChunkOrderIndexes, + &FilteredCompressedBytesPerSecond, + &UploadStats, + &AsyncUploadLooseChunk, + ChunkIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UploadPartBlobs_CompressChunk"); + + FilteredCompressedBytesPerSecond.Start(); + Stopwatch CompressTimer; + CompositeBuffer Payload = CompressChunk(Path, Content, Lookup, ChunkIndex, TempDir, LooseChunksStats); + ZEN_CONSOLE_VERBOSE("Compressed chunk {} ({} -> {}) in {}", + Content.ChunkedContent.ChunkHashes[ChunkIndex], + NiceBytes(Content.ChunkedContent.ChunkRawSizes[ChunkIndex]), + NiceBytes(Payload.GetSize()), + NiceTimeSpanMs(CompressTimer.GetElapsedTimeMs())); + const uint64_t ChunkRawSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + UploadStats.ReadFromDiskBytes += ChunkRawSize; + if (LooseChunksStats.CompressedChunkCount == LooseChunkOrderIndexes.size()) + { + FilteredCompressedBytesPerSecond.Stop(); + } + if (!AbortFlag) + { + AsyncUploadLooseChunk(Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkRawSize, std::move(Payload)); + } + } + }); + } + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + FilteredCompressedBytesPerSecond.Update(LooseChunksStats.CompressedChunkRawBytes.load()); + FilteredGenerateBlockBytesPerSecond.Update(GeneratedBlockByteCount.load()); + FilteredUploadedBytesPerSecond.Update(UploadedCompressedChunkSize.load() + UploadedBlockSize.load()); + uint64_t UploadedRawSize = UploadedRawChunkSize.load() + UploadedBlockSize.load(); + uint64_t UploadedCompressedSize = UploadedCompressedChunkSize.load() + UploadedBlockSize.load(); + + std::string Details = fmt::format( + "Compressed {}/{} ({}/{} {}B/s) chunks. " + "Uploaded {}/{} ({}/{}) blobs " + "({} {}bits/s)", + LooseChunksStats.CompressedChunkCount.load(), + LooseChunkOrderIndexes.size(), + NiceBytes(LooseChunksStats.CompressedChunkRawBytes), + NiceBytes(TotalLooseChunksSize), + NiceNum(FilteredCompressedBytesPerSecond.GetCurrent()), + + UploadedBlockCount.load() + UploadedChunkCount.load(), + UploadBlockCount + UploadChunkCount, + NiceBytes(UploadedRawSize), + NiceBytes(TotalRawSize), + + NiceBytes(UploadedCompressedSize), + NiceNum(FilteredUploadedBytesPerSecond.GetCurrent())); + + ProgressBar.UpdateState({.Task = "Uploading blobs ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(TotalRawSize), + .RemainingCount = gsl::narrow<uint64_t>(TotalRawSize - UploadedRawSize), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + ZEN_ASSERT(AbortFlag || QueuedPendingInMemoryBlocksForUpload.load() == 0); + + ProgressBar.Finish(); + + UploadStats.ElapsedWallTimeUS = FilteredUploadedBytesPerSecond.GetElapsedTimeUS(); + LooseChunksStats.CompressChunksElapsedWallTimeUS = FilteredCompressedBytesPerSecond.GetElapsedTimeUS(); + } + } + + std::vector<size_t> FindReuseBlocks(const std::vector<ChunkBlockDescription>& KnownBlocks, + std::span<const IoHash> ChunkHashes, + std::span<const uint32_t> ChunkIndexes, + uint8_t MinPercentLimit, + std::vector<uint32_t>& OutUnusedChunkIndexes, + FindBlocksStatistics& FindBlocksStats) + { + ZEN_TRACE_CPU("FindReuseBlocks"); + + // Find all blocks with a usage level higher than MinPercentLimit + // Pick out the blocks with usage higher or equal to MinPercentLimit + // Sort them with highest size usage - most usage first + // Make a list of all chunks and mark them as not found + // For each block, recalculate the block has usage percent based on the chunks marked as not found + // If the block still reaches MinPercentLimit, keep it and remove the matching chunks from the not found list + // Repeat for following all remaining block that initially matched MinPercentLimit + + std::vector<size_t> FilteredReuseBlockIndexes; + + uint32_t ChunkCount = gsl::narrow<uint32_t>(ChunkHashes.size()); + std::vector<bool> ChunkFound(ChunkCount, false); + + if (ChunkCount > 0) + { + if (!KnownBlocks.empty()) + { + Stopwatch ReuseTimer; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + ChunkHashToChunkIndex.reserve(ChunkIndexes.size()); + for (uint32_t ChunkIndex : ChunkIndexes) + { + ChunkHashToChunkIndex.insert_or_assign(ChunkHashes[ChunkIndex], ChunkIndex); + } + + std::vector<size_t> BlockSizes(KnownBlocks.size(), 0); + std::vector<size_t> BlockUseSize(KnownBlocks.size(), 0); + + std::vector<size_t> ReuseBlockIndexes; + + for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) + { + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + if (KnownBlock.BlockHash != IoHash::Zero && + KnownBlock.ChunkRawHashes.size() == KnownBlock.ChunkCompressedLengths.size()) + { + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); + if (BlockAttachmentCount == 0) + { + continue; + } + size_t ReuseSize = 0; + size_t BlockSize = 0; + size_t FoundAttachmentCount = 0; + size_t BlockChunkCount = KnownBlock.ChunkRawHashes.size(); + for (size_t BlockChunkIndex = 0; BlockChunkIndex < BlockChunkCount; BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (ChunkHashToChunkIndex.contains(BlockChunkHash)) + { + ReuseSize += BlockChunkSize; + FoundAttachmentCount++; + } + } + + size_t ReusePercent = (ReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundAttachmentCount, + ReusePercent); + ReuseBlockIndexes.push_back(KnownBlockIndex); + + BlockSizes[KnownBlockIndex] = BlockSize; + BlockUseSize[KnownBlockIndex] = ReuseSize; + } + else if (FoundAttachmentCount > 0) + { + // ZEN_CONSOLE_VERBOSE("Skipping block {}. {} attachments found, usage level: {}%", KnownBlock.BlockHash, + // FoundAttachmentCount, ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundAttachmentCount; + FindBlocksStats.RejectedByteCount += ReuseSize; + } + } + } + + if (!ReuseBlockIndexes.empty()) + { + std::sort(ReuseBlockIndexes.begin(), ReuseBlockIndexes.end(), [&](size_t Lhs, size_t Rhs) { + return BlockUseSize[Lhs] > BlockUseSize[Rhs]; + }); + + for (size_t KnownBlockIndex : ReuseBlockIndexes) + { + std::vector<uint32_t> FoundChunkIndexes; + size_t BlockSize = 0; + size_t AdjustedReuseSize = 0; + size_t AdjustedRawReuseSize = 0; + const ChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (size_t BlockChunkIndex = 0; BlockChunkIndex < KnownBlock.ChunkRawHashes.size(); BlockChunkIndex++) + { + const IoHash& BlockChunkHash = KnownBlock.ChunkRawHashes[BlockChunkIndex]; + const uint32_t BlockChunkSize = KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + BlockSize += BlockChunkSize; + if (auto It = ChunkHashToChunkIndex.find(BlockChunkHash); It != ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + if (!ChunkFound[ChunkIndex]) + { + FoundChunkIndexes.push_back(ChunkIndex); + AdjustedReuseSize += KnownBlock.ChunkCompressedLengths[BlockChunkIndex]; + AdjustedRawReuseSize += KnownBlock.ChunkRawLengths[BlockChunkIndex]; + } + } + } + + size_t ReusePercent = (AdjustedReuseSize * 100) / BlockSize; + + if (ReusePercent >= MinPercentLimit) + { + ZEN_CONSOLE_VERBOSE("Reusing block {}. {} attachments found, usage level: {}%", + KnownBlock.BlockHash, + FoundChunkIndexes.size(), + ReusePercent); + FilteredReuseBlockIndexes.push_back(KnownBlockIndex); + + for (uint32_t ChunkIndex : FoundChunkIndexes) + { + ChunkFound[ChunkIndex] = true; + } + FindBlocksStats.AcceptedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.AcceptedByteCount += AdjustedReuseSize; + FindBlocksStats.AcceptedRawByteCount += AdjustedRawReuseSize; + FindBlocksStats.AcceptedReduntantChunkCount += KnownBlock.ChunkRawHashes.size() - FoundChunkIndexes.size(); + FindBlocksStats.AcceptedReduntantByteCount += BlockSize - AdjustedReuseSize; + } + else + { + // ZEN_CONSOLE_VERBOSE("Skipping block {}. filtered usage level: {}%", KnownBlock.BlockHash, ReusePercent); + FindBlocksStats.RejectedBlockCount++; + FindBlocksStats.RejectedChunkCount += FoundChunkIndexes.size(); + FindBlocksStats.RejectedByteCount += AdjustedReuseSize; + } + } + } + } + OutUnusedChunkIndexes.reserve(ChunkIndexes.size() - FindBlocksStats.AcceptedChunkCount); + for (uint32_t ChunkIndex : ChunkIndexes) + { + if (!ChunkFound[ChunkIndex]) + { + OutUnusedChunkIndexes.push_back(ChunkIndex); + } + } + } + return FilteredReuseBlockIndexes; + }; + + void UploadFolder(StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + const std::string_view BuildPartName, + const std::filesystem::path& Path, + const std::filesystem::path& TempDir, + const std::filesystem::path& ManifestPath, + const uint64_t FindBlockMaxCount, + const uint8_t BlockReuseMinPercentLimit, + bool AllowMultiparts, + const CbObject& MetaData, + bool CreateBuild, + bool IgnoreExistingBlocks, + bool PostUploadVerify) + { + ZEN_TRACE_CPU("UploadFolder"); + + ProgressBar::SetLogOperationName(ProgressMode, "Upload Folder"); + + enum TaskSteps : uint32_t + { + PrepareBuild, + CalculateDelta, + Upload, + Validate, + Cleanup, + StepCount + }; + + auto EndProgress = + MakeGuard([&]() { ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::StepCount, TaskSteps::StepCount); }); + + Stopwatch ProcessTimer; + + CreateDirectories(TempDir); + CleanDirectory(TempDir, {}); + auto _ = MakeGuard([&]() { + if (CleanDirectory(TempDir, {})) + { + std::error_code DummyEc; + RemoveDir(TempDir, DummyEc); + } + }); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::PrepareBuild, TaskSteps::StepCount); + + std::uint64_t TotalRawSize = 0; + + CbObject ChunkerParameters; + + struct PrepareBuildResult + { + std::vector<ChunkBlockDescription> KnownBlocks; + uint64_t PreferredMultipartChunkSize = DefaultPreferredMultipartChunkSize; + uint64_t PayloadSize = 0; + uint64_t PrepareBuildTimeMs = 0; + uint64_t FindBlocksTimeMs = 0; + uint64_t ElapsedTimeMs = 0; + }; + + FindBlocksStatistics FindBlocksStats; + + std::future<PrepareBuildResult> PrepBuildResultFuture = GetNetworkPool().EnqueueTask(std::packaged_task<PrepareBuildResult()>{ + [&Storage, BuildId, FindBlockMaxCount, &MetaData, CreateBuild, AllowMultiparts, IgnoreExistingBlocks, &FindBlocksStats] { + ZEN_TRACE_CPU("PrepareBuild"); + + PrepareBuildResult Result; + Stopwatch Timer; + if (CreateBuild) + { + ZEN_TRACE_CPU("CreateBuild"); + + Stopwatch PutBuildTimer; + CbObject PutBuildResult = Storage.BuildStorage->PutBuild(BuildId, MetaData); + Result.PrepareBuildTimeMs = PutBuildTimer.GetElapsedTimeMs(); + Result.PreferredMultipartChunkSize = PutBuildResult["chunkSize"sv].AsUInt64(Result.PreferredMultipartChunkSize); + Result.PayloadSize = MetaData.GetSize(); + } + else + { + ZEN_TRACE_CPU("PutBuild"); + Stopwatch GetBuildTimer; + CbObject Build = Storage.BuildStorage->GetBuild(BuildId); + Result.PrepareBuildTimeMs = GetBuildTimer.GetElapsedTimeMs(); + Result.PayloadSize = Build.GetSize(); + if (auto ChunkSize = Build["chunkSize"sv].AsUInt64(); ChunkSize != 0) + { + Result.PreferredMultipartChunkSize = ChunkSize; + } + else if (AllowMultiparts) + { + ZEN_WARN("PreferredMultipartChunkSize is unknown. Defaulting to '{}'", + NiceBytes(Result.PreferredMultipartChunkSize)); + } + } + + if (!IgnoreExistingBlocks) + { + ZEN_TRACE_CPU("FindBlocks"); + Stopwatch KnownBlocksTimer; + CbObject BlockDescriptionList = Storage.BuildStorage->FindBlocks(BuildId, FindBlockMaxCount); + if (BlockDescriptionList) + { + Result.KnownBlocks = ParseChunkBlockDescriptionList(BlockDescriptionList); + } + FindBlocksStats.FindBlockTimeMS = KnownBlocksTimer.GetElapsedTimeMs(); + FindBlocksStats.FoundBlockCount = Result.KnownBlocks.size(); + Result.FindBlocksTimeMs = KnownBlocksTimer.GetElapsedTimeMs(); + } + Result.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + return Result; + }}); + + ChunkedFolderContent LocalContent; + + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + { + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions = + DefaultExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + auto ParseManifest = [](const std::filesystem::path& Path, + const std::filesystem::path& ManifestPath) -> std::vector<std::filesystem::path> { + std::vector<std::filesystem::path> AssetPaths; + std::filesystem::path AbsoluteManifestPath = + MakeSafeAbsolutePath(ManifestPath.is_absolute() ? ManifestPath : Path / ManifestPath); + IoBuffer ManifestContent = ReadFile(AbsoluteManifestPath).Flatten(); + std::string_view ManifestString((const char*)ManifestContent.GetView().GetData(), ManifestContent.GetSize()); + std::string_view::size_type Offset = 0; + while (Offset < ManifestContent.GetSize()) + { + size_t PathBreakOffset = ManifestString.find_first_of("\t\r\n", Offset); + if (PathBreakOffset == std::string_view::npos) + { + PathBreakOffset = ManifestContent.GetSize(); + } + std::string_view AssetPath = ManifestString.substr(Offset, PathBreakOffset - Offset); + if (!AssetPath.empty()) + { + AssetPaths.emplace_back(std::filesystem::path(AssetPath)); + } + Offset = PathBreakOffset; + size_t EolOffset = ManifestString.find_first_of("\r\n", Offset); + if (EolOffset == std::string_view::npos) + { + break; + } + Offset = EolOffset; + size_t LineBreakOffset = ManifestString.find_first_not_of("\t\r\n", Offset); + if (LineBreakOffset == std::string_view::npos) + { + break; + } + Offset = LineBreakOffset; + } + return AssetPaths; + }; + + Stopwatch ScanTimer; + FolderContent Content; + if (ManifestPath.empty()) + { + std::filesystem::path ExcludeManifestPath = Path / ZenExcludeManifestName; + tsl::robin_set<std::string> ExcludeAssetPaths; + if (IsFile(ExcludeManifestPath)) + { + std::vector<std::filesystem::path> AssetPaths = ParseManifest(Path, ExcludeManifestPath); + ExcludeAssetPaths.reserve(AssetPaths.size()); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + ExcludeAssetPaths.insert(AssetPath.generic_string()); + } + } + Content = GetFolderContent( + LocalFolderScanStats, + Path, + std::move(IsAcceptedFolder), + [&IsAcceptedFile, + &ExcludeAssetPaths](const std::string_view& RelativePath, uint64_t Size, uint32_t Attributes) -> bool { + if (RelativePath == ZenExcludeManifestName) + { + return false; + } + if (!IsAcceptedFile(RelativePath, Size, Attributes)) + { + return false; + } + if (ExcludeAssetPaths.contains(std::filesystem::path(RelativePath).generic_string())) + { + return false; + } + return true; + }, + GetIOWorkerPool(), + GetUpdateDelayMS(ProgressMode), + [&](bool, std::ptrdiff_t) { + ZEN_CONSOLE_VERBOSE("Found {} files in '{}'...", LocalFolderScanStats.AcceptedFileCount.load(), Path); + }, + AbortFlag); + } + else + { + Stopwatch ManifestParseTimer; + std::vector<std::filesystem::path> AssetPaths = ParseManifest(Path, ManifestPath); + for (const std::filesystem::path& AssetPath : AssetPaths) + { + Content.Paths.push_back(AssetPath); + const std::filesystem::path AssetFilePath = (Path / AssetPath).make_preferred(); + Content.RawSizes.push_back(FileSizeFromPath(AssetFilePath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(AssetFilePath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(AssetFilePath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + if (ManifestPath.is_relative()) + { + Content.Paths.push_back(ManifestPath); + const std::filesystem::path ManifestFilePath = (Path / ManifestPath).make_preferred(); + Content.RawSizes.push_back(FileSizeFromPath(ManifestFilePath)); +#if ZEN_PLATFORM_WINDOWS + Content.Attributes.push_back(GetFileAttributes(ManifestFilePath)); +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + Content.Attributes.push_back(GetFileMode(ManifestFilePath)); +#endif // ZEN_PLATFORM_MAC || ZEN_PLATFORM_LINUX + + LocalFolderScanStats.AcceptedFileByteCount += Content.RawSizes.back(); + LocalFolderScanStats.AcceptedFileCount++; + } + LocalFolderScanStats.FoundFileByteCount.store(LocalFolderScanStats.AcceptedFileByteCount); + LocalFolderScanStats.FoundFileCount.store(LocalFolderScanStats.AcceptedFileCount); + LocalFolderScanStats.ElapsedWallTimeUS = ManifestParseTimer.GetElapsedTimeUs(); + } + + std::unique_ptr<ChunkingController> ChunkController = + CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{}); + { + CbObjectWriter ChunkParametersWriter; + ChunkParametersWriter.AddString("name"sv, ChunkController->GetName()); + ChunkParametersWriter.AddObject("parameters"sv, ChunkController->GetParameters()); + ChunkerParameters = ChunkParametersWriter.Save(); + } + + TotalRawSize = std::accumulate(Content.RawSizes.begin(), Content.RawSizes.end(), std::uint64_t(0)); + + { + ProgressBar ProgressBar(ProgressMode, "Scan Files"); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + LocalContent = ChunkFolderContent( + ChunkingStats, + GetIOWorkerPool(), + Path, + Content, + *ChunkController, + GetUpdateDelayMS(ProgressMode), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + Content.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(TotalRawSize), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = TotalRawSize, + .RemainingCount = TotalRawSize - ChunkingStats.BytesHashed.load(), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + AbortFlag, + PauseFlag); + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + if (AbortFlag) + { + return; + } + } + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + LocalContent.Paths.size(), + NiceBytes(TotalRawSize), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + Path, + NiceTimeSpanMs(ScanTimer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + } + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + GenerateBlocksStatistics GenerateBlocksStats; + LooseChunksStatistics LooseChunksStats; + + std::vector<size_t> ReuseBlockIndexes; + std::vector<uint32_t> NewBlockChunkIndexes; + + PrepareBuildResult PrepBuildResult = PrepBuildResultFuture.get(); + + ZEN_CONSOLE("Build prepare took {}. {} took {}, payload size {}{}", + NiceTimeSpanMs(PrepBuildResult.ElapsedTimeMs), + CreateBuild ? "PutBuild" : "GetBuild", + NiceTimeSpanMs(PrepBuildResult.PrepareBuildTimeMs), + NiceBytes(PrepBuildResult.PayloadSize), + IgnoreExistingBlocks ? "" + : fmt::format(". Found {} blocks in {}", + PrepBuildResult.KnownBlocks.size(), + NiceTimeSpanMs(PrepBuildResult.FindBlocksTimeMs))); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CalculateDelta, TaskSteps::StepCount); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PrepBuildResult.PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + + Stopwatch BlockArrangeTimer; + + std::vector<std::uint32_t> LooseChunkIndexes; + { + bool EnableBlocks = true; + std::vector<std::uint32_t> BlockChunkIndexes; + for (uint32_t ChunkIndex = 0; ChunkIndex < LocalContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const uint64_t ChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (!EnableBlocks || ChunkRawSize == 0 || ChunkRawSize > DefaultChunksBlockParams.MaxChunkEmbedSize) + { + LooseChunkIndexes.push_back(ChunkIndex); + LooseChunksStats.ChunkByteCount += ChunkRawSize; + } + else + { + BlockChunkIndexes.push_back(ChunkIndex); + FindBlocksStats.PotentialChunkByteCount += ChunkRawSize; + } + } + FindBlocksStats.PotentialChunkCount = BlockChunkIndexes.size(); + LooseChunksStats.ChunkCount = LooseChunkIndexes.size(); + + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE("Ignoring any existing blocks in store"); + NewBlockChunkIndexes = std::move(BlockChunkIndexes); + } + else + { + ReuseBlockIndexes = FindReuseBlocks(PrepBuildResult.KnownBlocks, + LocalContent.ChunkedContent.ChunkHashes, + BlockChunkIndexes, + BlockReuseMinPercentLimit, + NewBlockChunkIndexes, + FindBlocksStats); + FindBlocksStats.AcceptedBlockCount = ReuseBlockIndexes.size(); + + for (const ChunkBlockDescription& Description : PrepBuildResult.KnownBlocks) + { + for (uint32_t ChunkRawLength : Description.ChunkRawLengths) + { + FindBlocksStats.FoundBlockByteCount += ChunkRawLength; + } + FindBlocksStats.FoundBlockChunkCount += Description.ChunkRawHashes.size(); + } + } + } + + std::vector<std::vector<uint32_t>> NewBlockChunks; + ArrangeChunksIntoBlocks(LocalContent, LocalLookup, DefaultChunksBlockParams.MaxBlockSize, NewBlockChunkIndexes, NewBlockChunks); + + FindBlocksStats.NewBlocksCount = NewBlockChunks.size(); + for (uint32_t ChunkIndex : NewBlockChunkIndexes) + { + FindBlocksStats.NewBlocksChunkByteCount += LocalContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + FindBlocksStats.NewBlocksChunkCount = NewBlockChunkIndexes.size(); + + const double AcceptedByteCountPercent = + FindBlocksStats.PotentialChunkByteCount > 0 + ? (100.0 * FindBlocksStats.AcceptedRawByteCount / FindBlocksStats.PotentialChunkByteCount) + : 0.0; + + const double AcceptedReduntantByteCountPercent = + FindBlocksStats.AcceptedByteCount > 0 ? (100.0 * FindBlocksStats.AcceptedReduntantByteCount) / + (FindBlocksStats.AcceptedByteCount + FindBlocksStats.AcceptedReduntantByteCount) + : 0.0; + ZEN_CONSOLE( + "Found {} chunks in {} ({}) blocks eligible for reuse in {}\n" + " Reusing {} ({}) matching chunks in {} blocks ({:.1f}%)\n" + " Accepting {} ({}) redundant chunks ({:.1f}%)\n" + " Rejected {} ({}) chunks in {} blocks\n" + " Arranged {} ({}) chunks in {} new blocks\n" + " Keeping {} ({}) chunks as loose chunks\n" + " Discovery completed in {}", + FindBlocksStats.FoundBlockChunkCount, + FindBlocksStats.FoundBlockCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedRawByteCount), + FindBlocksStats.AcceptedBlockCount, + AcceptedByteCountPercent, + + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + AcceptedReduntantByteCountPercent, + + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.RejectedBlockCount, + + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + FindBlocksStats.NewBlocksCount, + + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + + NiceTimeSpanMs(BlockArrangeTimer.GetElapsedTimeMs())); + + DiskStatistics DiskStats; + UploadStatistics UploadStats; + GeneratedBlocks NewBlocks; + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Upload, TaskSteps::StepCount); + + if (!NewBlockChunks.empty()) + { + Stopwatch GenerateBuildBlocksTimer; + auto __ = MakeGuard([&]() { + uint64_t BlockGenerateTimeUs = GenerateBuildBlocksTimer.GetElapsedTimeUs(); + ZEN_CONSOLE("Generated {} ({}) and uploaded {} ({}) blocks in {}. Generate speed: {}B/sec. Transfer speed {}bits/sec.", + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount), + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + NiceTimeSpanMs(BlockGenerateTimeUs / 1000), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, + GenerateBlocksStats.GeneratedBlockByteCount)), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.BlocksBytes * 8))); + }); + GenerateBuildBlocks(Path, + LocalContent, + LocalLookup, + Storage, + BuildId, + NewBlockChunks, + NewBlocks, + DiskStats, + UploadStats, + GenerateBlocksStats); + } + + CbObject PartManifest; + { + CbObjectWriter PartManifestWriter; + Stopwatch ManifestGenerationTimer; + auto __ = MakeGuard([&]() { + ZEN_CONSOLE("Generated build part manifest in {} ({})", + NiceTimeSpanMs(ManifestGenerationTimer.GetElapsedTimeMs()), + NiceBytes(PartManifestWriter.GetSaveSize())); + }); + PartManifestWriter.AddObject("chunker"sv, ChunkerParameters); + + std::vector<IoHash> AllChunkBlockHashes; + std::vector<ChunkBlockDescription> AllChunkBlockDescriptions; + AllChunkBlockHashes.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + AllChunkBlockDescriptions.reserve(ReuseBlockIndexes.size() + NewBlocks.BlockDescriptions.size()); + for (size_t ReuseBlockIndex : ReuseBlockIndexes) + { + AllChunkBlockDescriptions.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex]); + AllChunkBlockHashes.push_back(PrepBuildResult.KnownBlocks[ReuseBlockIndex].BlockHash); + } + AllChunkBlockDescriptions.insert(AllChunkBlockDescriptions.end(), + NewBlocks.BlockDescriptions.begin(), + NewBlocks.BlockDescriptions.end()); + for (const ChunkBlockDescription& BlockDescription : NewBlocks.BlockDescriptions) + { + AllChunkBlockHashes.push_back(BlockDescription.BlockHash); + } +#if EXTRA_VERIFY + tsl::robin_map<IoHash, size_t, IoHash::Hasher> ChunkHashToAbsoluteChunkIndex; + std::vector<IoHash> AbsoluteChunkHashes; + AbsoluteChunkHashes.reserve(LocalContent.ChunkedContent.ChunkHashes.size()); + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ChunkHashToAbsoluteChunkIndex.insert({LocalContent.ChunkedContent.ChunkHashes[ChunkIndex], AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + for (const ChunkBlockDescription& Block : AllChunkBlockDescriptions) + { + for (const IoHash& ChunkHash : Block.ChunkHashes) + { + ChunkHashToAbsoluteChunkIndex.insert({ChunkHash, AbsoluteChunkHashes.size()}); + AbsoluteChunkHashes.push_back(ChunkHash); + } + } + for (const IoHash& ChunkHash : LocalContent.ChunkedContent.ChunkHashes) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(ChunkHash)] == ChunkHash); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkHashes[LocalLookup.ChunkHashToChunkIndex.at(ChunkHash)] == ChunkHash); + } + for (const uint32_t ChunkIndex : LocalContent.ChunkedContent.ChunkOrders) + { + ZEN_ASSERT(AbsoluteChunkHashes[ChunkHashToAbsoluteChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex])] == + LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + ZEN_ASSERT(LocalLookup.ChunkHashToChunkIndex.at(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]) == ChunkIndex); + } +#endif // EXTRA_VERIFY + std::vector<uint32_t> AbsoluteChunkOrders = CalculateAbsoluteChunkOrders(LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkOrders, + LocalLookup.ChunkHashToChunkIndex, + LooseChunkIndexes, + AllChunkBlockDescriptions); + +#if EXTRA_VERIFY + for (uint32_t ChunkOrderIndex = 0; ChunkOrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); ChunkOrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[ChunkOrderIndex]; + uint32_t AbsoluteChunkIndex = AbsoluteChunkOrders[ChunkOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const IoHash& AbsoluteChunkHash = AbsoluteChunkHashes[AbsoluteChunkIndex]; + ZEN_ASSERT(LocalChunkHash == AbsoluteChunkHash); + } +#endif // EXTRA_VERIFY + + WriteBuildContentToCompactBinary(PartManifestWriter, + LocalContent.Platform, + LocalContent.Paths, + LocalContent.RawHashes, + LocalContent.RawSizes, + LocalContent.Attributes, + LocalContent.ChunkedContent.SequenceRawHashes, + LocalContent.ChunkedContent.ChunkCounts, + LocalContent.ChunkedContent.ChunkHashes, + LocalContent.ChunkedContent.ChunkRawSizes, + AbsoluteChunkOrders, + LooseChunkIndexes, + AllChunkBlockHashes); + +#if EXTRA_VERIFY + { + ChunkedFolderContent VerifyFolderContent; + + std::vector<uint32_t> OutAbsoluteChunkOrders; + std::vector<IoHash> OutLooseChunkHashes; + std::vector<uint64_t> OutLooseChunkRawSizes; + std::vector<IoHash> OutBlockRawHashes; + ReadBuildContentFromCompactBinary(PartManifestWriter.Save(), + VerifyFolderContent.Platform, + VerifyFolderContent.Paths, + VerifyFolderContent.RawHashes, + VerifyFolderContent.RawSizes, + VerifyFolderContent.Attributes, + VerifyFolderContent.ChunkedContent.SequenceRawHashes, + VerifyFolderContent.ChunkedContent.ChunkCounts, + OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + OutBlockRawHashes); + ZEN_ASSERT(OutBlockRawHashes == AllChunkBlockHashes); + + for (uint32_t OrderIndex = 0; OrderIndex < OutAbsoluteChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = OutAbsoluteChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = AbsoluteChunkHashes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + } + + CalculateLocalChunkOrders(OutAbsoluteChunkOrders, + OutLooseChunkHashes, + OutLooseChunkRawSizes, + AllChunkBlockDescriptions, + VerifyFolderContent.ChunkedContent.ChunkHashes, + VerifyFolderContent.ChunkedContent.ChunkRawSizes, + VerifyFolderContent.ChunkedContent.ChunkOrders); + + ZEN_ASSERT(LocalContent.Paths == VerifyFolderContent.Paths); + ZEN_ASSERT(LocalContent.RawHashes == VerifyFolderContent.RawHashes); + ZEN_ASSERT(LocalContent.RawSizes == VerifyFolderContent.RawSizes); + ZEN_ASSERT(LocalContent.Attributes == VerifyFolderContent.Attributes); + ZEN_ASSERT(LocalContent.ChunkedContent.SequenceRawHashes == VerifyFolderContent.ChunkedContent.SequenceRawHashes); + ZEN_ASSERT(LocalContent.ChunkedContent.ChunkCounts == VerifyFolderContent.ChunkedContent.ChunkCounts); + + for (uint32_t OrderIndex = 0; OrderIndex < LocalContent.ChunkedContent.ChunkOrders.size(); OrderIndex++) + { + uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + uint32_t VerifyChunkIndex = VerifyFolderContent.ChunkedContent.ChunkOrders[OrderIndex]; + const IoHash VerifyChunkHash = VerifyFolderContent.ChunkedContent.ChunkHashes[VerifyChunkIndex]; + uint64_t VerifyChunkRawSize = VerifyFolderContent.ChunkedContent.ChunkRawSizes[VerifyChunkIndex]; + + ZEN_ASSERT(LocalChunkHash == VerifyChunkHash); + ZEN_ASSERT(LocalChunkRawSize == VerifyChunkRawSize); + } + } +#endif // EXTRA_VERIFY + PartManifest = PartManifestWriter.Save(); + } + + Stopwatch PutBuildPartResultTimer; + std::pair<IoHash, std::vector<IoHash>> PutBuildPartResult = + Storage.BuildStorage->PutBuildPart(BuildId, BuildPartId, BuildPartName, PartManifest); + ZEN_CONSOLE("PutBuildPart took {}, payload size {}. {} attachments are needed.", + NiceTimeSpanMs(PutBuildPartResultTimer.GetElapsedTimeMs()), + NiceBytes(PartManifest.GetSize()), + PutBuildPartResult.second.size()); + IoHash PartHash = PutBuildPartResult.first; + + auto UploadAttachments = [&Storage, + &BuildId, + &Path, + &TempDir, + &LocalContent, + &LocalLookup, + &NewBlockChunks, + &NewBlocks, + &LooseChunkIndexes, + &LargeAttachmentSize, + &DiskStats, + &UploadStats, + &LooseChunksStats](std::span<IoHash> RawHashes) { + if (!AbortFlag) + { + UploadStatistics TempUploadStats; + LooseChunksStatistics TempLooseChunksStats; + + Stopwatch TempUploadTimer; + auto __ = MakeGuard([&]() { + uint64_t TempChunkUploadTimeUs = TempUploadTimer.GetElapsedTimeUs(); + ZEN_CONSOLE( + "Uploaded {} ({}) blocks. " + "Compressed {} ({} {}B/s) and uploaded {} ({}) chunks. " + "Transferred {} ({}bits/s) in {}", + TempUploadStats.BlockCount.load(), + NiceBytes(TempUploadStats.BlocksBytes), + + TempLooseChunksStats.CompressedChunkCount.load(), + NiceBytes(TempLooseChunksStats.CompressedChunkBytes.load()), + NiceNum( + GetBytesPerSecond(TempLooseChunksStats.CompressChunksElapsedWallTimeUS, TempLooseChunksStats.ChunkByteCount)), + TempUploadStats.ChunkCount.load(), + NiceBytes(TempUploadStats.ChunksBytes), + + NiceBytes(TempUploadStats.BlocksBytes + TempUploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(TempUploadStats.ElapsedWallTimeUS, TempUploadStats.ChunksBytes * 8)), + NiceTimeSpanMs(TempChunkUploadTimeUs / 1000)); + }); + UploadPartBlobs(Storage, + BuildId, + Path, + TempDir, + LocalContent, + LocalLookup, + RawHashes, + NewBlockChunks, + NewBlocks, + LooseChunkIndexes, + LargeAttachmentSize, + DiskStats, + TempUploadStats, + TempLooseChunksStats); + UploadStats += TempUploadStats; + LooseChunksStats += TempLooseChunksStats; + } + }; + if (IgnoreExistingBlocks) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart uploading all attachments, needs are: {}", + FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + + std::vector<IoHash> ForceUploadChunkHashes; + ForceUploadChunkHashes.reserve(LooseChunkIndexes.size()); + + for (uint32_t ChunkIndex : LooseChunkIndexes) + { + ForceUploadChunkHashes.push_back(LocalContent.ChunkedContent.ChunkHashes[ChunkIndex]); + } + + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockHeaders.size(); BlockIndex++) + { + if (NewBlocks.BlockHeaders[BlockIndex]) + { + // Block was not uploaded during generation + ForceUploadChunkHashes.push_back(NewBlocks.BlockDescriptions[BlockIndex].BlockHash); + } + } + UploadAttachments(ForceUploadChunkHashes); + } + else if (!PutBuildPartResult.second.empty()) + { + ZEN_CONSOLE_VERBOSE("PutBuildPart needs attachments: {}", FormatArray<IoHash>(PutBuildPartResult.second, "\n "sv)); + UploadAttachments(PutBuildPartResult.second); + } + + uint32_t FinalizeBuildPartRetryCount = 5; + while (!AbortFlag && (FinalizeBuildPartRetryCount--) > 0) + { + Stopwatch FinalizeBuildPartTimer; + std::vector<IoHash> Needs = Storage.BuildStorage->FinalizeBuildPart(BuildId, BuildPartId, PartHash); + ZEN_CONSOLE("FinalizeBuildPart took {}. {} attachments are missing.", + NiceTimeSpanMs(FinalizeBuildPartTimer.GetElapsedTimeMs()), + Needs.size()); + if (Needs.empty()) + { + break; + } + ZEN_CONSOLE_VERBOSE("FinalizeBuildPart needs attachments: {}", FormatArray<IoHash>(Needs, "\n "sv)); + UploadAttachments(Needs); + } + + if (CreateBuild && !AbortFlag) + { + Stopwatch FinalizeBuildTimer; + Storage.BuildStorage->FinalizeBuild(BuildId); + ZEN_CONSOLE("FinalizeBuild took {}", NiceTimeSpanMs(FinalizeBuildTimer.GetElapsedTimeMs())); + } + + if (!NewBlocks.BlockDescriptions.empty() && !AbortFlag) + { + uint64_t UploadBlockMetadataCount = 0; + Stopwatch UploadBlockMetadataTimer; + + uint32_t FailedMetadataUploadCount = 1; + int32_t MetadataUploadRetryCount = 3; + while ((MetadataUploadRetryCount-- > 0) && (FailedMetadataUploadCount > 0)) + { + FailedMetadataUploadCount = 0; + for (size_t BlockIndex = 0; BlockIndex < NewBlocks.BlockDescriptions.size(); BlockIndex++) + { + if (AbortFlag) + { + break; + } + const IoHash& BlockHash = NewBlocks.BlockDescriptions[BlockIndex].BlockHash; + if (!NewBlocks.MetaDataHasBeenUploaded[BlockIndex]) + { + const CbObject BlockMetaData = + BuildChunkBlockDescription(NewBlocks.BlockDescriptions[BlockIndex], NewBlocks.BlockMetaDatas[BlockIndex]); + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBlobMetadatas(BuildId, + std::vector<IoHash>({BlockHash}), + std::vector<CbObject>({BlockMetaData})); + } + bool MetadataSucceeded = Storage.BuildStorage->PutBlockMetadata(BuildId, BlockHash, BlockMetaData); + if (MetadataSucceeded) + { + UploadStats.BlocksBytes += BlockMetaData.GetSize(); + NewBlocks.MetaDataHasBeenUploaded[BlockIndex] = true; + UploadBlockMetadataCount++; + } + else + { + FailedMetadataUploadCount++; + } + } + } + } + if (UploadBlockMetadataCount > 0) + { + uint64_t ElapsedUS = UploadBlockMetadataTimer.GetElapsedTimeUs(); + UploadStats.ElapsedWallTimeUS += ElapsedUS; + ZEN_CONSOLE("Uploaded metadata for {} blocks in {}", UploadBlockMetadataCount, NiceTimeSpanMs(ElapsedUS / 1000)); + } + } + + ValidateStatistics ValidateStats; + DownloadStatistics ValidateDownloadStats; + if (PostUploadVerify && !AbortFlag) + { + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Validate, TaskSteps::StepCount); + ValidateBuildPart(*Storage.BuildStorage, BuildId, BuildPartId, BuildPartName, ValidateStats, ValidateDownloadStats); + } + + ZEN_CONSOLE_VERBOSE( + "Folder scanning stats:" + "\n FoundFileCount: {}" + "\n FoundFileByteCount: {}" + "\n AcceptedFileCount: {}" + "\n AcceptedFileByteCount: {}" + "\n ElapsedWallTimeUS: {}", + LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(LocalFolderScanStats.FoundFileByteCount.load()), + LocalFolderScanStats.AcceptedFileCount.load(), + NiceBytes(LocalFolderScanStats.AcceptedFileByteCount.load()), + NiceLatencyNs(LocalFolderScanStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Chunking stats:" + "\n FilesProcessed: {}" + "\n FilesChunked: {}" + "\n BytesHashed: {}" + "\n UniqueChunksFound: {}" + "\n UniqueSequencesFound: {}" + "\n UniqueBytesFound: {}" + "\n ElapsedWallTimeUS: {}", + ChunkingStats.FilesProcessed.load(), + ChunkingStats.FilesChunked.load(), + NiceBytes(ChunkingStats.BytesHashed.load()), + ChunkingStats.UniqueChunksFound.load(), + ChunkingStats.UniqueSequencesFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load()), + NiceLatencyNs(ChunkingStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Find block stats:" + "\n FindBlockTimeMS: {}" + "\n PotentialChunkCount: {}" + "\n PotentialChunkByteCount: {}" + "\n FoundBlockCount: {}" + "\n FoundBlockChunkCount: {}" + "\n FoundBlockByteCount: {}" + "\n AcceptedBlockCount: {}" + "\n AcceptedChunkCount: {}" + "\n AcceptedByteCount: {}" + "\n AcceptedRawByteCount: {}" + "\n RejectedBlockCount: {}" + "\n RejectedChunkCount: {}" + "\n RejectedByteCount: {}" + "\n AcceptedReduntantChunkCount: {}" + "\n AcceptedReduntantByteCount: {}" + "\n NewBlocksCount: {}" + "\n NewBlocksChunkCount: {}" + "\n NewBlocksChunkByteCount: {}", + NiceTimeSpanMs(FindBlocksStats.FindBlockTimeMS), + FindBlocksStats.PotentialChunkCount, + NiceBytes(FindBlocksStats.PotentialChunkByteCount), + FindBlocksStats.FoundBlockCount, + FindBlocksStats.FoundBlockChunkCount, + NiceBytes(FindBlocksStats.FoundBlockByteCount), + FindBlocksStats.AcceptedBlockCount, + FindBlocksStats.AcceptedChunkCount, + NiceBytes(FindBlocksStats.AcceptedByteCount), + NiceBytes(FindBlocksStats.AcceptedRawByteCount), + FindBlocksStats.RejectedBlockCount, + FindBlocksStats.RejectedChunkCount, + NiceBytes(FindBlocksStats.RejectedByteCount), + FindBlocksStats.AcceptedReduntantChunkCount, + NiceBytes(FindBlocksStats.AcceptedReduntantByteCount), + FindBlocksStats.NewBlocksCount, + FindBlocksStats.NewBlocksChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n GeneratedBlockByteCount: {}" + "\n GeneratedBlockCount: {}" + "\n GenerateBlocksElapsedWallTimeUS: {}", + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceLatencyNs(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Generate blocks stats:" + "\n ChunkCount: {}" + "\n ChunkByteCount: {}" + "\n CompressedChunkCount: {}" + "\n CompressChunksElapsedWallTimeUS: {}", + LooseChunksStats.ChunkCount, + NiceBytes(LooseChunksStats.ChunkByteCount), + LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(LooseChunksStats.CompressedChunkBytes.load()), + NiceLatencyNs(LooseChunksStats.CompressChunksElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Disk stats:" + "\n OpenReadCount: {}" + "\n OpenWriteCount: {}" + "\n ReadCount: {}" + "\n ReadByteCount: {}" + "\n WriteCount: {}" + "\n WriteByteCount: {}" + "\n CurrentOpenFileCount: {}", + DiskStats.OpenReadCount.load(), + DiskStats.OpenWriteCount.load(), + DiskStats.ReadCount.load(), + NiceBytes(DiskStats.ReadByteCount.load()), + DiskStats.WriteCount.load(), + NiceBytes(DiskStats.WriteByteCount.load()), + DiskStats.CurrentOpenFileCount.load()); + + ZEN_CONSOLE_VERBOSE( + "Upload stats:" + "\n BlockCount: {}" + "\n BlocksBytes: {}" + "\n ChunkCount: {}" + "\n ChunksBytes: {}" + "\n ReadFromDiskBytes: {}" + "\n MultipartAttachmentCount: {}" + "\n ElapsedWallTimeUS: {}", + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes.load()), + NiceBytes(UploadStats.ReadFromDiskBytes.load()), + UploadStats.MultipartAttachmentCount.load(), + NiceLatencyNs(UploadStats.ElapsedWallTimeUS * 1000)); + + if (PostUploadVerify) + { + ZEN_CONSOLE_VERBOSE( + "Validate stats:" + "\n BuildBlobSize: {}" + "\n BuildPartSize: {}" + "\n ChunkAttachmentCount: {}" + "\n BlockAttachmentCount: {}" + "\n VerifiedAttachmentCount: {}" + "\n VerifiedByteCount: {}" + "\n ElapsedWallTimeUS: {}", + NiceBytes(ValidateStats.BuildBlobSize), + NiceBytes(ValidateStats.BuildPartSize), + ValidateStats.ChunkAttachmentCount, + ValidateStats.BlockAttachmentCount, + ValidateStats.VerifiedAttachmentCount.load(), + NiceBytes(ValidateStats.VerifiedByteCount.load()), + NiceLatencyNs(ValidateStats.ElapsedWallTimeUS * 1000)); + + ZEN_CONSOLE_VERBOSE( + "Validate download stats:" + "\n RequestsCompleteCount: {}" + "\n DownloadedChunkCount: {}" + "\n DownloadedChunkByteCount: {}" + "\n MultipartAttachmentCount: {}" + "\n DownloadedBlockCount: {}" + "\n DownloadedBlockByteCount: {}" + "\n DownloadedPartialBlockCount: {}" + "\n DownloadedPartialBlockByteCount: {}", + ValidateDownloadStats.RequestsCompleteCount.load(), + ValidateDownloadStats.DownloadedChunkCount.load(), + NiceBytes(ValidateDownloadStats.DownloadedChunkByteCount.load()), + ValidateDownloadStats.MultipartAttachmentCount.load(), + ValidateDownloadStats.DownloadedBlockCount.load(), + NiceBytes(ValidateDownloadStats.DownloadedBlockByteCount.load()), + ValidateDownloadStats.DownloadedPartialBlockCount.load(), + NiceBytes(ValidateDownloadStats.DownloadedPartialBlockByteCount.load())); + } + + const double DeltaByteCountPercent = + ChunkingStats.BytesHashed > 0 + ? (100.0 * (FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes)) / (ChunkingStats.BytesHashed) + : 0.0; + + const std::string MultipartAttachmentStats = + (LargeAttachmentSize != (uint64_t)-1) ? fmt::format(" ({} as multipart)", UploadStats.MultipartAttachmentCount.load()) : ""; + + std::string ValidateInfo; + if (PostUploadVerify) + { + const uint64_t DownloadedCount = ValidateDownloadStats.DownloadedChunkCount + ValidateDownloadStats.DownloadedBlockCount; + const uint64_t DownloadedByteCount = + ValidateDownloadStats.DownloadedChunkByteCount + ValidateDownloadStats.DownloadedBlockByteCount; + ValidateInfo = fmt::format("\n Verified: {:>8} ({}), {}B/sec, {}", + DownloadedCount, + NiceBytes(DownloadedByteCount), + NiceNum(GetBytesPerSecond(ValidateStats.ElapsedWallTimeUS, DownloadedByteCount)), + NiceTimeSpanMs(ValidateStats.ElapsedWallTimeUS / 1000)); + } + + ZEN_CONSOLE( + "Uploaded part {} ('{}') to build {}, {}\n" + " Scanned files: {:>8} ({}), {}B/sec, {}\n" + " New data: {:>8} ({}) {:.1f}%\n" + " New blocks: {:>8} ({} -> {}), {}B/sec, {}\n" + " New chunks: {:>8} ({} -> {}), {}B/sec, {}\n" + " Uploaded: {:>8} ({}), {}bits/sec, {}\n" + " Blocks: {:>8} ({})\n" + " Chunks: {:>8} ({}){}" + "{}", + BuildPartId, + BuildPartName, + BuildId, + NiceTimeSpanMs(ProcessTimer.GetElapsedTimeMs()), + + LocalFolderScanStats.FoundFileCount.load(), + NiceBytes(LocalFolderScanStats.FoundFileByteCount.load()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed)), + NiceTimeSpanMs(ChunkingStats.ElapsedWallTimeUS / 1000), + + FindBlocksStats.NewBlocksChunkCount + LooseChunksStats.CompressedChunkCount, + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount + LooseChunksStats.CompressedChunkBytes), + DeltaByteCountPercent, + + GenerateBlocksStats.GeneratedBlockCount.load(), + NiceBytes(FindBlocksStats.NewBlocksChunkByteCount), + NiceBytes(GenerateBlocksStats.GeneratedBlockByteCount.load()), + NiceNum(GetBytesPerSecond(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS, GenerateBlocksStats.GeneratedBlockByteCount)), + NiceTimeSpanMs(GenerateBlocksStats.GenerateBlocksElapsedWallTimeUS / 1000), + + LooseChunksStats.CompressedChunkCount.load(), + NiceBytes(LooseChunksStats.ChunkByteCount), + NiceBytes(LooseChunksStats.CompressedChunkBytes.load()), + NiceNum(GetBytesPerSecond(LooseChunksStats.CompressChunksElapsedWallTimeUS, LooseChunksStats.ChunkByteCount)), + NiceTimeSpanMs(LooseChunksStats.CompressChunksElapsedWallTimeUS / 1000), + + UploadStats.BlockCount.load() + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.BlocksBytes + UploadStats.ChunksBytes), + NiceNum(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, (UploadStats.ChunksBytes + UploadStats.BlocksBytes) * 8)), + NiceTimeSpanMs(UploadStats.ElapsedWallTimeUS / 1000), + + UploadStats.BlockCount.load(), + NiceBytes(UploadStats.BlocksBytes.load()), + + UploadStats.ChunkCount.load(), + NiceBytes(UploadStats.ChunksBytes.load()), + MultipartAttachmentStats, + + ValidateInfo); + + Storage.BuildStorage->PutBuildPartStats( + BuildId, + BuildPartId, + {{"totalSize", double(LocalFolderScanStats.FoundFileByteCount.load())}, + {"reusedRatio", AcceptedByteCountPercent / 100.0}, + {"reusedBlockCount", double(FindBlocksStats.AcceptedBlockCount)}, + {"reusedBlockByteCount", double(FindBlocksStats.AcceptedRawByteCount)}, + {"newBlockCount", double(FindBlocksStats.NewBlocksCount)}, + {"newBlockByteCount", double(FindBlocksStats.NewBlocksChunkByteCount)}, + {"uploadedCount", double(UploadStats.BlockCount.load() + UploadStats.ChunkCount.load())}, + {"uploadedByteCount", double(UploadStats.BlocksBytes.load() + UploadStats.ChunksBytes.load())}, + {"uploadedBytesPerSec", + double(GetBytesPerSecond(UploadStats.ElapsedWallTimeUS, UploadStats.ChunksBytes + UploadStats.BlocksBytes))}, + {"elapsedTimeSec", double(ProcessTimer.GetElapsedTimeMs() / 1000.0)}}); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Cleanup, TaskSteps::StepCount); + } + + void VerifyFolder(const ChunkedFolderContent& Content, + const std::filesystem::path& Path, + bool VerifyFileHash, + VerifyFolderStatistics& VerifyFolderStats) + { + ZEN_TRACE_CPU("VerifyFolder"); + + Stopwatch Timer; + + ProgressBar ProgressBar(ProgressMode, "Verify Files"); + + WorkerThreadPool& VerifyPool = GetIOWorkerPool(); + + ParallelWork Work(AbortFlag, PauseFlag); + + const uint32_t PathCount = gsl::narrow<uint32_t>(Content.Paths.size()); + + RwLock ErrorLock; + std::vector<std::string> Errors; + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + const ChunkedContentLookup Lookup = BuildChunkedContentLookup(Content); + + for (uint32_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Work.IsAborted()) + { + break; + } + + Work.ScheduleWork( + VerifyPool, + [&Path, &Content, &Lookup, &ErrorLock, &Errors, &VerifyFolderStats, VerifyFileHash, &IsAcceptedFolder, PathIndex]( + std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("VerifyFile_work"); + + // TODO: Convert ScheduleWork body to function + + const std::filesystem::path TargetPath = (Path / Content.Paths[PathIndex]).make_preferred(); + if (IsAcceptedFolder(TargetPath.parent_path().generic_string())) + { + const uint64_t ExpectedSize = Content.RawSizes[PathIndex]; + if (!IsFile(TargetPath)) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("File {} with expected size {} does not exist", TargetPath, ExpectedSize)); + }); + VerifyFolderStats.FilesFailed++; + } + else + { + std::error_code Ec; + uint64_t SizeOnDisk = gsl::narrow<uint64_t>(FileSizeFromPath(TargetPath, Ec)); + if (Ec) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back( + fmt::format("Failed to get size of file {}: {} ({})", TargetPath, Ec.message(), Ec.value())); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk < ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is smaller than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk > ExpectedSize) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Size of file {} is bigger than expected. Expected: {}, Found: {}", + TargetPath, + ExpectedSize, + SizeOnDisk)); + }); + VerifyFolderStats.FilesFailed++; + } + else if (SizeOnDisk > 0 && VerifyFileHash) + { + const IoHash& ExpectedRawHash = Content.RawHashes[PathIndex]; + IoBuffer Buffer = IoBufferBuilder::MakeFromFile(TargetPath); + IoHash RawHash = IoHash::HashBuffer(Buffer); + if (RawHash != ExpectedRawHash) + { + uint64_t FileOffset = 0; + const uint32_t SequenceIndex = Lookup.RawHashToSequenceIndex.at(ExpectedRawHash); + const uint32_t OrderOffset = Lookup.SequenceIndexChunkOrderOffset[SequenceIndex]; + for (uint32_t OrderIndex = OrderOffset; + OrderIndex < OrderOffset + Content.ChunkedContent.ChunkCounts[SequenceIndex]; + OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + uint64_t ChunkSize = Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + IoHash ChunkHash = Content.ChunkedContent.ChunkHashes[ChunkIndex]; + IoBuffer FileChunk = IoBuffer(Buffer, FileOffset, ChunkSize); + if (IoHash::HashBuffer(FileChunk) != ChunkHash) + { + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format( + "WARNING: Hash of file {} does not match expected hash. Expected: {}, Found: {}. " + "Mismatch at chunk {}", + TargetPath, + ExpectedRawHash, + RawHash, + OrderIndex - OrderOffset)); + }); + break; + } + FileOffset += ChunkSize; + } + VerifyFolderStats.FilesFailed++; + } + VerifyFolderStats.ReadBytes += SizeOnDisk; + } + } + } + VerifyFolderStats.FilesVerified++; + } + }, + [&, PathIndex](std::exception_ptr Ex, std::atomic<bool>&) { + std::string Description; + try + { + std::rethrow_exception(Ex); + } + catch (const std::exception& Ex) + { + Description = Ex.what(); + } + ErrorLock.WithExclusiveLock([&]() { + Errors.push_back(fmt::format("Failed verifying file '{}'. Reason: {}", + (Path / Content.Paths[PathIndex]).make_preferred(), + Description)); + }); + VerifyFolderStats.FilesFailed++; + }); + } + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + std::string Details = fmt::format("Verified {}/{} ({}). Failed files: {}", + VerifyFolderStats.FilesVerified.load(), + PathCount, + NiceBytes(VerifyFolderStats.ReadBytes.load()), + VerifyFolderStats.FilesFailed.load()); + ProgressBar.UpdateState({.Task = "Verifying files ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(PathCount), + .RemainingCount = gsl::narrow<uint64_t>(PathCount - VerifyFolderStats.FilesVerified.load()), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + VerifyFolderStats.VerifyElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + + ProgressBar.Finish(); + if (AbortFlag) + { + return; + } + + for (const std::string& Error : Errors) + { + ZEN_CONSOLE("{}", Error); + } + if (!Errors.empty()) + { + throw std::runtime_error(fmt::format("Verify failed with {} errors", Errors.size())); + } + } + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> GetRemainingChunkTargets( + std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(Lookup, ChunkIndex); + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + if (!ChunkSources.empty()) + { + ChunkTargetPtrs.reserve(ChunkSources.size()); + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) + { + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) + { + ChunkTargetPtrs.push_back(&Source); + } + } + } + return ChunkTargetPtrs; + }; + + uint64_t GetChunkWriteCount(std::span<const std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const ChunkedContentLookup& Lookup, + uint32_t ChunkIndex) + { + uint64_t WriteCount = 0; + std::span<const ChunkedContentLookup::ChunkSequenceLocation> ChunkSources = GetChunkSequenceLocations(Lookup, ChunkIndex); + for (const ChunkedContentLookup::ChunkSequenceLocation& Source : ChunkSources) + { + if (SequenceIndexChunksLeftToWriteCounters[Source.SequenceIndex].load() > 0) + { + WriteCount++; + } + } + return WriteCount; + }; + + void FinalizeChunkSequence(const std::filesystem::path& TargetFolder, const IoHash& SequenceRawHash) + { + ZEN_TRACE_CPU("FinalizeChunkSequence"); + ZEN_ASSERT_SLOW(!IsFile(GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash))); + std::error_code Ec; + RenameFile(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash), + GetFinalChunkedSequenceFileName(TargetFolder, SequenceRawHash), + Ec); + if (Ec) + { + throw std::system_error(Ec); + } + } + + void FinalizeChunkSequences(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + std::span<const uint32_t> RemoteSequenceIndexes) + { + ZEN_TRACE_CPU("FinalizeChunkSequences"); + for (uint32_t SequenceIndex : RemoteSequenceIndexes) + { + FinalizeChunkSequence(TargetFolder, RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + } + } + + void VerifySequence(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + uint32_t RemoteSequenceIndex) + { + ZEN_TRACE_CPU("VerifySequence"); + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + { + ZEN_TRACE_CPU("HashSequence"); + const std::uint32_t RemotePathIndex = Lookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ExpectedSize = RemoteContent.RawSizes[RemotePathIndex]; + IoBuffer VerifyBuffer = IoBufferBuilder::MakeFromFile(GetTempChunkedSequenceFileName(TargetFolder, SequenceRawHash)); + const uint64_t VerifySize = VerifyBuffer.GetSize(); + if (VerifySize != ExpectedSize) + { + throw std::runtime_error(fmt::format("Written chunk sequence {} size {} does not match expected size {}", + SequenceRawHash, + VerifySize, + ExpectedSize)); + } + + const IoHash VerifyChunkHash = IoHash::HashBuffer(std::move(VerifyBuffer)); + if (VerifyChunkHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Written chunk sequence {} hash does not match expected hash {}", VerifyChunkHash, SequenceRawHash)); + } + } + } + + void VerifyAndCompleteChunkSequencesAsync(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span<const uint32_t> RemoteSequenceIndexes, + ParallelWork& Work, + WorkerThreadPool& VerifyPool) + { + if (RemoteSequenceIndexes.empty()) + { + return; + } + ZEN_TRACE_CPU("VerifyAndCompleteChunkSequence"); + for (uint32_t RemoteSequenceIndexOffset = 1; RemoteSequenceIndexOffset < RemoteSequenceIndexes.size(); RemoteSequenceIndexOffset++) + { + const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[RemoteSequenceIndexOffset]; + Work.ScheduleWork(VerifyPool, [&RemoteContent, &Lookup, TargetFolder, RemoteSequenceIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("VerifyAndCompleteChunkSequenceAsync"); + VerifySequence(TargetFolder, RemoteContent, Lookup, RemoteSequenceIndex); + if (!AbortFlag) + { + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + FinalizeChunkSequence(TargetFolder, SequenceRawHash); + } + } + }); + } + const uint32_t RemoteSequenceIndex = RemoteSequenceIndexes[0]; + + VerifySequence(TargetFolder, RemoteContent, Lookup, RemoteSequenceIndex); + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + FinalizeChunkSequence(TargetFolder, SequenceRawHash); + } + + bool CompleteSequenceChunk(uint32_t RemoteSequenceIndex, std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) + { + uint32_t PreviousValue = SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].fetch_sub(1); + ZEN_ASSERT(PreviousValue >= 1); + ZEN_ASSERT(PreviousValue != (uint32_t)-1); + return PreviousValue == 1; + } + + std::vector<uint32_t> CompleteChunkTargets(const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters) + { + ZEN_TRACE_CPU("CompleteChunkTargets"); + + std::vector<uint32_t> CompletedSequenceIndexes; + for (const ChunkedContentLookup::ChunkSequenceLocation* Location : ChunkTargetPtrs) + { + const uint32_t RemoteSequenceIndex = Location->SequenceIndex; + if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) + { + CompletedSequenceIndexes.push_back(RemoteSequenceIndex); + } + } + return CompletedSequenceIndexes; + } + + void WriteSequenceChunk(const std::filesystem::path& TargetFolderPath, + const ChunkedFolderContent& RemoteContent, + BufferedWriteFileCache::Local& LocalWriter, + const CompositeBuffer& Chunk, + const uint32_t SequenceIndex, + const uint64_t FileOffset, + const uint32_t PathIndex, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("WriteSequenceChunk"); + + const uint64_t SequenceSize = RemoteContent.RawSizes[PathIndex]; + + auto OpenFile = [&](BasicFile& File) { + const std::filesystem::path FileName = + GetTempChunkedSequenceFileName(TargetFolderPath, RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + File.Open(FileName, BasicFile::Mode::kWrite); + if (UseSparseFiles) + { + PrepareFileForScatteredWrite(File.Handle(), SequenceSize); + } + }; + + const uint64_t ChunkSize = Chunk.GetSize(); + ZEN_ASSERT(FileOffset + ChunkSize <= SequenceSize); + if (ChunkSize == SequenceSize) + { + BasicFile SingleChunkFile; + OpenFile(SingleChunkFile); + + DiskStats.CurrentOpenFileCount++; + auto _ = MakeGuard([&DiskStats]() { DiskStats.CurrentOpenFileCount--; }); + SingleChunkFile.Write(Chunk, FileOffset); + + DiskStats.WriteCount++; + DiskStats.WriteByteCount += ChunkSize; + } + else + { + const uint64_t MaxWriterBufferSize = 256u * 1025u; + + BufferedWriteFileCache::Local::Writer* Writer = LocalWriter.GetWriter(SequenceIndex); + if (Writer) + { + if ((!Writer->Writer) && (ChunkSize < MaxWriterBufferSize)) + { + Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); + } + Writer->Write(Chunk, FileOffset); + + DiskStats.WriteCount++; + DiskStats.WriteByteCount += ChunkSize; + } + else + { + Writer = LocalWriter.PutWriter(SequenceIndex, std::make_unique<BufferedWriteFileCache::Local::Writer>()); + + Writer->File = std::make_unique<BasicFile>(); + OpenFile(*Writer->File); + if (ChunkSize < MaxWriterBufferSize) + { + Writer->Writer = std::make_unique<BasicFileWriter>(*Writer->File, Min(SequenceSize, MaxWriterBufferSize)); + } + Writer->Write(Chunk, FileOffset); + + DiskStats.WriteCount++; + DiskStats.WriteByteCount += ChunkSize; + } + } + } + + struct BlockWriteOps + { + std::vector<CompositeBuffer> ChunkBuffers; + struct WriteOpData + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + size_t ChunkBufferIndex = (size_t)-1; + }; + std::vector<WriteOpData> WriteOps; + }; + + void WriteBlockChunkOps(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + const BlockWriteOps& Ops, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + WorkerThreadPool& VerifyPool, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("WriteBlockChunkOps"); + + { + BufferedWriteFileCache::Local LocalWriter(WriteCache); + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + if (AbortFlag) + { + break; + } + const CompositeBuffer& Chunk = Ops.ChunkBuffers[WriteOp.ChunkBufferIndex]; + const uint32_t SequenceIndex = WriteOp.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[SequenceIndex].load() > 0); + const uint64_t FileOffset = WriteOp.Target->Offset; + const uint32_t PathIndex = Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; + + WriteSequenceChunk(CacheFolderPath, RemoteContent, LocalWriter, Chunk, SequenceIndex, FileOffset, PathIndex, DiskStats); + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local) + std::vector<uint32_t> CompletedChunkSequences; + for (const BlockWriteOps::WriteOpData& WriteOp : Ops.WriteOps) + { + const uint32_t RemoteSequenceIndex = WriteOp.Target->SequenceIndex; + if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) + { + CompletedChunkSequences.push_back(RemoteSequenceIndex); + } + } + WriteCache.Close(CompletedChunkSequences); + VerifyAndCompleteChunkSequencesAsync(CacheFolderPath, RemoteContent, Lookup, CompletedChunkSequences, Work, VerifyPool); + } + } + + IoBuffer MakeBufferMemoryBased(const CompositeBuffer& PartialBlockBuffer) + { + ZEN_TRACE_CPU("MakeBufferMemoryBased"); + IoBuffer BlockMemoryBuffer; + std::span<const SharedBuffer> Segments = PartialBlockBuffer.GetSegments(); + if (Segments.size() == 1) + { + IoBufferFileReference FileRef = {}; + if (PartialBlockBuffer.GetSegments().front().AsIoBuffer().GetFileReference(FileRef)) + { + BlockMemoryBuffer = UniqueBuffer::Alloc(FileRef.FileChunkSize).MoveToShared().AsIoBuffer(); + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + auto _ = MakeGuard([&Reader]() { Reader.Detach(); }); + MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); + Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + return BlockMemoryBuffer; + } + else + { + return PartialBlockBuffer.GetSegments().front().AsIoBuffer(); + } + } + else + { + // Not a homogenous memory buffer, read all to memory + + BlockMemoryBuffer = UniqueBuffer::Alloc(PartialBlockBuffer.GetSize()).MoveToShared().AsIoBuffer(); + MutableMemoryView ReadMem = BlockMemoryBuffer.GetMutableView(); + for (const SharedBuffer& Segment : Segments) + { + IoBufferFileReference FileRef = {}; + if (Segment.AsIoBuffer().GetFileReference(FileRef)) + { + BasicFile Reader; + Reader.Attach(FileRef.FileHandle); + Reader.Read(ReadMem.GetData(), FileRef.FileChunkSize, FileRef.FileChunkOffset); + Reader.Detach(); + ReadMem = ReadMem.Mid(FileRef.FileChunkSize); + } + else + { + ReadMem = ReadMem.CopyFrom(Segment.AsIoBuffer().GetView()); + } + } + return BlockMemoryBuffer; + } + } + + bool GetBlockWriteOps(const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& Lookup, + std::span<const IoHash> ChunkRawHashes, + std::span<const uint32_t> ChunkCompressedLengths, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + const MemoryView BlockView, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + BlockWriteOps& OutOps) + { + ZEN_TRACE_CPU("GetBlockWriteOps"); + uint32_t OffsetInBlock = 0; + for (uint32_t ChunkBlockIndex = FirstIncludedBlockChunkIndex; ChunkBlockIndex <= LastIncludedBlockChunkIndex; ChunkBlockIndex++) + { + const uint32_t ChunkCompressedSize = ChunkCompressedLengths[ChunkBlockIndex]; + const IoHash& ChunkHash = ChunkRawHashes[ChunkBlockIndex]; + if (auto It = Lookup.ChunkHashToChunkIndex.find(ChunkHash); It != Lookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = It->second; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, Lookup, ChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + bool NeedsWrite = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[ChunkIndex].compare_exchange_strong(NeedsWrite, false)) + { + MemoryView ChunkMemoryView = BlockView.Mid(OffsetInBlock, ChunkCompressedSize); + IoHash VerifyChunkHash; + uint64_t VerifyChunkSize; + CompressedBuffer CompressedChunk = + CompressedBuffer::FromCompressed(SharedBuffer::MakeView(ChunkMemoryView), VerifyChunkHash, VerifyChunkSize); + ZEN_ASSERT(CompressedChunk); + ZEN_ASSERT(VerifyChunkHash == ChunkHash); + ZEN_ASSERT(VerifyChunkSize == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + + OodleCompressor ChunkCompressor; + OodleCompressionLevel ChunkCompressionLevel; + uint64_t ChunkBlockSize; + + bool GetCompressParametersSuccess = + CompressedChunk.TryGetCompressParameters(ChunkCompressor, ChunkCompressionLevel, ChunkBlockSize); + ZEN_ASSERT(GetCompressParametersSuccess); + + IoBuffer Decompressed; + if (ChunkCompressionLevel == OodleCompressionLevel::None) + { + MemoryView ChunkDecompressedMemoryView = ChunkMemoryView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()); + Decompressed = + IoBuffer(IoBuffer::Wrap, ChunkDecompressedMemoryView.GetData(), ChunkDecompressedMemoryView.GetSize()); + } + else + { + Decompressed = CompressedChunk.Decompress().AsIoBuffer(); + } + ZEN_ASSERT(Decompressed.GetSize() == RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]); + ZEN_ASSERT_SLOW(ChunkHash == IoHash::HashBuffer(Decompressed)); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : ChunkTargetPtrs) + { + OutOps.WriteOps.push_back( + BlockWriteOps::WriteOpData{.Target = Target, .ChunkBufferIndex = OutOps.ChunkBuffers.size()}); + } + OutOps.ChunkBuffers.emplace_back(std::move(Decompressed)); + } + } + } + + OffsetInBlock += ChunkCompressedSize; + } + { + ZEN_TRACE_CPU("GetBlockWriteOps_sort"); + std::sort(OutOps.WriteOps.begin(), + OutOps.WriteOps.end(), + [](const BlockWriteOps::WriteOpData& Lhs, const BlockWriteOps::WriteOpData& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + return Lhs.Target->Offset < Rhs.Target->Offset; + }); + } + return true; + } + + bool WriteBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + WorkerThreadPool& VerifyPool, + CompositeBuffer&& BlockBuffer, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("WriteBlockToDisk"); + + IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(BlockBuffer); + const MemoryView BlockView = BlockMemoryBuffer.GetView(); + + BlockWriteOps Ops; + if ((BlockDescription.HeaderSize == 0) || BlockDescription.ChunkCompressedLengths.empty()) + { + ZEN_TRACE_CPU("WriteBlockToDisk_Legacy"); + + uint64_t HeaderSize; + const std::vector<uint32_t> ChunkCompressedLengths = + ReadChunkBlockHeader(BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder()), HeaderSize); + + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + HeaderSize), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + WriteCache, + Work, + VerifyPool, + DiskStats); + return true; + } + return false; + } + + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView.Mid(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize), + 0, + gsl::narrow<uint32_t>(BlockDescription.ChunkRawHashes.size() - 1), + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + WriteCache, + Work, + VerifyPool, + DiskStats); + return true; + } + return false; + } + + bool WritePartialBlockToDisk(const std::filesystem::path& CacheFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkBlockDescription& BlockDescription, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + ParallelWork& Work, + WorkerThreadPool& VerifyPool, + CompositeBuffer&& PartialBlockBuffer, + uint32_t FirstIncludedBlockChunkIndex, + uint32_t LastIncludedBlockChunkIndex, + const ChunkedContentLookup& Lookup, + std::span<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags, + BufferedWriteFileCache& WriteCache, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("WritePartialBlockToDisk"); + + IoBuffer BlockMemoryBuffer = MakeBufferMemoryBased(PartialBlockBuffer); + const MemoryView BlockView = BlockMemoryBuffer.GetView(); + + BlockWriteOps Ops; + if (GetBlockWriteOps(RemoteContent, + Lookup, + BlockDescription.ChunkRawHashes, + BlockDescription.ChunkCompressedLengths, + SequenceIndexChunksLeftToWriteCounters, + RemoteChunkIndexNeedsCopyFromSourceFlags, + BlockView, + FirstIncludedBlockChunkIndex, + LastIncludedBlockChunkIndex, + Ops)) + { + WriteBlockChunkOps(CacheFolderPath, + RemoteContent, + Lookup, + SequenceIndexChunksLeftToWriteCounters, + Ops, + WriteCache, + Work, + VerifyPool, + DiskStats); + return true; + } + else + { + return false; + } + } + + bool IsSingleFileChunk(const ChunkedFolderContent& RemoteContent, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> Locations) + { + if (Locations.size() == 1) + { + const uint32_t FirstSequenceIndex = Locations[0]->SequenceIndex; + if (RemoteContent.ChunkedContent.ChunkCounts[FirstSequenceIndex] == 1) + { + ZEN_ASSERT_SLOW(Locations[0]->Offset == 0); + return true; + } + } + return false; + } + + void StreamDecompress(const std::filesystem::path& CacheFolderPath, + const IoHash& SequenceRawHash, + CompositeBuffer&& CompressedPart, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("StreamDecompress"); + const std::filesystem::path TempChunkSequenceFileName = GetTempChunkedSequenceFileName(CacheFolderPath, SequenceRawHash); + TemporaryFile DecompressedTemp; + std::error_code Ec; + DecompressedTemp.CreateTemporary(TempChunkSequenceFileName.parent_path(), Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed creating temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompressedPart, RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", SequenceRawHash)); + } + if (RawHash != SequenceRawHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, SequenceRawHash)); + } + PrepareFileForScatteredWrite(DecompressedTemp.Handle(), RawSize); + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream( + 0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + ZEN_TRACE_CPU("StreamDecompress_Write"); + DiskStats.ReadByteCount += SourceSize; + if (!AbortFlag) + { + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + DecompressedTemp.Write(Segment, Offset); + Offset += Segment.GetSize(); + DiskStats.WriteByteCount += Segment.GetSize(); + DiskStats.WriteCount++; + } + return true; + } + return false; + }); + + if (AbortFlag) + { + return; + } + + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large blob {}", SequenceRawHash)); + } + const IoHash VerifyHash = Hash.GetHash(); + if (VerifyHash != SequenceRawHash) + { + throw std::runtime_error( + fmt::format("Decompressed blob payload hash {} does not match expected hash {}", VerifyHash, SequenceRawHash)); + } + DecompressedTemp.MoveTemporaryIntoPlace(TempChunkSequenceFileName, Ec); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed moving temporary file for decompressing large blob {}. Reason: {}", SequenceRawHash, Ec.message())); + } + // WriteChunkStats.ChunkCountWritten++; + } + + bool WriteCompressedChunk(const std::filesystem::path& TargetFolder, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + const IoHash& ChunkHash, + const std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + IoBuffer&& CompressedPart, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("WriteCompressedChunk"); + auto ChunkHashToChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(ChunkHashToChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + if (IsSingleFileChunk(RemoteContent, ChunkTargetPtrs)) + { + const std::uint32_t SequenceIndex = ChunkTargetPtrs.front()->SequenceIndex; + const IoHash& SequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]; + StreamDecompress(TargetFolder, SequenceRawHash, CompositeBuffer(std::move(CompressedPart)), DiskStats); + return false; + } + else + { + IoHash RawHash; + uint64_t RawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(CompositeBuffer(std::move(CompressedPart)), RawHash, RawSize); + if (!Compressed) + { + throw std::runtime_error(fmt::format("Failed to parse header of compressed large blob {}", ChunkHash)); + } + if (RawHash != ChunkHash) + { + throw std::runtime_error(fmt::format("RawHash in header {} in large blob {} does match.", RawHash, ChunkHash)); + } + + BufferedWriteFileCache::Local LocalWriter(WriteCache); + + IoHashStream Hash; + bool CouldDecompress = Compressed.DecompressToStream( + 0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset); + ZEN_TRACE_CPU("StreamDecompress_Write"); + DiskStats.ReadByteCount += SourceSize; + if (!AbortFlag) + { + for (const ChunkedContentLookup::ChunkSequenceLocation* TargetPtr : ChunkTargetPtrs) + { + const auto& Target = *TargetPtr; + const uint64_t FileOffset = Target.Offset + Offset; + const uint32_t SequenceIndex = Target.SequenceIndex; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + + WriteSequenceChunk(TargetFolder, + RemoteContent, + LocalWriter, + RangeBuffer, + SequenceIndex, + FileOffset, + PathIndex, + DiskStats); + } + + return true; + } + return false; + }); + + if (AbortFlag) + { + return false; + } + + if (!CouldDecompress) + { + throw std::runtime_error(fmt::format("Failed to decompress large chunk {}", ChunkHash)); + } + + return true; + } + } + + void AsyncWriteDownloadedChunk(const std::filesystem::path& ZenFolderPath, + const ChunkedFolderContent& RemoteContent, + const ChunkedContentLookup& RemoteLookup, + uint32_t RemoteChunkIndex, + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*>&& ChunkTargetPtrs, + BufferedWriteFileCache& WriteCache, + ParallelWork& Work, + WorkerThreadPool& WritePool, + IoBuffer&& Payload, + std::span<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters, + std::atomic<uint64_t>& WritePartsComplete, + const uint64_t TotalPartWriteCount, + FilteredRate& FilteredWrittenBytesPerSecond, + DiskStatistics& DiskStats) + { + ZEN_TRACE_CPU("AsyncWriteDownloadedChunk"); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + const uint64_t Size = Payload.GetSize(); + + std::filesystem::path CompressedChunkPath; + + // Check if the dowloaded chunk is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (Payload.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && (FileRef.FileChunkSize == Size)) + { + ZEN_TRACE_CPU("MoveTempChunk"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + Payload.SetDeleteOnClose(false); + Payload = {}; + CompressedChunkPath = ZenTempDownloadFolderPath(ZenFolderPath) / ChunkHash.ToHexString(); + RenameFile(TempBlobPath, CompressedChunkPath, Ec); + if (Ec) + { + CompressedChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + Payload = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, Size, true); + Payload.SetDeleteOnClose(true); + } + } + } + } + + if (CompressedChunkPath.empty() && (Size > 512u * 1024u)) + { + ZEN_TRACE_CPU("WriteTempChunk"); + // Could not be moved and rather large, lets store it on disk + CompressedChunkPath = ZenTempDownloadFolderPath(ZenFolderPath) / ChunkHash.ToHexString(); + TemporaryFile::SafeWriteFile(CompressedChunkPath, Payload); + Payload = {}; + } + + Work.ScheduleWork( + WritePool, + [&ZenFolderPath, + &RemoteContent, + &RemoteLookup, + SequenceIndexChunksLeftToWriteCounters, + &Work, + &WritePool, + CompressedChunkPath, + RemoteChunkIndex, + TotalPartWriteCount, + &WriteCache, + &DiskStats, + &WritePartsComplete, + &FilteredWrittenBytesPerSecond, + ChunkTargetPtrs = std::move(ChunkTargetPtrs), + CompressedPart = std::move(Payload)](std::atomic<bool>&) mutable { + ZEN_TRACE_CPU("UpdateFolder_WriteChunk"); + + if (!AbortFlag) + { + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + if (CompressedChunkPath.empty()) + { + ZEN_ASSERT(CompressedPart); + } + else + { + ZEN_ASSERT(!CompressedPart); + CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", ChunkHash, CompressedChunkPath)); + } + } + + std::filesystem::path TargetFolder = ZenTempCacheFolderPath(ZenFolderPath); + + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + WriteCache, + std::move(CompressedPart), + DiskStats); + if (!AbortFlag) + { + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + if (!CompressedChunkPath.empty()) + { + TryRemoveFile(CompressedChunkPath); + } + + std::vector<uint32_t> CompletedSequences = + CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); + WriteCache.Close(CompletedSequences); + if (NeedHashVerify) + { + VerifyAndCompleteChunkSequencesAsync(TargetFolder, + RemoteContent, + RemoteLookup, + CompletedSequences, + Work, + WritePool); + } + else + { + FinalizeChunkSequences(TargetFolder, RemoteContent, CompletedSequences); + } + } + } + }); + }; + + bool ReadStateFile(const std::filesystem::path& StateFilePath, + FolderContent& OutLocalFolderState, + ChunkedFolderContent& OutLocalContent) + { + ZEN_TRACE_CPU("ReadStateFile"); + bool HasLocalState = false; + try + { + CbObject CurrentStateObject = LoadCompactBinaryObject(StateFilePath).Object; + if (CurrentStateObject) + { + Oid CurrentBuildId; + std::vector<Oid> SavedBuildPartIds; + std::vector<std::string> SavedBuildPartsNames; + std::vector<ChunkedFolderContent> SavedPartContents; + if (ReadStateObject(CurrentStateObject, + CurrentBuildId, + SavedBuildPartIds, + SavedBuildPartsNames, + SavedPartContents, + OutLocalFolderState)) + { + if (!SavedPartContents.empty()) + { + if (SavedPartContents.size() == 1) + { + OutLocalContent = std::move(SavedPartContents[0]); + } + else + { + OutLocalContent = + MergeChunkedFolderContents(SavedPartContents[0], + std::span<const ChunkedFolderContent>(SavedPartContents).subspan(1)); + } + HasLocalState = true; + } + } + } + } + catch (const std::exception& Ex) + { + ZEN_CONSOLE("Failed reading state file {}, falling back to scannning. Reason: {}", StateFilePath, Ex.what()); + } + return HasLocalState; + } + + FolderContent GetValidFolderContent(GetFolderContentStatistics& LocalFolderScanStats, + const std::filesystem::path& Path, + std::span<const std::filesystem::path> PathsToCheck, + std::function<void(uint64_t PathCount, uint64_t CompletedPathCount)>&& ProgressCallback) + { + ZEN_TRACE_CPU("GetValidFolderContent"); + FolderContent Result; + const uint32_t PathCount = gsl::narrow<uint32_t>(PathsToCheck.size()); + + Result.Paths.resize(PathCount); + Result.RawSizes.resize(PathCount); + Result.Attributes.resize(PathCount); + Result.ModificationTicks.resize(PathCount); + + { + Stopwatch Timer; + auto _ = MakeGuard([&LocalFolderScanStats, &Timer]() { LocalFolderScanStats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ParallelWork Work(AbortFlag, PauseFlag); + std::atomic<uint64_t> CompletedPathCount = 0; + uint32_t PathIndex = 0; + + while (PathIndex < PathCount) + { + uint32_t PathRangeCount = Min(128u, PathCount - PathIndex); + Work.ScheduleWork(GetIOWorkerPool(), + [PathIndex, PathRangeCount, &PathsToCheck, &Path, &Result, &CompletedPathCount, &LocalFolderScanStats]( + std::atomic<bool>&) { + for (uint32_t PathRangeIndex = PathIndex; PathRangeIndex < PathIndex + PathRangeCount; + PathRangeIndex++) + { + const std::filesystem::path& FilePath = PathsToCheck[PathRangeIndex]; + std::filesystem::path LocalFilePath = (Path / FilePath).make_preferred(); + if (TryGetFileProperties(LocalFilePath, + Result.RawSizes[PathRangeIndex], + Result.ModificationTicks[PathRangeIndex], + Result.Attributes[PathRangeIndex])) + { + Result.Paths[PathRangeIndex] = std::move(FilePath); + LocalFolderScanStats.FoundFileCount++; + LocalFolderScanStats.FoundFileByteCount += Result.RawSizes[PathRangeIndex]; + LocalFolderScanStats.AcceptedFileCount++; + LocalFolderScanStats.AcceptedFileByteCount += Result.RawSizes[PathRangeIndex]; + } + CompletedPathCount++; + } + }); + PathIndex += PathRangeCount; + } + Work.Wait(200, [&](bool, bool, ptrdiff_t) { + if (ProgressCallback) + { + ProgressCallback(PathCount, CompletedPathCount.load()); + } + }); + } + + uint32_t WritePathIndex = 0; + for (uint32_t ReadPathIndex = 0; ReadPathIndex < PathCount; ReadPathIndex++) + { + if (!Result.Paths[ReadPathIndex].empty()) + { + if (WritePathIndex < ReadPathIndex) + { + Result.Paths[WritePathIndex] = std::move(Result.Paths[ReadPathIndex]); + Result.RawSizes[WritePathIndex] = Result.RawSizes[ReadPathIndex]; + Result.Attributes[WritePathIndex] = Result.Attributes[ReadPathIndex]; + Result.ModificationTicks[WritePathIndex] = Result.ModificationTicks[ReadPathIndex]; + } + WritePathIndex++; + } + } + + Result.Paths.resize(WritePathIndex); + Result.RawSizes.resize(WritePathIndex); + Result.Attributes.resize(WritePathIndex); + Result.ModificationTicks.resize(WritePathIndex); + return Result; + } + + void UpdateFolder(const std::filesystem::path& SystemRootDir, + StorageInstance& Storage, + const Oid& BuildId, + const std::filesystem::path& Path, + const std::filesystem::path& ZenFolderPath, + const std::uint64_t LargeAttachmentSize, + const std::uint64_t PreferredMultipartChunkSize, + const ChunkedFolderContent& LocalContent, + const ChunkedFolderContent& RemoteContent, + const std::vector<ChunkBlockDescription>& BlockDescriptions, + const std::vector<IoHash>& LooseChunkHashes, + bool AllowPartialBlockRequests, + bool WipeTargetFolder, + bool PrimeCacheOnly, + bool EnableScavenging, + FolderContent& OutLocalFolderState, + DiskStatistics& DiskStats, + CacheMappingStatistics& CacheMappingStats, + DownloadStatistics& DownloadStats, + WriteChunkStatistics& WriteChunkStats, + RebuildFolderStateStatistics& RebuildFolderStateStats) + { + ZEN_TRACE_CPU("UpdateFolder"); + + ZEN_ASSERT((!PrimeCacheOnly) || (PrimeCacheOnly && (!AllowPartialBlockRequests))); + + Stopwatch IndexTimer; + + const ChunkedContentLookup LocalLookup = BuildChunkedContentLookup(LocalContent); + + const ChunkedContentLookup RemoteLookup = BuildChunkedContentLookup(RemoteContent); + + ZEN_CONSOLE("Indexed local and remote content in {}", NiceTimeSpanMs(IndexTimer.GetElapsedTimeMs())); + + const std::filesystem::path CacheFolderPath = ZenTempCacheFolderPath(ZenFolderPath); + + Stopwatch CacheMappingTimer; + + std::vector<std::atomic<uint32_t>> SequenceIndexChunksLeftToWriteCounters(RemoteContent.ChunkedContent.SequenceRawHashes.size()); + std::vector<bool> RemoteChunkIndexNeedsCopyFromLocalFileFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + std::vector<std::atomic<bool>> RemoteChunkIndexNeedsCopyFromSourceFlags(RemoteContent.ChunkedContent.ChunkHashes.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedChunkHashesFound; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedSequenceHashesFound; + if (!PrimeCacheOnly) + { + ZEN_TRACE_CPU("UpdateFolder_CheckChunkCache"); + + Stopwatch CacheTimer; + + DirectoryContent CacheDirContent; + GetDirectoryContent(CacheFolderPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + CacheDirContent); + for (size_t Index = 0; Index < CacheDirContent.Files.size(); Index++) + { + IoHash FileHash; + if (IoHash::TryParse(CacheDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto ChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(FileHash); + ChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t ChunkIndex = ChunkIt->second; + const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + if (ChunkSize == CacheDirContent.FileSizes[Index]) + { + CachedChunkHashesFound.insert({FileHash, ChunkIndex}); + CacheMappingStats.CacheChunkCount++; + CacheMappingStats.CacheChunkByteCount += ChunkSize; + continue; + } + } + else if (auto SequenceIt = RemoteLookup.RawHashToSequenceIndex.find(FileHash); + SequenceIt != RemoteLookup.RawHashToSequenceIndex.end()) + { + const uint32_t SequenceIndex = SequenceIt->second; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const uint64_t SequenceSize = RemoteContent.RawSizes[PathIndex]; + if (SequenceSize == CacheDirContent.FileSizes[Index]) + { + CachedSequenceHashesFound.insert({FileHash, SequenceIndex}); + CacheMappingStats.CacheSequenceHashesCount++; + CacheMappingStats.CacheSequenceHashesByteCount += SequenceSize; + + const std::filesystem::path CacheFilePath = + GetFinalChunkedSequenceFileName(CacheFolderPath, + RemoteContent.ChunkedContent.SequenceRawHashes[SequenceIndex]); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + + continue; + } + } + } + TryRemoveFile(CacheDirContent.Files[Index]); + } + CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); + } + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CachedBlocksFound; + if (!PrimeCacheOnly) + { + ZEN_TRACE_CPU("UpdateFolder_CheckBlockCache"); + + Stopwatch CacheTimer; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> AllBlockSizes; + AllBlockSizes.reserve(BlockDescriptions.size()); + for (uint32_t BlockIndex = 0; BlockIndex < BlockDescriptions.size(); BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + AllBlockSizes.insert({BlockDescription.BlockHash, BlockIndex}); + } + + DirectoryContent BlockDirContent; + GetDirectoryContent(ZenTempBlockFolderPath(ZenFolderPath), + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + BlockDirContent); + CachedBlocksFound.reserve(BlockDirContent.Files.size()); + for (size_t Index = 0; Index < BlockDirContent.Files.size(); Index++) + { + IoHash FileHash; + if (IoHash::TryParse(BlockDirContent.Files[Index].filename().string(), FileHash)) + { + if (auto BlockIt = AllBlockSizes.find(FileHash); BlockIt != AllBlockSizes.end()) + { + const uint32_t BlockIndex = BlockIt->second; + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + uint64_t BlockSize = CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize; + for (uint64_t ChunkSize : BlockDescription.ChunkCompressedLengths) + { + BlockSize += ChunkSize; + } + + if (BlockSize == BlockDirContent.FileSizes[Index]) + { + CachedBlocksFound.insert({FileHash, BlockIndex}); + CacheMappingStats.CacheBlockCount++; + CacheMappingStats.CacheBlocksByteCount += BlockSize; + continue; + } + } + } + TryRemoveFile(BlockDirContent.Files[Index]); + } + + CacheMappingStats.CacheScanElapsedWallTimeUs += CacheTimer.GetElapsedTimeUs(); + } + + std::vector<uint32_t> LocalPathIndexesMatchingSequenceIndexes; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceIndexesLeftToFindToRemoteIndex; + + if (!PrimeCacheOnly) + { + // Pick up all whole files we can use from current local state + ZEN_TRACE_CPU("UpdateFolder_GetLocalSequences"); + + Stopwatch LocalTimer; + + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const IoHash& RemoteSequenceRawHash = RemoteContent.ChunkedContent.SequenceRawHashes[RemoteSequenceIndex]; + const uint32_t RemotePathIndex = GetFirstPathIndexForSeqeuenceIndex(RemoteLookup, RemoteSequenceIndex); + const uint64_t RemoteRawSize = RemoteContent.RawSizes[RemotePathIndex]; + if (auto CacheSequenceIt = CachedSequenceHashesFound.find(RemoteSequenceRawHash); + CacheSequenceIt != CachedSequenceHashesFound.end()) + { + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); + } + else if (auto CacheChunkIt = CachedChunkHashesFound.find(RemoteSequenceRawHash); + CacheChunkIt != CachedChunkHashesFound.end()) + { + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + ZEN_ASSERT_SLOW(IsFile(CacheFilePath)); + ZEN_CONSOLE_VERBOSE("Found chunk {} at {} ({})", RemoteSequenceRawHash, CacheFilePath, NiceBytes(RemoteRawSize)); + } + else if (auto It = LocalLookup.RawHashToSequenceIndex.find(RemoteSequenceRawHash); + It != LocalLookup.RawHashToSequenceIndex.end()) + { + const uint32_t LocalSequenceIndex = It->second; + const uint32_t LocalPathIndex = GetFirstPathIndexForSeqeuenceIndex(LocalLookup, LocalSequenceIndex); + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + ZEN_ASSERT_SLOW(IsFile(LocalFilePath)); + LocalPathIndexesMatchingSequenceIndexes.push_back(LocalPathIndex); + CacheMappingStats.LocalPathsMatchingSequencesCount++; + CacheMappingStats.LocalPathsMatchingSequencesByteCount += RemoteRawSize; + ZEN_CONSOLE_VERBOSE("Found sequence {} at {} ({})", RemoteSequenceRawHash, LocalFilePath, NiceBytes(RemoteRawSize)); + } + else + { + // We must write the sequence + const uint32_t ChunkCount = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; + SequenceIndexesLeftToFindToRemoteIndex.insert({RemoteSequenceRawHash, RemoteSequenceIndex}); + } + } + + CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); + } + else + { + for (uint32_t RemoteSequenceIndex = 0; RemoteSequenceIndex < RemoteContent.ChunkedContent.SequenceRawHashes.size(); + RemoteSequenceIndex++) + { + const uint32_t ChunkCount = RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]; + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = ChunkCount; + } + } + + std::vector<ChunkedFolderContent> ScavengedContents; + std::vector<ChunkedContentLookup> ScavengedLookups; + std::vector<std::filesystem::path> ScavengedPaths; + + struct ScavengeCopyOperation + { + uint32_t ScavengedContentIndex = (uint32_t)-1; + uint32_t ScavengedPathIndex = (uint32_t)-1; + uint32_t RemoteSequenceIndex = (uint32_t)-1; + uint64_t RawSize = (uint32_t)-1; + }; + + std::vector<ScavengeCopyOperation> ScavengeCopyOperations; + uint64_t ScavengedPathsCount = 0; + + if (!PrimeCacheOnly && EnableScavenging) + { + ZEN_TRACE_CPU("UpdateFolder_GetScavengedSequences"); + + Stopwatch ScavengeTimer; + + if (!SequenceIndexesLeftToFindToRemoteIndex.empty()) + { + std::vector<ScavengeSource> ScavengeSources = GetDownloadedStatePaths(SystemRootDir); + auto EraseIt = std::remove_if(ScavengeSources.begin(), ScavengeSources.end(), [&Path](const ScavengeSource& Source) { + return Source.Path == Path; + }); + ScavengeSources.erase(EraseIt, ScavengeSources.end()); + + const size_t ScavengePathCount = ScavengeSources.size(); + + ScavengedContents.resize(ScavengePathCount); + ScavengedLookups.resize(ScavengePathCount); + ScavengedPaths.resize(ScavengePathCount); + + ProgressBar ScavengeProgressBar(ProgressMode, "Scavenging"); + ParallelWork Work(AbortFlag, PauseFlag); + + std::atomic<uint64_t> PathsFound(0); + std::atomic<uint64_t> ChunksFound(0); + std::atomic<uint64_t> PathsScavenged(0); + + for (size_t ScavengeIndex = 0; ScavengeIndex < ScavengePathCount; ScavengeIndex++) + { + Work.ScheduleWork( + GetIOWorkerPool(), + [&RemoteLookup, + &ScavengeSources, + &ScavengedContents, + &ScavengedPaths, + &ScavengedLookups, + &PathsFound, + &ChunksFound, + &PathsScavenged, + ScavengeIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + const ScavengeSource& Source = ScavengeSources[ScavengeIndex]; + + ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengeIndex]; + std::filesystem::path& ScavengePath = ScavengedPaths[ScavengeIndex]; + + FolderContent LocalFolderState; + if (ReadStateFile(Source.StateFilePath, LocalFolderState, ScavengedLocalContent)) + { + if (IsDir(Source.Path)) + { + ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengeIndex]; + ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + + std::vector<uint32_t> PathIndexesToScavange; + uint32_t ScavengedStatePathCount = gsl::narrow<uint32_t>(ScavengedLocalContent.Paths.size()); + PathIndexesToScavange.reserve(ScavengedStatePathCount); + for (uint32_t ScavengedStatePathIndex = 0; ScavengedStatePathIndex < ScavengedStatePathCount; + ScavengedStatePathIndex++) + { + const IoHash& SequenceHash = ScavengedLocalContent.RawHashes[ScavengedStatePathIndex]; + if (auto ScavengeSequenceIt = ScavengedLookup.RawHashToSequenceIndex.find(SequenceHash); + ScavengeSequenceIt != ScavengedLookup.RawHashToSequenceIndex.end()) + { + const uint32_t ScavengeSequenceIndex = ScavengeSequenceIt->second; + if (RemoteLookup.RawHashToSequenceIndex.contains(SequenceHash)) + { + PathIndexesToScavange.push_back(ScavengedStatePathIndex); + } + else + { + const uint32_t ScavengeChunkCount = + ScavengedLocalContent.ChunkedContent.ChunkCounts[ScavengeSequenceIndex]; + for (uint32_t ScavengeChunkIndexOffset = 0; + ScavengeChunkIndexOffset < ScavengeChunkCount; + ScavengeChunkIndexOffset++) + { + const size_t ScavengeChunkOrderIndex = + ScavengedLookup.ChunkSequenceLocationOffset[ScavengeSequenceIndex] + + ScavengeChunkIndexOffset; + const uint32_t ScavengeChunkIndex = + ScavengedLocalContent.ChunkedContent.ChunkOrders[ScavengeChunkOrderIndex]; + const IoHash& ScavengeChunkHash = + ScavengedLocalContent.ChunkedContent.ChunkHashes[ScavengeChunkIndex]; + if (RemoteLookup.ChunkHashToChunkIndex.contains(ScavengeChunkHash)) + { + PathIndexesToScavange.push_back(ScavengedStatePathIndex); + break; + } + } + } + } + } + + if (!PathIndexesToScavange.empty()) + { + std::vector<std::filesystem::path> PathsToScavenge; + PathsToScavenge.reserve(PathIndexesToScavange.size()); + for (uint32_t ScavengedStatePathIndex : PathIndexesToScavange) + { + PathsToScavenge.push_back(ScavengedLocalContent.Paths[ScavengedStatePathIndex]); + } + + GetFolderContentStatistics ScavengedFolderScanStats; + + FolderContent ValidFolderContent = + GetValidFolderContent(ScavengedFolderScanStats, Source.Path, PathsToScavenge, {}); + + if (!LocalFolderState.AreKnownFilesEqual(ValidFolderContent)) + { + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = + GetUpdatedContent(LocalFolderState, ValidFolderContent, DeletedPaths); + + // If the files are modified since the state was saved we ignore the files since we don't + // want to incur the cost of scanning/hashing scavenged files + DeletedPaths.insert(DeletedPaths.end(), + UpdatedContent.Paths.begin(), + UpdatedContent.Paths.end()); + if (!DeletedPaths.empty()) + { + ScavengedLocalContent = + DeletePathsFromChunkedContent(ScavengedLocalContent, ScavengedLookup, DeletedPaths); + ScavengedLookup = BuildChunkedContentLookup(ScavengedLocalContent); + } + } + + if (!ScavengedLocalContent.Paths.empty()) + { + ScavengePath = Source.Path; + PathsFound += ScavengedLocalContent.Paths.size(); + ChunksFound += ScavengedLocalContent.ChunkedContent.ChunkHashes.size(); + } + } + + if (ScavengePath.empty()) + { + ScavengedLocalContent = {}; + ScavengedLookups[ScavengeIndex] = {}; + ScavengedPaths[ScavengeIndex].clear(); + } + } + } + PathsScavenged++; + } + }); + } + { + ZEN_TRACE_CPU("ScavengeScan_Wait"); + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + std::string Details = fmt::format("{}/{} scanned. {} paths and {} chunks found for scavanging", + PathsScavenged.load(), + ScavengePathCount, + PathsFound.load(), + ChunksFound.load()); + ScavengeProgressBar.UpdateState({.Task = "Scavenging ", + .Details = Details, + .TotalCount = ScavengePathCount, + .RemainingCount = ScavengePathCount - PathsScavenged.load(), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + ScavengeProgressBar.Finish(); + if (AbortFlag) + { + return; + } + + for (uint32_t ScavengedContentIndex = 0; + ScavengedContentIndex < ScavengedContents.size() && (!SequenceIndexesLeftToFindToRemoteIndex.empty()); + ScavengedContentIndex++) + { + const std::filesystem::path& ScavengePath = ScavengedPaths[ScavengedContentIndex]; + if (!ScavengePath.empty()) + { + const ChunkedFolderContent& ScavengedLocalContent = ScavengedContents[ScavengedContentIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; + + for (uint32_t ScavengedSequenceIndex = 0; + ScavengedSequenceIndex < ScavengedLocalContent.ChunkedContent.SequenceRawHashes.size(); + ScavengedSequenceIndex++) + { + const IoHash& SequenceRawHash = ScavengedLocalContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; + if (auto It = SequenceIndexesLeftToFindToRemoteIndex.find(SequenceRawHash); + It != SequenceIndexesLeftToFindToRemoteIndex.end()) + { + const uint32_t RemoteSequenceIndex = It->second; + const uint64_t RawSize = + RemoteContent.RawSizes[RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]]; + ZEN_ASSERT(RawSize > 0); + + const uint32_t ScavengedPathIndex = ScavengedLookup.SequenceIndexFirstPathIndex[ScavengedSequenceIndex]; + ZEN_ASSERT_SLOW(IsFile((ScavengePath / ScavengedLocalContent.Paths[ScavengedPathIndex]).make_preferred())); + + ScavengeCopyOperations.push_back({.ScavengedContentIndex = ScavengedContentIndex, + .ScavengedPathIndex = ScavengedPathIndex, + .RemoteSequenceIndex = RemoteSequenceIndex, + .RawSize = RawSize}); + + SequenceIndexesLeftToFindToRemoteIndex.erase(SequenceRawHash); + SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex] = 0; + + CacheMappingStats.ScavengedPathsMatchingSequencesCount++; + CacheMappingStats.ScavengedPathsMatchingSequencesByteCount += RawSize; + } + } + ScavengedPathsCount++; + } + } + } + CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); + } + + uint32_t RemainingChunkCount = 0; + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + if (ChunkWriteCount > 0) + { + RemainingChunkCount++; + } + } + + // Pick up all chunks in current local state + // TODO: Rename to LocalStateCopyData + struct CacheCopyData + { + uint32_t ScavengeSourceIndex = (uint32_t)-1; + uint32_t SourceSequenceIndex = (uint32_t)-1; + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> TargetChunkLocationPtrs; + struct ChunkTarget + { + uint32_t TargetChunkLocationCount = (uint32_t)-1; + uint32_t RemoteChunkIndex = (uint32_t)-1; + uint64_t CacheFileOffset = (uint64_t)-1; + }; + std::vector<ChunkTarget> ChunkTargets; + }; + + tsl::robin_map<IoHash, size_t, IoHash::Hasher> RawHashToCacheCopyDataIndex; + std::vector<CacheCopyData> CacheCopyDatas; + + if (!PrimeCacheOnly) + { + ZEN_TRACE_CPU("UpdateFolder_GetLocalChunks"); + + Stopwatch LocalTimer; + + for (uint32_t LocalSequenceIndex = 0; + LocalSequenceIndex < LocalContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0); + LocalSequenceIndex++) + { + const IoHash& LocalSequenceRawHash = LocalContent.ChunkedContent.SequenceRawHashes[LocalSequenceIndex]; + const uint32_t LocalOrderOffset = LocalLookup.SequenceIndexChunkOrderOffset[LocalSequenceIndex]; + + { + uint64_t SourceOffset = 0; + const uint32_t LocalChunkCount = LocalContent.ChunkedContent.ChunkCounts[LocalSequenceIndex]; + for (uint32_t LocalOrderIndex = 0; LocalOrderIndex < LocalChunkCount; LocalOrderIndex++) + { + const uint32_t LocalChunkIndex = LocalContent.ChunkedContent.ChunkOrders[LocalOrderOffset + LocalOrderIndex]; + const IoHash& LocalChunkHash = LocalContent.ChunkedContent.ChunkHashes[LocalChunkIndex]; + const uint64_t LocalChunkRawSize = LocalContent.ChunkedContent.ChunkRawSizes[LocalChunkIndex]; + + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(LocalChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), + .RemoteChunkIndex = RemoteChunkIndex, + .CacheFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(LocalSequenceRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1, + .SourceSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(LocalSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = (uint32_t)-1, + .SourceSequenceIndex = LocalSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + CacheMappingStats.LocalChunkMatchingRemoteCount++; + CacheMappingStats.LocalChunkMatchingRemoteByteCount += LocalChunkRawSize; + RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + RemainingChunkCount--; + } + } + } + SourceOffset += LocalChunkRawSize; + } + } + } + CacheMappingStats.LocalScanElapsedWallTimeUs += LocalTimer.GetElapsedTimeUs(); + } + + if (!PrimeCacheOnly) + { + ZEN_TRACE_CPU("UpdateFolder_GetScavengeChunks"); + + Stopwatch ScavengeTimer; + + for (uint32_t ScavengedContentIndex = 0; ScavengedContentIndex < ScavengedContents.size() && (RemainingChunkCount > 0); + ScavengedContentIndex++) + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengedContentIndex]; + // const std::filesystem::path& ScavengedPath = ScavengedPaths[ScavengedContentIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[ScavengedContentIndex]; + + for (uint32_t ScavengedSequenceIndex = 0; + ScavengedSequenceIndex < ScavengedContent.ChunkedContent.SequenceRawHashes.size() && (RemainingChunkCount > 0); + ScavengedSequenceIndex++) + { + const IoHash& ScavengedSequenceRawHash = ScavengedContent.ChunkedContent.SequenceRawHashes[ScavengedSequenceIndex]; + const uint32_t ScavengedOrderOffset = ScavengedLookup.SequenceIndexChunkOrderOffset[ScavengedSequenceIndex]; + + { + uint64_t SourceOffset = 0; + const uint32_t ScavengedChunkCount = ScavengedContent.ChunkedContent.ChunkCounts[ScavengedSequenceIndex]; + for (uint32_t ScavengedOrderIndex = 0; ScavengedOrderIndex < ScavengedChunkCount; ScavengedOrderIndex++) + { + const uint32_t ScavengedChunkIndex = + ScavengedContent.ChunkedContent.ChunkOrders[ScavengedOrderOffset + ScavengedOrderIndex]; + const IoHash& ScavengedChunkHash = ScavengedContent.ChunkedContent.ChunkHashes[ScavengedChunkIndex]; + const uint64_t ScavengedChunkRawSize = ScavengedContent.ChunkedContent.ChunkRawSizes[ScavengedChunkIndex]; + + if (auto RemoteChunkIt = RemoteLookup.ChunkHashToChunkIndex.find(ScavengedChunkHash); + RemoteChunkIt != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = RemoteChunkIt->second; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (!ChunkTargetPtrs.empty()) + { + CacheCopyData::ChunkTarget Target = { + .TargetChunkLocationCount = gsl::narrow<uint32_t>(ChunkTargetPtrs.size()), + .RemoteChunkIndex = RemoteChunkIndex, + .CacheFileOffset = SourceOffset}; + if (auto CopySourceIt = RawHashToCacheCopyDataIndex.find(ScavengedSequenceRawHash); + CopySourceIt != RawHashToCacheCopyDataIndex.end()) + { + CacheCopyData& Data = CacheCopyDatas[CopySourceIt->second]; + if (Data.TargetChunkLocationPtrs.size() > 1024) + { + RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash, + CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengedSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + else + { + Data.TargetChunkLocationPtrs.insert(Data.TargetChunkLocationPtrs.end(), + ChunkTargetPtrs.begin(), + ChunkTargetPtrs.end()); + Data.ChunkTargets.push_back(Target); + } + } + else + { + RawHashToCacheCopyDataIndex.insert_or_assign(ScavengedSequenceRawHash, CacheCopyDatas.size()); + CacheCopyDatas.push_back( + CacheCopyData{.ScavengeSourceIndex = ScavengedContentIndex, + .SourceSequenceIndex = ScavengedSequenceIndex, + .TargetChunkLocationPtrs = ChunkTargetPtrs, + .ChunkTargets = std::vector<CacheCopyData::ChunkTarget>{Target}}); + } + CacheMappingStats.ScavengedChunkMatchingRemoteCount++; + CacheMappingStats.ScavengedChunkMatchingRemoteByteCount += ScavengedChunkRawSize; + RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex] = true; + RemainingChunkCount--; + } + } + } + SourceOffset += ScavengedChunkRawSize; + } + } + } + } + CacheMappingStats.ScavengeElapsedWallTimeUs += ScavengeTimer.GetElapsedTimeUs(); + } + + if (!CachedSequenceHashesFound.empty() || !CachedChunkHashesFound.empty() || !CachedBlocksFound.empty()) + { + ZEN_CONSOLE("Download cache: Found {} ({}) chunk sequences, {} ({}) chunks, {} ({}) blocks in {}", + CachedSequenceHashesFound.size(), + NiceBytes(CacheMappingStats.CacheSequenceHashesByteCount), + CachedChunkHashesFound.size(), + NiceBytes(CacheMappingStats.CacheChunkByteCount), + CachedBlocksFound.size(), + NiceBytes(CacheMappingStats.CacheBlocksByteCount), + NiceTimeSpanMs(CacheMappingStats.CacheScanElapsedWallTimeUs / 1000)); + } + + if (!LocalPathIndexesMatchingSequenceIndexes.empty() || CacheMappingStats.LocalChunkMatchingRemoteCount > 0) + { + ZEN_CONSOLE("Local state : Found {} ({}) chunk sequences, {} ({}) chunks in {}", + LocalPathIndexesMatchingSequenceIndexes.size(), + NiceBytes(CacheMappingStats.LocalPathsMatchingSequencesByteCount), + CacheMappingStats.LocalChunkMatchingRemoteCount, + NiceBytes(CacheMappingStats.LocalChunkMatchingRemoteByteCount), + NiceTimeSpanMs(CacheMappingStats.LocalScanElapsedWallTimeUs / 1000)); + } + if (CacheMappingStats.ScavengedPathsMatchingSequencesCount > 0 || CacheMappingStats.ScavengedChunkMatchingRemoteCount > 0) + { + ZEN_CONSOLE("Scavenge of {} paths, found {} ({}) chunk sequences, {} ({}) chunks in {}", + ScavengedPathsCount, + CacheMappingStats.ScavengedPathsMatchingSequencesCount, + NiceBytes(CacheMappingStats.ScavengedPathsMatchingSequencesByteCount), + CacheMappingStats.ScavengedChunkMatchingRemoteCount, + NiceBytes(CacheMappingStats.ScavengedChunkMatchingRemoteByteCount), + NiceTimeSpanMs(CacheMappingStats.ScavengeElapsedWallTimeUs / 1000)); + } + + uint64_t BytesToWrite = 0; + + for (uint32_t RemoteChunkIndex = 0; RemoteChunkIndex < RemoteContent.ChunkedContent.ChunkHashes.size(); RemoteChunkIndex++) + { + uint64_t ChunkWriteCount = GetChunkWriteCount(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + if (ChunkWriteCount > 0) + { + BytesToWrite += RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] * ChunkWriteCount; + if (!RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex] = true; + } + } + } + + for (const ScavengeCopyOperation& ScavengeCopyOp : ScavengeCopyOperations) + { + BytesToWrite += ScavengeCopyOp.RawSize; + } + + uint64_t TotalRequestCount = 0; + uint64_t TotalPartWriteCount = 0; + std::atomic<uint64_t> WritePartsComplete = 0; + + { + ZEN_TRACE_CPU("WriteChunks"); + + Stopwatch WriteTimer; + + FilteredRate FilteredDownloadedBytesPerSecond; + FilteredRate FilteredWrittenBytesPerSecond; + + WorkerThreadPool& NetworkPool = GetNetworkPool(); + WorkerThreadPool& WritePool = GetIOWorkerPool(); + + ProgressBar WriteProgressBar(ProgressMode, PrimeCacheOnly ? "Downloading" : "Writing"); + ParallelWork Work(AbortFlag, PauseFlag); + + struct LooseChunkHashWorkData + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs; + uint32_t RemoteChunkIndex = (uint32_t)-1; + }; + + std::vector<LooseChunkHashWorkData> LooseChunkHashWorks; + TotalPartWriteCount += CacheCopyDatas.size(); + TotalPartWriteCount += ScavengeCopyOperations.size(); + + for (const IoHash ChunkHash : LooseChunkHashes) + { + auto RemoteChunkIndexIt = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); + ZEN_ASSERT(RemoteChunkIndexIt != RemoteLookup.ChunkHashToChunkIndex.end()); + const uint32_t RemoteChunkIndex = RemoteChunkIndexIt->second; + if (RemoteChunkIndexNeedsCopyFromLocalFileFlags[RemoteChunkIndex]) + { + ZEN_CONSOLE_VERBOSE("Skipping chunk {} due to cache reuse", ChunkHash); + continue; + } + bool NeedsCopy = true; + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex].compare_exchange_strong(NeedsCopy, false)) + { + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + GetRemainingChunkTargets(SequenceIndexChunksLeftToWriteCounters, RemoteLookup, RemoteChunkIndex); + + if (ChunkTargetPtrs.empty()) + { + ZEN_CONSOLE_VERBOSE("Skipping chunk {} due to cache reuse", ChunkHash); + } + else + { + TotalRequestCount++; + TotalPartWriteCount++; + LooseChunkHashWorks.push_back( + LooseChunkHashWorkData{.ChunkTargetPtrs = ChunkTargetPtrs, .RemoteChunkIndex = RemoteChunkIndex}); + } + } + } + + uint32_t BlockCount = gsl::narrow<uint32_t>(BlockDescriptions.size()); + + std::vector<bool> ChunkIsPickedUpByBlock(RemoteContent.ChunkedContent.ChunkHashes.size(), false); + auto GetNeededChunkBlockIndexes = [&RemoteContent, + &RemoteLookup, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &ChunkIsPickedUpByBlock](const ChunkBlockDescription& BlockDescription) { + ZEN_TRACE_CPU("UpdateFolder_GetNeededChunkBlockIndexes"); + std::vector<uint32_t> NeededBlockChunkIndexes; + for (uint32_t ChunkBlockIndex = 0; ChunkBlockIndex < BlockDescription.ChunkRawHashes.size(); ChunkBlockIndex++) + { + const IoHash& ChunkHash = BlockDescription.ChunkRawHashes[ChunkBlockIndex]; + if (auto It = RemoteLookup.ChunkHashToChunkIndex.find(ChunkHash); It != RemoteLookup.ChunkHashToChunkIndex.end()) + { + const uint32_t RemoteChunkIndex = It->second; + if (!ChunkIsPickedUpByBlock[RemoteChunkIndex]) + { + if (RemoteChunkIndexNeedsCopyFromSourceFlags[RemoteChunkIndex]) + { + ChunkIsPickedUpByBlock[RemoteChunkIndex] = true; + NeededBlockChunkIndexes.push_back(ChunkBlockIndex); + } + } + } + } + return NeededBlockChunkIndexes; + }; + + std::vector<uint32_t> CachedChunkBlockIndexes; + + struct BlockRangeDescriptor + { + uint32_t BlockIndex = (uint32_t)-1; + uint64_t RangeStart = 0; + uint64_t RangeLength = 0; + uint32_t ChunkBlockIndexStart = 0; + uint32_t ChunkBlockIndexCount = 0; + }; + std::vector<BlockRangeDescriptor> BlockRangeWorks; + + std::vector<uint32_t> FullBlockWorks; + + for (uint32_t BlockIndex = 0; BlockIndex < BlockCount; BlockIndex++) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + const std::vector<uint32_t> BlockChunkIndexNeeded = GetNeededChunkBlockIndexes(BlockDescription); + if (!BlockChunkIndexNeeded.empty()) + { + if (PrimeCacheOnly) + { + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else + { + bool UsingCachedBlock = false; + if (auto It = CachedBlocksFound.find(BlockDescription.BlockHash); It != CachedBlocksFound.end()) + { + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_CacheGet"); + + TotalPartWriteCount++; + + std::filesystem::path BlockPath = + ZenTempBlockFolderPath(ZenFolderPath) / BlockDescription.BlockHash.ToHexString(); + if (IsFile(BlockPath)) + { + CachedChunkBlockIndexes.push_back(BlockIndex); + UsingCachedBlock = true; + } + } + + if (!UsingCachedBlock) + { + bool WantsToDoPartialBlockDownload = BlockChunkIndexNeeded.size() < BlockDescription.ChunkRawHashes.size(); + bool CanDoPartialBlockDownload = + (BlockDescription.HeaderSize > 0) && + (BlockDescription.ChunkCompressedLengths.size() == BlockDescription.ChunkRawHashes.size()); + if (AllowPartialBlockRequests && WantsToDoPartialBlockDownload && CanDoPartialBlockDownload) + { + std::vector<BlockRangeDescriptor> BlockRanges; + + ZEN_TRACE_CPU("UpdateFolder_HandleBlocks_PartialAnalysis"); + + uint32_t NeedBlockChunkIndexOffset = 0; + uint32_t ChunkBlockIndex = 0; + uint32_t CurrentOffset = + gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + BlockDescription.HeaderSize); + + const uint64_t TotalBlockSize = std::accumulate(BlockDescription.ChunkCompressedLengths.begin(), + BlockDescription.ChunkCompressedLengths.end(), + std::uint64_t(CurrentOffset)); + + BlockRangeDescriptor NextRange{.BlockIndex = BlockIndex}; + while (NeedBlockChunkIndexOffset < BlockChunkIndexNeeded.size() && + ChunkBlockIndex < BlockDescription.ChunkRawHashes.size()) + { + const uint32_t ChunkCompressedLength = BlockDescription.ChunkCompressedLengths[ChunkBlockIndex]; + if (ChunkBlockIndex < BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + NextRange = {.BlockIndex = BlockIndex}; + } + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + } + else if (ChunkBlockIndex == BlockChunkIndexNeeded[NeedBlockChunkIndexOffset]) + { + if (NextRange.RangeLength == 0) + { + NextRange.RangeStart = CurrentOffset; + NextRange.ChunkBlockIndexStart = ChunkBlockIndex; + } + NextRange.RangeLength += ChunkCompressedLength; + NextRange.ChunkBlockIndexCount++; + ChunkBlockIndex++; + CurrentOffset += ChunkCompressedLength; + NeedBlockChunkIndexOffset++; + } + else + { + ZEN_ASSERT(false); + } + } + if (NextRange.RangeLength > 0) + { + BlockRanges.push_back(NextRange); + } + + ZEN_ASSERT(!BlockRanges.empty()); + + std::vector<BlockRangeDescriptor> CollapsedBlockRanges; + auto It = BlockRanges.begin(); + CollapsedBlockRanges.push_back(*It++); + while (It != BlockRanges.end()) + { + BlockRangeDescriptor& LastRange = CollapsedBlockRanges.back(); + uint64_t Slack = It->RangeStart - (LastRange.RangeStart + LastRange.RangeLength); + uint64_t BothRangeSize = It->RangeLength + LastRange.RangeLength; + if (Slack <= Max(BothRangeSize / 8, 64u * 1024u)) // Made up heuristic - we'll see how it pans out + { + LastRange.ChunkBlockIndexCount = + (It->ChunkBlockIndexStart + It->ChunkBlockIndexCount) - LastRange.ChunkBlockIndexStart; + LastRange.RangeLength = (It->RangeStart + It->RangeLength) - LastRange.RangeStart; + } + else + { + CollapsedBlockRanges.push_back(*It); + } + ++It; + } + + const std::uint64_t WantedSize = std::accumulate( + CollapsedBlockRanges.begin(), + CollapsedBlockRanges.end(), + uint64_t(0), + [](uint64_t Current, const BlockRangeDescriptor& Range) { return Current + Range.RangeLength; }); + ZEN_ASSERT(WantedSize <= TotalBlockSize); + if (WantedSize > ((TotalBlockSize * 95) / 100)) + { + ZEN_CONSOLE_VERBOSE("Using more than 95% ({}) of block {} ({}), requesting full block", + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize)); + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else if ((WantedSize > ((TotalBlockSize * 9) / 10)) && CollapsedBlockRanges.size() > 1) + { + ZEN_CONSOLE_VERBOSE( + "Using more than 90% ({}) of block {} ({}) using {} requests, requesting full block", + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + CollapsedBlockRanges.size()); + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else if ((WantedSize > ((TotalBlockSize * 8) / 10)) && (CollapsedBlockRanges.size() > 16)) + { + ZEN_CONSOLE_VERBOSE( + "Using more than 80% ({}) of block {} ({}) using {} requests, requesting full block", + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + CollapsedBlockRanges.size()); + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else if ((WantedSize > ((TotalBlockSize * 7) / 10)) && (CollapsedBlockRanges.size() > 48)) + { + ZEN_CONSOLE_VERBOSE( + "Using more than 70% ({}) of block {} ({}) using {} requests, requesting full block", + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + CollapsedBlockRanges.size()); + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else if ((WantedSize > ((TotalBlockSize * 6) / 10)) && (CollapsedBlockRanges.size() > 64)) + { + ZEN_CONSOLE_VERBOSE( + "Using more than 60% ({}) of block {} ({}) using {} requests, requesting full block", + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + CollapsedBlockRanges.size()); + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + else + { + if (WantedSize > ((TotalBlockSize * 5) / 10)) + { + ZEN_CONSOLE_VERBOSE("Using {}% ({}) of block {} ({}) using {} requests, requesting partial block", + (WantedSize * 100) / TotalBlockSize, + NiceBytes(WantedSize), + BlockDescription.BlockHash, + NiceBytes(TotalBlockSize), + CollapsedBlockRanges.size()); + } + TotalRequestCount += CollapsedBlockRanges.size(); + TotalPartWriteCount += CollapsedBlockRanges.size(); + + BlockRangeWorks.insert(BlockRangeWorks.end(), CollapsedBlockRanges.begin(), CollapsedBlockRanges.end()); + } + } + else + { + TotalRequestCount++; + TotalPartWriteCount++; + + FullBlockWorks.push_back(BlockIndex); + } + } + } + } + else + { + ZEN_CONSOLE_VERBOSE("Skipping block {} due to cache reuse", BlockDescriptions[BlockIndex].BlockHash); + } + } + + struct BlobsExistsResult + { + tsl::robin_set<IoHash> ExistingBlobs; + uint64_t ElapsedTimeMs = 0; + }; + + BlobsExistsResult ExistsResult; + + if (Storage.BuildCacheStorage) + { + ZEN_TRACE_CPU("BlobCacheExistCheck"); + Stopwatch Timer; + + tsl::robin_set<IoHash> BlobHashesSet; + + BlobHashesSet.reserve(LooseChunkHashWorks.size() + FullBlockWorks.size()); + for (LooseChunkHashWorkData& LooseChunkHashWork : LooseChunkHashWorks) + { + BlobHashesSet.insert(RemoteContent.ChunkedContent.ChunkHashes[LooseChunkHashWork.RemoteChunkIndex]); + } + for (const BlockRangeDescriptor& BlockRange : BlockRangeWorks) + { + const uint32_t BlockIndex = BlockRange.BlockIndex; + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + BlobHashesSet.insert(BlockDescription.BlockHash); + } + for (uint32_t BlockIndex : FullBlockWorks) + { + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + BlobHashesSet.insert(BlockDescription.BlockHash); + } + + if (!BlobHashesSet.empty()) + { + const std::vector<IoHash> BlobHashes(BlobHashesSet.begin(), BlobHashesSet.end()); + const std::vector<BuildStorageCache::BlobExistsResult> CacheExistsResult = + Storage.BuildCacheStorage->BlobsExists(BuildId, BlobHashes); + + if (CacheExistsResult.size() == BlobHashes.size()) + { + ExistsResult.ExistingBlobs.reserve(CacheExistsResult.size()); + for (size_t BlobIndex = 0; BlobIndex < BlobHashes.size(); BlobIndex++) + { + if (CacheExistsResult[BlobIndex].HasBody) + { + ExistsResult.ExistingBlobs.insert(BlobHashes[BlobIndex]); + } + } + } + ExistsResult.ElapsedTimeMs = Timer.GetElapsedTimeMs(); + if (!ExistsResult.ExistingBlobs.empty()) + { + ZEN_CONSOLE("Remote cache : Found {} out of {} needed blobs in {}", + ExistsResult.ExistingBlobs.size(), + BlobHashes.size(), + NiceTimeSpanMs(ExistsResult.ElapsedTimeMs)); + } + } + } + + BufferedWriteFileCache WriteCache; + + for (uint32_t ScavengeOpIndex = 0; ScavengeOpIndex < ScavengeCopyOperations.size(); ScavengeOpIndex++) + { + if (AbortFlag) + { + break; + } + if (!PrimeCacheOnly) + { + Work.ScheduleWork( + WritePool, + [&RemoteContent, + &CacheFolderPath, + &ScavengedPaths, + &ScavengeCopyOperations, + &ScavengedContents, + &FilteredWrittenBytesPerSecond, + ScavengeOpIndex, + &WritePartsComplete, + TotalPartWriteCount, + &DiskStats](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteScavenged"); + + FilteredWrittenBytesPerSecond.Start(); + + const ScavengeCopyOperation& ScavengeOp = ScavengeCopyOperations[ScavengeOpIndex]; + const ChunkedFolderContent& ScavengedContent = ScavengedContents[ScavengeOp.ScavengedContentIndex]; + const std::filesystem::path ScavengedPath = ScavengedContent.Paths[ScavengeOp.ScavengedPathIndex]; + + const std::filesystem::path ScavengedFilePath = + (ScavengedPaths[ScavengeOp.ScavengedContentIndex] / ScavengedPath).make_preferred(); + ZEN_ASSERT_SLOW(FileSizeFromPath(ScavengedFilePath) == ScavengeOp.RawSize); + + const IoHash& RemoteSequenceRawHash = + RemoteContent.ChunkedContent.SequenceRawHashes[ScavengeOp.RemoteSequenceIndex]; + const std::filesystem::path TempFilePath = + GetTempChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + + const uint64_t RawSize = ScavengedContent.RawSizes[ScavengeOp.ScavengedContentIndex]; + CopyFile(ScavengedFilePath, TempFilePath, RawSize, DiskStats.WriteCount, DiskStats.WriteByteCount); + + const std::filesystem::path CacheFilePath = + GetFinalChunkedSequenceFileName(CacheFolderPath, RemoteSequenceRawHash); + RenameFile(TempFilePath, CacheFilePath); + + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }); + } + } + + for (uint32_t LooseChunkHashWorkIndex = 0; LooseChunkHashWorkIndex < LooseChunkHashWorks.size(); LooseChunkHashWorkIndex++) + { + if (AbortFlag) + { + break; + } + + LooseChunkHashWorkData& LooseChunkHashWork = LooseChunkHashWorks[LooseChunkHashWorkIndex]; + + std::vector<const ChunkedContentLookup::ChunkSequenceLocation*> ChunkTargetPtrs = + std::move(LooseChunkHashWork.ChunkTargetPtrs); + const uint32_t RemoteChunkIndex = LooseChunkHashWork.RemoteChunkIndex; + + if (PrimeCacheOnly && ExistsResult.ExistingBlobs.contains(RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex])) + { + DownloadStats.RequestsCompleteCount++; + continue; + } + + Work.ScheduleWork( + WritePool, + [&Storage, + &Path, + &ZenFolderPath, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &SequenceIndexChunksLeftToWriteCounters, + &Work, + &WritePool, + &NetworkPool, + PrimeCacheOnly, + &ExistsResult, + &DiskStats, + &DownloadStats, + &WriteChunkStats, + &WritePartsComplete, + RemoteChunkIndex, + ChunkTargetPtrs, + BuildId = Oid(BuildId), + LargeAttachmentSize, + PreferredMultipartChunkSize, + TotalRequestCount, + TotalPartWriteCount, + &WriteCache, + &FilteredDownloadedBytesPerSecond, + &FilteredWrittenBytesPerSecond](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_ReadPreDownloaded"); + std::filesystem::path ExistingCompressedChunkPath; + if (!PrimeCacheOnly) + { + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + std::filesystem::path CompressedChunkPath = + ZenTempDownloadFolderPath(ZenFolderPath) / ChunkHash.ToHexString(); + if (IsFile(CompressedChunkPath)) + { + IoBuffer ExistingCompressedPart = IoBufferBuilder::MakeFromFile(ExistingCompressedChunkPath); + if (ExistingCompressedPart) + { + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer::ValidateCompressedHeader(ExistingCompressedPart, RawHash, RawSize)) + { + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + ExistingCompressedChunkPath = std::move(CompressedChunkPath); + } + else + { + std::error_code DummyEc; + RemoveFile(CompressedChunkPath, DummyEc); + } + } + } + } + if (!AbortFlag) + + { + if (!ExistingCompressedChunkPath.empty()) + { + Work.ScheduleWork( + WritePool, + [&Path, + &ZenFolderPath, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePool, + &DiskStats, + &WriteChunkStats, + &WritePartsComplete, + TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + RemoteChunkIndex, + ChunkTargetPtrs, + CompressedChunkPath = std::move(ExistingCompressedChunkPath)](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePreDownloaded"); + + FilteredWrittenBytesPerSecond.Start(); + + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + + IoBuffer CompressedPart = IoBufferBuilder::MakeFromFile(CompressedChunkPath); + if (!CompressedPart) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded compressed chunk {} from {}", + ChunkHash, + CompressedChunkPath)); + } + + std::filesystem::path TargetFolder = ZenTempCacheFolderPath(ZenFolderPath); + bool NeedHashVerify = WriteCompressedChunk(TargetFolder, + RemoteContent, + RemoteLookup, + ChunkHash, + ChunkTargetPtrs, + WriteCache, + std::move(CompressedPart), + DiskStats); + WritePartsComplete++; + + if (!AbortFlag) + { + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + + TryRemoveFile(CompressedChunkPath); + + std::vector<uint32_t> CompletedSequences = + CompleteChunkTargets(ChunkTargetPtrs, SequenceIndexChunksLeftToWriteCounters); + WriteCache.Close(CompletedSequences); + if (NeedHashVerify) + { + VerifyAndCompleteChunkSequencesAsync(TargetFolder, + RemoteContent, + RemoteLookup, + CompletedSequences, + Work, + WritePool); + } + else + { + FinalizeChunkSequences(TargetFolder, RemoteContent, CompletedSequences); + } + } + } + }); + } + else + { + Work.ScheduleWork( + NetworkPool, + [&Path, + &ZenFolderPath, + &Storage, + BuildId = Oid(BuildId), + PrimeCacheOnly, + &RemoteContent, + &RemoteLookup, + &ExistsResult, + &SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePool, + &NetworkPool, + &DiskStats, + &WriteChunkStats, + &WritePartsComplete, + TotalPartWriteCount, + TotalRequestCount, + &FilteredDownloadedBytesPerSecond, + &FilteredWrittenBytesPerSecond, + LargeAttachmentSize, + PreferredMultipartChunkSize, + RemoteChunkIndex, + ChunkTargetPtrs, + &DownloadStats](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + const IoHash& ChunkHash = RemoteContent.ChunkedContent.ChunkHashes[RemoteChunkIndex]; + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BuildBlob; + const bool ExistsInCache = + Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(ChunkHash); + if (ExistsInCache) + { + BuildBlob = Storage.BuildCacheStorage->GetBuildBlob(BuildId, ChunkHash); + } + if (BuildBlob) + { + uint64_t BlobSize = BuildBlob.GetSize(); + DownloadStats.DownloadedChunkCount++; + DownloadStats.DownloadedChunkByteCount += BlobSize; + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + AsyncWriteDownloadedChunk(ZenFolderPath, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + WriteCache, + Work, + WritePool, + std::move(BuildBlob), + SequenceIndexChunksLeftToWriteCounters, + WritePartsComplete, + TotalPartWriteCount, + FilteredWrittenBytesPerSecond, + DiskStats); + } + else + { + if (RemoteContent.ChunkedContent.ChunkRawSizes[RemoteChunkIndex] >= LargeAttachmentSize) + { + ZEN_TRACE_CPU("UpdateFolder_GetLargeChunk"); + DownloadLargeBlob(*Storage.BuildStorage, + ZenTempDownloadFolderPath(ZenFolderPath), + BuildId, + ChunkHash, + PreferredMultipartChunkSize, + Work, + NetworkPool, + DownloadStats, + [&Storage, + &ZenFolderPath, + &RemoteContent, + &RemoteLookup, + BuildId, + PrimeCacheOnly, + &SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePool, + ChunkHash, + TotalPartWriteCount, + TotalRequestCount, + &WritePartsComplete, + &FilteredWrittenBytesPerSecond, + &FilteredDownloadedBytesPerSecond, + &DownloadStats, + &DiskStats, + RemoteChunkIndex, + ChunkTargetPtrs](IoBuffer&& Payload) mutable { + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + if (Payload && Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob( + BuildId, + ChunkHash, + ZenContentType::kCompressedBinary, + CompositeBuffer(SharedBuffer(Payload))); + } + if (!PrimeCacheOnly) + { + if (!AbortFlag) + { + AsyncWriteDownloadedChunk( + ZenFolderPath, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + WriteCache, + Work, + WritePool, + std::move(Payload), + SequenceIndexChunksLeftToWriteCounters, + WritePartsComplete, + TotalPartWriteCount, + FilteredWrittenBytesPerSecond, + DiskStats); + } + } + }); + } + else + { + ZEN_TRACE_CPU("UpdateFolder_GetChunk"); + BuildBlob = Storage.BuildStorage->GetBuildBlob(BuildId, ChunkHash); + if (BuildBlob && Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob( + BuildId, + ChunkHash, + BuildBlob.GetContentType(), + CompositeBuffer(SharedBuffer(BuildBlob))); + } + if (!BuildBlob) + { + throw std::runtime_error(fmt::format("Chunk {} is missing", ChunkHash)); + } + if (!PrimeCacheOnly) + { + if (!AbortFlag) + { + uint64_t BlobSize = BuildBlob.GetSize(); + DownloadStats.DownloadedChunkCount++; + DownloadStats.DownloadedChunkByteCount += BlobSize; + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + AsyncWriteDownloadedChunk(ZenFolderPath, + RemoteContent, + RemoteLookup, + RemoteChunkIndex, + std::move(ChunkTargetPtrs), + WriteCache, + Work, + WritePool, + std::move(BuildBlob), + SequenceIndexChunksLeftToWriteCounters, + WritePartsComplete, + TotalPartWriteCount, + FilteredWrittenBytesPerSecond, + DiskStats); + } + } + } + } + } + }); + } + } + } + }); + } + + for (size_t CopyDataIndex = 0; CopyDataIndex < CacheCopyDatas.size(); CopyDataIndex++) + { + ZEN_ASSERT(!PrimeCacheOnly); + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, + [&Path, + &LocalContent, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &LocalLookup, + &SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePool, + &FilteredWrittenBytesPerSecond, + &CacheCopyDatas, + &ScavengedContents, + &ScavengedLookups, + &ScavengedPaths, + &WritePartsComplete, + TotalPartWriteCount, + &DiskStats, + CopyDataIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_CopyLocal"); + + FilteredWrittenBytesPerSecond.Start(); + const CacheCopyData& CopyData = CacheCopyDatas[CopyDataIndex]; + + std::filesystem::path SourceFilePath; + + if (CopyData.ScavengeSourceIndex == (uint32_t)-1) + { + const uint32_t LocalPathIndex = LocalLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + } + else + { + const ChunkedFolderContent& ScavengedContent = ScavengedContents[CopyData.ScavengeSourceIndex]; + const ChunkedContentLookup& ScavengedLookup = ScavengedLookups[CopyData.ScavengeSourceIndex]; + const std::filesystem::path ScavengedPath = ScavengedPaths[CopyData.ScavengeSourceIndex]; + const uint32_t ScavengedPathIndex = + ScavengedLookup.SequenceIndexFirstPathIndex[CopyData.SourceSequenceIndex]; + SourceFilePath = (ScavengedPath / ScavengedContent.Paths[ScavengedPathIndex]).make_preferred(); + } + ZEN_ASSERT_SLOW(IsFile(SourceFilePath)); + ZEN_ASSERT(!CopyData.TargetChunkLocationPtrs.empty()); + + uint64_t CacheLocalFileBytesRead = 0; + + size_t TargetStart = 0; + const std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> AllTargets( + CopyData.TargetChunkLocationPtrs); + + struct WriteOp + { + const ChunkedContentLookup::ChunkSequenceLocation* Target = nullptr; + uint64_t CacheFileOffset = (uint64_t)-1; + uint32_t ChunkIndex = (uint32_t)-1; + }; + + std::vector<WriteOp> WriteOps; + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Sort"); + WriteOps.reserve(AllTargets.size()); + for (const CacheCopyData::ChunkTarget& ChunkTarget : CopyData.ChunkTargets) + { + std::span<const ChunkedContentLookup::ChunkSequenceLocation* const> TargetRange = + AllTargets.subspan(TargetStart, ChunkTarget.TargetChunkLocationCount); + for (const ChunkedContentLookup::ChunkSequenceLocation* Target : TargetRange) + { + WriteOps.push_back(WriteOp{.Target = Target, + .CacheFileOffset = ChunkTarget.CacheFileOffset, + .ChunkIndex = ChunkTarget.RemoteChunkIndex}); + } + TargetStart += ChunkTarget.TargetChunkLocationCount; + } + + std::sort(WriteOps.begin(), WriteOps.end(), [](const WriteOp& Lhs, const WriteOp& Rhs) { + if (Lhs.Target->SequenceIndex < Rhs.Target->SequenceIndex) + { + return true; + } + else if (Lhs.Target->SequenceIndex > Rhs.Target->SequenceIndex) + { + return false; + } + if (Lhs.Target->Offset < Rhs.Target->Offset) + { + return true; + } + return false; + }); + } + + if (!AbortFlag) + { + ZEN_TRACE_CPU("Write"); + + tsl::robin_set<uint32_t> ChunkIndexesWritten; + + BufferedOpenFile SourceFile(SourceFilePath, DiskStats); + BufferedWriteFileCache::Local LocalWriter(WriteCache); + + for (size_t WriteOpIndex = 0; WriteOpIndex < WriteOps.size();) + { + if (AbortFlag) + { + break; + } + const WriteOp& Op = WriteOps[WriteOpIndex]; + + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() <= + RemoteContent.ChunkedContent.ChunkCounts[RemoteSequenceIndex]); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounters[RemoteSequenceIndex].load() > 0); + const uint32_t RemotePathIndex = RemoteLookup.SequenceIndexFirstPathIndex[RemoteSequenceIndex]; + const uint64_t ChunkSize = RemoteContent.ChunkedContent.ChunkRawSizes[Op.ChunkIndex]; + + uint64_t ReadLength = ChunkSize; + size_t WriteCount = 1; + uint64_t OpSourceEnd = Op.CacheFileOffset + ChunkSize; + uint64_t OpTargetEnd = Op.Target->Offset + ChunkSize; + while ((WriteOpIndex + WriteCount) < WriteOps.size()) + { + const WriteOp& NextOp = WriteOps[WriteOpIndex + WriteCount]; + if (NextOp.Target->SequenceIndex != Op.Target->SequenceIndex) + { + break; + } + if (NextOp.Target->Offset != OpTargetEnd) + { + break; + } + if (NextOp.CacheFileOffset != OpSourceEnd) + { + break; + } + const uint64_t NextChunkLength = RemoteContent.ChunkedContent.ChunkRawSizes[NextOp.ChunkIndex]; + if (ReadLength + NextChunkLength > 512u * 1024u) + { + break; + } + ReadLength += NextChunkLength; + OpSourceEnd += NextChunkLength; + OpTargetEnd += NextChunkLength; + WriteCount++; + } + + CompositeBuffer ChunkSource = SourceFile.GetRange(Op.CacheFileOffset, ReadLength); + + const uint64_t FileOffset = Op.Target->Offset; + + WriteSequenceChunk(CacheFolderPath, + RemoteContent, + LocalWriter, + ChunkSource, + RemoteSequenceIndex, + FileOffset, + RemotePathIndex, + DiskStats); + + CacheLocalFileBytesRead += ReadLength; // TODO: This should be the sum of unique chunk sizes? + + WriteOpIndex += WriteCount; + } + } + if (!AbortFlag) + { + // Write tracking, updating this must be done without any files open (BufferedWriteFileCache::Local) + std::vector<uint32_t> CompletedChunkSequences; + for (const WriteOp& Op : WriteOps) + { + const uint32_t RemoteSequenceIndex = Op.Target->SequenceIndex; + if (CompleteSequenceChunk(RemoteSequenceIndex, SequenceIndexChunksLeftToWriteCounters)) + { + CompletedChunkSequences.push_back(RemoteSequenceIndex); + } + } + WriteCache.Close(CompletedChunkSequences); + VerifyAndCompleteChunkSequencesAsync(CacheFolderPath, + RemoteContent, + RemoteLookup, + CompletedChunkSequences, + Work, + WritePool); + ZEN_CONSOLE_VERBOSE("Copied {} from {}", NiceBytes(CacheLocalFileBytesRead), SourceFilePath); + } + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }); + } + + for (uint32_t BlockIndex : CachedChunkBlockIndexes) + { + ZEN_ASSERT(!PrimeCacheOnly); + if (AbortFlag) + { + break; + } + + Work.ScheduleWork( + WritePool, + [&ZenFolderPath, + &CacheFolderPath, + &RemoteContent, + &RemoteLookup, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + &WriteCache, + &Work, + &WritePool, + &BlockDescriptions, + &FilteredWrittenBytesPerSecond, + &DiskStats, + &WritePartsComplete, + TotalPartWriteCount, + BlockIndex](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteCachedBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + FilteredWrittenBytesPerSecond.Start(); + + std::filesystem::path BlockChunkPath = + ZenTempBlockFolderPath(ZenFolderPath) / BlockDescription.BlockHash.ToHexString(); + IoBuffer BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error( + fmt::format("Can not read block {} at {}", BlockDescription.BlockHash, BlockChunkPath)); + } + + if (!AbortFlag) + { + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + Work, + WritePool, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + WriteCache, + DiskStats)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error(fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + + TryRemoveFile(BlockChunkPath); + + WritePartsComplete++; + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + } + }); + } + + for (size_t BlockRangeIndex = 0; BlockRangeIndex < BlockRangeWorks.size(); BlockRangeIndex++) + { + ZEN_ASSERT(!PrimeCacheOnly); + if (AbortFlag) + { + break; + } + const BlockRangeDescriptor BlockRange = BlockRangeWorks[BlockRangeIndex]; + ZEN_ASSERT(BlockRange.BlockIndex != (uint32_t)-1); + const uint32_t BlockIndex = BlockRange.BlockIndex; + Work.ScheduleWork( + NetworkPool, + [&Storage, + &ZenFolderPath, + BuildId, + &RemoteLookup, + &BlockDescriptions, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &CacheFolderPath, + &RemoteContent, + &SequenceIndexChunksLeftToWriteCounters, + &ExistsResult, + &WriteCache, + &FilteredDownloadedBytesPerSecond, + TotalRequestCount, + &WritePartsComplete, + TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + &DiskStats, + &DownloadStats, + &Work, + &WritePool, + BlockIndex, + BlockRange](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_GetPartialBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + IoBuffer BlockBuffer; + if (Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash)) + { + BlockBuffer = Storage.BuildCacheStorage->GetBuildBlob(BuildId, + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + } + if (!BlockBuffer) + { + BlockBuffer = Storage.BuildStorage->GetBuildBlob(BuildId, + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + } + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing when fetching range {} -> {}", + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeStart + BlockRange.RangeLength)); + } + if (!AbortFlag) + { + uint64_t BlockSize = BlockBuffer.GetSize(); + DownloadStats.DownloadedBlockCount++; + DownloadStats.DownloadedBlockByteCount += BlockSize; + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + std::filesystem::path BlockChunkPath; + + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); + + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = ZenTempBlockFolderPath(ZenFolderPath) / fmt::format("{}_{:x}_{:x}", + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + RenameFile(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } + } + } + } + + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = ZenTempBlockFolderPath(ZenFolderPath) / fmt::format("{}_{:x}_{:x}", + BlockDescription.BlockHash, + BlockRange.RangeStart, + BlockRange.RangeLength); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } + + if (!AbortFlag) + { + Work.ScheduleWork( + WritePool, + [&CacheFolderPath, + &RemoteContent, + &RemoteLookup, + &BlockDescriptions, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + &WritePartsComplete, + &WriteCache, + &Work, + TotalPartWriteCount, + &WritePool, + &DiskStats, + &FilteredWrittenBytesPerSecond, + BlockIndex, + BlockRange, + BlockChunkPath, + BlockPartialBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WritePartialBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockPartialBuffer); + } + else + { + ZEN_ASSERT(!BlockPartialBuffer); + BlockPartialBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockPartialBuffer) + { + throw std::runtime_error(fmt::format("Could not open downloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); + } + } + + FilteredWrittenBytesPerSecond.Start(); + + if (!WritePartialBlockToDisk( + CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + Work, + WritePool, + CompositeBuffer(std::move(BlockPartialBuffer)), + BlockRange.ChunkBlockIndexStart, + BlockRange.ChunkBlockIndexStart + BlockRange.ChunkBlockIndexCount - 1, + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + WriteCache, + DiskStats)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error( + fmt::format("Partial block {} is malformed", BlockDescription.BlockHash)); + } + + if (!BlockChunkPath.empty()) + { + TryRemoveFile(BlockChunkPath); + } + + WritePartsComplete++; + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }); + } + } + } + }); + } + + for (uint32_t BlockIndex : FullBlockWorks) + { + if (AbortFlag) + { + break; + } + + if (PrimeCacheOnly && ExistsResult.ExistingBlobs.contains(BlockDescriptions[BlockIndex].BlockHash)) + { + DownloadStats.RequestsCompleteCount++; + continue; + } + + Work.ScheduleWork( + NetworkPool, + [&Storage, + &ZenFolderPath, + BuildId, + PrimeCacheOnly, + &BlockDescriptions, + &WritePartsComplete, + TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + &ExistsResult, + &Work, + &WritePool, + &RemoteContent, + &RemoteLookup, + &WriteCache, + &CacheFolderPath, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + &FilteredDownloadedBytesPerSecond, + &WriteChunkStats, + &DiskStats, + &DownloadStats, + TotalRequestCount, + BlockIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_GetFullBlock"); + + const ChunkBlockDescription& BlockDescription = BlockDescriptions[BlockIndex]; + + FilteredDownloadedBytesPerSecond.Start(); + + IoBuffer BlockBuffer; + const bool ExistsInCache = + Storage.BuildCacheStorage && ExistsResult.ExistingBlobs.contains(BlockDescription.BlockHash); + if (ExistsInCache) + { + BlockBuffer = Storage.BuildCacheStorage->GetBuildBlob(BuildId, BlockDescription.BlockHash); + } + if (!BlockBuffer) + { + BlockBuffer = Storage.BuildStorage->GetBuildBlob(BuildId, BlockDescription.BlockHash); + if (BlockBuffer && Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->PutBuildBlob(BuildId, + BlockDescription.BlockHash, + BlockBuffer.GetContentType(), + CompositeBuffer(SharedBuffer(BlockBuffer))); + } + } + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is missing", BlockDescription.BlockHash)); + } + if (!AbortFlag) + { + uint64_t BlockSize = BlockBuffer.GetSize(); + DownloadStats.DownloadedBlockCount++; + DownloadStats.DownloadedBlockByteCount += BlockSize; + DownloadStats.RequestsCompleteCount++; + if (DownloadStats.RequestsCompleteCount == TotalRequestCount) + { + FilteredDownloadedBytesPerSecond.Stop(); + } + + if (!PrimeCacheOnly) + { + std::filesystem::path BlockChunkPath; + + // Check if the dowloaded block is file based and we can move it directly without rewriting it + { + IoBufferFileReference FileRef; + if (BlockBuffer.GetFileReference(FileRef) && (FileRef.FileChunkOffset == 0) && + (FileRef.FileChunkSize == BlockSize)) + { + ZEN_TRACE_CPU("UpdateFolder_MoveTempBlock"); + std::error_code Ec; + std::filesystem::path TempBlobPath = PathFromHandle(FileRef.FileHandle, Ec); + if (!Ec) + { + BlockBuffer.SetDeleteOnClose(false); + BlockBuffer = {}; + BlockChunkPath = + ZenTempBlockFolderPath(ZenFolderPath) / BlockDescription.BlockHash.ToHexString(); + RenameFile(TempBlobPath, BlockChunkPath, Ec); + if (Ec) + { + BlockChunkPath = std::filesystem::path{}; + + // Re-open the temp file again + BasicFile OpenTemp(TempBlobPath, BasicFile::Mode::kDelete); + BlockBuffer = IoBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BlockSize, true); + BlockBuffer.SetDeleteOnClose(true); + } + } + } + } + + if (BlockChunkPath.empty() && (BlockSize > 512u * 1024u)) + { + ZEN_TRACE_CPU("UpdateFolder_WriteTempBlock"); + // Could not be moved and rather large, lets store it on disk + BlockChunkPath = ZenTempBlockFolderPath(ZenFolderPath) / BlockDescription.BlockHash.ToHexString(); + TemporaryFile::SafeWriteFile(BlockChunkPath, BlockBuffer); + BlockBuffer = {}; + } + + if (!AbortFlag) + { + Work.ScheduleWork(WritePool, + [&Work, + &WritePool, + &RemoteContent, + &RemoteLookup, + CacheFolderPath, + &RemoteChunkIndexNeedsCopyFromSourceFlags, + &SequenceIndexChunksLeftToWriteCounters, + BlockIndex, + &BlockDescriptions, + &WriteCache, + &WriteChunkStats, + &DiskStats, + &WritePartsComplete, + TotalPartWriteCount, + &FilteredWrittenBytesPerSecond, + BlockChunkPath, + BlockBuffer = std::move(BlockBuffer)](std::atomic<bool>&) mutable { + if (!AbortFlag) + { + ZEN_TRACE_CPU("UpdateFolder_WriteFullBlock"); + + const ChunkBlockDescription& BlockDescription = + BlockDescriptions[BlockIndex]; + + if (BlockChunkPath.empty()) + { + ZEN_ASSERT(BlockBuffer); + } + else + { + ZEN_ASSERT(!BlockBuffer); + BlockBuffer = IoBufferBuilder::MakeFromFile(BlockChunkPath); + if (!BlockBuffer) + { + throw std::runtime_error( + fmt::format("Could not open dowloaded block {} from {}", + BlockDescription.BlockHash, + BlockChunkPath)); + } + } + + FilteredWrittenBytesPerSecond.Start(); + if (!WriteBlockToDisk(CacheFolderPath, + RemoteContent, + BlockDescription, + SequenceIndexChunksLeftToWriteCounters, + Work, + WritePool, + CompositeBuffer(std::move(BlockBuffer)), + RemoteLookup, + RemoteChunkIndexNeedsCopyFromSourceFlags, + WriteCache, + DiskStats)) + { + std::error_code DummyEc; + RemoveFile(BlockChunkPath, DummyEc); + throw std::runtime_error( + fmt::format("Block {} is malformed", BlockDescription.BlockHash)); + } + + if (!BlockChunkPath.empty()) + { + TryRemoveFile(BlockChunkPath); + } + + WritePartsComplete++; + + if (WritePartsComplete == TotalPartWriteCount) + { + FilteredWrittenBytesPerSecond.Stop(); + } + } + }); + } + } + } + } + }); + } + + { + ZEN_TRACE_CPU("WriteChunks_Wait"); + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + uint64_t DownloadedBytes = DownloadStats.DownloadedChunkByteCount.load() + + DownloadStats.DownloadedBlockByteCount.load() + + +DownloadStats.DownloadedPartialBlockByteCount.load(); + FilteredWrittenBytesPerSecond.Update(DiskStats.WriteByteCount.load()); + FilteredDownloadedBytesPerSecond.Update(DownloadedBytes); + std::string DownloadRateString = + (DownloadStats.RequestsCompleteCount == TotalRequestCount) + ? "" + : fmt::format(" {}bits/s", NiceNum(FilteredDownloadedBytesPerSecond.GetCurrent() * 8)); + std::string WriteDetails = PrimeCacheOnly ? "" + : fmt::format(" {}/{} ({}B/s) written.", + NiceBytes(DiskStats.WriteByteCount.load()), + NiceBytes(BytesToWrite), + NiceNum(FilteredWrittenBytesPerSecond.GetCurrent())); + std::string Details = fmt::format("{}/{} ({}{}) downloaded.{}", + DownloadStats.RequestsCompleteCount.load(), + TotalRequestCount, + NiceBytes(DownloadedBytes), + DownloadRateString, + WriteDetails); + WriteProgressBar.UpdateState( + {.Task = PrimeCacheOnly ? "Downloading " : "Writing chunks ", + .Details = Details, + .TotalCount = PrimeCacheOnly ? TotalRequestCount : BytesToWrite, + .RemainingCount = PrimeCacheOnly ? (TotalRequestCount - DownloadStats.RequestsCompleteCount.load()) + : (BytesToWrite - DiskStats.WriteByteCount.load()), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + FilteredWrittenBytesPerSecond.Stop(); + FilteredDownloadedBytesPerSecond.Stop(); + + WriteProgressBar.Finish(); + if (AbortFlag) + { + return; + } + + if (!PrimeCacheOnly) + { + uint32_t RawSequencesMissingWriteCount = 0; + for (uint32_t SequenceIndex = 0; SequenceIndex < SequenceIndexChunksLeftToWriteCounters.size(); SequenceIndex++) + { + const auto& SequenceIndexChunksLeftToWriteCounter = SequenceIndexChunksLeftToWriteCounters[SequenceIndex]; + if (SequenceIndexChunksLeftToWriteCounter.load() != 0) + { + RawSequencesMissingWriteCount++; + const uint32_t PathIndex = RemoteLookup.SequenceIndexFirstPathIndex[SequenceIndex]; + const std::filesystem::path& IncompletePath = RemoteContent.Paths[PathIndex]; + ZEN_ASSERT(!IncompletePath.empty()); + const uint32_t ExpectedSequenceCount = RemoteContent.ChunkedContent.ChunkCounts[SequenceIndex]; + ZEN_CONSOLE("{}: Max count {}, Current count {}", + IncompletePath, + ExpectedSequenceCount, + SequenceIndexChunksLeftToWriteCounter.load()); + ZEN_ASSERT(SequenceIndexChunksLeftToWriteCounter.load() <= ExpectedSequenceCount); + } + } + ZEN_ASSERT(RawSequencesMissingWriteCount == 0); + } + + const uint64_t DownloadedBytes = DownloadStats.DownloadedChunkByteCount.load() + DownloadStats.DownloadedBlockByteCount.load() + + +DownloadStats.DownloadedPartialBlockByteCount.load(); + ZEN_CONSOLE("Downloaded {} ({}bits/s) in {}. Wrote {} ({}B/s) in {}. Completed in {}", + NiceBytes(DownloadedBytes), + NiceNum(GetBytesPerSecond(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(), DownloadedBytes * 8)), + NiceTimeSpanMs(FilteredDownloadedBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceBytes(DiskStats.WriteByteCount.load()), + NiceNum(GetBytesPerSecond(FilteredWrittenBytesPerSecond.GetElapsedTimeUS(), DiskStats.WriteByteCount.load())), + NiceTimeSpanMs(FilteredWrittenBytesPerSecond.GetElapsedTimeUS() / 1000), + NiceTimeSpanMs(WriteTimer.GetElapsedTimeMs())); + + WriteChunkStats.WriteChunksElapsedWallTimeUs = WriteTimer.GetElapsedTimeUs(); + WriteChunkStats.DownloadTimeUs = FilteredDownloadedBytesPerSecond.GetElapsedTimeUS(); + WriteChunkStats.WriteTimeUs = FilteredWrittenBytesPerSecond.GetElapsedTimeUS(); + } + + if (PrimeCacheOnly) + { + return; + } + + tsl::robin_map<uint32_t, uint32_t> RemotePathIndexToLocalPathIndex; + RemotePathIndexToLocalPathIndex.reserve(RemoteContent.Paths.size()); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> SequenceHashToLocalPathIndex; + std::vector<uint32_t> RemoveLocalPathIndexes; + + if (AbortFlag) + { + return; + } + + { + ZEN_TRACE_CPU("UpdateFolder_PrepareTarget"); + + tsl::robin_set<IoHash, IoHash::Hasher> CachedRemoteSequences; + tsl::robin_map<std::string, uint32_t> RemotePathToRemoteIndex; + RemotePathToRemoteIndex.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + RemotePathToRemoteIndex.insert({RemoteContent.Paths[RemotePathIndex].generic_string(), RemotePathIndex}); + } + + std::vector<uint32_t> FilesToCache; + + uint64_t MatchCount = 0; + uint64_t PathMismatchCount = 0; + uint64_t HashMismatchCount = 0; + std::atomic<uint64_t> CachedCount = 0; + std::atomic<uint64_t> CachedByteCount = 0; + uint64_t SkippedCount = 0; + uint64_t DeleteCount = 0; + for (uint32_t LocalPathIndex = 0; LocalPathIndex < LocalContent.Paths.size(); LocalPathIndex++) + { + if (AbortFlag) + { + break; + } + const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path& LocalPath = LocalContent.Paths[LocalPathIndex]; + + ZEN_ASSERT_SLOW(IsFile((Path / LocalContent.Paths[LocalPathIndex]).make_preferred())); + + if (!WipeTargetFolder) + { + if (auto RemotePathIt = RemotePathToRemoteIndex.find(LocalPath.generic_string()); + RemotePathIt != RemotePathToRemoteIndex.end()) + { + const uint32_t RemotePathIndex = RemotePathIt->second; + if (RemoteContent.RawHashes[RemotePathIndex] == RawHash) + { + // It is already in it's desired place + RemotePathIndexToLocalPathIndex[RemotePathIndex] = LocalPathIndex; + SequenceHashToLocalPathIndex.insert({RawHash, LocalPathIndex}); + MatchCount++; + continue; + } + else + { + HashMismatchCount++; + } + } + else + { + PathMismatchCount++; + } + } + if (RemoteLookup.RawHashToSequenceIndex.contains(RawHash)) + { + if (!CachedRemoteSequences.contains(RawHash)) + { + ZEN_TRACE_CPU("MoveToCache"); + // We need it + FilesToCache.push_back(LocalPathIndex); + CachedRemoteSequences.insert(RawHash); + } + else + { + // We already have it + SkippedCount++; + } + } + else if (!WipeTargetFolder) + { + // We don't need it + RemoveLocalPathIndexes.push_back(LocalPathIndex); + DeleteCount++; + } + } + + if (AbortFlag) + { + return; + } + + { + ZEN_TRACE_CPU("UpdateFolder_CopyToCache"); + + Stopwatch Timer; + + WorkerThreadPool& WritePool = GetIOWorkerPool(); + + ProgressBar CacheLocalProgressBar(ProgressMode, "Cache Local Data"); + ParallelWork Work(AbortFlag, PauseFlag); + + for (uint32_t LocalPathIndex : FilesToCache) + { + if (AbortFlag) + { + break; + } + Work.ScheduleWork( + WritePool, + [&Path, &LocalContent, &CacheFolderPath, &CachedCount, &CachedByteCount, LocalPathIndex](std::atomic<bool>&) { + ZEN_TRACE_CPU("UpdateFolder_AsyncCopyToCache"); + if (!AbortFlag) + { + const IoHash& RawHash = LocalContent.RawHashes[LocalPathIndex]; + const std::filesystem::path& LocalPath = LocalContent.Paths[LocalPathIndex]; + const std::filesystem::path CacheFilePath = GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(!IsFileWithRetry(CacheFilePath)); + const std::filesystem::path LocalFilePath = (Path / LocalPath).make_preferred(); + RenameFileWithRetry(LocalFilePath, CacheFilePath); + CachedCount++; + CachedByteCount += LocalContent.RawSizes[LocalPathIndex]; + } + }); + } + + { + ZEN_TRACE_CPU("CacheLocal_Wait"); + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + const uint64_t WorkTotal = FilesToCache.size(); + const uint64_t WorkComplete = CachedCount.load(); + std::string Details = fmt::format("{}/{} ({}) files", WorkComplete, WorkTotal, NiceBytes(CachedByteCount)); + CacheLocalProgressBar.UpdateState({.Task = "Caching local ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(WorkTotal), + .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + CacheLocalProgressBar.Finish(); + if (AbortFlag) + { + return; + } + + ZEN_DEBUG( + "Local state prep: Match: {}, PathMismatch: {}, HashMismatch: {}, Cached: {} ({}), Skipped: {}, " + "Delete: {}", + MatchCount, + PathMismatchCount, + HashMismatchCount, + CachedCount.load(), + NiceBytes(CachedByteCount.load()), + SkippedCount, + DeleteCount); + } + } + + if (WipeTargetFolder) + { + ZEN_TRACE_CPU("UpdateFolder_WipeTarget"); + Stopwatch Timer; + + // Clean target folder + if (!CleanDirectory(Path, DefaultExcludeFolders)) + { + ZEN_WARN("Some files in {} could not be removed", Path); + } + RebuildFolderStateStats.CleanFolderElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + } + + if (AbortFlag) + { + return; + } + + { + ZEN_TRACE_CPU("UpdateFolder_FinalizeTree"); + Stopwatch Timer; + + WorkerThreadPool& WritePool = GetIOWorkerPool(); + + ProgressBar RebuildProgressBar(ProgressMode, "Rebuild State"); + ParallelWork Work(AbortFlag, PauseFlag); + + OutLocalFolderState.Paths.resize(RemoteContent.Paths.size()); + OutLocalFolderState.RawSizes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.Attributes.resize(RemoteContent.Paths.size()); + OutLocalFolderState.ModificationTicks.resize(RemoteContent.Paths.size()); + + std::atomic<uint64_t> DeletedCount = 0; + + for (uint32_t LocalPathIndex : RemoveLocalPathIndexes) + { + if (AbortFlag) + { + break; + } + Work.ScheduleWork(WritePool, [&Path, &LocalContent, &DeletedCount, LocalPathIndex](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("FinalizeTree_Remove"); + const std::filesystem::path LocalFilePath = (Path / LocalContent.Paths[LocalPathIndex]).make_preferred(); + SetFileReadOnlyWithRetry(LocalFilePath, false); + RemoveFileWithRetry(LocalFilePath); + DeletedCount++; + } + }); + } + + std::atomic<uint64_t> TargetsComplete = 0; + + struct FinalizeTarget + { + IoHash RawHash; + uint32_t RemotePathIndex; + }; + + std::vector<FinalizeTarget> Targets; + Targets.reserve(RemoteContent.Paths.size()); + for (uint32_t RemotePathIndex = 0; RemotePathIndex < RemoteContent.Paths.size(); RemotePathIndex++) + { + Targets.push_back(FinalizeTarget{.RawHash = RemoteContent.RawHashes[RemotePathIndex], .RemotePathIndex = RemotePathIndex}); + } + std::sort(Targets.begin(), Targets.end(), [](const FinalizeTarget& Lhs, const FinalizeTarget& Rhs) { + if (Lhs.RawHash < Rhs.RawHash) + { + return true; + } + else if (Lhs.RawHash > Rhs.RawHash) + { + return false; + } + return Lhs.RemotePathIndex < Rhs.RemotePathIndex; + }); + + size_t TargetOffset = 0; + while (TargetOffset < Targets.size()) + { + if (AbortFlag) + { + break; + } + + size_t TargetCount = 1; + while (Targets[TargetOffset + TargetCount].RawHash == Targets[TargetOffset].RawHash) + { + TargetCount++; + } + + Work.ScheduleWork( + WritePool, + [&Path, + &LocalContent, + &SequenceHashToLocalPathIndex, + &RemoteContent, + &RemoteLookup, + &CacheFolderPath, + &Targets, + &RemotePathIndexToLocalPathIndex, + &RebuildFolderStateStats, + &OutLocalFolderState, + BaseTargetOffset = TargetOffset, + TargetCount, + &TargetsComplete](std::atomic<bool>&) { + if (!AbortFlag) + { + ZEN_TRACE_CPU("FinalizeTree_Work"); + + size_t TargetOffset = BaseTargetOffset; + const IoHash& RawHash = Targets[TargetOffset].RawHash; + + if (RawHash == IoHash::Zero) + { + ZEN_TRACE_CPU("ZeroSize"); + while (TargetOffset < (BaseTargetOffset + TargetCount)) + { + const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; + ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); + const std::filesystem::path& TargetPath = RemoteContent.Paths[RemotePathIndex]; + std::filesystem::path TargetFilePath = (Path / TargetPath).make_preferred(); + if (!RemotePathIndexToLocalPathIndex[RemotePathIndex]) + { + if (IsFileWithRetry(TargetFilePath)) + { + SetFileReadOnlyWithRetry(TargetFilePath, false); + } + else + { + CreateDirectories(TargetFilePath.parent_path()); + } + BasicFile OutputFile; + OutputFile.Open(TargetFilePath, BasicFile::Mode::kTruncate); + } + OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; + OutLocalFolderState.RawSizes[RemotePathIndex] = RemoteContent.RawSizes[RemotePathIndex]; + + OutLocalFolderState.Attributes[RemotePathIndex] = + RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(TargetFilePath) + : SetNativeFileAttributes(TargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[RemotePathIndex]); + OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); + + TargetOffset++; + TargetsComplete++; + } + } + else + { + ZEN_TRACE_CPU("Files"); + ZEN_ASSERT(RemoteLookup.RawHashToSequenceIndex.contains(RawHash)); + const uint32_t FirstRemotePathIndex = Targets[TargetOffset].RemotePathIndex; + const std::filesystem::path& FirstTargetPath = RemoteContent.Paths[FirstRemotePathIndex]; + std::filesystem::path FirstTargetFilePath = (Path / FirstTargetPath).make_preferred(); + + if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(FirstRemotePathIndex); + InPlaceIt != RemotePathIndexToLocalPathIndex.end()) + { + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); + } + else + { + if (IsFileWithRetry(FirstTargetFilePath)) + { + SetFileReadOnlyWithRetry(FirstTargetFilePath, false); + } + else + { + CreateDirectories(FirstTargetFilePath.parent_path()); + } + + if (auto InplaceIt = SequenceHashToLocalPathIndex.find(RawHash); + InplaceIt != SequenceHashToLocalPathIndex.end()) + { + ZEN_TRACE_CPU("Copy"); + const uint32_t LocalPathIndex = InplaceIt->second; + const std::filesystem::path& SourcePath = LocalContent.Paths[LocalPathIndex]; + std::filesystem::path SourceFilePath = (Path / SourcePath).make_preferred(); + ZEN_ASSERT_SLOW(IsFileWithRetry(SourceFilePath)); + + ZEN_DEBUG("Copying from '{}' -> '{}'", SourceFilePath, FirstTargetFilePath); + const uint64_t RawSize = LocalContent.RawSizes[LocalPathIndex]; + std::atomic<uint64_t> WriteCount; + std::atomic<uint64_t> WriteByteCount; + CopyFile(SourceFilePath, FirstTargetFilePath, RawSize, WriteCount, WriteByteCount); + RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; + } + else + { + ZEN_TRACE_CPU("Rename"); + const std::filesystem::path CacheFilePath = + GetFinalChunkedSequenceFileName(CacheFolderPath, RawHash); + ZEN_ASSERT_SLOW(IsFileWithRetry(CacheFilePath)); + + RenameFileWithRetry(CacheFilePath, FirstTargetFilePath); + + RebuildFolderStateStats.FinalizeTreeFilesMovedCount++; + } + } + + OutLocalFolderState.Paths[FirstRemotePathIndex] = FirstTargetPath; + OutLocalFolderState.RawSizes[FirstRemotePathIndex] = RemoteContent.RawSizes[FirstRemotePathIndex]; + + OutLocalFolderState.Attributes[FirstRemotePathIndex] = + RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(FirstTargetFilePath) + : SetNativeFileAttributes(FirstTargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[FirstRemotePathIndex]); + OutLocalFolderState.ModificationTicks[FirstRemotePathIndex] = + GetModificationTickFromPath(FirstTargetFilePath); + + TargetOffset++; + TargetsComplete++; + + while (TargetOffset < (BaseTargetOffset + TargetCount)) + { + const uint32_t RemotePathIndex = Targets[TargetOffset].RemotePathIndex; + ZEN_ASSERT(Targets[TargetOffset].RawHash == RawHash); + const std::filesystem::path& TargetPath = RemoteContent.Paths[RemotePathIndex]; + std::filesystem::path TargetFilePath = (Path / TargetPath).make_preferred(); + + if (auto InPlaceIt = RemotePathIndexToLocalPathIndex.find(RemotePathIndex); + InPlaceIt != RemotePathIndexToLocalPathIndex.end()) + { + ZEN_ASSERT_SLOW(IsFileWithRetry(TargetFilePath)); + } + else + { + ZEN_TRACE_CPU("Copy"); + if (IsFileWithRetry(TargetFilePath)) + { + SetFileReadOnlyWithRetry(TargetFilePath, false); + } + else + { + CreateDirectories(TargetFilePath.parent_path()); + } + + ZEN_ASSERT_SLOW(IsFileWithRetry(FirstTargetFilePath)); + ZEN_DEBUG("Copying from '{}' -> '{}'", FirstTargetFilePath, TargetFilePath); + const uint64_t RawSize = RemoteContent.RawSizes[RemotePathIndex]; + std::atomic<uint64_t> WriteCount; + std::atomic<uint64_t> WriteByteCount; + CopyFile(FirstTargetFilePath, TargetFilePath, RawSize, WriteCount, WriteByteCount); + RebuildFolderStateStats.FinalizeTreeFilesCopiedCount++; + } + + OutLocalFolderState.Paths[RemotePathIndex] = TargetPath; + OutLocalFolderState.RawSizes[RemotePathIndex] = RemoteContent.RawSizes[RemotePathIndex]; + + OutLocalFolderState.Attributes[RemotePathIndex] = + RemoteContent.Attributes.empty() + ? GetNativeFileAttributes(TargetFilePath) + : SetNativeFileAttributes(TargetFilePath, + RemoteContent.Platform, + RemoteContent.Attributes[RemotePathIndex]); + OutLocalFolderState.ModificationTicks[RemotePathIndex] = GetModificationTickFromPath(TargetFilePath); + + TargetOffset++; + TargetsComplete++; + } + } + } + }); + + TargetOffset += TargetCount; + } + + { + ZEN_TRACE_CPU("FinalizeTree_Wait"); + + Work.Wait(GetUpdateDelayMS(ProgressMode), [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + const uint64_t WorkTotal = Targets.size() + RemoveLocalPathIndexes.size(); + const uint64_t WorkComplete = TargetsComplete.load() + DeletedCount.load(); + std::string Details = fmt::format("{}/{} files", WorkComplete, WorkTotal); + RebuildProgressBar.UpdateState({.Task = "Rebuilding state ", + .Details = Details, + .TotalCount = gsl::narrow<uint64_t>(WorkTotal), + .RemainingCount = gsl::narrow<uint64_t>(WorkTotal - WorkComplete), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + } + + RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs = Timer.GetElapsedTimeUs(); + RebuildProgressBar.Finish(); + } + } + + std::string GetCbObjectAsNiceString(CbObjectView Object, std::string_view Prefix, std::string_view Suffix) + { + ExtendableStringBuilder<512> SB; + std::vector<std::pair<std::string, std::string>> NameStringValuePairs; + for (CbFieldView Field : Object) + { + std::string_view Name = Field.GetName(); + switch (CbValue Accessor = Field.GetValue(); Accessor.GetType()) + { + case CbFieldType::String: + NameStringValuePairs.push_back({std::string(Name), std::string(Accessor.AsString())}); + break; + case CbFieldType::IntegerPositive: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerPositive())}); + break; + case CbFieldType::IntegerNegative: + NameStringValuePairs.push_back({std::string(Name), fmt::format("{}", Accessor.AsIntegerNegative())}); + break; + case CbFieldType::Float32: + { + const float Value = Accessor.AsFloat32(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.9g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::Float64: + { + const double Value = Accessor.AsFloat64(); + if (std::isfinite(Value)) + { + NameStringValuePairs.push_back({std::string(Name), fmt::format("{:.17g}", Value)}); + } + else + { + NameStringValuePairs.push_back({std::string(Name), "null"}); + } + } + break; + case CbFieldType::BoolFalse: + NameStringValuePairs.push_back({std::string(Name), "false"}); + break; + case CbFieldType::BoolTrue: + NameStringValuePairs.push_back({std::string(Name), "true"}); + break; + case CbFieldType::Hash: + { + NameStringValuePairs.push_back({std::string(Name), Accessor.AsHash().ToHexString()}); + } + break; + case CbFieldType::Uuid: + { + StringBuilder<Oid::StringLength + 1> Builder; + Accessor.AsUuid().ToString(Builder); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::DateTime: + { + ExtendableStringBuilder<64> Builder; + Builder << DateTime(Accessor.AsDateTimeTicks()).ToIso8601(); + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + } + break; + case CbFieldType::TimeSpan: + { + ExtendableStringBuilder<64> Builder; + const TimeSpan Span(Accessor.AsTimeSpanTicks()); + if (Span.GetDays() == 0) + { + Builder << Span.ToString("%h:%m:%s.%n"); + } + else + { + Builder << Span.ToString("%d.%h:%m:%s.%n"); + } + NameStringValuePairs.push_back({std::string(Name), Builder.ToString()}); + break; + } + case CbFieldType::ObjectId: + NameStringValuePairs.push_back({std::string(Name), Accessor.AsObjectId().ToString()}); + break; + } + } + std::string::size_type LongestKey = 0; + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + LongestKey = Max(KeyValue.first.length(), LongestKey); + } + for (const std::pair<std::string, std::string>& KeyValue : NameStringValuePairs) + { + SB.Append(fmt::format("{}{:<{}}: {}{}", Prefix, KeyValue.first, LongestKey, KeyValue.second, Suffix)); + } + return SB.ToString(); + } + + std::vector<std::pair<Oid, std::string>> ResolveBuildPartNames(BuildStorage& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + std::uint64_t& OutPreferredMultipartChunkSize) + { + std::vector<std::pair<Oid, std::string>> Result; + { + Stopwatch GetBuildTimer; + CbObject BuildObject = Storage.GetBuild(BuildId); + ZEN_CONSOLE("GetBuild took {}. Name: '{}', Payload size: {}", + NiceTimeSpanMs(GetBuildTimer.GetElapsedTimeMs()), + BuildObject["name"sv].AsString(), + NiceBytes(BuildObject.GetSize())); + + ZEN_CONSOLE("{}", GetCbObjectAsNiceString(BuildObject, " "sv, "\n"sv)); + + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + if (!PartsObject) + { + throw std::runtime_error("Build object does not have a 'parts' object"); + } + + OutPreferredMultipartChunkSize = BuildObject["chunkSize"sv].AsUInt64(OutPreferredMultipartChunkSize); + + std::vector<std::pair<Oid, std::string>> AvailableParts; + + for (CbFieldView PartView : PartsObject) + { + const std::string BuildPartName = std::string(PartView.GetName()); + const Oid BuildPartId = PartView.AsObjectId(); + if (BuildPartId == Oid::Zero) + { + ExtendableStringBuilder<128> SB; + for (CbFieldView ScanPartView : PartsObject) + { + SB.Append(fmt::format("\n {}: {}", ScanPartView.GetName(), ScanPartView.AsObjectId())); + } + throw std::runtime_error( + fmt::format("Build object parts does not have a '{}' object id{}", BuildPartName, SB.ToView())); + } + AvailableParts.push_back({BuildPartId, BuildPartName}); + } + + if (BuildPartIds.empty() && BuildPartNames.empty()) + { + Result = AvailableParts; + } + else + { + for (const std::string& BuildPartName : BuildPartNames) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartName](const auto& Part) { return Part.second == BuildPartName; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part named '{}'", BuildId, BuildPartName)); + } + } + for (const Oid& BuildPartId : BuildPartIds) + { + if (auto It = std::find_if(AvailableParts.begin(), + AvailableParts.end(), + [&BuildPartId](const auto& Part) { return Part.first == BuildPartId; }); + It != AvailableParts.end()) + { + Result.push_back(*It); + } + else + { + throw std::runtime_error(fmt::format("Build {} object does not have a part with id '{}'", BuildId, BuildPartId)); + } + } + } + + if (Result.empty()) + { + throw std::runtime_error(fmt::format("Build object does not have any parts", BuildId)); + } + } + return Result; + } + + ChunkedFolderContent GetRemoteContent(StorageInstance& Storage, + const Oid& BuildId, + const std::vector<std::pair<Oid, std::string>>& BuildParts, + std::unique_ptr<ChunkingController>& OutChunkController, + std::vector<ChunkedFolderContent>& OutPartContents, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) + { + ZEN_TRACE_CPU("GetRemoteContent"); + + Stopwatch GetBuildPartTimer; + const Oid BuildPartId = BuildParts[0].first; + const std::string_view BuildPartName = BuildParts[0].second; + CbObject BuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, BuildPartId); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + BuildPartId, + BuildPartName, + NiceTimeSpanMs(GetBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(BuildPartManifest.GetSize())); + ZEN_CONSOLE("{}", GetCbObjectAsNiceString(BuildPartManifest, " "sv, "\n"sv)); + + { + CbObjectView Chunker = BuildPartManifest["chunker"sv].AsObjectView(); + std::string_view ChunkerName = Chunker["name"sv].AsString(); + CbObjectView Parameters = Chunker["parameters"sv].AsObjectView(); + OutChunkController = CreateChunkingController(ChunkerName, Parameters); + } + + auto ParseBuildPartManifest = [](StorageInstance& Storage, + const Oid& BuildId, + const Oid& BuildPartId, + CbObject BuildPartManifest, + ChunkedFolderContent& OutRemoteContent, + std::vector<ChunkBlockDescription>& OutBlockDescriptions, + std::vector<IoHash>& OutLooseChunkHashes) { + std::vector<uint32_t> AbsoluteChunkOrders; + std::vector<uint64_t> LooseChunkRawSizes; + std::vector<IoHash> BlockRawHashes; + + ReadBuildContentFromCompactBinary(BuildPartManifest, + OutRemoteContent.Platform, + OutRemoteContent.Paths, + OutRemoteContent.RawHashes, + OutRemoteContent.RawSizes, + OutRemoteContent.Attributes, + OutRemoteContent.ChunkedContent.SequenceRawHashes, + OutRemoteContent.ChunkedContent.ChunkCounts, + AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + BlockRawHashes); + + // TODO: GetBlockDescriptions for all BlockRawHashes in one go - check for local block descriptions when we cache them + + { + Stopwatch GetBlockMetadataTimer; + + std::vector<ChunkBlockDescription> UnorderedList; + tsl::robin_map<IoHash, size_t, IoHash::Hasher> BlockDescriptionLookup; + if (Storage.BuildCacheStorage) + { + std::vector<CbObject> CacheBlockMetadatas = Storage.BuildCacheStorage->GetBlobMetadatas(BuildId, BlockRawHashes); + UnorderedList.reserve(CacheBlockMetadatas.size()); + for (size_t CacheBlockMetadataIndex = 0; CacheBlockMetadataIndex < CacheBlockMetadatas.size(); + CacheBlockMetadataIndex++) + { + const CbObject& CacheBlockMetadata = CacheBlockMetadatas[CacheBlockMetadataIndex]; + ChunkBlockDescription Description = ParseChunkBlockDescription(CacheBlockMetadata); + if (Description.BlockHash == IoHash::Zero) + { + ZEN_WARN("Unexpected/invalid block metadata received from remote cache, skipping block"); + } + else + { + UnorderedList.emplace_back(std::move(Description)); + } + } + for (size_t DescriptionIndex = 0; DescriptionIndex < UnorderedList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = UnorderedList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, DescriptionIndex); + } + } + + if (UnorderedList.size() < BlockRawHashes.size()) + { + std::vector<IoHash> RemainingBlockHashes; + RemainingBlockHashes.reserve(BlockRawHashes.size() - UnorderedList.size()); + for (const IoHash& BlockRawHash : BlockRawHashes) + { + if (!BlockDescriptionLookup.contains(BlockRawHash)) + { + RemainingBlockHashes.push_back(BlockRawHash); + } + } + CbObject BlockMetadatas = Storage.BuildStorage->GetBlockMetadatas(BuildId, RemainingBlockHashes); + std::vector<ChunkBlockDescription> RemainingList; + { + CbArrayView BlocksArray = BlockMetadatas["blocks"sv].AsArrayView(); + std::vector<IoHash> FoundBlockHashes; + std::vector<CbObject> FoundBlockMetadatas; + for (CbFieldView Block : BlocksArray) + { + ChunkBlockDescription Description = ParseChunkBlockDescription(Block.AsObjectView()); + + if (Description.BlockHash == IoHash::Zero) + { + ZEN_WARN("Unexpected/invalid block metadata received from remote store, skipping block"); + } + else + { + if (Storage.BuildCacheStorage) + { + UniqueBuffer MetaBuffer = UniqueBuffer::Alloc(Block.GetSize()); + Block.CopyTo(MetaBuffer.GetMutableView()); + CbObject BlockMetadata(MetaBuffer.MoveToShared()); + + FoundBlockHashes.push_back(Description.BlockHash); + FoundBlockMetadatas.push_back(BlockMetadata); + } + RemainingList.emplace_back(std::move(Description)); + } + } + if (Storage.BuildCacheStorage && !FoundBlockHashes.empty()) + { + Storage.BuildCacheStorage->PutBlobMetadatas(BuildId, FoundBlockHashes, FoundBlockMetadatas); + } + } + + for (size_t DescriptionIndex = 0; DescriptionIndex < RemainingList.size(); DescriptionIndex++) + { + const ChunkBlockDescription& Description = RemainingList[DescriptionIndex]; + BlockDescriptionLookup.insert_or_assign(Description.BlockHash, UnorderedList.size() + DescriptionIndex); + } + UnorderedList.insert(UnorderedList.end(), RemainingList.begin(), RemainingList.end()); + } + + OutBlockDescriptions.reserve(BlockDescriptionLookup.size()); + for (const IoHash& BlockHash : BlockRawHashes) + { + if (auto It = BlockDescriptionLookup.find(BlockHash); It != BlockDescriptionLookup.end()) + { + OutBlockDescriptions.push_back(std::move(UnorderedList[It->second])); + } + } + + ZEN_CONSOLE("GetBlockMetadata for {} took {}. Found {} blocks", + BuildPartId, + NiceTimeSpanMs(GetBlockMetadataTimer.GetElapsedTimeMs()), + OutBlockDescriptions.size()); + } + + if (OutBlockDescriptions.size() != BlockRawHashes.size()) + { + bool AttemptFallback = false; + std::string ErrorDescription = + fmt::format("All required blocks could not be found, {} blocks does not have metadata in this context.", + BlockRawHashes.size() - OutBlockDescriptions.size()); + if (AttemptFallback) + { + ZEN_CONSOLE("{} Attemping fallback options.", ErrorDescription); + std::vector<ChunkBlockDescription> AugmentedBlockDescriptions; + AugmentedBlockDescriptions.reserve(BlockRawHashes.size()); + std::vector<ChunkBlockDescription> FoundBlocks = + ParseChunkBlockDescriptionList(Storage.BuildStorage->FindBlocks(BuildId, (uint64_t)-1)); + + for (const IoHash& BlockHash : BlockRawHashes) + { + if (auto It = std::find_if( + OutBlockDescriptions.begin(), + OutBlockDescriptions.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + It != OutBlockDescriptions.end()) + { + AugmentedBlockDescriptions.emplace_back(std::move(*It)); + } + else if (auto ListBlocksIt = std::find_if( + FoundBlocks.begin(), + FoundBlocks.end(), + [BlockHash](const ChunkBlockDescription& Description) { return Description.BlockHash == BlockHash; }); + ListBlocksIt != FoundBlocks.end()) + { + ZEN_CONSOLE("Found block {} via context find successfully", BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(*ListBlocksIt)); + } + else + { + IoBuffer BlockBuffer = Storage.BuildStorage->GetBuildBlob(BuildId, BlockHash); + if (!BlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} could not be found", BlockHash)); + } + IoHash BlockRawHash; + uint64_t BlockRawSize; + CompressedBuffer CompressedBlockBuffer = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(BlockBuffer)), BlockRawHash, BlockRawSize); + if (!CompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} is not a compressed buffer", BlockHash)); + } + + if (BlockRawHash != BlockHash) + { + throw std::runtime_error( + fmt::format("Block {} header has a mismatching raw hash {}", BlockHash, BlockRawHash)); + } + + CompositeBuffer DecompressedBlockBuffer = CompressedBlockBuffer.DecompressToComposite(); + if (!DecompressedBlockBuffer) + { + throw std::runtime_error(fmt::format("Block {} failed to decompress", BlockHash)); + } + + ChunkBlockDescription MissingChunkDescription = + GetChunkBlockDescription(DecompressedBlockBuffer.Flatten(), BlockHash); + AugmentedBlockDescriptions.emplace_back(std::move(MissingChunkDescription)); + } + } + OutBlockDescriptions.swap(AugmentedBlockDescriptions); + } + else + { + throw std::runtime_error(ErrorDescription); + } + } + + CalculateLocalChunkOrders(AbsoluteChunkOrders, + OutLooseChunkHashes, + LooseChunkRawSizes, + OutBlockDescriptions, + OutRemoteContent.ChunkedContent.ChunkHashes, + OutRemoteContent.ChunkedContent.ChunkRawSizes, + OutRemoteContent.ChunkedContent.ChunkOrders); + }; + + OutPartContents.resize(1); + ParseBuildPartManifest(Storage, + BuildId, + BuildPartId, + BuildPartManifest, + OutPartContents[0], + OutBlockDescriptions, + OutLooseChunkHashes); + ChunkedFolderContent RemoteContent; + if (BuildParts.size() > 1) + { + std::vector<ChunkBlockDescription> OverlayBlockDescriptions; + std::vector<IoHash> OverlayLooseChunkHashes; + for (size_t PartIndex = 1; PartIndex < BuildParts.size(); PartIndex++) + { + const Oid& OverlayBuildPartId = BuildParts[PartIndex].first; + const std::string& OverlayBuildPartName = BuildParts[PartIndex].second; + Stopwatch GetOverlayBuildPartTimer; + CbObject OverlayBuildPartManifest = Storage.BuildStorage->GetBuildPart(BuildId, OverlayBuildPartId); + ZEN_CONSOLE("GetBuildPart {} ('{}') took {}. Payload size: {}", + OverlayBuildPartId, + OverlayBuildPartName, + NiceTimeSpanMs(GetOverlayBuildPartTimer.GetElapsedTimeMs()), + NiceBytes(OverlayBuildPartManifest.GetSize())); + + ChunkedFolderContent OverlayPartContent; + std::vector<ChunkBlockDescription> OverlayPartBlockDescriptions; + std::vector<IoHash> OverlayPartLooseChunkHashes; + + ParseBuildPartManifest(Storage, + BuildId, + OverlayBuildPartId, + OverlayBuildPartManifest, + OverlayPartContent, + OverlayPartBlockDescriptions, + OverlayPartLooseChunkHashes); + OutPartContents.push_back(OverlayPartContent); + OverlayBlockDescriptions.insert(OverlayBlockDescriptions.end(), + OverlayPartBlockDescriptions.begin(), + OverlayPartBlockDescriptions.end()); + OverlayLooseChunkHashes.insert(OverlayLooseChunkHashes.end(), + OverlayPartLooseChunkHashes.begin(), + OverlayPartLooseChunkHashes.end()); + } + + RemoteContent = + MergeChunkedFolderContents(OutPartContents[0], std::span<const ChunkedFolderContent>(OutPartContents).subspan(1)); + { + tsl::robin_set<IoHash> AllBlockHashes; + for (const ChunkBlockDescription& Description : OutBlockDescriptions) + { + AllBlockHashes.insert(Description.BlockHash); + } + for (const ChunkBlockDescription& Description : OverlayBlockDescriptions) + { + if (!AllBlockHashes.contains(Description.BlockHash)) + { + AllBlockHashes.insert(Description.BlockHash); + OutBlockDescriptions.push_back(Description); + } + } + } + { + tsl::robin_set<IoHash> AllLooseChunkHashes(OutLooseChunkHashes.begin(), OutLooseChunkHashes.end()); + for (const IoHash& OverlayLooseChunkHash : OverlayLooseChunkHashes) + { + if (!AllLooseChunkHashes.contains(OverlayLooseChunkHash)) + { + AllLooseChunkHashes.insert(OverlayLooseChunkHash); + OutLooseChunkHashes.push_back(OverlayLooseChunkHash); + } + } + } + } + else + { + RemoteContent = OutPartContents[0]; + } + return RemoteContent; + } + + ChunkedFolderContent GetLocalContent(GetFolderContentStatistics& LocalFolderScanStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + const std::filesystem::path& StateFilePath, + ChunkingController& ChunkController, + std::span<const std::filesystem::path> ReferencePaths, + FolderContent& OutLocalFolderContent) + { + FolderContent LocalFolderState; + ChunkedFolderContent LocalContent; + + Stopwatch ReadStateTimer; + const bool HasLocalState = IsFile(StateFilePath) && ReadStateFile(StateFilePath, LocalFolderState, LocalContent); + if (HasLocalState) + { + ZEN_CONSOLE("Read local state file {} in {}", StateFilePath, NiceTimeSpanMs(ReadStateTimer.GetElapsedTimeMs())); + } + { + const uint32_t LocalPathCount = gsl::narrow<uint32_t>(ReferencePaths.size()); + const uint32_t RemotePathCount = gsl::narrow<uint32_t>(LocalFolderState.Paths.size()); + + std::vector<std::filesystem::path> PathsToCheck; + PathsToCheck.reserve(LocalPathCount + RemotePathCount); + + tsl::robin_set<std::string> FileSet; + FileSet.reserve(LocalPathCount + RemotePathCount); + + for (const std::filesystem::path& LocalPath : LocalFolderState.Paths) + { + FileSet.insert(LocalPath.generic_string()); + PathsToCheck.push_back(LocalPath); + } + + for (const std::filesystem::path& RemotePath : ReferencePaths) + { + if (FileSet.insert(RemotePath.generic_string()).second) + { + PathsToCheck.push_back(RemotePath); + } + } + + ProgressBar ProgressBar(ProgressMode, "Check Files"); + OutLocalFolderContent = GetValidFolderContent( + LocalFolderScanStats, + Path, + PathsToCheck, + [&ProgressBar, &LocalFolderScanStats](uint64_t PathCount, uint64_t CompletedPathCount) { + std::string Details = + fmt::format("{}/{} checked, {} found", CompletedPathCount, PathCount, LocalFolderScanStats.FoundFileCount.load()); + ProgressBar.UpdateState({.Task = "Checking files ", + .Details = Details, + .TotalCount = PathCount, + .RemainingCount = PathCount - CompletedPathCount, + .Status = ProgressBar::State::CalculateStatus(AbortFlag, PauseFlag)}, + false); + }); + ProgressBar.Finish(); + if (AbortFlag) + { + return {}; + } + } + + bool ScanContent = true; + std::vector<uint32_t> PathIndexesOufOfDate; + if (HasLocalState) + { + if (!LocalFolderState.AreKnownFilesEqual(OutLocalFolderContent)) + { + const size_t LocaStatePathCount = LocalFolderState.Paths.size(); + std::vector<std::filesystem::path> DeletedPaths; + FolderContent UpdatedContent = GetUpdatedContent(LocalFolderState, OutLocalFolderContent, DeletedPaths); + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + + ZEN_CONSOLE("Updating state, {} local files deleted and {} local files updated out of {}", + DeletedPaths.size(), + UpdatedContent.Paths.size(), + LocaStatePathCount); + if (UpdatedContent.Paths.size() > 0) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : UpdatedContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(ProgressMode, "Scan Files"); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + ChunkedFolderContent UpdatedLocalContent = ChunkFolderContent( + ChunkingStats, + GetIOWorkerPool(), + Path, + UpdatedContent, + ChunkController, + GetUpdateDelayMS(ProgressMode), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + UpdatedContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = ByteCountToScan - ChunkingStats.BytesHashed.load(), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + AbortFlag, + PauseFlag); + + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + if (AbortFlag) + { + return {}; + } + LocalContent = MergeChunkedFolderContents(LocalContent, {{UpdatedLocalContent}}); + } + } + else + { + // Remove files from LocalContent no longer in LocalFolderState + tsl::robin_set<std::string> LocalFolderPaths; + LocalFolderPaths.reserve(LocalFolderState.Paths.size()); + for (const std::filesystem::path& LocalFolderPath : LocalFolderState.Paths) + { + LocalFolderPaths.insert(LocalFolderPath.generic_string()); + } + std::vector<std::filesystem::path> DeletedPaths; + for (const std::filesystem::path& LocalContentPath : LocalContent.Paths) + { + if (!LocalFolderPaths.contains(LocalContentPath.generic_string())) + { + DeletedPaths.push_back(LocalContentPath); + } + } + if (!DeletedPaths.empty()) + { + LocalContent = DeletePathsFromChunkedContent(LocalContent, DeletedPaths); + } + } + ScanContent = false; + } + + if (ScanContent) + { + uint64_t ByteCountToScan = 0; + for (const uint64_t RawSize : OutLocalFolderContent.RawSizes) + { + ByteCountToScan += RawSize; + } + ProgressBar ProgressBar(ProgressMode, "Scan Files"); + FilteredRate FilteredBytesHashed; + FilteredBytesHashed.Start(); + LocalContent = ChunkFolderContent( + ChunkingStats, + GetIOWorkerPool(), + Path, + OutLocalFolderContent, + ChunkController, + GetUpdateDelayMS(ProgressMode), + [&](bool IsAborted, bool IsPaused, std::ptrdiff_t) { + FilteredBytesHashed.Update(ChunkingStats.BytesHashed.load()); + std::string Details = fmt::format("{}/{} ({}/{}, {}B/s) scanned, {} ({}) chunks found", + ChunkingStats.FilesProcessed.load(), + OutLocalFolderContent.Paths.size(), + NiceBytes(ChunkingStats.BytesHashed.load()), + NiceBytes(ByteCountToScan), + NiceNum(FilteredBytesHashed.GetCurrent()), + ChunkingStats.UniqueChunksFound.load(), + NiceBytes(ChunkingStats.UniqueBytesFound.load())); + ProgressBar.UpdateState({.Task = "Scanning files ", + .Details = Details, + .TotalCount = ByteCountToScan, + .RemainingCount = (ByteCountToScan - ChunkingStats.BytesHashed.load()), + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }, + AbortFlag, + PauseFlag); + + FilteredBytesHashed.Stop(); + ProgressBar.Finish(); + if (AbortFlag) + { + return {}; + } + } + return LocalContent; + } + + ChunkedFolderContent ScanAndChunkFolder( + GetFolderContentStatistics& GetFolderContentStats, + ChunkingStatistics& ChunkingStats, + const std::filesystem::path& Path, + std::function<bool(const std::string_view& RelativePath)>&& IsAcceptedFolder, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& IsAcceptedFile, + ChunkingController& ChunkController) + { + Stopwatch Timer; + + ZEN_TRACE_CPU("ScanAndChunkFolder"); + + FolderContent Content = GetFolderContent( + GetFolderContentStats, + Path, + std::move(IsAcceptedFolder), + std::move(IsAcceptedFile), + GetIOWorkerPool(), + GetUpdateDelayMS(ProgressMode), + [](bool, std::ptrdiff_t) {}, + AbortFlag); + if (AbortFlag) + { + return {}; + } + + FolderContent _; + ChunkedFolderContent Result = GetLocalContent(GetFolderContentStats, + ChunkingStats, + Path, + ZenStateFilePath(Path / ZenFolderName), + ChunkController, + Content.Paths, + _); + + const uint64_t TotalRawSize = std::accumulate(Result.RawSizes.begin(), Result.RawSizes.end(), std::uint64_t(0)); + const uint64_t ChunkedRawSize = + std::accumulate(Result.ChunkedContent.ChunkRawSizes.begin(), Result.ChunkedContent.ChunkRawSizes.end(), std::uint64_t(0)); + + ZEN_CONSOLE("Found {} ({}) files divided into {} ({}) unique chunks in '{}' in {}. Average hash rate {}B/sec", + Result.Paths.size(), + NiceBytes(TotalRawSize), + Result.ChunkedContent.ChunkHashes.size(), + NiceBytes(ChunkedRawSize), + Path, + NiceTimeSpanMs(Timer.GetElapsedTimeMs()), + NiceNum(GetBytesPerSecond(ChunkingStats.ElapsedWallTimeUS, ChunkingStats.BytesHashed))); + return Result; + }; + + void DownloadFolder(StorageInstance& Storage, + const Oid& BuildId, + const std::vector<Oid>& BuildPartIds, + std::span<const std::string> BuildPartNames, + const std::filesystem::path& Path, + const std::filesystem::path& ZenFolderPath, + const std::filesystem::path SystemRootDir, + bool AllowMultiparts, + bool AllowPartialBlockRequests, + bool WipeTargetFolder, + bool PostDownloadVerify, + bool PrimeCacheOnly, + bool EnableScavenging) + { + ZEN_TRACE_CPU("DownloadFolder"); + + ProgressBar::SetLogOperationName(ProgressMode, "Download Folder"); + + enum TaskSteps : uint32_t + { + CheckState, + CompareState, + Download, + Verify, + Cleanup, + StepCount + }; + + auto EndProgress = + MakeGuard([&]() { ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::StepCount, TaskSteps::StepCount); }); + + ZEN_ASSERT((!PrimeCacheOnly) || (PrimeCacheOnly && (!AllowPartialBlockRequests))); + + Stopwatch DownloadTimer; + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CheckState, TaskSteps::StepCount); + + const std::filesystem::path ZenTempFolder = ZenTempFolderPath(ZenFolderPath); + CreateDirectories(ZenTempFolder); + + CreateDirectories(ZenTempBlockFolderPath(ZenFolderPath)); + CreateDirectories(ZenTempCacheFolderPath(ZenFolderPath)); + CreateDirectories(ZenTempDownloadFolderPath(ZenFolderPath)); + + std::uint64_t PreferredMultipartChunkSize = 32u * 1024u * 1024u; + + std::vector<std::pair<Oid, std::string>> AllBuildParts = + ResolveBuildPartNames(*Storage.BuildStorage, BuildId, BuildPartIds, BuildPartNames, PreferredMultipartChunkSize); + + std::vector<ChunkedFolderContent> PartContents; + + std::unique_ptr<ChunkingController> ChunkController; + + std::vector<ChunkBlockDescription> BlockDescriptions; + std::vector<IoHash> LooseChunkHashes; + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CompareState, TaskSteps::StepCount); + + ChunkedFolderContent RemoteContent = + GetRemoteContent(Storage, BuildId, AllBuildParts, ChunkController, PartContents, BlockDescriptions, LooseChunkHashes); + + const std::uint64_t LargeAttachmentSize = AllowMultiparts ? PreferredMultipartChunkSize * 4u : (std::uint64_t)-1; + GetFolderContentStatistics LocalFolderScanStats; + ChunkingStatistics ChunkingStats; + ChunkedFolderContent LocalContent; + FolderContent LocalFolderContent; + if (!PrimeCacheOnly) + { + if (IsDir(Path)) + { + if (!WipeTargetFolder) + { + if (!ChunkController) + { + ZEN_CONSOLE("Warning: Unspecified chunking algorith, using default"); + ChunkController = CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{}); + } + + LocalContent = GetLocalContent(LocalFolderScanStats, + ChunkingStats, + Path, + ZenStateFilePath(ZenFolderPath), + *ChunkController, + RemoteContent.Paths, + LocalFolderContent); + } + } + else + { + CreateDirectories(Path); + } + } + if (AbortFlag) + { + return; + } + + auto CompareContent = [](const ChunkedFolderContent& Lhs, const ChunkedFolderContent& Rhs) { + tsl::robin_map<std::string, size_t> RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t LhsPathCount = Lhs.Paths.size(); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + if (auto It = RhsPathToIndex.find(Lhs.Paths[LhsPathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((Lhs.RawHashes[LhsPathIndex] != Rhs.RawHashes[RhsPathIndex]) || + (!FolderContent::AreFileAttributesEqual(Lhs.Attributes[LhsPathIndex], Rhs.Attributes[RhsPathIndex]))) + { + return false; + } + } + else + { + return false; + } + } + tsl::robin_set<std::string> LhsPathExists; + LhsPathExists.reserve(LhsPathCount); + for (size_t LhsPathIndex = 0; LhsPathIndex < LhsPathCount; LhsPathIndex++) + { + LhsPathExists.insert({Lhs.Paths[LhsPathIndex].generic_string()}); + } + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + if (!LhsPathExists.contains(Rhs.Paths[RhsPathIndex].generic_string())) + { + return false; + } + } + + return true; + }; + + if (CompareContent(RemoteContent, LocalContent)) + { + ZEN_CONSOLE("Local state is identical to build to download. All done. Completed in {}.", + NiceTimeSpanMs(DownloadTimer.GetElapsedTimeMs())); + + Stopwatch WriteStateTimer; + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderContent, Path); + CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path()); + TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView()); + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); + + AddDownloadedPath(SystemRootDir, BuildId, AllBuildParts, ZenStateFilePath(ZenFolderPath), Path); + } + else + { + ExtendableStringBuilder<128> BuildPartString; + for (const std::pair<Oid, std::string>& BuildPart : AllBuildParts) + { + BuildPartString.Append(fmt::format(" {} ({})", BuildPart.second, BuildPart.first)); + } + + uint64_t RawSize = std::accumulate(RemoteContent.RawSizes.begin(), RemoteContent.RawSizes.end(), std::uint64_t(0)); + + ZEN_CONSOLE("Downloading build {}, parts:{} to '{}' ({})", BuildId, BuildPartString.ToView(), Path, NiceBytes(RawSize)); + FolderContent LocalFolderState; + + DiskStatistics DiskStats; + CacheMappingStatistics CacheMappingStats; + DownloadStatistics DownloadStats; + WriteChunkStatistics WriteChunkStats; + RebuildFolderStateStatistics RebuildFolderStateStats; + VerifyFolderStatistics VerifyFolderStats; + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Download, TaskSteps::StepCount); + + UpdateFolder(SystemRootDir, + Storage, + BuildId, + Path, + ZenFolderPath, + LargeAttachmentSize, + PreferredMultipartChunkSize, + LocalContent, + RemoteContent, + BlockDescriptions, + LooseChunkHashes, + AllowPartialBlockRequests, + WipeTargetFolder, + PrimeCacheOnly, + EnableScavenging, + LocalFolderState, + DiskStats, + CacheMappingStats, + DownloadStats, + WriteChunkStats, + RebuildFolderStateStats); + + if (!AbortFlag) + { + if (!PrimeCacheOnly) + { + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Verify, TaskSteps::StepCount); + + VerifyFolder(RemoteContent, Path, PostDownloadVerify, VerifyFolderStats); + + Stopwatch WriteStateTimer; + CbObject StateObject = CreateStateObject(BuildId, AllBuildParts, PartContents, LocalFolderState, Path); + + CreateDirectories(ZenStateFilePath(ZenFolderPath).parent_path()); + TemporaryFile::SafeWriteFile(ZenStateFilePath(ZenFolderPath), StateObject.GetView()); + ZEN_CONSOLE("Wrote local state in {}", NiceTimeSpanMs(WriteStateTimer.GetElapsedTimeMs())); + + AddDownloadedPath(SystemRootDir, BuildId, AllBuildParts, ZenStateFilePath(ZenFolderPath), Path); + +#if 0 + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(StateObject, SB); + WriteFile(ZenStateFileJsonPath(ZenFolderPath), IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); +#endif // 0 + } + const uint64_t DownloadCount = DownloadStats.DownloadedChunkCount.load() + DownloadStats.DownloadedBlockCount.load() + + DownloadStats.DownloadedPartialBlockCount.load(); + const uint64_t DownloadByteCount = DownloadStats.DownloadedChunkByteCount.load() + + DownloadStats.DownloadedBlockByteCount.load() + + DownloadStats.DownloadedPartialBlockByteCount.load(); + const uint64_t DownloadTimeMs = DownloadTimer.GetElapsedTimeMs(); + + ZEN_CONSOLE( + "Downloaded build {}, parts:{} in {}\n" + " Download: {} ({}) {}bits/s\n" + " Write: {} ({}) {}B/s\n" + " Clean: {}\n" + " Finalize: {}\n" + " Verify: {}", + BuildId, + BuildPartString.ToView(), + NiceTimeSpanMs(DownloadTimeMs), + + DownloadCount, + NiceBytes(DownloadByteCount), + NiceNum(GetBytesPerSecond(WriteChunkStats.DownloadTimeUs, DownloadByteCount * 8)), + + DiskStats.WriteCount.load(), + NiceBytes(DiskStats.WriteByteCount.load()), + NiceNum(GetBytesPerSecond(WriteChunkStats.WriteTimeUs, DiskStats.WriteByteCount.load())), + + NiceTimeSpanMs(RebuildFolderStateStats.CleanFolderElapsedWallTimeUs / 1000), + + NiceTimeSpanMs(RebuildFolderStateStats.FinalizeTreeElapsedWallTimeUs / 1000), + + NiceTimeSpanMs(VerifyFolderStats.VerifyElapsedWallTimeUs / 1000)); + } + } + if (PrimeCacheOnly) + { + if (Storage.BuildCacheStorage) + { + Storage.BuildCacheStorage->Flush(5000, [](intptr_t Remaining) { + if (Remaining == 0) + { + ZEN_CONSOLE("Build cache upload complete"); + } + else + { + ZEN_CONSOLE("Waiting for build cache to complete uploading. {} blobs remaining", Remaining); + } + return !AbortFlag; + }); + } + } + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Cleanup, TaskSteps::StepCount); + + if (CleanDirectory(ZenTempFolder, {})) + { + RemoveDirWithRetry(ZenTempFolder); + } + } + + void DiffFolders(const std::filesystem::path& BasePath, const std::filesystem::path& ComparePath, bool OnlyChunked) + { + ZEN_TRACE_CPU("DiffFolders"); + + ProgressBar::SetLogOperationName(ProgressMode, "Diff Folders"); + + enum TaskSteps : uint32_t + { + CheckBase, + CheckCompare, + Diff, + Cleanup, + StepCount + }; + + auto EndProgress = + MakeGuard([&]() { ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::StepCount, TaskSteps::StepCount); }); + + ChunkedFolderContent BaseFolderContent; + ChunkedFolderContent CompareFolderContent; + + { + std::unique_ptr<ChunkingController> ChunkController = + CreateChunkingControllerWithFixedChunking(ChunkingControllerWithFixedChunkingSettings{}); + std::vector<std::string_view> ExcludeExtensions = DefaultExcludeExtensions; + if (OnlyChunked) + { + ExcludeExtensions.insert(ExcludeExtensions.end(), + DefaultChunkingExcludeExtensions.begin(), + DefaultChunkingExcludeExtensions.end()); + } + + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders](const std::string_view& RelativePath) -> bool { + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + auto IsAcceptedFile = [ExcludeExtensions](const std::string_view& RelativePath, uint64_t, uint32_t) -> bool { + for (const std::string_view& ExcludeExtension : ExcludeExtensions) + { + if (RelativePath.ends_with(ExcludeExtension)) + { + return false; + } + } + return true; + }; + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CheckBase, TaskSteps::StepCount); + + GetFolderContentStatistics BaseGetFolderContentStats; + ChunkingStatistics BaseChunkingStats; + BaseFolderContent = ScanAndChunkFolder(BaseGetFolderContentStats, + BaseChunkingStats, + BasePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController); + if (AbortFlag) + { + return; + } + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::CheckCompare, TaskSteps::StepCount); + + GetFolderContentStatistics CompareGetFolderContentStats; + ChunkingStatistics CompareChunkingStats; + CompareFolderContent = ScanAndChunkFolder(CompareGetFolderContentStats, + CompareChunkingStats, + ComparePath, + IsAcceptedFolder, + IsAcceptedFile, + *ChunkController); + + if (AbortFlag) + { + return; + } + } + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Diff, TaskSteps::StepCount); + + std::vector<IoHash> AddedHashes; + std::vector<IoHash> RemovedHashes; + uint64_t RemovedSize = 0; + uint64_t AddedSize = 0; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> BaseRawHashLookup; + for (size_t PathIndex = 0; PathIndex < BaseFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + BaseRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> CompareRawHashLookup; + for (size_t PathIndex = 0; PathIndex < CompareFolderContent.RawHashes.size(); PathIndex++) + { + const IoHash& RawHash = CompareFolderContent.RawHashes[PathIndex]; + if (!BaseRawHashLookup.contains(RawHash)) + { + AddedHashes.push_back(RawHash); + AddedSize += CompareFolderContent.RawSizes[PathIndex]; + } + CompareRawHashLookup.insert_or_assign(RawHash, PathIndex); + } + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + const IoHash& RawHash = BaseFolderContent.RawHashes[PathIndex]; + if (!CompareRawHashLookup.contains(RawHash)) + { + RemovedHashes.push_back(RawHash); + RemovedSize += BaseFolderContent.RawSizes[PathIndex]; + } + } + + uint64_t BaseTotalRawSize = 0; + for (uint32_t PathIndex = 0; PathIndex < BaseFolderContent.Paths.size(); PathIndex++) + { + BaseTotalRawSize += BaseFolderContent.RawSizes[PathIndex]; + } + + double KeptPercent = BaseTotalRawSize > 0 ? (100.0 * (BaseTotalRawSize - RemovedSize)) / BaseTotalRawSize : 0; + + ZEN_CONSOLE("File diff : {} ({}) removed, {} ({}) added, {} ({} {:.1f}%) kept", + RemovedHashes.size(), + NiceBytes(RemovedSize), + AddedHashes.size(), + NiceBytes(AddedSize), + BaseFolderContent.Paths.size() - RemovedHashes.size(), + NiceBytes(BaseTotalRawSize - RemovedSize), + KeptPercent); + + uint64_t CompareTotalRawSize = 0; + + uint64_t FoundChunkCount = 0; + uint64_t FoundChunkSize = 0; + uint64_t NewChunkCount = 0; + uint64_t NewChunkSize = 0; + const ChunkedContentLookup BaseFolderLookup = BuildChunkedContentLookup(BaseFolderContent); + for (uint32_t ChunkIndex = 0; ChunkIndex < CompareFolderContent.ChunkedContent.ChunkHashes.size(); ChunkIndex++) + { + const IoHash& ChunkHash = CompareFolderContent.ChunkedContent.ChunkHashes[ChunkIndex]; + if (BaseFolderLookup.ChunkHashToChunkIndex.contains(ChunkHash)) + { + FoundChunkCount++; + FoundChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + else + { + NewChunkCount++; + NewChunkSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + CompareTotalRawSize += CompareFolderContent.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + + double FoundPercent = CompareTotalRawSize > 0 ? (100.0 * FoundChunkSize) / CompareTotalRawSize : 0; + double NewPercent = CompareTotalRawSize > 0 ? (100.0 * NewChunkSize) / CompareTotalRawSize : 0; + + ZEN_CONSOLE("Chunk diff: {} ({} {:.1f}%) out of {} ({}) chunks in {} ({}) base chunks. Added {} ({} {:.1f}%) chunks.", + FoundChunkCount, + NiceBytes(FoundChunkSize), + FoundPercent, + CompareFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(CompareTotalRawSize), + BaseFolderContent.ChunkedContent.ChunkHashes.size(), + NiceBytes(BaseTotalRawSize), + NewChunkCount, + NiceBytes(NewChunkSize), + NewPercent); + + ProgressBar::SetLogOperationProgress(ProgressMode, TaskSteps::Cleanup, TaskSteps::StepCount); + } + +} // namespace + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +BuildsCommand::BuildsCommand() +{ + m_Options.add_options()("h,help", "Print help"); + + auto AddSystemOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", "", "system-dir", "Specify system root", cxxopts::value(m_SystemRootDir), "<systemdir>"); + Ops.add_option("", + "", + "use-sparse-files", + "Enable use of sparse files when writing large files. Defaults to true.", + cxxopts::value(m_UseSparseFiles), + "<usesparsefiles>"); + }; + + auto AddAuthOptions = [this](cxxopts::Options& Ops) { + // Direct access token (may expire) + Ops.add_option("auth-token", + "", + "access-token", + "Cloud/Builds Storage access token", + cxxopts::value(m_AccessToken), + "<accesstoken>"); + Ops.add_option("auth-token", + "", + "access-token-env", + "Name of environment variable that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenEnv)->default_value(DefaultAccessTokenEnvVariableName), + "<envvariable>"); + Ops.add_option("auth-token", + "", + "access-token-path", + "Path to json file that holds the cloud/builds Storage access token", + cxxopts::value(m_AccessTokenPath), + "<filepath>"); + + // Auth manager token encryption + Ops.add_option("security", + "", + "encryption-aes-key", + "256 bit AES encryption key", + cxxopts::value<std::string>(m_EncryptionKey), + ""); + Ops.add_option("security", + "", + "encryption-aes-iv", + "128 bit AES encryption initialization vector", + cxxopts::value<std::string>(m_EncryptionIV), + ""); + + // OpenId acccess token + Ops.add_option("openid", + "", + "openid-provider-name", + "Open ID provider name", + cxxopts::value<std::string>(m_OpenIdProviderName), + "Default"); + Ops.add_option("openid", "", "openid-provider-url", "Open ID provider url", cxxopts::value<std::string>(m_OpenIdProviderUrl), ""); + Ops.add_option("openid", "", "openid-client-id", "Open ID client id", cxxopts::value<std::string>(m_OpenIdClientId), ""); + Ops.add_option("openid", + "", + "openid-refresh-token", + "Open ID refresh token", + cxxopts::value<std::string>(m_OpenIdRefreshToken), + ""); + + // OAuth acccess token + Ops.add_option("oauth", "", "oauth-url", "OAuth provier url", cxxopts::value<std::string>(m_OAuthUrl)->default_value(""), ""); + Ops.add_option("oauth", + "", + "oauth-clientid", + "OAuth client id", + cxxopts::value<std::string>(m_OAuthClientId)->default_value(""), + ""); + Ops.add_option("oauth", + "", + "oauth-clientsecret", + "OAuth client secret", + cxxopts::value<std::string>(m_OAuthClientSecret)->default_value(""), + ""); + Ops.add_option("auth", + "", + "oidctoken-exe-path", + "Path to OidcToken executable", + cxxopts::value<std::string>(m_OidcTokenAuthExecutablePath)->default_value(""), + ""); + }; + + auto AddCloudOptions = [this, &AddAuthOptions](cxxopts::Options& Ops) { + AddAuthOptions(Ops); + Ops.add_option("cloud build", "", "override-host", "Cloud Builds URL", cxxopts::value(m_OverrideHost), "<override-host>"); + Ops.add_option("cloud build", + "", + "url", + "Cloud Builds host url (legacy - use --override-host)", + cxxopts::value(m_OverrideHost), + "<url>"); + Ops.add_option("cloud build", "", "cloud-url", "Cloud Artifact URL", cxxopts::value(m_Url), "<cloud-url>"); + Ops.add_option("cloud build", "", "host", "Cloud Builds host", cxxopts::value(m_Host), "<host>"); + Ops.add_option("cloud build", + "", + "assume-http2", + "Assume that the builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake", + cxxopts::value(m_AssumeHttp2), + "<assumehttp2>"); + + Ops.add_option("cloud build", "", "namespace", "Builds Storage namespace", cxxopts::value(m_Namespace), "<namespace>"); + Ops.add_option("cloud build", "", "bucket", "Builds Storage bucket", cxxopts::value(m_Bucket), "<bucket>"); + Ops.add_option("cloud build", + "", + "allow-redirect", + "Allow redirect of requests", + cxxopts::value(m_AllowRedirect), + "<allow-redirect>"); + }; + + auto AddFileOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("filestorage", "", "storage-path", "Builds Storage Path", cxxopts::value(m_StoragePath), "<storagepath>"); + Ops.add_option("filestorage", + "", + "json-metadata", + "Write build, part and block metadata as .json files in addition to .cb files", + cxxopts::value(m_WriteMetadataAsJson), + "<jsonmetadata>"); + }; + + auto AddCacheOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("cache", "", "zen-cache-host", "Host ip and port for zen builds cache", cxxopts::value(m_ZenCacheHost), "<zenhost>"); + }; + + auto AddOutputOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("output", + "", + "plain-progress", + "Show progress using plain output", + cxxopts::value(m_PlainProgress), + "<plainprogress>"); + Ops.add_option("output", + "", + "log-progress", + "Write @progress style progress to output", + cxxopts::value(m_LogProgress), + "<logprogress>"); + Ops.add_option("output", "", "verbose", "Enable verbose console output", cxxopts::value(m_Verbose), "<verbose>"); + }; + + auto AddWorkerOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", + "", + "boost-workers", + "Increase the number of worker threads - may cause computer to be less responsive", + cxxopts::value(m_BoostWorkerThreads), + "<boostworkers>"); + }; + + auto AddZenFolderOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", + "", + "zen-folder-path", + fmt::format("Path to zen state and temp folders. Defaults to [--local-path/]{}", ZenFolderName), + cxxopts::value(m_ZenFolderPath), + "<boostworkers>"); + }; + m_Options.add_option("", + "v", + "verb", + "Verb for build - list-namespaces, list, upload, download, diff, fetch-blob, validate-part", + cxxopts::value(m_Verb), + "<verb>"); + m_Options.parse_positional({"verb"}); + m_Options.positional_help("verb"); + + // list-namespaces + AddSystemOptions(m_ListNamespacesOptions); + AddCloudOptions(m_ListNamespacesOptions); + AddFileOptions(m_ListNamespacesOptions); + AddOutputOptions(m_ListNamespacesOptions); + AddZenFolderOptions(m_ListNamespacesOptions); + m_ListNamespacesOptions.add_options()("h,help", "Print help"); + m_ListNamespacesOptions.add_option("", + "", + "recursive", + "Enable fetch of buckets within namespaces also", + cxxopts::value(m_ListNamespacesRecursive), + "<recursive>"); + m_ListNamespacesOptions.add_option("", + "", + "result-path", + "Path to json or compactbinary to write query result to", + cxxopts::value(m_ListResultPath), + "<result-path>"); + m_ListNamespacesOptions.parse_positional({"result-path"}); + m_ListNamespacesOptions.positional_help("result-path"); + + // list + AddSystemOptions(m_ListOptions); + AddCloudOptions(m_ListOptions); + AddFileOptions(m_ListOptions); + AddOutputOptions(m_ListOptions); + AddZenFolderOptions(m_ListOptions); + m_ListOptions.add_options()("h,help", "Print help"); + m_ListOptions.add_option("", + "", + "query-path", + "Path to json or compactbinary file containing list query", + cxxopts::value(m_ListQueryPath), + "<query-path>"); + m_ListOptions.add_option("", + "", + "result-path", + "Path to json or compactbinary to write query result to", + cxxopts::value(m_ListResultPath), + "<result-path>"); + m_ListOptions.parse_positional({"query-path", "result-path"}); + m_ListOptions.positional_help("query-path result-path"); + + // upload + AddSystemOptions(m_UploadOptions); + AddCloudOptions(m_UploadOptions); + AddFileOptions(m_UploadOptions); + AddOutputOptions(m_UploadOptions); + AddCacheOptions(m_UploadOptions); + AddWorkerOptions(m_UploadOptions); + AddZenFolderOptions(m_UploadOptions); + m_UploadOptions.add_options()("h,help", "Print help"); + m_UploadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); + m_UploadOptions.add_option("", + "", + "create-build", + "Set to true to create the containing build, if unset a builds-id must be given and the build already exist", + cxxopts::value(m_CreateBuild), + "<id>"); + m_UploadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_UploadOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + "<id>"); + m_UploadOptions.add_option("", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + "<name>"); + m_UploadOptions.add_option("", + "", + "metadata-path", + "Path to json file that holds the metadata for the build. Requires the create-build option to be set", + cxxopts::value(m_BuildMetadataPath), + "<metadata-path>"); + m_UploadOptions.add_option( + "", + "", + "metadata", + "Key-value pairs separated by ';' with build meta data. (key1=value1;key2=value2). Requires the create-build option to be set", + cxxopts::value(m_BuildMetadata), + "<metadata>"); + m_UploadOptions.add_option("", "", "clean", "Ignore existing blocks", cxxopts::value(m_Clean), "<clean>"); + m_UploadOptions.add_option("", + "", + "block-min-reuse", + "Percent of an existing block that must be relevant for it to be resused. Defaults to 85.", + cxxopts::value(m_BlockReuseMinPercentLimit), + "<minreuse>"); + m_UploadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_UploadOptions.add_option("", + "", + "manifest-path", + "Path to a text file with one line of <local path>[TAB]<modification date> per file to include.", + cxxopts::value(m_ManifestPath), + "<manifestpath>"); + m_UploadOptions + .add_option("", "", "verify", "Enable post upload verify of all uploaded data", cxxopts::value(m_PostUploadVerify), "<verify>"); + m_UploadOptions.add_option("", + "", + "find-max-block-count", + "The maximum number of blocks we search for in the build context", + cxxopts::value(m_FindBlockMaxCount), + "<maxblockcount>"); + + m_UploadOptions.parse_positional({"local-path", "build-id"}); + m_UploadOptions.positional_help("local-path build-id"); + + // download + AddSystemOptions(m_DownloadOptions); + AddCloudOptions(m_DownloadOptions); + AddFileOptions(m_DownloadOptions); + AddOutputOptions(m_DownloadOptions); + AddCacheOptions(m_DownloadOptions); + AddZenFolderOptions(m_DownloadOptions); + AddWorkerOptions(m_DownloadOptions); + m_DownloadOptions.add_option("cache", + "", + "cache-prime-only", + "Only download blobs missing in cache and upload to cache", + cxxopts::value(m_PrimeCacheOnly), + "<cacheprimeonly>"); + + m_DownloadOptions.add_options()("h,help", "Print help"); + m_DownloadOptions.add_option("", "l", "local-path", "Root file system folder for build", cxxopts::value(m_Path), "<local-path>"); + m_DownloadOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_DownloadOptions.add_option( + "", + "", + "build-part-id", + "Build part Ids list separated by ',', if no build-part-ids or build-part-names are given all parts will be downloaded", + cxxopts::value(m_BuildPartIds), + "<id>"); + m_DownloadOptions.add_option("", + "", + "build-part-name", + "Name of the build parts list separated by ',', if no build-part-ids or build-part-names are given " + "all parts will be downloaded", + cxxopts::value(m_BuildPartNames), + "<name>"); + m_DownloadOptions + .add_option("", "", "clean", "Delete all data in target folder before downloading", cxxopts::value(m_Clean), "<clean>"); + m_DownloadOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_DownloadOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + "<allowpartialblockrequests>"); + + m_DownloadOptions + .add_option("", "", "verify", "Enable post download verify of all tracked files", cxxopts::value(m_PostDownloadVerify), "<verify>"); + m_DownloadOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); + m_DownloadOptions.parse_positional({"local-path", "build-id", "build-part-name"}); + m_DownloadOptions.positional_help("local-path build-id build-part-name"); + + AddOutputOptions(m_DiffOptions); + AddWorkerOptions(m_DiffOptions); + m_DiffOptions.add_options()("h,help", "Print help"); + m_DiffOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_DiffOptions.add_option("", "c", "compare-path", "Root file system folder used as diff", cxxopts::value(m_DiffPath), "<diff-path>"); + m_DiffOptions.add_option("", + "", + "only-chunked", + "Skip files from diff summation that are not processed with chunking", + cxxopts::value(m_OnlyChunked), + "<only-chunked>"); + m_DiffOptions.parse_positional({"local-path", "compare-path"}); + m_DiffOptions.positional_help("local-path compare-path"); + + AddSystemOptions(m_TestOptions); + AddCloudOptions(m_TestOptions); + AddFileOptions(m_TestOptions); + AddOutputOptions(m_TestOptions); + AddCacheOptions(m_TestOptions); + AddWorkerOptions(m_TestOptions); + m_TestOptions.add_options()("h,help", "Print help"); + m_TestOptions.add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_TestOptions.add_option("", + "", + "allow-multipart", + "Allow large attachments to be transfered using multipart protocol. Defaults to true.", + cxxopts::value(m_AllowMultiparts), + "<allowmultipart>"); + m_TestOptions.add_option("", + "", + "allow-partial-block-requests", + "Allow request for partial chunk blocks. Defaults to true.", + cxxopts::value(m_AllowPartialBlockRequests), + "<allowpartialblockrequests>"); + m_TestOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); + m_TestOptions.parse_positional({"local-path"}); + m_TestOptions.positional_help("local-path"); + + AddSystemOptions(m_FetchBlobOptions); + AddCloudOptions(m_FetchBlobOptions); + AddFileOptions(m_FetchBlobOptions); + AddOutputOptions(m_FetchBlobOptions); + AddCacheOptions(m_FetchBlobOptions); + AddZenFolderOptions(m_FetchBlobOptions); + m_FetchBlobOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_FetchBlobOptions + .add_option("", "", "blob-hash", "IoHash in hex form identifying the blob to download", cxxopts::value(m_BlobHash), "<blob-hash>"); + m_FetchBlobOptions.parse_positional({"build-id", "blob-hash"}); + m_FetchBlobOptions.positional_help("build-id blob-hash"); + + auto AddZenProcessOptions = [this](cxxopts::Options& Ops) { + Ops.add_option("", "", "process-id", "Process id of running process", cxxopts::value(m_ZenProcessId), "<pid>"); + }; + AddZenProcessOptions(m_PauseOptions); + m_PauseOptions.parse_positional({"process-id"}); + m_PauseOptions.positional_help("process-id"); + + AddZenProcessOptions(m_ResumeOptions); + m_ResumeOptions.parse_positional({"process-id"}); + m_ResumeOptions.positional_help("process-id"); + + AddZenProcessOptions(m_AbortOptions); + m_AbortOptions.parse_positional({"process-id"}); + m_AbortOptions.positional_help("process-id"); + + AddSystemOptions(m_ValidateBuildPartOptions); + AddCloudOptions(m_ValidateBuildPartOptions); + AddFileOptions(m_ValidateBuildPartOptions); + AddOutputOptions(m_ValidateBuildPartOptions); + AddWorkerOptions(m_ValidateBuildPartOptions); + AddZenFolderOptions(m_ValidateBuildPartOptions); + m_ValidateBuildPartOptions.add_option("", "", "build-id", "Build Id", cxxopts::value(m_BuildId), "<id>"); + m_ValidateBuildPartOptions.add_option("", + "", + "build-part-id", + "Build part Id, if not given it will be auto generated", + cxxopts::value(m_BuildPartId), + "<id>"); + m_ValidateBuildPartOptions.add_option( + "", + "", + "build-part-name", + "Name of the build part, if not given it will be be named after the directory name at end of local-path", + cxxopts::value(m_BuildPartName), + "<name>"); + m_ValidateBuildPartOptions.parse_positional({"build-id", "build-part-id"}); + m_ValidateBuildPartOptions.positional_help("build-id build-part-id"); + + AddSystemOptions(m_MultiTestDownloadOptions); + AddCloudOptions(m_MultiTestDownloadOptions); + AddFileOptions(m_MultiTestDownloadOptions); + AddOutputOptions(m_MultiTestDownloadOptions); + AddCacheOptions(m_MultiTestDownloadOptions); + AddWorkerOptions(m_MultiTestDownloadOptions); + m_MultiTestDownloadOptions + .add_option("", "l", "local-path", "Root file system folder used as base", cxxopts::value(m_Path), "<local-path>"); + m_MultiTestDownloadOptions.add_option("", "", "build-ids", "Build Ids list separated by ','", cxxopts::value(m_BuildIds), "<ids>"); + m_MultiTestDownloadOptions.add_option("", + "", + "enable-scavenge", + "Enable scavenging of data from previouse download locations", + cxxopts::value(m_EnableScavenging), + "<scavenge>"); + m_MultiTestDownloadOptions.parse_positional({"local-path"}); + m_MultiTestDownloadOptions.positional_help("local-path"); +} + +BuildsCommand::~BuildsCommand() = default; + +int +BuildsCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) +{ + ZEN_UNUSED(GlobalOptions); + + signal(SIGINT, SignalCallbackHandler); +#if ZEN_PLATFORM_WINDOWS + signal(SIGBREAK, SignalCallbackHandler); +#endif // ZEN_PLATFORM_WINDOWS + + using namespace std::literals; + + std::vector<char*> SubCommandArguments; + cxxopts::Options* SubOption = nullptr; + int ParentCommandArgCount = GetSubCommand(m_Options, argc, argv, m_SubCommands, SubOption, SubCommandArguments); + if (!ParseOptions(ParentCommandArgCount, argv)) + { + return 0; + } + + if (SubOption == nullptr) + { + throw zen::OptionParseException("command verb is missing"); + } + + if (!ParseOptions(*SubOption, gsl::narrow<int>(SubCommandArguments.size()), SubCommandArguments.data())) + { + return 0; + } + + std::filesystem::path SystemRootDir; + auto ParseSystemOptions = [&]() { + if (m_SystemRootDir.empty()) + { + m_SystemRootDir = PickDefaultSystemRootDirectory(); + } + MakeSafeAbsolutePathÍnPlace(m_SystemRootDir); + }; + ParseSystemOptions(); + + auto ParseStorageOptions = [&](bool RequireNamespace, bool RequireBucket) { + m_Host = RemoveQuotes(m_Host); + m_OverrideHost = RemoveQuotes(m_OverrideHost); + m_Url = RemoveQuotes(m_Url); + m_Namespace = RemoveQuotes(m_Namespace); + m_Bucket = RemoveQuotes(m_Bucket); + if (!m_Url.empty()) + { + if (!m_Host.empty()) + { + throw zen::OptionParseException(fmt::format("host is not compatible with the url option\n{}", SubOption->help())); + } + if (!m_Bucket.empty()) + { + throw zen::OptionParseException(fmt::format("bucket is not compatible with the url option\n{}", SubOption->help())); + } + if (!m_BuildId.empty()) + { + throw zen::OptionParseException(fmt::format("buildid is not compatible with the url option\n{}", SubOption->help())); + } + if (!ParseCloudUrl(m_Url, m_Host, m_Namespace, m_Bucket, m_BuildId)) + { + throw zen::OptionParseException(fmt::format("url does not match the Cloud Artifact URL format\n{}", SubOption->help())); + } + } + + if (!m_OverrideHost.empty() || !m_Host.empty()) + { + if (!m_StoragePath.empty()) + { + throw zen::OptionParseException( + fmt::format("host/url/override-host is not compatible with the storage-path option\n{}", SubOption->help())); + } + if (RequireNamespace && m_Namespace.empty()) + { + throw zen::OptionParseException(fmt::format("namespace option is required for this storage option\n{}", SubOption->help())); + } + if (RequireBucket && m_Bucket.empty()) + { + throw zen::OptionParseException(fmt::format("bucket option is required for this storage option\n{}", SubOption->help())); + } + } + else if (m_StoragePath.empty()) + { + throw zen::OptionParseException(fmt::format("At least one storage option is required\n{}", SubOption->help())); + } + MakeSafeAbsolutePathÍnPlace(m_StoragePath); + }; + + std::unique_ptr<AuthMgr> Auth; + HttpClientSettings ClientSettings{.LogCategory = "httpbuildsclient", + .AssumeHttp2 = m_AssumeHttp2, + .AllowResume = true, + .RetryCount = 2}; + + auto CreateAuthMgr = [&]() { + ZEN_ASSERT(!m_SystemRootDir.empty()); + if (!Auth) + { + if (m_EncryptionKey.empty()) + { + m_EncryptionKey = "abcdefghijklmnopqrstuvxyz0123456"; + ZEN_CONSOLE("Warning: Using default encryption key"); + } + + if (m_EncryptionIV.empty()) + { + m_EncryptionIV = "0123456789abcdef"; + ZEN_CONSOLE("Warning: Using default encryption initialization vector"); + } + + AuthConfig AuthMgrConfig = {.RootDirectory = m_SystemRootDir / "auth", + .EncryptionKey = AesKey256Bit::FromString(m_EncryptionKey), + .EncryptionIV = AesIV128Bit::FromString(m_EncryptionIV)}; + if (!AuthMgrConfig.EncryptionKey.IsValid()) + { + throw zen::OptionParseException("Invalid AES encryption key"); + } + if (!AuthMgrConfig.EncryptionIV.IsValid()) + { + throw zen::OptionParseException("Invalid AES initialization vector"); + } + Auth = AuthMgr::Create(AuthMgrConfig); + } + }; + + auto ParseAuthOptions = [&]() { + m_OpenIdProviderUrl = RemoveQuotes(m_OpenIdProviderUrl); + m_OpenIdClientId = RemoveQuotes(m_OpenIdClientId); + + m_AccessToken = RemoveQuotes(m_AccessToken); + m_EncryptionKey = RemoveQuotes(m_EncryptionKey); + m_EncryptionIV = RemoveQuotes(m_EncryptionIV); + + m_OAuthUrl = RemoveQuotes(m_OAuthUrl); + m_OAuthClientId = RemoveQuotes(m_OAuthClientId); + m_OAuthClientSecret = RemoveQuotes(m_OAuthClientSecret); + + m_OidcTokenAuthExecutablePath = RemoveQuotes(m_OidcTokenAuthExecutablePath); + + if (!m_OpenIdProviderUrl.empty() && !m_OpenIdClientId.empty()) + { + CreateAuthMgr(); + std::string ProviderName = m_OpenIdProviderName.empty() ? "Default" : m_OpenIdProviderName; + Auth->AddOpenIdProvider({.Name = ProviderName, .Url = m_OpenIdProviderUrl, .ClientId = m_OpenIdClientId}); + if (!m_OpenIdRefreshToken.empty()) + { + Auth->AddOpenIdToken({.ProviderName = ProviderName, .RefreshToken = m_OpenIdRefreshToken}); + } + } + + auto GetEnvAccessToken = [](const std::string& AccessTokenEnv) -> std::string { + if (!AccessTokenEnv.empty()) + { + return GetEnvVariable(AccessTokenEnv); + } + return {}; + }; + + auto FindOidcTokenExePath = [](const std::string& OidcTokenAuthExecutablePath) -> std::filesystem::path { + if (OidcTokenAuthExecutablePath.empty()) + { + const std::string OidcExecutableName = "OidcToken" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path OidcTokenPath = (GetRunningExecutablePath().parent_path() / OidcExecutableName).make_preferred(); + if (IsFile(OidcTokenPath)) + { + return OidcTokenPath; + } + } + else + { + std::filesystem::path OidcTokenPath = std::filesystem::absolute(StringToPath(OidcTokenAuthExecutablePath)).make_preferred(); + if (IsFile(OidcTokenPath)) + { + return OidcTokenPath; + } + } + return {}; + }; + + if (!m_AccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(m_AccessToken); + } + else if (!m_AccessTokenPath.empty()) + { + MakeSafeAbsolutePathÍnPlace(m_AccessTokenPath); + std::string ResolvedAccessToken = ReadAccessTokenFromFile(m_AccessTokenPath); + if (!ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + } + else if (!m_OAuthUrl.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOAuthClientCredentials( + {.Url = m_OAuthUrl, .ClientId = m_OAuthClientId, .ClientSecret = m_OAuthClientSecret}); + } + else if (!m_OpenIdProviderName.empty()) + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOpenIdProvider(*Auth, m_OpenIdProviderName); + } + else if (std::string ResolvedAccessToken = GetEnvAccessToken(m_AccessTokenEnv); !ResolvedAccessToken.empty()) + { + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromStaticToken(ResolvedAccessToken); + } + else if (std::filesystem::path OidcTokenExePath = FindOidcTokenExePath(m_OidcTokenAuthExecutablePath); !OidcTokenExePath.empty()) + { + const std::string& CloudHost = m_OverrideHost.empty() ? m_Host : m_OverrideHost; + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromOidcTokenExecutable(OidcTokenExePath, CloudHost); + } + + if (!ClientSettings.AccessTokenProvider) + { + CreateAuthMgr(); + ClientSettings.AccessTokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(*Auth); + } + }; + + auto ParseOutputOptions = [&]() { + IsVerbose = m_Verbose; + if (m_LogProgress) + { + ProgressMode = ProgressBar::Mode::Log; + } + else if (m_Verbose || m_PlainProgress) + { + ProgressMode = ProgressBar::Mode::Plain; + } + else + { + ProgressMode = ProgressBar::Mode::Pretty; + } + }; + ParseOutputOptions(); + + auto CreateBuildStorage = [&](BuildStorage::Statistics& StorageStats, + BuildStorageCache::Statistics& StorageCacheStats, + const std::filesystem::path& TempPath, + bool RequireNamespace, + bool RequireBucket) -> StorageInstance { + ParseStorageOptions(RequireNamespace, RequireBucket); + + m_ZenCacheHost = RemoveQuotes(m_ZenCacheHost); + + StorageInstance Result; + + std::string BuildStorageName = ZEN_CLOUD_STORAGE; + std::string BuildCacheName; + bool CacheAssumeHttp2 = false; + std::string StorageDescription; + std::string CacheDescription; + + if (!m_Host.empty() || !m_OverrideHost.empty()) + { + ParseAuthOptions(); + } + + std::string CloudHost; + + auto TestCacheEndpoint = [](std::string_view BaseUrl, const bool AssumeHttp2) -> std::pair<bool, std::string> { + HttpClientSettings TestClientSettings{.LogCategory = "httpcacheclient", + .ConnectTimeout = std::chrono::milliseconds{1000}, + .Timeout = std::chrono::milliseconds{2000}, + .AssumeHttp2 = AssumeHttp2, + .AllowResume = true, + .RetryCount = 0}; + HttpClient TestHttpClient(BaseUrl, TestClientSettings); + HttpClient::Response TestResponse = TestHttpClient.Get("/status/builds"); + if (TestResponse.IsSuccess()) + { + return {true, ""}; + } + return {false, TestResponse.ErrorMessage("")}; + }; + + if (!m_Host.empty()) + { + if (m_OverrideHost.empty() || m_ZenCacheHost.empty()) + { + HttpClient DiscoveryHttpClient(m_Host, ClientSettings); + HttpClient::Response ServerInfoResponse = + DiscoveryHttpClient.Get("/api/v1/status/servers", HttpClient::Accept(HttpContentType::kJSON)); + if (!ServerInfoResponse.IsSuccess()) + { + throw std::runtime_error(fmt::format("Failed to get list of servers from discovery url '{}'. Reason: '{}'", + m_Host, + ServerInfoResponse.ErrorMessage(""))); + } + + std::string_view JsonResponse = ServerInfoResponse.AsText(); + CbObject ResponseObjectView = LoadCompactBinaryFromJson(JsonResponse).AsObject(); + + auto TestHostEndpoint = [](std::string_view BaseUrl, const bool AssumeHttp2) -> std::pair<bool, std::string> { + HttpClientSettings TestClientSettings{.LogCategory = "httpbuildsclient", + .ConnectTimeout = std::chrono::milliseconds{1000}, + .Timeout = std::chrono::milliseconds{2000}, + .AssumeHttp2 = AssumeHttp2, + .AllowResume = true, + .RetryCount = 0}; + + HttpClient TestHttpClient(BaseUrl, TestClientSettings); + HttpClient::Response TestResponse = TestHttpClient.Get("/health/live"); + if (TestResponse.IsSuccess()) + { + return {true, ""}; + } + return {false, TestResponse.ErrorMessage("")}; + }; + + if (m_OverrideHost.empty()) + { + CbArrayView ServerEndpointsArray = ResponseObjectView["serverEndpoints"sv].AsArrayView(); + std::uint64_t ServerCount = ServerEndpointsArray.Num(); + if (ServerCount == 0) + { + throw std::runtime_error(fmt::format("Failed to find any builds hosts at {}", m_Host)); + } + for (CbFieldView ServerEndpointView : ServerEndpointsArray) + { + CbObjectView ServerEndpointObject = ServerEndpointView.AsObjectView(); + + std::string_view BaseUrl = ServerEndpointObject["baseUrl"sv].AsString(); + if (!BaseUrl.empty()) + { + const bool AssumeHttp2 = ServerEndpointObject["assumeHttp2"sv].AsBool(false); + std::string_view Name = ServerEndpointObject["name"sv].AsString(); + if (auto TestResult = TestHostEndpoint(BaseUrl, AssumeHttp2); TestResult.first) + { + CloudHost = BaseUrl; + m_AssumeHttp2 = AssumeHttp2; + BuildStorageName = Name; + break; + } + else + { + ZEN_DEBUG("Unable to reach host {}. Reason: {}", BaseUrl, TestResult.second); + } + } + } + if (CloudHost.empty()) + { + throw std::runtime_error( + fmt::format("Failed to find any usable builds hosts out of {} using {}", ServerCount, m_Host)); + } + } + else if (auto TestResult = TestHostEndpoint(m_OverrideHost, m_AssumeHttp2); TestResult.first) + { + CloudHost = m_OverrideHost; + } + else + { + throw std::runtime_error(fmt::format("Host {} could not be reached. Reason: {}", m_OverrideHost, TestResult.second)); + } + + if (m_ZenCacheHost.empty()) + { + CbArrayView CacheEndpointsArray = ResponseObjectView["cacheEndpoints"sv].AsArrayView(); + std::uint64_t CacheCount = CacheEndpointsArray.Num(); + for (CbFieldView CacheEndpointView : CacheEndpointsArray) + { + CbObjectView CacheEndpointObject = CacheEndpointView.AsObjectView(); + + std::string_view BaseUrl = CacheEndpointObject["baseUrl"sv].AsString(); + if (!BaseUrl.empty()) + { + const bool AssumeHttp2 = CacheEndpointObject["assumeHttp2"sv].AsBool(false); + std::string_view Name = CacheEndpointObject["name"sv].AsString(); + + if (auto TestResult = TestCacheEndpoint(BaseUrl, AssumeHttp2); TestResult.first) + { + m_ZenCacheHost = BaseUrl; + CacheAssumeHttp2 = AssumeHttp2; + BuildCacheName = Name; + break; + } + } + } + if (m_ZenCacheHost.empty()) + { + ZenServerState State; + if (State.InitializeReadOnly()) + { + State.Snapshot([&](const ZenServerState::ZenServerEntry& Entry) { + if (m_ZenCacheHost.empty()) + { + std::string ZenServerLocalHostUrl = + fmt::format("http://127.0.0.1:{}", Entry.EffectiveListenPort.load()); + if (auto TestResult = TestCacheEndpoint(ZenServerLocalHostUrl, false); TestResult.first) + { + m_ZenCacheHost = ZenServerLocalHostUrl; + CacheAssumeHttp2 = false; + BuildCacheName = "localhost"; + } + } + }); + } + if (m_ZenCacheHost.empty()) + { + ZEN_CONSOLE("Warning: Failed to find any usable cache hosts out of {} using {}", CacheCount, m_Host); + } + } + } + else if (auto TestResult = TestCacheEndpoint(m_ZenCacheHost, false); TestResult.first) + { + std::string::size_type HostnameStart = 0; + std::string::size_type HostnameLength = std::string::npos; + if (auto StartPos = m_ZenCacheHost.find("//"); StartPos != std::string::npos) + { + HostnameStart = StartPos + 2; + } + if (auto EndPos = m_ZenCacheHost.find("/", HostnameStart); EndPos != std::string::npos) + { + HostnameLength = EndPos - HostnameStart; + } + BuildCacheName = m_ZenCacheHost.substr(HostnameStart, HostnameLength); + } + else + { + ZEN_WARN("Unable to reach cache host {}. Reason: {}", m_ZenCacheHost, TestResult.second); + } + } + } + else + { + CloudHost = m_OverrideHost; + } + + if (!CloudHost.empty()) + { + Result.BuildStorageHttp = std::make_unique<HttpClient>(CloudHost, ClientSettings); + StorageDescription = fmt::format("Cloud {}{}. SessionId: '{}'. Namespace '{}', Bucket '{}'", + BuildStorageName.empty() ? "" : fmt::format("{}, ", BuildStorageName), + CloudHost, + Result.BuildStorageHttp->GetSessionId(), + m_Namespace, + m_Bucket); + Result.BuildStorage = CreateJupiterBuildStorage(Log(), + *Result.BuildStorageHttp, + StorageStats, + m_Namespace, + m_Bucket, + m_AllowRedirect, + TempPath / "storage"); + Result.StorageName = BuildStorageName; + } + else if (!m_StoragePath.empty()) + { + StorageDescription = fmt::format("folder {}", m_StoragePath); + Result.BuildStorage = CreateFileBuildStorage(m_StoragePath, StorageStats, false, DefaultLatency, DefaultDelayPerKBSec); + Result.StorageName = fmt::format("Disk {}", m_StoragePath.stem()); + } + else + { + throw zen::OptionParseException(fmt::format("Storage option is missing\n{}", SubOption->help())); + } + if (!m_ZenCacheHost.empty()) + { + Result.CacheHttp = std::make_unique<HttpClient>(m_ZenCacheHost, + HttpClientSettings{.LogCategory = "httpcacheclient", + .ConnectTimeout = std::chrono::milliseconds{3000}, + .Timeout = std::chrono::milliseconds{30000}, + .AssumeHttp2 = CacheAssumeHttp2, + .AllowResume = true, + .RetryCount = 0}); + Result.BuildCacheStorage = CreateZenBuildStorageCache(*Result.CacheHttp, + StorageCacheStats, + m_Namespace, + m_Bucket, + TempPath / "zencache", + m_PrimeCacheOnly); + CacheDescription = fmt::format("Zen {}{}. SessionId: '{}'", + BuildCacheName.empty() ? "" : fmt::format("{}, ", BuildCacheName), + m_ZenCacheHost, + Result.CacheHttp->GetSessionId()); + + if (!m_Namespace.empty()) + { + CacheDescription += fmt::format(". Namespace '{}'", m_Namespace); + } + if (!m_Bucket.empty()) + { + CacheDescription += fmt::format(" Bucket '{}'", m_Bucket); + } + Result.CacheName = BuildCacheName.empty() ? m_ZenCacheHost : BuildCacheName; + } + ZEN_CONSOLE("Remote: {}", StorageDescription); + if (!Result.CacheName.empty()) + { + ZEN_CONSOLE("Cache : {}", CacheDescription); + } + return Result; + }; + + auto ParsePath = [&]() { + if (m_Path.empty()) + { + throw zen::OptionParseException(fmt::format("local-path is required\n{}", SubOption->help())); + } + MakeSafeAbsolutePathÍnPlace(m_Path); + }; + + auto ParseDiffPath = [&]() { + if (m_DiffPath.empty()) + { + throw zen::OptionParseException(fmt::format("compare-path is required\n{}", SubOption->help())); + } + MakeSafeAbsolutePathÍnPlace(m_DiffPath); + }; + + auto ParseBlobHash = [&]() -> IoHash { + m_BlobHash = RemoveQuotes(m_BlobHash); + if (m_BlobHash.empty()) + { + throw zen::OptionParseException(fmt::format("Blob hash string is missing\n{}", SubOption->help())); + } + + IoHash BlobHash; + if (!IoHash::TryParse(m_BlobHash, BlobHash)) + { + throw zen::OptionParseException(fmt::format("Blob hash string is invalid\n{}", SubOption->help())); + } + + return BlobHash; + }; + + auto ParseBuildId = [&]() -> Oid { + m_BuildId = RemoveQuotes(m_BuildId); + if (m_BuildId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", SubOption->help())); + } + else if (Oid BuildId = Oid::FromHexString(m_BuildId); BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build id\n{}", SubOption->help())); + } + else + { + return BuildId; + } + }; + + auto ParseBuildPartId = [&]() -> Oid { + m_BuildPartId = RemoveQuotes(m_BuildPartId); + if (m_BuildPartId.length() != Oid::StringLength) + { + throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", SubOption->help())); + } + else if (Oid BuildPartId = Oid::FromHexString(m_BuildPartId); BuildPartId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("Invalid build part id\n{}", SubOption->help())); + } + else + { + return BuildPartId; + } + }; + + auto ParseBuildPartIds = [&]() -> std::vector<Oid> { + std::vector<Oid> BuildPartIds; + for (const std::string& BuildPartId : m_BuildPartIds) + { + BuildPartIds.push_back(Oid::TryFromHexString(RemoveQuotes(BuildPartId))); + if (BuildPartIds.back() == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("build-part-id '{}' is invalid\n{}", BuildPartId, SubOption->help())); + } + } + return BuildPartIds; + }; + + auto ParseBuildPartNames = [&]() -> std::vector<std::string> { + std::vector<std::string> BuildPartNames; + for (const std::string& BuildPartName : m_BuildPartNames) + { + BuildPartNames.push_back(std::string(RemoveQuotes(BuildPartName))); + if (BuildPartNames.back().empty()) + { + throw zen::OptionParseException(fmt::format("build-part-names '{}' is invalid\n{}", BuildPartName, SubOption->help())); + } + } + return BuildPartNames; + }; + + auto ParseBuildMetadata = [&]() -> CbObject { + if (m_CreateBuild) + { + if (m_BuildMetadataPath.empty() && m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Options for builds target are missing\n{}", SubOption->help())); + } + if (!m_BuildMetadataPath.empty() && !m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("Conflicting options for builds target\n{}", SubOption->help())); + } + + if (!m_BuildMetadataPath.empty()) + { + MakeSafeAbsolutePathÍnPlace(m_BuildMetadataPath); + IoBuffer MetaDataJson = ReadFile(m_BuildMetadataPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + CbObject MetaData = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error( + fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_BuildMetadataPath, JsonError)); + } + return MetaData; + } + m_BuildMetadata = RemoveQuotes(m_BuildMetadata); + if (!m_BuildMetadata.empty()) + { + CbObjectWriter MetaDataWriter(1024); + ForEachStrTok(m_BuildMetadata, ';', [&](std::string_view Pair) { + size_t SplitPos = Pair.find('='); + if (SplitPos == std::string::npos || SplitPos == 0) + { + throw std::runtime_error(fmt::format("build metadata key-value pair '{}' is malformed", Pair)); + } + MetaDataWriter.AddString(Pair.substr(0, SplitPos), Pair.substr(SplitPos + 1)); + return true; + }); + return MetaDataWriter.Save(); + } + } + else + { + if (!m_BuildMetadataPath.empty()) + { + throw zen::OptionParseException( + fmt::format("metadata-path option is only valid if creating a build\n{}", SubOption->help())); + } + if (!m_BuildMetadata.empty()) + { + throw zen::OptionParseException(fmt::format("metadata option is only valid if creating a build\n{}", SubOption->help())); + } + } + return {}; + }; + + BoostWorkerThreads = m_BoostWorkerThreads; + UseSparseFiles = m_UseSparseFiles; + + try + { + if (SubOption == &m_ListNamespacesOptions) + { + if (!m_ListResultPath.empty()) + { + ZEN_CONSOLE("Running {}: {} (pid {})", + GetRunningExecutablePath(), + ZEN_CFG_VERSION_BUILD_STRING_FULL, + GetCurrentProcessId()); + } + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + const std::filesystem::path ZenFolderPath = m_ZenFolderPath.empty() + ? MakeSafeAbsolutePath(std::filesystem::current_path()) / ZenFolderName + : MakeSafeAbsolutePath(m_ZenFolderPath); + CreateDirectories(ZenFolderPath); + auto _ = MakeGuard([ZenFolderPath]() { + if (CleanDirectory(ZenFolderPath, {})) + { + std::error_code DummyEc; + RemoveDir(ZenFolderPath, DummyEc); + } + }); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(ZenFolderPath), + /*RequriesNamespace*/ false, + /*RequireBucket*/ false); + + CbObject Response = Storage.BuildStorage->ListNamespaces(m_ListNamespacesRecursive); + ZEN_ASSERT(ValidateCompactBinary(Response.GetView(), CbValidateMode::All) == CbValidateError::None); + if (m_ListResultPath.empty()) + { + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + ZEN_CONSOLE("{}", SB.ToView()); + } + else + { + std::filesystem::path ListResultPath = MakeSafeAbsolutePath(m_ListResultPath); + if (ToLower(ListResultPath.extension().string()) == ".cbo") + { + MemoryView ResponseView = Response.GetView(); + WriteFile(ListResultPath, IoBuffer(IoBuffer::Wrap, ResponseView.GetData(), ResponseView.GetSize())); + } + else + { + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + WriteFile(ListResultPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); + } + } + + return 0; + } + + if (SubOption == &m_ListOptions) + { + MakeSafeAbsolutePathÍnPlace(m_ListQueryPath); + MakeSafeAbsolutePathÍnPlace(m_ListResultPath); + + if (!m_ListResultPath.empty()) + { + ZEN_CONSOLE("Running {}: {} (pid {})", + GetRunningExecutablePath(), + ZEN_CFG_VERSION_BUILD_STRING_FULL, + GetCurrentProcessId()); + } + CbObject QueryObject; + if (m_ListQueryPath.empty()) + { + CbObjectWriter QueryWriter; + QueryWriter.BeginObject("query"); + QueryWriter.EndObject(); // query + QueryObject = QueryWriter.Save(); + } + else + { + if (ToLower(m_ListQueryPath.extension().string()) == ".cbo") + { + QueryObject = LoadCompactBinaryObject(IoBufferBuilder::MakeFromFile(m_ListQueryPath)); + } + else + { + IoBuffer MetaDataJson = ReadFile(m_ListQueryPath).Flatten(); + std::string_view Json(reinterpret_cast<const char*>(MetaDataJson.GetData()), MetaDataJson.GetSize()); + std::string JsonError; + QueryObject = LoadCompactBinaryFromJson(Json, JsonError).AsObject(); + if (!JsonError.empty()) + { + throw std::runtime_error( + fmt::format("build metadata file '{}' is malformed. Reason: '{}'", m_ListQueryPath, JsonError)); + } + } + } + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + CreateDirectories(m_ZenFolderPath); + auto _ = MakeGuard([this]() { + if (CleanDirectory(m_ZenFolderPath, {})) + { + std::error_code DummyEc; + RemoveDir(m_ZenFolderPath, DummyEc); + } + }); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ false); + + CbObject Response = Storage.BuildStorage->ListBuilds(QueryObject); + ZEN_ASSERT(ValidateCompactBinary(Response.GetView(), CbValidateMode::All) == CbValidateError::None); + if (m_ListResultPath.empty()) + { + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + ZEN_CONSOLE("{}", SB.ToView()); + } + else + { + if (ToLower(m_ListResultPath.extension().string()) == ".cbo") + { + MemoryView ResponseView = Response.GetView(); + WriteFile(m_ListResultPath, IoBuffer(IoBuffer::Wrap, ResponseView.GetData(), ResponseView.GetSize())); + } + else + { + ExtendableStringBuilder<1024> SB; + CompactBinaryToJson(Response.GetView(), SB); + WriteFile(m_ListResultPath, IoBuffer(IoBuffer::Wrap, SB.Data(), SB.Size())); + } + } + + return 0; + } + + if (SubOption == &m_UploadOptions) + { + ZEN_CONSOLE("Running {}: {} (pid {})", GetRunningExecutablePath(), ZEN_CFG_VERSION_BUILD_STRING_FULL, GetCurrentProcessId()); + + ZenState InstanceState; + + ParsePath(); + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + CreateDirectories(m_ZenFolderPath); + auto _ = MakeGuard([this]() { + if (CleanDirectory(m_ZenFolderPath, {})) + { + std::error_code DummyEc; + RemoveDir(m_ZenFolderPath, DummyEc); + } + }); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + if (m_BuildPartName.empty()) + { + m_BuildPartName = m_Path.filename().string(); + } + + const Oid BuildId = m_BuildId.empty() ? Oid::NewOid() : ParseBuildId(); + if (m_BuildId.empty()) + { + m_BuildId = BuildId.ToString(); + } + const Oid BuildPartId = m_BuildPartId.empty() ? Oid::NewOid() : ParseBuildPartId(); + if (m_BuildPartId.empty()) + { + m_BuildPartId = BuildPartId.ToString(); + } + + CbObject MetaData = ParseBuildMetadata(); + + const std::filesystem::path TempDir = ZenTempFolderPath(m_ZenFolderPath); + + m_ManifestPath = RemoveQuotes(m_ManifestPath); + + UploadFolder(Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + TempDir, + m_ManifestPath, + m_FindBlockMaxCount, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + m_CreateBuild, + m_Clean, + m_PostUploadVerify); + + if (false) + { + ZEN_CONSOLE( + "{}:\n" + "Read: {}\n" + "Write: {}\n" + "Requests: {}\n" + "Avg Request Time: {}\n" + "Avg I/O Time: {}", + Storage.StorageName, + NiceBytes(StorageStats.TotalBytesRead.load()), + NiceBytes(StorageStats.TotalBytesWritten.load()), + StorageStats.TotalRequestCount.load(), + StorageStats.TotalExecutionTimeUs.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalExecutionTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0, + StorageStats.TotalRequestCount.load() > 0 + ? NiceTimeSpanMs(StorageStats.TotalRequestTimeUs.load() / 1000 / StorageStats.TotalRequestCount.load()) + : 0); + } + return AbortFlag ? 11 : 0; + } + + if (SubOption == &m_DownloadOptions) + { + ZEN_CONSOLE("Running {}: {} (pid {})", GetRunningExecutablePath(), ZEN_CFG_VERSION_BUILD_STRING_FULL, GetCurrentProcessId()); + + ZenState InstanceState; + + ParsePath(); + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = m_Path / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + const Oid BuildId = ParseBuildId(); + + if (m_PostDownloadVerify && m_PrimeCacheOnly) + { + throw zen::OptionParseException( + fmt::format("'cache-prime-only' option is not compatible with 'verify' option\n{}", SubOption->help())); + } + + if (m_Clean && m_PrimeCacheOnly) + { + ZEN_WARN("ignoring 'clean' option when 'cache-prime-only' is enabled"); + } + + if (m_AllowPartialBlockRequests && m_PrimeCacheOnly) + { + ZEN_WARN("ignoring 'allow-partial-block-requests' option when 'cache-prime-only' is enabled"); + } + + std::vector<Oid> BuildPartIds = ParseBuildPartIds(); + std::vector<std::string> BuildPartNames = ParseBuildPartNames(); + + DownloadFolder(Storage, + BuildId, + BuildPartIds, + BuildPartNames, + m_Path, + m_ZenFolderPath, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests && !m_PrimeCacheOnly, + m_Clean, + m_PostDownloadVerify, + m_PrimeCacheOnly, + m_EnableScavenging); + + return AbortFlag ? 11 : 0; + } + if (SubOption == &m_DiffOptions) + { + ParsePath(); + ParseDiffPath(); + + DiffFolders(m_Path, m_DiffPath, m_OnlyChunked); + return AbortFlag ? 11 : 0; + } + + if (SubOption == &m_FetchBlobOptions) + { + ZEN_CONSOLE("Running {}: {} (pid {})", GetRunningExecutablePath(), ZEN_CFG_VERSION_BUILD_STRING_FULL, GetCurrentProcessId()); + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + CreateDirectories(m_ZenFolderPath); + auto _ = MakeGuard([this]() { + if (CleanDirectory(m_ZenFolderPath, {})) + { + std::error_code DummyEc; + RemoveDir(m_ZenFolderPath, DummyEc); + } + }); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + IoHash BlobHash = ParseBlobHash(); + + const Oid BuildId = Oid::FromHexString(m_BuildId); + + uint64_t CompressedSize; + uint64_t DecompressedSize; + ValidateBlob(*Storage.BuildStorage, BuildId, BlobHash, CompressedSize, DecompressedSize); + if (AbortFlag) + { + return 11; + } + ZEN_CONSOLE("Blob '{}' has a compressed size {} and a decompressed size of {} bytes", + BlobHash, + CompressedSize, + DecompressedSize); + return 0; + } + + if (SubOption == &m_ValidateBuildPartOptions) + { + ZEN_CONSOLE("Running {}: {} (pid {})", GetRunningExecutablePath(), ZEN_CFG_VERSION_BUILD_STRING_FULL, GetCurrentProcessId()); + + ZenState InstanceState; + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = std::filesystem::current_path() / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + CreateDirectories(m_ZenFolderPath); + auto _ = MakeGuard([this]() { + if (CleanDirectory(m_ZenFolderPath, {})) + { + std::error_code DummyEc; + RemoveDir(m_ZenFolderPath, DummyEc); + } + }); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + Oid BuildId = ParseBuildId(); + + if (!m_BuildPartName.empty() && !m_BuildPartId.empty()) + { + throw zen::OptionParseException(fmt::format("build-part-id conflicts with build-part-name\n{}", SubOption->help())); + } + + const Oid BuildPartId = m_BuildPartName.empty() ? Oid::Zero : ParseBuildPartId(); + + ValidateStatistics ValidateStats; + DownloadStatistics DownloadStats; + ValidateBuildPart(*Storage.BuildStorage, BuildId, BuildPartId, m_BuildPartName, ValidateStats, DownloadStats); + + return AbortFlag ? 13 : 0; + } + + if (SubOption == &m_MultiTestDownloadOptions) + { + m_SystemRootDir = (GetRunningExecutablePath().parent_path() / ".tmpzensystem").make_preferred(); + CreateDirectories(m_SystemRootDir); + CleanDirectory(m_SystemRootDir, {}); + auto _ = MakeGuard([&]() { DeleteDirectories(m_SystemRootDir); }); + + ParsePath(); + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = m_Path / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + Stopwatch Timer; + for (const std::string& BuildIdString : m_BuildIds) + { + Oid BuildId = Oid::FromHexString(RemoveQuotes(BuildIdString)); + if (BuildId == Oid::Zero) + { + throw zen::OptionParseException(fmt::format("invalid build id {}\n{}", BuildIdString, SubOption->help())); + } + DownloadFolder(Storage, + BuildId, + {}, + {}, + m_Path, + m_ZenFolderPath, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + BuildIdString == m_BuildIds.front(), + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Download cancelled"); + return 11; + } + ZEN_CONSOLE("\n"); + } + ZEN_CONSOLE("Completed in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + return 0; + } + + auto ParseZenProcessId = [&]() { + if (m_ZenProcessId == -1) + { + const std::filesystem::path RunningExecutablePath = GetRunningExecutablePath(); + ProcessHandle RunningProcess; + std::error_code Ec = FindProcess(RunningExecutablePath, RunningProcess, /*IncludeSelf*/ false); + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed finding process running '{}', reason: '{}'", RunningExecutablePath, Ec.message())); + } + if (!RunningProcess.IsValid()) + { + throw zen::OptionParseException( + fmt::format("Unable to find a running instance of the zen executable '{}'", RunningExecutablePath)); + } + m_ZenProcessId = RunningProcess.Pid(); + } + }; + + if (SubOption == &m_PauseOptions) + { + ParseZenProcessId(); + ZenState RunningState(m_ZenProcessId); + RunningState.StateData().Pause.store(true); + return 0; + } + + if (SubOption == &m_ResumeOptions) + { + ParseZenProcessId(); + ZenState RunningState(m_ZenProcessId); + RunningState.StateData().Pause.store(false); + return 0; + } + + if (SubOption == &m_AbortOptions) + { + ParseZenProcessId(); + ZenState RunningState(m_ZenProcessId); + RunningState.StateData().Abort.store(true); + return 0; + } + + if (SubOption == &m_TestOptions) + { + m_SystemRootDir = (GetRunningExecutablePath().parent_path() / ".tmpzensystem").make_preferred(); + CreateDirectories(m_SystemRootDir); + CleanDirectory(m_SystemRootDir, {}); + auto _ = MakeGuard([&]() { DeleteDirectories(m_SystemRootDir); }); + + ParsePath(); + + if (m_OverrideHost.empty() && m_StoragePath.empty()) + { + m_StoragePath = (GetRunningExecutablePath().parent_path() / ".tmpstore").make_preferred(); + CreateDirectories(m_StoragePath); + CleanDirectory(m_StoragePath, {}); + m_StoragePath = m_StoragePath.generic_string(); + } + + auto __ = MakeGuard([&]() { + if (m_OverrideHost.empty() && m_StoragePath.empty()) + { + DeleteDirectories(m_StoragePath); + } + }); + + BuildStorage::Statistics StorageStats; + BuildStorageCache::Statistics StorageCacheStats; + + const std::filesystem::path DownloadPath = m_Path.parent_path() / (m_BuildPartName + "_test"); + const std::filesystem::path DownloadPath2 = m_Path.parent_path() / (m_BuildPartName + "_test2"); + + auto ___ = MakeGuard([DownloadPath, DownloadPath2]() { + CleanDirectory(DownloadPath, true); + DeleteDirectories(DownloadPath); + CleanDirectory(DownloadPath2, true); + DeleteDirectories(DownloadPath2); + }); + + if (m_ZenFolderPath.empty()) + { + m_ZenFolderPath = m_Path / ZenFolderName; + } + MakeSafeAbsolutePathÍnPlace(m_ZenFolderPath); + + StorageInstance Storage = CreateBuildStorage(StorageStats, + StorageCacheStats, + ZenTempFolderPath(m_ZenFolderPath), + /*RequriesNamespace*/ true, + /*RequireBucket*/ true); + + m_BuildId = Oid::NewOid().ToString(); + m_BuildPartName = m_Path.filename().string(); + m_BuildPartId = Oid::NewOid().ToString(); + m_CreateBuild = true; + + const Oid BuildId = Oid::FromHexString(m_BuildId); + const Oid BuildPartId = Oid::FromHexString(m_BuildPartId); + + auto MakeMetaData = [](const Oid& BuildId) -> CbObject { + CbObjectWriter BuildMetaDataWriter; + { + const uint32_t CL = BuildId.OidBits[2]; + BuildMetaDataWriter.AddString("name", fmt::format("++Test+Main-CL-{}", CL)); + BuildMetaDataWriter.AddString("branch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("baselineBranch", "ZenTestBuild"); + BuildMetaDataWriter.AddString("platform", "Windows"); + BuildMetaDataWriter.AddString("project", "Test"); + BuildMetaDataWriter.AddInteger("changelist", CL); + BuildMetaDataWriter.AddString("buildType", "test-folder"); + } + return BuildMetaDataWriter.Save(); + }; + CbObject MetaData = MakeMetaData(Oid::TryFromHexString(m_BuildId)); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("Upload Build {}, Part {} ({}) from '{}'\n{}", m_BuildId, BuildPartId, m_BuildPartName, m_Path, SB.ToView()); + } + + const std::filesystem::path UploadTempDir = UploadTempDirectory(m_Path); + // std::filesystem::path UploadTempDir = m_ZenFolderPath / "upload_tmp"; + + UploadFolder(Storage, + BuildId, + BuildPartId, + m_BuildPartName, + m_Path, + UploadTempDir, + {}, + m_FindBlockMaxCount, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData, + true, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Upload failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}'", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + true, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (identical target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (identical target)"); + return 11; + } + + auto ScrambleDir = [](const std::filesystem::path& Path) { + ZEN_CONSOLE("\nScrambling '{}'", Path); + Stopwatch Timer; + DirectoryContent DownloadContent; + GetDirectoryContent( + Path, + DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::IncludeFileSizes, + DownloadContent); + auto IsAcceptedFolder = [ExcludeFolders = DefaultExcludeFolders, Path](const std::filesystem::path& AbsolutePath) -> bool { + std::string RelativePath = std::filesystem::relative(AbsolutePath, Path).generic_string(); + for (const std::string_view& ExcludeFolder : ExcludeFolders) + { + if (RelativePath.starts_with(ExcludeFolder)) + { + if (RelativePath.length() == ExcludeFolder.length()) + { + return false; + } + else if (RelativePath[ExcludeFolder.length()] == '/') + { + return false; + } + } + } + return true; + }; + + ParallelWork Work(AbortFlag, PauseFlag); + + uint32_t Randomizer = 0; + auto FileSizeIt = DownloadContent.FileSizes.begin(); + for (const std::filesystem::path& FilePath : DownloadContent.Files) + { + if (IsAcceptedFolder(FilePath)) + { + uint32_t Case = (Randomizer++) % 7; + switch (Case) + { + case 0: + { + uint64_t SourceSize = *FileSizeIt; + if (SourceSize > 256) + { + Work.ScheduleWork( + GetIOWorkerPool(), + [SourceSize, FilePath = std::filesystem::path(FilePath)](std::atomic<bool>&) { + if (!AbortFlag) + { + bool IsReadOnly = SetFileReadOnlyWithRetry(FilePath, false); + { + BasicFile Source(FilePath, BasicFile::Mode::kWrite); + uint64_t RangeSize = Min(SourceSize / 3, 512u * 1024u); + IoBuffer TempBuffer1(RangeSize); + IoBuffer TempBuffer2(RangeSize); + IoBuffer TempBuffer3(RangeSize); + Source.Read(TempBuffer1.GetMutableView().GetData(), RangeSize, 0); + Source.Read(TempBuffer2.GetMutableView().GetData(), RangeSize, SourceSize / 2); + Source.Read(TempBuffer3.GetMutableView().GetData(), + RangeSize, + SourceSize - RangeSize); + Source.Write(TempBuffer1, SourceSize / 2); + Source.Write(TempBuffer2, SourceSize - RangeSize); + Source.Write(TempBuffer3, SourceSize - 0); + } + if (IsReadOnly) + { + SetFileReadOnly(FilePath, true); + } + } + }); + } + } + break; + case 1: + { + (void)SetFileReadOnlyWithRetry(FilePath, false); + RemoveFileWithRetry(FilePath); + } + break; + default: + break; + } + } + FileSizeIt++; + } + Work.Wait(5000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + ZEN_CONSOLE("Scrambling files, {} remaining", PendingWork); + }); + ZEN_ASSERT(!AbortFlag.load()); + ZEN_CONSOLE("Scrambled files in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }; + + ScrambleDir(DownloadPath); + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled target)", + BuildId, + BuildPartId, + m_BuildPartName, + DownloadPath); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed. (scrambled target)"); + return 11; + } + + ScrambleDir(DownloadPath); + + Oid BuildId2 = Oid::NewOid(); + Oid BuildPartId2 = Oid::NewOid(); + + CbObject MetaData2 = MakeMetaData(BuildId2); + { + ExtendableStringBuilder<256> SB; + CompactBinaryToJson(MetaData, SB); + ZEN_CONSOLE("\nUpload scrambled Build {}, Part {} ({})\n{}\n", BuildId2, BuildPartId2, m_BuildPartName, SB.ToView()); + } + + UploadFolder(Storage, + BuildId2, + BuildPartId2, + m_BuildPartName, + DownloadPath, + UploadTempDir, + {}, + m_FindBlockMaxCount, + m_BlockReuseMinPercentLimit, + m_AllowMultiparts, + MetaData2, + true, + false, + true); + if (AbortFlag) + { + ZEN_CONSOLE("Upload of scrambled failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nRe-download Build {}, Part {} ({}) to '{}' (scrambled)", BuildId2, BuildPartId2, m_BuildPartName, DownloadPath); + DownloadFolder(Storage, + BuildId2, + {BuildPartId2}, + {}, + DownloadPath, + DownloadPath / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + ZEN_CONSOLE("\nDownload Build {}, Part {} ({}) to '{}' (original)", BuildId, BuildPartId, m_BuildPartName, DownloadPath2); + DownloadFolder(Storage, + BuildId, + {BuildPartId}, + {}, + DownloadPath2, + DownloadPath2 / ZenFolderName, + m_SystemRootDir, + m_AllowMultiparts, + m_AllowPartialBlockRequests, + false, + true, + false, + m_EnableScavenging); + if (AbortFlag) + { + ZEN_CONSOLE("Re-download failed."); + return 11; + } + + return 0; + } + } + catch (const std::system_error& SysErr) + { + if (IsOOD(SysErr)) + { + ZEN_CONSOLE("Operation failed due to out of disk space: {}", SysErr.what()); + return 3; + } + else if (IsOOM(SysErr)) + { + ZEN_CONSOLE("Operation failed due to out of memory: {}", SysErr.what()); + return 3; + } + else + { + ZEN_ERROR("{}", SysErr.what()); + return 3; + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("{}", Ex.what()); + return 3; + } + ZEN_ASSERT(false); +} + +} // namespace zen diff --git a/src/zen/cmds/builds_cmd.h b/src/zen/cmds/builds_cmd.h new file mode 100644 index 000000000..b3466c0d3 --- /dev/null +++ b/src/zen/cmds/builds_cmd.h @@ -0,0 +1,144 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "../zen.h" + +#include <zenhttp/auth/authmgr.h> +#include <zenhttp/httpclientauth.h> +#include <filesystem> + +namespace zen { + +class BuildsCommand : public CacheStoreCommand +{ +public: + static constexpr char Name[] = "builds"; + static constexpr char Description[] = "Manage builds - list, upload, download, diff"; + + BuildsCommand(); + ~BuildsCommand(); + + virtual int Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) override; + virtual cxxopts::Options& Options() override { return m_Options; } + +private: + cxxopts::Options m_Options{Name, Description}; + + std::filesystem::path m_SystemRootDir; + + bool m_PlainProgress = false; + bool m_LogProgress = false; + bool m_Verbose = false; + bool m_BoostWorkerThreads = false; + bool m_UseSparseFiles = true; + + std::filesystem::path m_ZenFolderPath; + + // cloud builds + std::string m_OverrideHost; + std::string m_Host; + std::string m_Url; + bool m_AssumeHttp2 = false; + bool m_AllowRedirect = false; + std::string m_Namespace; + std::string m_Bucket; + + // file storage + std::filesystem::path m_StoragePath; + bool m_WriteMetadataAsJson = false; + + // cache + std::string m_ZenCacheHost; + bool m_PrimeCacheOnly = false; + + std::string m_BuildId; + bool m_CreateBuild = false; + std::filesystem::path m_BuildMetadataPath; + std::string m_BuildMetadata; + std::string m_BuildPartName; // Defaults to name of leaf folder in m_Path + std::string m_BuildPartId; // Defaults to a generated id when creating part, looked up when downloading using m_BuildPartName + bool m_Clean = false; + uint8_t m_BlockReuseMinPercentLimit = 85; + bool m_AllowMultiparts = true; + bool m_AllowPartialBlockRequests = true; + std::string m_ManifestPath; // Not a std::filesystem::path since it can be relative to m_Path + + // Direct access token (may expire) + std::string m_AccessToken; + std::string m_AccessTokenEnv; + std::filesystem::path m_AccessTokenPath; + + // Auth manager token encryption + std::string m_EncryptionKey; // 256 bit AES encryption key + std::string m_EncryptionIV; // 128 bit AES initialization vector + + // OpenId acccess token + std::string m_OpenIdProviderName; + std::string m_OpenIdProviderUrl; + std::string m_OpenIdClientId; + std::string m_OpenIdRefreshToken; + + // OAuth acccess token + std::string m_OAuthUrl; + std::string m_OAuthClientId; + std::string m_OAuthClientSecret; + + std::string m_OidcTokenAuthExecutablePath; + + std::string m_Verb; // list, upload, download + + cxxopts::Options m_ListNamespacesOptions{"list-namespaces", "List available build namespaces"}; + bool m_ListNamespacesRecursive = false; + + cxxopts::Options m_ListOptions{"list", "List available builds"}; + std::filesystem::path m_ListQueryPath; + std::filesystem::path m_ListResultPath; + + std::filesystem::path m_Path; + + cxxopts::Options m_UploadOptions{"upload", "Upload a folder"}; + uint64_t m_FindBlockMaxCount = 10000; + bool m_PostUploadVerify = false; + + cxxopts::Options m_DownloadOptions{"download", "Download a folder"}; + std::vector<std::string> m_BuildPartNames; + std::vector<std::string> m_BuildPartIds; + bool m_PostDownloadVerify = false; + bool m_EnableScavenging = true; + + cxxopts::Options m_DiffOptions{"diff", "Compare two local folders"}; + std::filesystem::path m_DiffPath; + bool m_OnlyChunked = false; + + cxxopts::Options m_FetchBlobOptions{"fetch-blob", "Fetch a blob from remote store"}; + std::string m_BlobHash; + + cxxopts::Options m_PauseOptions{"pause", "Pause an ongoing zen builds process"}; + cxxopts::Options m_ResumeOptions{"resume", "Resume a paused zen builds process"}; + cxxopts::Options m_AbortOptions{"abort", "Abort an ongoing zen builds process"}; + + int m_ZenProcessId = -1; + + cxxopts::Options m_ValidateBuildPartOptions{"validate-part", "Fetch a build part and validate all referenced attachments"}; + + cxxopts::Options m_TestOptions{"test", "Test upload and download with verify"}; + + cxxopts::Options m_MultiTestDownloadOptions{"multi-test-download", "Test multiple sequenced downloads with verify"}; + std::vector<std::string> m_BuildIds; + + cxxopts::Options* m_SubCommands[12] = {&m_ListNamespacesOptions, + &m_ListOptions, + &m_UploadOptions, + &m_DownloadOptions, + &m_PauseOptions, + &m_ResumeOptions, + &m_AbortOptions, + &m_DiffOptions, + &m_FetchBlobOptions, + &m_ValidateBuildPartOptions, + &m_TestOptions, + &m_MultiTestDownloadOptions}; +}; + +} // namespace zen diff --git a/src/zen/cmds/cache_cmd.cpp b/src/zen/cmds/cache_cmd.cpp index 6ec6a80db..4412eaf34 100644 --- a/src/zen/cmds/cache_cmd.cpp +++ b/src/zen/cmds/cache_cmd.cpp @@ -625,22 +625,20 @@ CacheGetCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) HttpClient Http(m_HostName); - std::filesystem::path TargetPath; if (!m_OutputPath.empty()) { - TargetPath = std::filesystem::path(m_OutputPath); - if (std::filesystem::is_directory(TargetPath)) + if (IsDir(m_OutputPath)) { - TargetPath = TargetPath / (m_AttachmentHash.empty() ? m_ValueKey : m_AttachmentHash); + m_OutputPath = m_OutputPath / (m_AttachmentHash.empty() ? m_ValueKey : m_AttachmentHash); } else { - CreateDirectories(TargetPath.parent_path()); + CreateDirectories(m_OutputPath.parent_path()); } } - if (TargetPath.empty()) + if (m_OutputPath.empty()) { - TargetPath = (m_AttachmentHash.empty() ? m_ValueKey : m_AttachmentHash); + m_OutputPath = (m_AttachmentHash.empty() ? m_ValueKey : m_AttachmentHash); } std::string Url = fmt::format("/z$/{}/{}/{}", m_Namespace, m_Bucket, m_ValueKey); @@ -670,17 +668,17 @@ CacheGetCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else { - WriteFile(TargetPath, IoBuffer(IoBuffer::Wrap, StringData.data(), StringData.length())); - ZEN_CONSOLE("Wrote {} to '{}' ({})", NiceBytes(StringData.length()), TargetPath, ToString(ChunkData.GetContentType())); + WriteFile(m_OutputPath, IoBuffer(IoBuffer::Wrap, StringData.data(), StringData.length())); + ZEN_CONSOLE("Wrote {} to '{}' ({})", NiceBytes(StringData.length()), m_OutputPath, ToString(ChunkData.GetContentType())); } } else { - if (!MoveToFile(TargetPath, ChunkData)) + if (!MoveToFile(m_OutputPath, ChunkData)) { - WriteFile(TargetPath, ChunkData); + WriteFile(m_OutputPath, ChunkData); } - ZEN_CONSOLE("Wrote {} to '{}' ({})", NiceBytes(ChunkData.GetSize()), TargetPath, ToString(ChunkData.GetContentType())); + ZEN_CONSOLE("Wrote {} to '{}' ({})", NiceBytes(ChunkData.GetSize()), m_OutputPath, ToString(ChunkData.GetContentType())); } } else diff --git a/src/zen/cmds/cache_cmd.h b/src/zen/cmds/cache_cmd.h index 73702cada..b8a319359 100644 --- a/src/zen/cmds/cache_cmd.h +++ b/src/zen/cmds/cache_cmd.h @@ -108,15 +108,15 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{Name, Description}; - std::string m_HostName; - std::string m_Namespace; - std::string m_Bucket; - std::string m_ValueKey; - std::string m_AttachmentHash; - std::string m_OutputPath; - bool m_AsText = false; - bool m_Decompress = true; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + std::string m_Namespace; + std::string m_Bucket; + std::string m_ValueKey; + std::string m_AttachmentHash; + std::filesystem::path m_OutputPath; + bool m_AsText = false; + bool m_Decompress = true; }; } // namespace zen diff --git a/src/zen/cmds/copy_cmd.cpp b/src/zen/cmds/copy_cmd.cpp index d42d3c107..e86b6964c 100644 --- a/src/zen/cmds/copy_cmd.cpp +++ b/src/zen/cmds/copy_cmd.cpp @@ -42,11 +42,8 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (m_CopyTarget.empty()) throw std::runtime_error("No target specified"); - std::filesystem::path FromPath; - std::filesystem::path ToPath; - - FromPath = m_CopySource; - ToPath = m_CopyTarget; + std::filesystem::path FromPath = m_CopySource; + std::filesystem::path ToPath = m_CopyTarget; std::error_code Ec; std::filesystem::path FromCanonical = std::filesystem::canonical(FromPath, Ec); @@ -64,8 +61,8 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } } - const bool IsFileCopy = std::filesystem::is_regular_file(m_CopySource); - const bool IsDirCopy = std::filesystem::is_directory(m_CopySource); + const bool IsFileCopy = IsFile(m_CopySource); + const bool IsDirCopy = IsDir(m_CopySource); if (!IsFileCopy && !IsDirCopy) { @@ -79,20 +76,14 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (IsDirCopy) { - if (std::filesystem::exists(ToPath)) + if (IsFile(ToPath)) { - const bool IsTargetDir = std::filesystem::is_directory(ToPath); - if (!IsTargetDir) - { - if (std::filesystem::is_regular_file(ToPath)) - { - throw std::runtime_error("Attempted copy of directory into file"); - } - } + throw std::runtime_error("Attempted copy of directory into file"); } - else + + if (!IsDir(ToPath)) { - std::filesystem::create_directories(ToPath); + CreateDirectories(ToPath); } std::filesystem::path ToCanonical = std::filesystem::canonical(ToPath, Ec); @@ -120,7 +111,11 @@ CopyCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, + const path_view& File, + uint64_t FileSize, + uint32_t, + uint64_t) override { ZEN_UNUSED(FileSize); std::error_code Ec; diff --git a/src/zen/cmds/copy_cmd.h b/src/zen/cmds/copy_cmd.h index 876aff3f5..e9735c159 100644 --- a/src/zen/cmds/copy_cmd.h +++ b/src/zen/cmds/copy_cmd.h @@ -19,11 +19,11 @@ public: virtual ZenCmdCategory& CommandCategory() const override { return g_UtilitiesCategory; } private: - cxxopts::Options m_Options{"copy", "Copy files efficiently"}; - std::string m_CopySource; - std::string m_CopyTarget; - bool m_NoClone = false; - bool m_MustClone = false; + cxxopts::Options m_Options{"copy", "Copy files efficiently"}; + std::filesystem::path m_CopySource; + std::filesystem::path m_CopyTarget; + bool m_NoClone = false; + bool m_MustClone = false; }; } // namespace zen diff --git a/src/zen/cmds/dedup_cmd.h b/src/zen/cmds/dedup_cmd.h index c4f0068e4..2721be2b9 100644 --- a/src/zen/cmds/dedup_cmd.h +++ b/src/zen/cmds/dedup_cmd.h @@ -21,8 +21,8 @@ public: private: cxxopts::Options m_Options{"dedup", "Deduplicate files"}; std::vector<std::string> m_Positional; - std::string m_DedupSource; - std::string m_DedupTarget; + std::filesystem::path m_DedupSource; + std::filesystem::path m_DedupTarget; size_t m_SizeThreshold = 1024 * 1024; }; diff --git a/src/zen/cmds/projectstore_cmd.cpp b/src/zen/cmds/projectstore_cmd.cpp index 6bc499f03..58af0577e 100644 --- a/src/zen/cmds/projectstore_cmd.cpp +++ b/src/zen/cmds/projectstore_cmd.cpp @@ -41,7 +41,7 @@ namespace { std::string ReadJupiterAccessTokenFromFile(const std::filesystem::path& Path) { - if (!std::filesystem::is_regular_file(Path)) + if (!IsFile(Path)) { throw std::runtime_error(fmt::format("the file '{}' does not exist", Path)); } @@ -61,6 +61,75 @@ namespace { return AuthToken; } + std::filesystem::path FindOidcTokenExePath(std::string_view OidcTokenAuthExecutablePath) + { + if (OidcTokenAuthExecutablePath.empty()) + { + const std::string OidcExecutableName = "OidcToken" ZEN_EXE_SUFFIX_LITERAL; + std::filesystem::path OidcTokenPath = (GetRunningExecutablePath().parent_path() / OidcExecutableName).make_preferred(); + if (IsFile(OidcTokenPath)) + { + return OidcTokenPath; + } + OidcTokenPath = (std::filesystem::current_path() / OidcExecutableName).make_preferred(); + if (IsFile(OidcTokenPath)) + { + return OidcTokenPath; + } + } + else + { + std::filesystem::path OidcTokenPath = std::filesystem::absolute(StringToPath(OidcTokenAuthExecutablePath)).make_preferred(); + if (IsFile(OidcTokenPath)) + { + return OidcTokenPath; + } + } + return {}; + }; + + void WriteAuthOptions(CbObjectWriter& Writer, + std::string_view JupiterOpenIdProvider, + std::string_view JupiterAccessToken, + std::string_view JupiterAccessTokenEnv, + std::string_view JupiterAccessTokenPath, + std::string_view OidcTokenAuthExecutablePath) + { + if (!JupiterOpenIdProvider.empty()) + { + Writer.AddString("openid-provider"sv, JupiterOpenIdProvider); + } + if (!JupiterAccessToken.empty()) + { + Writer.AddString("access-token"sv, JupiterAccessToken); + } + if (!JupiterAccessTokenPath.empty()) + { + std::string ResolvedCloudAccessToken = ReadJupiterAccessTokenFromFile(JupiterAccessTokenPath); + if (!ResolvedCloudAccessToken.empty()) + { + Writer.AddString("access-token"sv, ResolvedCloudAccessToken); + } + } + if (!JupiterAccessTokenEnv.empty()) + { + std::string ResolvedCloudAccessTokenEnv = GetEnvVariable(JupiterAccessTokenEnv); + + if (!ResolvedCloudAccessTokenEnv.empty()) + { + Writer.AddString("access-token"sv, ResolvedCloudAccessTokenEnv); + } + else + { + Writer.AddString("access-token-env"sv, JupiterAccessTokenEnv); + } + } + if (std::filesystem::path OidcTokenExePath = FindOidcTokenExePath(OidcTokenAuthExecutablePath); !OidcTokenExePath.empty()) + { + Writer.AddString("oidc-exe-path"sv, OidcTokenExePath.generic_string()); + } + } + IoBuffer MakeCbObjectPayload(std::function<void(CbObjectWriter& Writer)> WriteCB) { CbObjectWriter Writer; @@ -99,7 +168,7 @@ namespace { throw std::runtime_error(fmt::format("invalid job id returned, received '{}'", JobIdText)); } - ProgressBar ProgressBar(PlainProgress); + ProgressBar ProgressBar(PlainProgress ? ProgressBar::Mode::Plain : ProgressBar::Mode::Pretty, ""sv); auto OuputMessages = [&](CbObjectView StatusObject) { CbArrayView Messages = StatusObject["Messages"sv].AsArrayView(); @@ -863,6 +932,12 @@ ExportOplogCommand::ExportOplogCommand() "<filepath>"); m_Options.add_option("", "", + "oidctoken-exe-path", + "Path to OidcToken executable", + cxxopts::value<std::string>(m_OidcTokenAuthExecutablePath)->default_value(""), + ""); + m_Options.add_option("", + "", "assume-http2", "Assume that the cloud/builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake", cxxopts::value(m_JupiterAssumeHttp2), @@ -1160,35 +1235,12 @@ ExportOplogCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg { Writer.AddString("basekey"sv, m_BaseCloudKey); } - if (!m_JupiterOpenIdProvider.empty()) - { - Writer.AddString("openid-provider"sv, m_JupiterOpenIdProvider); - } - if (!m_JupiterAccessToken.empty()) - { - Writer.AddString("access-token"sv, m_JupiterAccessToken); - } - if (!m_JupiterAccessTokenPath.empty()) - { - std::string ResolvedCloudAccessToken = ReadJupiterAccessTokenFromFile(m_JupiterAccessTokenPath); - if (!ResolvedCloudAccessToken.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessToken); - } - } - if (!m_JupiterAccessTokenEnv.empty()) - { - std::string ResolvedCloudAccessTokenEnv = GetEnvVariable(m_JupiterAccessTokenEnv); - - if (!ResolvedCloudAccessTokenEnv.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessTokenEnv); - } - else - { - Writer.AddString("access-token-env"sv, m_JupiterAccessTokenEnv); - } - } + WriteAuthOptions(Writer, + m_JupiterOpenIdProvider, + m_JupiterAccessToken, + m_JupiterAccessTokenEnv, + m_JupiterAccessTokenPath, + m_OidcTokenAuthExecutablePath); if (m_JupiterAssumeHttp2) { Writer.AddBool("assumehttp2"sv, true); @@ -1219,35 +1271,12 @@ ExportOplogCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg Writer.AddString("namespace"sv, m_JupiterNamespace); Writer.AddString("bucket"sv, m_JupiterBucket); Writer.AddString("buildsid"sv, m_BuildsId); - if (!m_JupiterOpenIdProvider.empty()) - { - Writer.AddString("openid-provider"sv, m_JupiterOpenIdProvider); - } - if (!m_JupiterAccessToken.empty()) - { - Writer.AddString("access-token"sv, m_JupiterAccessToken); - } - if (!m_JupiterAccessTokenPath.empty()) - { - std::string ResolvedCloudAccessToken = ReadJupiterAccessTokenFromFile(m_JupiterAccessTokenPath); - if (!ResolvedCloudAccessToken.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessToken); - } - } - if (!m_JupiterAccessTokenEnv.empty()) - { - std::string ResolvedCloudAccessTokenEnv = GetEnvVariable(m_JupiterAccessTokenEnv); - - if (!ResolvedCloudAccessTokenEnv.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessTokenEnv); - } - else - { - Writer.AddString("access-token-env"sv, m_JupiterAccessTokenEnv); - } - } + WriteAuthOptions(Writer, + m_JupiterOpenIdProvider, + m_JupiterAccessToken, + m_JupiterAccessTokenEnv, + m_JupiterAccessTokenPath, + m_OidcTokenAuthExecutablePath); if (m_JupiterAssumeHttp2) { Writer.AddBool("assumehttp2"sv, true); @@ -1383,6 +1412,12 @@ ImportOplogCommand::ImportOplogCommand() "<filepath>"); m_Options.add_option("", "", + "oidctoken-exe-path", + "Path to OidcToken executable", + cxxopts::value<std::string>(m_OidcTokenAuthExecutablePath)->default_value(""), + ""); + m_Options.add_option("", + "", "assume-http2", "Assume that the cloud/builds endpoint is a HTTP/2 endpoint skipping HTTP/1.1 upgrade handshake", cxxopts::value(m_JupiterAssumeHttp2), @@ -1449,12 +1484,6 @@ ImportOplogCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg return 1; } - m_OplogName = ResolveOplog(Http, m_ProjectName, m_OplogName); - if (m_OplogName.empty()) - { - return 1; - } - size_t TargetCount = 0; TargetCount += m_CloudUrl.empty() ? 0 : 1; TargetCount += m_BuildsUrl.empty() ? 0 : 1; @@ -1586,35 +1615,12 @@ ImportOplogCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg Writer.AddString("namespace"sv, m_JupiterNamespace); Writer.AddString("bucket"sv, m_JupiterBucket); Writer.AddString("key"sv, m_CloudKey); - if (!m_JupiterOpenIdProvider.empty()) - { - Writer.AddString("openid-provider"sv, m_JupiterOpenIdProvider); - } - if (!m_JupiterAccessToken.empty()) - { - Writer.AddString("access-token"sv, m_JupiterAccessToken); - } - if (!m_JupiterAccessTokenPath.empty()) - { - std::string ResolvedCloudAccessToken = ReadJupiterAccessTokenFromFile(m_JupiterAccessTokenPath); - if (!ResolvedCloudAccessToken.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessToken); - } - } - if (!m_JupiterAccessTokenEnv.empty()) - { - std::string ResolvedCloudAccessTokenEnv = GetEnvVariable(m_JupiterAccessTokenEnv); - - if (!ResolvedCloudAccessTokenEnv.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessTokenEnv); - } - else - { - Writer.AddString("access-token-env"sv, m_JupiterAccessTokenEnv); - } - } + WriteAuthOptions(Writer, + m_JupiterOpenIdProvider, + m_JupiterAccessToken, + m_JupiterAccessTokenEnv, + m_JupiterAccessTokenPath, + m_OidcTokenAuthExecutablePath); if (m_JupiterAssumeHttp2) { Writer.AddBool("assumehttp2"sv, true); @@ -1631,35 +1637,12 @@ ImportOplogCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg Writer.AddString("namespace"sv, m_JupiterNamespace); Writer.AddString("bucket"sv, m_JupiterBucket); Writer.AddString("buildsid"sv, m_BuildsId); - if (!m_JupiterOpenIdProvider.empty()) - { - Writer.AddString("openid-provider"sv, m_JupiterOpenIdProvider); - } - if (!m_JupiterAccessToken.empty()) - { - Writer.AddString("access-token"sv, m_JupiterAccessToken); - } - if (!m_JupiterAccessTokenPath.empty()) - { - std::string ResolvedCloudAccessToken = ReadJupiterAccessTokenFromFile(m_JupiterAccessTokenPath); - if (!ResolvedCloudAccessToken.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessToken); - } - } - if (!m_JupiterAccessTokenEnv.empty()) - { - std::string ResolvedCloudAccessTokenEnv = GetEnvVariable(m_JupiterAccessTokenEnv); - - if (!ResolvedCloudAccessTokenEnv.empty()) - { - Writer.AddString("access-token"sv, ResolvedCloudAccessTokenEnv); - } - else - { - Writer.AddString("access-token-env"sv, m_JupiterAccessTokenEnv); - } - } + WriteAuthOptions(Writer, + m_JupiterOpenIdProvider, + m_JupiterAccessToken, + m_JupiterAccessTokenEnv, + m_JupiterAccessTokenPath, + m_OidcTokenAuthExecutablePath); if (m_JupiterAssumeHttp2) { Writer.AddBool("assumehttp2"sv, true); @@ -2028,6 +2011,7 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg std::filesystem::path TmpPath = RootPath / ".tmp"; CreateDirectories(TmpPath); + auto _ = MakeGuard([&TmpPath]() { DeleteDirectories(TmpPath); }); std::atomic_int64_t FileCount = 0; int OplogEntryCount = 0; @@ -2040,6 +2024,8 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg std::unordered_set<std::u8string> FileNames; std::atomic<uint64_t> WrittenByteCount = 0; + std::atomic<bool> AbortFlag(false); + Stopwatch WriteStopWatch; auto EmitFilesForDataArray = [&](CbArrayView DataArray) { @@ -2068,38 +2054,50 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg EmitCount++; WorkRemaining.AddCount(1); WorkerPool.ScheduleWork( - [this, &RootPath, FileName, &FileCount, ChunkId, &Http, TmpPath, &WorkRemaining, &WrittenByteCount]() { + [this, &RootPath, &AbortFlag, FileName, &FileCount, ChunkId, &Http, TmpPath, &WorkRemaining, &WrittenByteCount]() { auto _ = MakeGuard([&WorkRemaining]() { WorkRemaining.CountDown(); }); - if (HttpClient::Response ChunkResponse = - Http.Download(fmt::format("/prj/{}/oplog/{}/{}"sv, m_ProjectName, m_OplogName, ChunkId), TmpPath)) + if (!AbortFlag) { - auto TryDecompress = [](const IoBuffer& Buffer) -> IoBuffer { - IoHash RawHash; - uint64_t RawSize; - if (CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer), RawHash, RawSize)) - { - return Compressed.Decompress().AsIoBuffer(); - }; - return std::move(Buffer); - }; - - IoBuffer ChunkData = - m_Decompress ? TryDecompress(ChunkResponse.ResponsePayload) : ChunkResponse.ResponsePayload; - std::filesystem::path TargetPath = RootPath / FileName; - if (!MoveToFile(TargetPath, ChunkData)) + try { - WriteFile(TargetPath, ChunkData); + if (HttpClient::Response ChunkResponse = + Http.Download(fmt::format("/prj/{}/oplog/{}/{}"sv, m_ProjectName, m_OplogName, ChunkId), TmpPath)) + { + auto TryDecompress = [](const IoBuffer& Buffer) -> IoBuffer { + IoHash RawHash; + uint64_t RawSize; + if (CompressedBuffer Compressed = + CompressedBuffer::FromCompressed(SharedBuffer(Buffer), RawHash, RawSize)) + { + return Compressed.Decompress().AsIoBuffer(); + }; + return std::move(Buffer); + }; + + IoBuffer ChunkData = + m_Decompress ? TryDecompress(ChunkResponse.ResponsePayload) : ChunkResponse.ResponsePayload; + + if (!MoveToFile(TargetPath, ChunkData)) + { + WriteFile(TargetPath, ChunkData); + } + WrittenByteCount.fetch_add(ChunkData.GetSize()); + ++FileCount; + } + else + { + throw std::runtime_error(fmt::format("Unable to fetch '{}' (chunk {}). Reason: '{}'", + FileName, + ChunkId, + ChunkResponse.ErrorMessage(""sv))); + } + } + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_CONSOLE("Failed writing file to '{}'. Reason: '{}'", TargetPath, Ex.what()); } - WrittenByteCount.fetch_add(ChunkData.GetSize()); - ++FileCount; - } - else - { - ZEN_CONSOLE("Unable to fetch '{}' (chunk {}). Reason: '{}'", - FileName, - ChunkId, - ChunkResponse.ErrorMessage(""sv)); } }); } @@ -2114,33 +2112,36 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg { std::unique_ptr<ProgressBar> EmitProgressBar; { - ProgressBar ParseProgressBar(false); + ProgressBar ParseProgressBar(ProgressBar::Mode::Pretty, ""); CbArrayView Entries = ResponseObject["entries"sv].AsArrayView(); uint64_t Remaining = Entries.Num(); for (auto EntryIter : Entries) { - CbObjectView Entry = EntryIter.AsObjectView(); - ParseProgressBar.UpdateState( - {.Task = "Parsing oplog", .Details = "", .TotalCount = Entries.Num(), .RemainingCount = Remaining}, - false); - Remaining--; - if (!m_KeyFilter.empty()) + if (!AbortFlag) { - if (Entry["key"].AsString().find(m_KeyFilter) == std::string_view::npos) + CbObjectView Entry = EntryIter.AsObjectView(); + ParseProgressBar.UpdateState( + {.Task = "Parsing oplog", .Details = "", .TotalCount = Entries.Num(), .RemainingCount = Remaining}, + false); + Remaining--; + if (!m_KeyFilter.empty()) { - continue; + if (Entry["key"].AsString().find(m_KeyFilter) == std::string_view::npos) + { + continue; + } + } + if (!EmitProgressBar) + { + EmitProgressBar = std::make_unique<ProgressBar>(ProgressBar::Mode::Pretty, ""sv); + WriteStopWatch.Reset(); } - } - if (!EmitProgressBar) - { - EmitProgressBar = std::make_unique<ProgressBar>(false); - WriteStopWatch.Reset(); - } - EmitFilesForDataArray(Entry["packagedata"sv].AsArrayView()); - EmitFilesForDataArray(Entry["bulkdata"sv].AsArrayView()); + EmitFilesForDataArray(Entry["packagedata"sv].AsArrayView()); + EmitFilesForDataArray(Entry["bulkdata"sv].AsArrayView()); - ++OplogEntryCount; + ++OplogEntryCount; + } } ParseProgressBar.Finish(); } @@ -2172,6 +2173,12 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg { WorkRemaining.Wait(); } + + if (AbortFlag) + { + // Error has already been reported by async code + return 1; + } } else { @@ -2185,8 +2192,6 @@ OplogMirrorCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** arg return 1; } - std::filesystem::remove_all(TmpPath); - ZEN_CONSOLE("mirrored {} files from {} oplog entries successfully", FileCount.load(), OplogEntryCount); return 0; diff --git a/src/zen/cmds/projectstore_cmd.h b/src/zen/cmds/projectstore_cmd.h index e66e98414..0d24d8529 100644 --- a/src/zen/cmds/projectstore_cmd.h +++ b/src/zen/cmds/projectstore_cmd.h @@ -109,6 +109,7 @@ private: std::string m_JupiterAccessToken; std::string m_JupiterAccessTokenEnv; std::string m_JupiterAccessTokenPath; + std::string m_OidcTokenAuthExecutablePath; bool m_JupiterAssumeHttp2 = false; bool m_JupiterDisableTempBlocks = false; @@ -165,6 +166,7 @@ private: std::string m_JupiterAccessToken; std::string m_JupiterAccessTokenEnv; std::string m_JupiterAccessTokenPath; + std::string m_OidcTokenAuthExecutablePath; bool m_JupiterAssumeHttp2 = false; std::string m_CloudUrl; diff --git a/src/zen/cmds/rpcreplay_cmd.cpp b/src/zen/cmds/rpcreplay_cmd.cpp index 5b88a1f73..4fc38d92a 100644 --- a/src/zen/cmds/rpcreplay_cmd.cpp +++ b/src/zen/cmds/rpcreplay_cmd.cpp @@ -196,7 +196,7 @@ RpcReplayCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException("Rpc replay command requires a path"); } - if (!std::filesystem::exists(m_RecordingPath) || !std::filesystem::is_directory(m_RecordingPath)) + if (!IsDir(m_RecordingPath)) { throw std::runtime_error(fmt::format("could not find recording at '{}'", m_RecordingPath)); } diff --git a/src/zen/cmds/run_cmd.cpp b/src/zen/cmds/run_cmd.cpp index a99ba9704..309b8996a 100644 --- a/src/zen/cmds/run_cmd.cpp +++ b/src/zen/cmds/run_cmd.cpp @@ -100,7 +100,7 @@ RunCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else { - CleanDirectory(BaseDirectory); + CleanDirectory(BaseDirectory, /*ForceRemoveReadOnlyFiles*/ false); } } diff --git a/src/zen/cmds/serve_cmd.cpp b/src/zen/cmds/serve_cmd.cpp index 8e36e74ce..64039e4c9 100644 --- a/src/zen/cmds/serve_cmd.cpp +++ b/src/zen/cmds/serve_cmd.cpp @@ -67,7 +67,7 @@ ServeCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException("command requires a root path"); } - if (!std::filesystem::exists(m_RootPath) || !std::filesystem::is_directory(m_RootPath)) + if (!IsDir(m_RootPath)) { throw zen::OptionParseException(fmt::format("path must exist and must be a directory: '{}'", m_RootPath)); } @@ -120,7 +120,7 @@ ServeCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) struct FsVisitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path ServerPath = std::filesystem::relative(Parent / File, RootPath); std::string ServerPathString = reinterpret_cast<const char*>(ServerPath.generic_u8string().c_str()); diff --git a/src/zen/cmds/status_cmd.cpp b/src/zen/cmds/status_cmd.cpp index 16754e747..2b507e43d 100644 --- a/src/zen/cmds/status_cmd.cpp +++ b/src/zen/cmds/status_cmd.cpp @@ -32,7 +32,7 @@ StatusCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) uint16_t EffectivePort = 0; if (!m_DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!IsFile(m_DataDir / ".lock")) { ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); return 1; diff --git a/src/zen/cmds/up_cmd.cpp b/src/zen/cmds/up_cmd.cpp index ac2f42a86..d0763701e 100644 --- a/src/zen/cmds/up_cmd.cpp +++ b/src/zen/cmds/up_cmd.cpp @@ -155,7 +155,7 @@ AttachCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (!m_DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!IsFile(m_DataDir / ".lock")) { ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); return 1; @@ -216,13 +216,13 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (m_ProgramBaseDir.empty()) { - std::filesystem::path ExePath = zen::GetRunningExecutablePath(); + std::filesystem::path ExePath = GetRunningExecutablePath(); m_ProgramBaseDir = ExePath.parent_path(); } if (!m_DataDir.empty()) { - if (!std::filesystem::is_regular_file(m_DataDir / ".lock")) + if (!IsFile(m_DataDir / ".lock")) { ZEN_CONSOLE("lock file does not exist in directory '{}'", m_DataDir); return 1; @@ -311,7 +311,7 @@ DownCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) // Try to find the running executable by path name std::filesystem::path ServerExePath = m_ProgramBaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; ProcessHandle RunningProcess; - if (std::error_code Ec = FindProcess(ServerExePath, RunningProcess); !Ec) + if (std::error_code Ec = FindProcess(ServerExePath, RunningProcess); !Ec, /*IncludeSelf*/ false) { ZEN_WARN("attempting hard terminate of zen process with pid ({})", RunningProcess.Pid()); try diff --git a/src/zen/cmds/version_cmd.cpp b/src/zen/cmds/version_cmd.cpp index 41042533d..7dfa125e4 100644 --- a/src/zen/cmds/version_cmd.cpp +++ b/src/zen/cmds/version_cmd.cpp @@ -2,10 +2,12 @@ #include "version_cmd.h" +#include <zencore/basicfile.h> #include <zencore/config.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zenhttp/httpclient.h> #include <zenhttp/httpcommon.h> #include <zenutil/zenserverprocess.h> @@ -17,11 +19,14 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { +using namespace std::literals; + VersionCommand::VersionCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName), "[hosturl]"); m_Options.add_option("", "d", "detailed", "Detailed Version", cxxopts::value(m_DetailedVersion), "[detailedversion]"); + m_Options.add_option("", "o", "output-path", "Path for output", cxxopts::value(m_OutputPath), "[outputpath]"); m_Options.parse_positional({"hosturl"}); } @@ -51,28 +56,40 @@ VersionCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) } else { - const std::string UrlBase = fmt::format("{}/health", m_HostName); - cpr::Session Session; - std::string VersionRequest = fmt::format("{}/version{}", UrlBase, m_DetailedVersion ? "?detailed=true" : ""); - Session.SetUrl(VersionRequest); - cpr::Response Response = Session.Get(); - if (!zen::IsHttpSuccessCode(Response.status_code)) + if (!m_OutputPath.empty()) { - if (Response.status_code) - { - ZEN_ERROR("{} failed: {}: {} ({})", VersionRequest, Response.status_code, Response.reason, Response.text); - } - else - { - ZEN_ERROR("{} failed: {}", VersionRequest, Response.error.message); - } + ZEN_CONSOLE("Querying host {}", m_HostName); + } + HttpClient Client(m_HostName, HttpClientSettings{.Timeout = std::chrono::milliseconds(5000)}); + + HttpClient::KeyValueMap Parameters; + if (m_DetailedVersion) + { + Parameters.Entries.insert_or_assign("detailed", "true"); + } + const std::string_view VersionRequest("/health/version"sv); + HttpClient::Response Response = Client.Get(VersionRequest, {}, Parameters); + if (!Response.IsSuccess()) + { + ZEN_ERROR("{} failed: {}", VersionRequest, Response.ErrorMessage(""sv)); return 1; } - Version = Response.text; + Version = Response.AsText(); } - ZEN_CONSOLE("{}", Version); + if (m_OutputPath.empty()) + { + ZEN_CONSOLE("{}", Version); + } + else + { + ZEN_CONSOLE("Writing version '{}' to '{}'", Version, m_OutputPath); + + BasicFile OutputFile(m_OutputPath, BasicFile::Mode::kTruncate); + OutputFile.Write(Version.data(), Version.length(), 0); + OutputFile.Close(); + } return 0; } diff --git a/src/zen/cmds/version_cmd.h b/src/zen/cmds/version_cmd.h index f8d16fb96..7a910e463 100644 --- a/src/zen/cmds/version_cmd.h +++ b/src/zen/cmds/version_cmd.h @@ -9,6 +9,9 @@ namespace zen { class VersionCommand : public ZenCmdBase { public: + static constexpr char Name[] = "version"; + static constexpr char Description[] = "Get zen service version"; + VersionCommand(); ~VersionCommand(); @@ -16,9 +19,10 @@ public: virtual cxxopts::Options& Options() override { return m_Options; } private: - cxxopts::Options m_Options{"version", "Get zen service version"}; - std::string m_HostName; - bool m_DetailedVersion = false; + cxxopts::Options m_Options{Name, Description}; + std::string m_HostName; + bool m_DetailedVersion = false; + std::filesystem::path m_OutputPath; }; } // namespace zen diff --git a/src/zen/cmds/wipe_cmd.cpp b/src/zen/cmds/wipe_cmd.cpp new file mode 100644 index 000000000..fcc18df2b --- /dev/null +++ b/src/zen/cmds/wipe_cmd.cpp @@ -0,0 +1,606 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "wipe_cmd.h" + +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/string.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenutil/parallelwork.h> +#include <zenutil/workerpools.h> + +#include <signal.h> + +#include <iostream> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_PLATFORM_WINDOWS +# include <zencore/windows.h> +#else +# include <fcntl.h> +# include <sys/file.h> +# include <sys/stat.h> +# include <unistd.h> +#endif + +namespace zen { + +namespace { + static std::atomic<bool> AbortFlag = false; + static std::atomic<bool> PauseFlag = false; + static bool IsVerbose = false; + static bool Quiet = false; + static ProgressBar::Mode ProgressMode = ProgressBar::Mode::Pretty; + const bool SingleThreaded = false; + bool BoostWorkerThreads = true; + + uint32_t GetUpdateDelayMS(ProgressBar::Mode InMode) + { + switch (InMode) + { + case ProgressBar::Mode::Plain: + return 5000; + case ProgressBar::Mode::Pretty: + return 200; + case ProgressBar::Mode::Log: + return 2000; + default: + ZEN_ASSERT(false); + return 0; + } + } + + WorkerThreadPool& GetIOWorkerPool() + { + return SingleThreaded ? GetSyncWorkerPool() + : BoostWorkerThreads ? GetLargeWorkerPool(EWorkloadType::Burst) + : GetMediumWorkerPool(EWorkloadType::Burst); + } + +#define ZEN_CONSOLE_VERBOSE(fmtstr, ...) \ + if (IsVerbose) \ + { \ + ZEN_CONSOLE_LOG(zen::logging::level::Info, fmtstr, ##__VA_ARGS__); \ + } + + static void SignalCallbackHandler(int SigNum) + { + if (SigNum == SIGINT) + { + PauseFlag = false; + AbortFlag = true; + } +#if ZEN_PLATFORM_WINDOWS + if (SigNum == SIGBREAK) + { + PauseFlag = false; + AbortFlag = true; + } +#endif // ZEN_PLATFORM_WINDOWS + } + + bool IsReadOnly(uint32_t Attributes) + { +#if ZEN_PLATFORM_WINDOWS + return IsFileAttributeReadOnly(Attributes); +#else + return IsFileModeReadOnly(Attributes); +#endif + } + + bool IsFileWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + bool Result = IsFile(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + Result = IsFile(Path, Ec); + } + if (Ec) + { + zen::ThrowSystemError(Ec.value(), Ec.message()); + } + return Result; + } + + bool SetFileReadOnlyWithRetry(const std::filesystem::path& Path, bool ReadOnly) + { + std::error_code Ec; + bool Result = SetFileReadOnly(Path, ReadOnly, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsFileWithRetry(Path)) + { + return false; + } + Ec.clear(); + Result = SetFileReadOnly(Path, ReadOnly, Ec); + } + if (Ec) + { + zen::ThrowSystemError(Ec.value(), Ec.message()); + } + return Result; + } + + void RemoveFileWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + RemoveFile(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsFileWithRetry(Path)) + { + return; + } + Ec.clear(); + RemoveFile(Path, Ec); + } + if (Ec) + { + zen::ThrowSystemError(Ec.value(), Ec.message()); + } + } + + void RemoveDirWithRetry(const std::filesystem::path& Path) + { + std::error_code Ec; + RemoveDir(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + if (!IsDir(Path)) + { + return; + } + Ec.clear(); + RemoveDir(Path, Ec); + } + if (Ec) + { + zen::ThrowSystemError(Ec.value(), Ec.message()); + } + } + + bool CleanDirectory(const std::filesystem::path& Path, + std::span<const std::string_view> ExcludeDirectories, + bool RemoveReadonly, + bool Dryrun) + { + ZEN_TRACE_CPU("CleanDirectory"); + Stopwatch Timer; + + ProgressBar Progress(ProgressMode, "Clean Folder"); + + std::atomic<bool> CleanWipe = true; + std::atomic<uint64_t> DiscoveredItemCount = 0; + std::atomic<uint64_t> DeletedItemCount = 0; + std::atomic<uint64_t> DeletedByteCount = 0; + std::atomic<uint64_t> FailedDeleteCount = 0; + + std::vector<std::filesystem::path> SubdirectoriesToDelete; + tsl::robin_map<IoHash, size_t, IoHash::Hasher> SubdirectoriesToDeleteLookup; + tsl::robin_set<IoHash, IoHash::Hasher> SubdirectoriesToKeep; + RwLock SubdirectoriesLock; + + auto AddFoundDirectory = [&](std::filesystem::path Directory, bool Keep) -> bool { + bool Added = false; + if (Keep) + { + bool IsLeaf = true; + while (Directory != Path) + { + const std::string DirectoryString = Directory.generic_string(); + IoHash DirectoryNameHash = IoHash::HashBuffer(DirectoryString.data(), DirectoryString.length()); + RwLock::ExclusiveLockScope _(SubdirectoriesLock); + if (auto It = SubdirectoriesToKeep.find(DirectoryNameHash); It == SubdirectoriesToKeep.end()) + { + SubdirectoriesToKeep.insert(DirectoryNameHash); + if (IsLeaf) + { + Added = true; + } + } + else + { + break; + } + Directory = Directory.parent_path(); + IsLeaf = false; + } + } + else + { + bool IsLeaf = true; + while (Directory != Path) + { + const std::string DirectoryString = Directory.generic_string(); + IoHash DirectoryNameHash = IoHash::HashBuffer(DirectoryString.data(), DirectoryString.length()); + RwLock::ExclusiveLockScope _(SubdirectoriesLock); + if (SubdirectoriesToKeep.contains(DirectoryNameHash)) + { + break; + } + if (auto It = SubdirectoriesToDeleteLookup.find(DirectoryNameHash); It == SubdirectoriesToDeleteLookup.end()) + { + SubdirectoriesToDeleteLookup.insert({DirectoryNameHash, SubdirectoriesToDelete.size()}); + SubdirectoriesToDelete.push_back(Directory); + if (IsLeaf) + { + Added = true; + } + } + else + { + break; + } + Directory = Directory.parent_path(); + IsLeaf = false; + } + } + return Added; + }; + + ParallelWork Work(AbortFlag, PauseFlag); + + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(const std::filesystem::path& InPath, + std::atomic<bool>& InCleanWipe, + std::atomic<uint64_t>& InDiscoveredItemCount, + std::atomic<uint64_t>& InDeletedItemCount, + std::atomic<uint64_t>& InDeletedByteCount, + std::atomic<uint64_t>& InFailedDeleteCount, + std::span<const std::string_view> InExcludeDirectories, + bool InRemoveReadonly, + bool InDryrun, + const std::function<bool(std::filesystem::path, bool)>& InAddFoundDirectoryFunc) + : Path(InPath) + , CleanWipe(InCleanWipe) + , DiscoveredItemCount(InDiscoveredItemCount) + , DeletedItemCount(InDeletedItemCount) + , DeletedByteCount(InDeletedByteCount) + , FailedDeleteCount(InFailedDeleteCount) + , ExcludeDirectories(InExcludeDirectories) + , RemoveReadonly(InRemoveReadonly) + , Dryrun(InDryrun) + , AddFoundDirectoryFunc(InAddFoundDirectoryFunc) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + ZEN_TRACE_CPU("CleanDirectory_AsyncVisitDirectory"); + if (!AbortFlag) + { + if (!RelativeRoot.empty()) + { + DiscoveredItemCount++; + } + if (Content.FileNames.empty()) + { + const std::filesystem::path ParentPath = Path / RelativeRoot; + bool KeepDirectory = RelativeRoot.empty(); + + bool Added = AddFoundDirectoryFunc(ParentPath, KeepDirectory); + if (Added) + { + ZEN_CONSOLE_VERBOSE("{} directory {}", KeepDirectory ? "Keeping" : "Removing", ParentPath); + } + } + else + { + DiscoveredItemCount += Content.FileNames.size(); + + const std::string RelativeRootString = RelativeRoot.generic_string(); + bool RemoveContent = true; + for (const std::string_view ExcludeDirectory : ExcludeDirectories) + { + if (RelativeRootString.starts_with(ExcludeDirectory)) + { + if (RelativeRootString.length() > ExcludeDirectory.length()) + { + const char MaybePathDelimiter = RelativeRootString[ExcludeDirectory.length()]; + if (MaybePathDelimiter == '/' || MaybePathDelimiter == '\\' || + MaybePathDelimiter == std::filesystem::path::preferred_separator) + { + RemoveContent = false; + break; + } + } + else + { + RemoveContent = false; + break; + } + } + } + + const std::filesystem::path ParentPath = Path / RelativeRoot; + bool KeepDirectory = RelativeRoot.empty(); + + if (RemoveContent) + { + ZEN_TRACE_CPU("DeleteFiles"); + uint64_t RemovedCount = 0; + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + const std::filesystem::path FilePath = (ParentPath / FileName).make_preferred(); + try + { + const uint32_t Attributes = Content.FileAttributes[FileIndex]; + const bool IsReadonly = IsReadOnly(Attributes); + bool RemoveFile = false; + if (IsReadonly) + { + if (RemoveReadonly) + { + if (!Dryrun) + { + SetFileReadOnlyWithRetry(FilePath, false); + } + RemoveFile = true; + } + } + else + { + RemoveFile = true; + } + + if (RemoveFile) + { + if (!Dryrun) + { + RemoveFileWithRetry(FilePath); + } + DeletedItemCount++; + DeletedByteCount += Content.FileSizes[FileIndex]; + RemovedCount++; + ZEN_CONSOLE_VERBOSE("Removed file {}", FilePath); + } + else + { + ZEN_CONSOLE_VERBOSE("Skipped readonly file {}", FilePath); + KeepDirectory = true; + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed removing file {}. Reason: {}", FilePath, Ex.what()); + FailedDeleteCount++; + CleanWipe = false; + KeepDirectory = true; + } + } + ZEN_CONSOLE_VERBOSE("Removed {} files in {}", RemovedCount, ParentPath); + } + else + { + ZEN_CONSOLE_VERBOSE("Skipped removal of {} files in {}", Content.FileNames.size(), ParentPath); + } + bool Added = AddFoundDirectoryFunc(ParentPath, KeepDirectory); + if (Added) + { + ZEN_CONSOLE_VERBOSE("{} directory {}", KeepDirectory ? "Keeping" : "Removing", ParentPath); + } + } + } + } + const std::filesystem::path& Path; + std::atomic<bool>& CleanWipe; + std::atomic<uint64_t>& DiscoveredItemCount; + std::atomic<uint64_t>& DeletedItemCount; + std::atomic<uint64_t>& DeletedByteCount; + std::atomic<uint64_t>& FailedDeleteCount; + std::span<const std::string_view> ExcludeDirectories; + const bool RemoveReadonly; + const bool Dryrun; + std::function<bool(std::filesystem::path, bool)> AddFoundDirectoryFunc; + } Visitor(Path, + CleanWipe, + DiscoveredItemCount, + DeletedItemCount, + DeletedByteCount, + FailedDeleteCount, + ExcludeDirectories, + RemoveReadonly, + Dryrun, + AddFoundDirectory); + + uint64_t LastUpdateTimeMs = Timer.GetElapsedTimeMs(); + + GetDirectoryContent(Path, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | + DirectoryContentFlags::IncludeFileSizes | DirectoryContentFlags::IncludeAttributes, + Visitor, + GetIOWorkerPool(), + Work.PendingWork()); + + Work.Wait(ProgressMode == ProgressBar::Mode::Pretty ? 200 : 5000, [&](bool IsAborted, bool IsPaused, ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + if (Quiet) + { + return; + } + LastUpdateTimeMs = Timer.GetElapsedTimeMs(); + + uint64_t Deleted = DeletedItemCount.load(); + uint64_t DeletedBytes = DeletedByteCount.load(); + uint64_t Discovered = DiscoveredItemCount.load(); + Progress.UpdateState({.Task = "Removing files ", + .Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)), + .TotalCount = Discovered, + .RemainingCount = Discovered - Deleted, + .Status = ProgressBar::State::CalculateStatus(IsAborted, IsPaused)}, + false); + }); + + std::vector<std::filesystem::path> DirectoriesToDelete; + DirectoriesToDelete.reserve(SubdirectoriesToDelete.size()); + for (auto It : SubdirectoriesToDeleteLookup) + { + const IoHash& DirHash = It.first; + if (auto KeepIt = SubdirectoriesToKeep.find(DirHash); KeepIt == SubdirectoriesToKeep.end()) + { + DirectoriesToDelete.emplace_back(std::move(SubdirectoriesToDelete[It.second])); + } + } + + std::sort(DirectoriesToDelete.begin(), + DirectoriesToDelete.end(), + [](const std::filesystem::path& Lhs, const std::filesystem::path& Rhs) { + return Lhs.string().length() > Rhs.string().length(); + }); + + for (size_t SubDirectoryIndex = 0; SubDirectoryIndex < DirectoriesToDelete.size(); SubDirectoryIndex++) + { + ZEN_TRACE_CPU("DeleteDirs"); + const std::filesystem::path& DirectoryToDelete = DirectoriesToDelete[SubDirectoryIndex]; + try + { + if (!Dryrun) + { + RemoveDirWithRetry(DirectoryToDelete); + } + ZEN_CONSOLE_VERBOSE("Removed directory {}", DirectoryToDelete); + DeletedItemCount++; + } + catch (const std::exception& Ex) + { + if (!Quiet) + { + ZEN_WARN("Failed removing directory {}. Reason: {}", DirectoryToDelete, Ex.what()); + } + CleanWipe = false; + FailedDeleteCount++; + } + + uint64_t NowMs = Timer.GetElapsedTimeMs(); + if ((NowMs - LastUpdateTimeMs) >= GetUpdateDelayMS(ProgressMode)) + { + LastUpdateTimeMs = NowMs; + + uint64_t Deleted = DeletedItemCount.load(); + uint64_t DeletedBytes = DeletedByteCount.load(); + uint64_t Discovered = DiscoveredItemCount.load(); + Progress.UpdateState({.Task = "Removing folders", + .Details = fmt::format("Found {}, Deleted {} ({})", Discovered, Deleted, NiceBytes(DeletedBytes)), + .TotalCount = DirectoriesToDelete.size(), + .RemainingCount = DirectoriesToDelete.size() - SubDirectoryIndex}, + false); + } + } + + Progress.Finish(); + + uint64_t ElapsedTimeMs = Timer.GetElapsedTimeMs(); + if (!Quiet) + { + ZEN_CONSOLE("Wiped folder '{}' {} ({}) ({} failed) in {}", + Path, + DeletedItemCount.load(), + NiceBytes(DeletedByteCount.load()), + FailedDeleteCount.load(), + NiceTimeSpanMs(ElapsedTimeMs)); + } + if (FailedDeleteCount.load() > 0) + { + throw std::runtime_error(fmt::format("Failed to delete {} files/directories in '{}'", FailedDeleteCount.load(), Path)); + } + return CleanWipe; + } +} // namespace + +WipeCommand::WipeCommand() +{ + m_Options.add_options()("h,help", "Print help"); + m_Options.add_option("", "d", "directory", "Directory to wipe", cxxopts::value(m_Directory), "<directory>"); + m_Options.add_option("", "r", "keep-readonly", "Leave read-only files", cxxopts::value(m_KeepReadOnlyFiles), "<keepreadonly>"); + m_Options.add_option("", "q", "quiet", "Reduce output to console", cxxopts::value(m_Quiet), "<quiet>"); + m_Options.add_option("", "y", "yes", "Don't query for confirmation", cxxopts::value(m_Yes), "<yes>"); + m_Options.add_option("", "", "dryrun", "Do a dry run without deleting anything", cxxopts::value(m_Dryrun), "<dryrun>"); + m_Options.add_option("output", "", "plain-progress", "Show progress using plain output", cxxopts::value(m_PlainProgress), "<progress>"); + m_Options.add_option("output", "", "verbose", "Enable verbose console output", cxxopts::value(m_Verbose), "<verbose>"); + m_Options.add_option("", + "", + "boost-workers", + "Increase the number of worker threads - may cause computer to be less responsive", + cxxopts::value(m_BoostWorkerThreads), + "<boostworkers>"); + + m_Options.parse_positional({"directory"}); +} + +WipeCommand::~WipeCommand() = default; + +int +WipeCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) +{ + ZEN_UNUSED(GlobalOptions); + + signal(SIGINT, SignalCallbackHandler); +#if ZEN_PLATFORM_WINDOWS + signal(SIGBREAK, SignalCallbackHandler); +#endif // ZEN_PLATFORM_WINDOWS + + if (!ZenCmdBase::ParseOptions(argc, argv)) + { + return 0; + } + + Quiet = m_Quiet; + IsVerbose = m_Verbose; + ProgressMode = (IsVerbose || m_PlainProgress) ? ProgressBar::Mode::Plain : ProgressBar::Mode::Pretty; + BoostWorkerThreads = m_BoostWorkerThreads; + + MakeSafeAbsolutePathÍnPlace(m_Directory); + + if (!IsDir(m_Directory)) + { + return 0; + } + + while (!m_Yes) + { + const std::string Prompt = fmt::format("Do you want to wipe directory '{}'? (yes/no) ", m_Directory); + printf("%s", Prompt.c_str()); + std::string Reponse; + std::getline(std::cin, Reponse); + Reponse = ToLower(Reponse); + if (Reponse == "y" || Reponse == "yes") + { + m_Yes = true; + } + else if (Reponse == "n" || Reponse == "no") + { + return 0; + } + } + + try + { + CleanDirectory(m_Directory, {}, !m_KeepReadOnlyFiles, m_Dryrun); + } + catch (std::exception& Ex) + { + if (!m_Quiet) + { + ZEN_ERROR("{}", Ex.what()); + } + return 3; + } + + return 0; +} + +} // namespace zen diff --git a/src/zen/cmds/wipe_cmd.h b/src/zen/cmds/wipe_cmd.h new file mode 100644 index 000000000..0e910bb81 --- /dev/null +++ b/src/zen/cmds/wipe_cmd.h @@ -0,0 +1,36 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include "../zen.h" + +namespace zen { + +/** Wipe directories + */ +class WipeCommand : public ZenCmdBase +{ +public: + static constexpr char Name[] = "wipe"; + static constexpr char Description[] = "Wipe the contents of a directory"; + + WipeCommand(); + ~WipeCommand(); + + virtual cxxopts::Options& Options() override { return m_Options; } + virtual int Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) override; + virtual ZenCmdCategory& CommandCategory() const override { return g_UtilitiesCategory; } + +private: + cxxopts::Options m_Options{Name, Description}; + std::filesystem::path m_Directory; + bool m_KeepReadOnlyFiles = true; + bool m_Quiet = false; + bool m_Yes = false; + bool m_PlainProgress = false; + bool m_Verbose = false; + bool m_Dryrun = false; + bool m_BoostWorkerThreads = false; +}; + +} // namespace zen diff --git a/src/zen/cmds/workspaces_cmd.cpp b/src/zen/cmds/workspaces_cmd.cpp index 05d3c573f..773734f12 100644 --- a/src/zen/cmds/workspaces_cmd.cpp +++ b/src/zen/cmds/workspaces_cmd.cpp @@ -25,18 +25,22 @@ namespace { if (!Path.empty()) { std::u8string PathString = Path.u8string(); - if (PathString.ends_with(std::filesystem::path::preferred_separator)) + if (PathString.ends_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) { PathString.pop_back(); Path = std::filesystem::path(PathString); } - // Special case if user gives a path with quotes and includes a backslash at the end: - // ="path\" cxxopts strips the leading quote only but not the trailing. - // As we expect paths here and we don't want trailing slashes we strip away the quote - // manually if the string does not start with a quote UE-231677 - else if (PathString[0] != '\"' && PathString[PathString.length() - 1] == '\"') + } + } + + static void RemoveLeadingPathSeparator(std::filesystem::path& Path) + { + if (!Path.empty()) + { + std::u8string PathString = Path.u8string(); + if (PathString.starts_with(std::filesystem::path::preferred_separator) || PathString.starts_with('/')) { - PathString.pop_back(); + PathString.erase(PathString.begin()); Path = std::filesystem::path(PathString); } } @@ -83,7 +87,7 @@ WorkspaceCommand::WorkspaceCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value<std::filesystem::path>(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), "<verb>"); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -158,12 +162,12 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) throw zen::OptionParseException(fmt::format("path is required\n{}", m_CreateOptions.help())); } - RemoveTrailingPathSeparator(m_Path); + std::filesystem::path Path = StringToPath(m_Path); if (m_Id.empty()) { - m_Id = Workspaces::PathToId(m_Path).ToString(); - ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, m_Path); + m_Id = Workspaces::PathToId(Path).ToString(); + ZEN_CONSOLE("Using generated workspace id {} from path '{}'", m_Id, Path); } if (Oid::TryFromHexString(m_Id) == Oid::Zero) @@ -174,7 +178,7 @@ WorkspaceCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** argv) if (Workspaces::AddWorkspace( Log(), StatePath, - {.Id = Oid::FromHexString(m_Id), .RootPath = m_Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) + {.Id = Oid::FromHexString(m_Id), .RootPath = Path, .AllowShareCreationFromHttp = m_AllowShareCreationFromHttp})) { if (!m_HostName.empty()) { @@ -274,7 +278,7 @@ WorkspaceShareCommand::WorkspaceShareCommand() { m_Options.add_options()("h,help", "Print help"); m_Options.add_option("", "u", "hosturl", "Host URL", cxxopts::value(m_HostName)->default_value(""), "<hosturl>"); - m_Options.add_options()("system-dir", "Specify system root", cxxopts::value<std::filesystem::path>(m_SystemRootDir)); + m_Options.add_options()("system-dir", "Specify system root", cxxopts::value(m_SystemRootDir)); m_Options.add_option("", "v", "verb", "Verb for workspace - create, remove, info", cxxopts::value(m_Verb), "<verb>"); m_Options.parse_positional({"verb"}); m_Options.positional_help("verb"); @@ -394,6 +398,10 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException("unable to resolve system root directory"); } } + else + { + MakeSafeAbsolutePathÍnPlace(m_SystemRootDir); + } std::filesystem::path StatePath = m_SystemRootDir / "workspaces"; @@ -450,6 +458,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** } } + RemoveLeadingPathSeparator(m_SharePath); RemoveTrailingPathSeparator(m_SharePath); if (m_ShareId.empty()) @@ -517,7 +526,8 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + + std::filesystem::path WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -529,8 +539,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - Workspaces::WorkspaceShareConfiguration Share = - Workspaces::FindWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId)); + Workspaces::WorkspaceShareConfiguration Share = Workspaces::FindWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId)); if (Share.Id == Oid::Zero) { ZEN_CONSOLE("Workspace share {} does not exist", m_ShareId); @@ -542,6 +551,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** if (SubOption == &m_RemoveOptions) { + std::filesystem::path WorkspaceRoot; if (!m_Alias.empty()) { Workspaces::WorkspaceConfiguration WorkspaceConfig; @@ -552,9 +562,9 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace share with alias {} does not exist", m_Alias); return 0; } - m_ShareId = ShareConfig.Id.ToString(); - m_WorkspaceId = WorkspaceConfig.Id.ToString(); - m_WorkspaceRoot = WorkspaceConfig.RootPath; + m_ShareId = ShareConfig.Id.ToString(); + m_WorkspaceId = WorkspaceConfig.Id.ToString(); + WorkspaceRoot = WorkspaceConfig.RootPath; } if (m_WorkspaceId.empty()) @@ -573,7 +583,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** ZEN_CONSOLE("Workspace {} does not exist", m_WorkspaceId); return 0; } - m_WorkspaceRoot = WorkspaceConfig.RootPath; + WorkspaceRoot = WorkspaceConfig.RootPath; if (m_ShareId.empty()) { @@ -585,7 +595,7 @@ WorkspaceShareCommand::Run(const ZenCliOptions& GlobalOptions, int argc, char** throw zen::OptionParseException(fmt::format("workspace id '{}' is invalid", m_ShareId)); } - if (Workspaces::RemoveWorkspaceShare(Log(), m_WorkspaceRoot, Oid::FromHexString(m_ShareId))) + if (Workspaces::RemoveWorkspaceShare(Log(), WorkspaceRoot, Oid::FromHexString(m_ShareId))) { if (!m_HostName.empty()) { diff --git a/src/zen/cmds/workspaces_cmd.h b/src/zen/cmds/workspaces_cmd.h index de0edd061..d85d8f7d8 100644 --- a/src/zen/cmds/workspaces_cmd.h +++ b/src/zen/cmds/workspaces_cmd.h @@ -29,9 +29,9 @@ private: std::string m_Id; - cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; - std::filesystem::path m_Path; - bool m_AllowShareCreationFromHttp = false; + cxxopts::Options m_CreateOptions{"create", "Create a workspace"}; + std::string m_Path; + bool m_AllowShareCreationFromHttp = false; cxxopts::Options m_InfoOptions{"info", "Info about a workspace"}; diff --git a/src/zen/xmake.lua b/src/zen/xmake.lua index 78b2a3c2b..bf595a21d 100644 --- a/src/zen/xmake.lua +++ b/src/zen/xmake.lua @@ -18,13 +18,15 @@ target("zen") add_ldflags("/subsystem:console,5.02") add_ldflags("/LTCG") add_links("crypt32", "wldap32", "Ws2_32") + + add_links("dbghelp", "winhttp", "version") -- for Sentry end if is_plat("macosx") then add_ldflags("-framework CoreFoundation") + add_ldflags("-framework Foundation") add_ldflags("-framework Security") add_ldflags("-framework SystemConfiguration") - add_syslinks("bsm") end add_packages("vcpkg::cpr", "vcpkg::cxxopts", "vcpkg::mimalloc", "vcpkg::fmt") diff --git a/src/zen/zen.cpp b/src/zen/zen.cpp index fd58b024a..598ef9314 100644 --- a/src/zen/zen.cpp +++ b/src/zen/zen.cpp @@ -7,6 +7,7 @@ #include "cmds/admin_cmd.h" #include "cmds/bench_cmd.h" +#include "cmds/builds_cmd.h" #include "cmds/cache_cmd.h" #include "cmds/copy_cmd.h" #include "cmds/dedup_cmd.h" @@ -22,16 +23,30 @@ #include "cmds/up_cmd.h" #include "cmds/version_cmd.h" #include "cmds/vfs_cmd.h" +#include "cmds/wipe_cmd.h" #include "cmds/workspaces_cmd.h" +#include <zencore/callstack.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> +#include <zencore/process.h> #include <zencore/scopeguard.h> +#include <zencore/sentryintegration.h> #include <zencore/string.h> +#include <zencore/trace.h> +#include <zencore/windows.h> #include <zenhttp/httpcommon.h> +#include <zenutil/environmentoptions.h> +#include <zenutil/logging.h> #include <zenutil/zenserverprocess.h> +#include <zencore/memory/fmalloc.h> +#include <zencore/memory/llm.h> +#include <zencore/memory/memory.h> +#include <zencore/memory/memorytrace.h> +#include <zencore/memory/newdelete.h> + #if ZEN_WITH_TESTS # define ZEN_TEST_WITH_RUNNER 1 # include <zencore/testing.h> @@ -46,6 +61,11 @@ ZEN_THIRD_PARTY_INCLUDES_END #include <zencore/memory/newdelete.h> +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC +# include <sys/ioctl.h> +# include <unistd.h> +#endif + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -251,15 +271,133 @@ ZenCmdBase::ResolveTargetHostSpec(const std::string& InHostSpec) return ResolveTargetHostSpec(InHostSpec, /* out */ Dummy); } -ProgressBar::ProgressBar(bool PlainProgress) : m_PlainProgress(PlainProgress), m_LastUpdateMS(m_SW.GetElapsedTimeMs() - 10000) +#if ZEN_PLATFORM_WINDOWS +static HANDLE +GetConsoleHandle() +{ + static HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE); + return hStdOut; +} +#endif + +static bool +CheckStdoutTty() +{ +#if ZEN_PLATFORM_WINDOWS + HANDLE hStdOut = GetConsoleHandle(); + DWORD dwMode = 0; + static bool IsConsole = ::GetConsoleMode(hStdOut, &dwMode); + return IsConsole; +#else + return isatty(fileno(stdout)); +#endif +} + +static bool +IsStdoutTty() +{ + static bool StdoutIsTty = CheckStdoutTty(); + return StdoutIsTty; +} + +static void +OutputToConsoleRaw(const char* String, size_t Length) +{ +#if ZEN_PLATFORM_WINDOWS + HANDLE hStdOut = GetConsoleHandle(); +#endif + +#if ZEN_PLATFORM_WINDOWS + if (IsStdoutTty()) + { + WriteConsoleA(hStdOut, String, (DWORD)Length, 0, 0); + } + else + { + ::WriteFile(hStdOut, (LPCVOID)String, (DWORD)Length, 0, 0); + } +#else + fwrite(String, 1, Length, stdout); +#endif +} + +static void +OutputToConsoleRaw(const std::string& String) +{ + OutputToConsoleRaw(String.c_str(), String.length()); +} + +static void +OutputToConsoleRaw(const StringBuilderBase& SB) +{ + OutputToConsoleRaw(SB.c_str(), SB.Size()); +} + +static uint32_t +GetConsoleColumns() +{ +#if ZEN_PLATFORM_WINDOWS + HANDLE hStdOut = GetConsoleHandle(); + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(hStdOut, &csbi) == TRUE) + { + return (uint32_t)(csbi.srWindow.Right - csbi.srWindow.Left + 1); + } +#else + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) + { + return (uint32_t)w.ws_col; + } +#endif + return 1024; +} + +void +ProgressBar::SetLogOperationName(Mode InMode, std::string_view Name) +{ + if (InMode == Mode::Log) + { + std::string String = fmt::format("@progress {}\n", Name); + OutputToConsoleRaw(String); + } +} + +void +ProgressBar::SetLogOperationProgress(Mode InMode, uint32_t StepIndex, uint32_t StepCount) { + if (InMode == Mode::Log) + { + const size_t PercentDone = StepCount > 0u ? gsl::narrow<uint8_t>((100 * StepIndex) / StepCount) : 0u; + + std::string String = fmt::format("@progress {}%\n", PercentDone); + OutputToConsoleRaw(String); + } +} + +ProgressBar::ProgressBar(Mode InMode, std::string_view InSubTask) +: m_Mode((!IsStdoutTty() && InMode == Mode::Pretty) ? Mode::Plain : InMode) +, m_LastUpdateMS((uint64_t)-1) +, m_PausedMS(0) +, m_SubTask(InSubTask) +{ + if (!m_SubTask.empty() && InMode == Mode::Log) + { + std::string String = fmt::format("@progress push {}\n", m_SubTask); + OutputToConsoleRaw(String); + } } ProgressBar::~ProgressBar() { try { - Finish(); + ForceLinebreak(); + if (!m_SubTask.empty() && m_Mode == Mode::Log) + { + const std::string String("@progress pop\n"); + OutputToConsoleRaw(String); + } } catch (const std::exception& Ex) { @@ -270,48 +408,160 @@ ProgressBar::~ProgressBar() void ProgressBar::UpdateState(const State& NewState, bool DoLinebreak) { + ZEN_ASSERT(NewState.TotalCount >= NewState.RemainingCount); if (DoLinebreak == false && m_State == NewState) { return; } uint64_t ElapsedTimeMS = m_SW.GetElapsedTimeMs(); - if (!DoLinebreak && (NewState.Task == m_State.Task) && ((m_LastUpdateMS + 200) > ElapsedTimeMS)) + if (m_LastUpdateMS != (uint64_t)-1) { - return; + if (!DoLinebreak && (NewState.Status == m_State.Status) && (NewState.Task == m_State.Task) && + ((m_LastUpdateMS + 200) > ElapsedTimeMS)) + { + return; + } + if (m_State.Status == State::EStatus::Paused) + { + uint64_t ElapsedSinceLast = ElapsedTimeMS - m_LastUpdateMS; + m_PausedMS += ElapsedSinceLast; + } } m_LastUpdateMS = ElapsedTimeMS; - size_t PercentDone = + std::string Task = NewState.Task; + switch (NewState.Status) + { + case State::EStatus::Aborted: + Task = "Aborting"; + break; + case State::EStatus::Paused: + Task = "Paused"; + break; + default: + break; + } + if (NewState.Task.length() > Task.length()) + { + Task += std::string(NewState.Task.length() - Task.length(), ' '); + } + + const size_t PercentDone = NewState.TotalCount > 0u ? gsl::narrow<uint8_t>((100 * (NewState.TotalCount - NewState.RemainingCount)) / NewState.TotalCount) : 0u; - if (m_PlainProgress) + if (m_Mode == Mode::Plain) { - ZEN_CONSOLE("{} {}% ({})", NewState.Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS)); + const std::string Details = (!NewState.Details.empty()) ? fmt::format(": {}", NewState.Details) : ""; + const std::string Output = fmt::format("{} {}% ({}){}\n", Task, PercentDone, NiceTimeSpanMs(ElapsedTimeMS), Details); + OutputToConsoleRaw(Output); } - else + else if (m_Mode == Mode::Pretty) { size_t ProgressBarSize = 20; - size_t ProgressBarCount = (ProgressBarSize * PercentDone) / 100; - uint64_t Completed = NewState.TotalCount - NewState.RemainingCount; - uint64_t ETAMS = (Completed > 0) ? (ElapsedTimeMS * NewState.RemainingCount) / Completed : 0; - std::string ETA = (ETAMS > 0) ? fmt::format(" ETA {}", NiceTimeSpanMs(ETAMS)) : ""; - - std::string Output = fmt::format("\r{} {:#3}%: |{}{}|: {}{}{}", - NewState.Task, - PercentDone, - std::string(ProgressBarCount, '#'), - std::string(ProgressBarSize - ProgressBarCount, ' '), - NiceTimeSpanMs(ElapsedTimeMS), - ETA, - NewState.Details.empty() ? "" : fmt::format(". {}", NewState.Details)); + size_t ProgressBarCount = (ProgressBarSize * PercentDone) / 100; + uint64_t Completed = NewState.TotalCount - NewState.RemainingCount; + uint64_t ETAElapsedMS = ElapsedTimeMS -= m_PausedMS; + uint64_t ETAMS = + (NewState.Status == State::EStatus::Running) && (PercentDone > 5) ? (ETAElapsedMS * NewState.RemainingCount) / Completed : 0; + + uint32_t ConsoleColumns = GetConsoleColumns(); + + const std::string PercentString = fmt::format("{:#3}%", PercentDone); + + const std::string ProgressBarString = + fmt::format(": |{}{}|", std::string(ProgressBarCount, '#'), std::string(ProgressBarSize - ProgressBarCount, ' ')); + + const std::string ElapsedString = fmt::format(": {}", NiceTimeSpanMs(ElapsedTimeMS)); + + const std::string ETAString = (ETAMS > 0) ? fmt::format(" ETA {}", NiceTimeSpanMs(ETAMS)) : ""; + + const std::string DetailsString = (!NewState.Details.empty()) ? fmt::format(". {}", NewState.Details) : ""; + + ExtendableStringBuilder<256> OutputBuilder; + + OutputBuilder << "\r" << Task << PercentString; + if (OutputBuilder.Size() + 1 < ConsoleColumns) + { + size_t RemainingSpace = ConsoleColumns - (OutputBuilder.Size() + 1); + bool ElapsedFits = RemainingSpace >= ElapsedString.length(); + RemainingSpace -= ElapsedString.length(); + bool ETAFits = ElapsedFits && RemainingSpace >= ETAString.length(); + RemainingSpace -= ETAString.length(); + bool DetailsFits = ETAFits && RemainingSpace >= DetailsString.length(); + RemainingSpace -= DetailsString.length(); + bool ProgressBarFits = DetailsFits && RemainingSpace >= ProgressBarString.length(); + RemainingSpace -= ProgressBarString.length(); + + if (ProgressBarFits) + { + OutputBuilder << ProgressBarString; + } + if (ElapsedFits) + { + OutputBuilder << ElapsedString; + } + if (ETAFits) + { + OutputBuilder << ETAString; + } + if (DetailsFits) + { + OutputBuilder << DetailsString; + } + } + + std::string_view Output = OutputBuilder.ToView(); std::string::size_type EraseLength = m_LastOutputLength > Output.length() ? (m_LastOutputLength - Output.length()) : 0; - printf("%s%s%s", Output.c_str(), std::string(EraseLength, ' ').c_str(), DoLinebreak ? "\n" : ""); + + ExtendableStringBuilder<256> LineToPrint; + + if (Output.length() + EraseLength >= ConsoleColumns) + { + if (m_LastOutputLength > 0) + { + LineToPrint << "\n"; + } + LineToPrint << Output.substr(1); + DoLinebreak = true; + } + else + { + LineToPrint << Output << std::string(EraseLength, ' '); + } + + if (DoLinebreak) + { + LineToPrint << "\n"; + } + + OutputToConsoleRaw(LineToPrint); + m_LastOutputLength = DoLinebreak ? 0 : Output.length(); m_State = NewState; } + else if (m_Mode == Mode::Log) + { + if (m_State.Task != NewState.Task || + m_State.Details != NewState.Details) // TODO: Should we output just because details change? Will this spam the log collector? + { + const std::string Details = (!NewState.Details.empty()) ? fmt::format(": {}", NewState.Details) : ""; + const std::string Message = fmt::format("@progress {} ({}){}\n", NewState.Task, NiceTimeSpanMs(ElapsedTimeMS), Details); + OutputToConsoleRaw(Message); + } + + const size_t OldPercentDone = + m_State.TotalCount > 0u ? gsl::narrow<uint8_t>((100 * (m_State.TotalCount - m_State.RemainingCount)) / m_State.TotalCount) : 0u; + + if (OldPercentDone != PercentDone) + { + const std::string Progress = fmt::format("@progress {}%\n", PercentDone); + OutputToConsoleRaw(Progress); + } + m_State = NewState; + } } void @@ -327,7 +577,7 @@ ProgressBar::ForceLinebreak() void ProgressBar::Finish() { - if (m_LastOutputLength > 0 && m_State.RemainingCount > 0) + if (m_LastOutputLength > 0 || m_State.RemainingCount > 0) { State NewState = m_State; NewState.RemainingCount = 0; @@ -360,30 +610,35 @@ ProgressBar::HasActiveTask() const int main(int argc, char** argv) { - using namespace zen; - using namespace std::literals; - - zen::logging::InitializeLogging(); - - // Set output mode to handle virtual terminal sequences - zen::logging::EnableVTMode(); - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); - - LoggerRef DefaultLogger = zen::logging::Default(); - auto& Sinks = DefaultLogger.SpdLogger->sinks(); - - Sinks.clear(); - auto ConsoleSink = std::make_shared<spdlog::sinks::ansicolor_stdout_sink_mt>(); - Sinks.push_back(ConsoleSink); - - zen::MaximizeOpenFileCount(); + zen::SetCurrentThreadName("main"); + + std::vector<std::string> Args; +#if ZEN_PLATFORM_WINDOWS + LPWSTR RawCommandLine = GetCommandLine(); + std::string CommandLine = zen::WideToUtf8(RawCommandLine); + Args = zen::ParseCommandLine(CommandLine); +#else + Args.reserve(argc); + for (int I = 0; I < argc; I++) + { + std::string Arg(argv[I]); + if ((!Arg.empty()) && (Arg != " ")) + { + Args.emplace_back(std::move(Arg)); + } + } +#endif + std::vector<char*> RawArgs = zen::StripCommandlineQuotes(Args); - ////////////////////////////////////////////////////////////////////////// + argc = gsl::narrow<int>(RawArgs.size()); + argv = RawArgs.data(); - auto _ = zen::MakeGuard([] { spdlog::shutdown(); }); + using namespace zen; + using namespace std::literals; AttachCommand AttachCmd; BenchCommand BenchCmd; + BuildsCommand BuildsCmd; CacheDetailsCommand CacheDetailsCmd; CacheGetCommand CacheGetCmd; CacheGenerateCommand CacheGenerateCmd; @@ -427,6 +682,7 @@ main(int argc, char** argv) UpCommand UpCmd; VersionCommand VersionCmd; VfsCommand VfsCmd; + WipeCommand WipeCmd; WorkspaceCommand WorkspaceCmd; WorkspaceShareCommand WorkspaceShareCmd; @@ -439,6 +695,7 @@ main(int argc, char** argv) // clang-format off {"attach", &AttachCmd, "Add a sponsor process to a running zen service"}, {"bench", &BenchCmd, "Utility command for benchmarking"}, + {BuildsCommand::Name, &BuildsCmd, BuildsCommand::Description}, {"cache-details", &CacheDetailsCmd, "Details on cache"}, {"cache-info", &CacheInfoCmd, "Info on cache, namespace or bucket"}, {CacheGetCommand::Name, &CacheGetCmd, CacheGetCommand::Description}, @@ -479,9 +736,10 @@ main(int argc, char** argv) {"top", &TopCmd, "Monitor zen server activity"}, {"trace", &TraceCmd, "Control zen realtime tracing"}, {"up", &UpCmd, "Bring zen server up"}, - {"version", &VersionCmd, "Get zen server version"}, + {VersionCommand::Name, &VersionCmd, VersionCommand::Description}, {"vfs", &VfsCmd, "Manage virtual file system"}, {"flush", &FlushCmd, "Flush storage"}, + {WipeCommand::Name, &WipeCmd, WipeCommand::Description}, {WorkspaceCommand::Name, &WorkspaceCmd, WorkspaceCommand::Description}, {WorkspaceShareCommand::Name, &WorkspaceShareCmd, WorkspaceShareCommand::Description}, // clang-format on @@ -579,15 +837,60 @@ main(int argc, char** argv) GlobalOptions.PassthroughArgs = PassthroughArgs; GlobalOptions.PassthroughArgV = PassthroughArgV; + std::string MemoryOptions; + std::string SubCommand = "<None>"; cxxopts::Options Options("zen", "Zen management tool"); Options.add_options()("d, debug", "Enable debugging", cxxopts::value<bool>(GlobalOptions.IsDebug)); Options.add_options()("v, verbose", "Enable verbose logging", cxxopts::value<bool>(GlobalOptions.IsVerbose)); + Options.add_options()("malloc", "Configure memory allocator subsystem", cxxopts::value(MemoryOptions)->default_value("mimalloc")); Options.add_options()("help", "Show command line help"); Options.add_options()("c, command", "Sub command", cxxopts::value<std::string>(SubCommand)); +#if ZEN_WITH_TRACE + std::string TraceChannels; + std::string TraceHost; + std::string TraceFile; + + Options.add_option("ue-trace", + "", + "trace", + "Specify which trace channels should be enabled", + cxxopts::value<std::string>(TraceChannels)->default_value(""), + ""); + + Options.add_option("ue-trace", + "", + "tracehost", + "Hostname to send the trace to", + cxxopts::value<std::string>(TraceHost)->default_value(""), + ""); + + Options + .add_option("ue-trace", "", "tracefile", "Path to write a trace to", cxxopts::value<std::string>(TraceFile)->default_value(""), ""); +#endif // ZEN_WITH_TRACE + +#if ZEN_USE_SENTRY + + SentryIntegration::Config SentryConfig; + + bool NoSentry = false; + + Options + .add_option("sentry", "", "no-sentry", "Disable Sentry crash handler", cxxopts::value<bool>(NoSentry)->default_value("false"), ""); + Options.add_option("sentry", + "", + "sentry-allow-personal-info", + "Allow personally identifiable information in sentry crash reports", + cxxopts::value<bool>(SentryConfig.AllowPII)->default_value("false"), + ""); + Options.add_option("sentry", "", "sentry-dsn", "Sentry DSN to send events to", cxxopts::value<std::string>(SentryConfig.Dsn), ""); + Options.add_option("sentry", "", "sentry-environment", "Sentry environment", cxxopts::value<std::string>(SentryConfig.Environment), ""); + Options.add_options()("sentry-debug", "Enable debug mode for Sentry", cxxopts::value<bool>(SentryConfig.Debug)->default_value("false")); +#endif + Options.parse_positional({"command"}); const bool IsNullInvoke = (argc == 1); // If no arguments are passed we want to print usage information @@ -629,14 +932,96 @@ main(int argc, char** argv) exit(0); } - if (GlobalOptions.IsDebug) +#if ZEN_USE_SENTRY + { - logging::SetLogLevel(logging::level::Debug); + EnvironmentOptions EnvOptions; + + EnvOptions.AddOption("UE_ZEN_SENTRY_DSN"sv, SentryConfig.Dsn, "sentry-dsn"sv); + EnvOptions.AddOption("UE_ZEN_SENTRY_ALLOWPERSONALINFO"sv, SentryConfig.AllowPII, "sentry-allow-personal-info"sv); + EnvOptions.AddOption("UE_ZEN_SENTRY_ENVIRONMENT"sv, SentryConfig.Environment, "sentry-environment"sv); + + bool EnvEnableSentry = !NoSentry; + EnvOptions.AddOption("UE_ZEN_SENTRY_ENABLED"sv, EnvEnableSentry, "no-sentry"sv); + + EnvOptions.AddOption("UE_ZEN_SENTRY_DEBUG"sv, SentryConfig.Debug, "sentry-debug"sv); + + EnvOptions.Parse(ParseResult); + + if (EnvEnableSentry != !NoSentry) + { + NoSentry = !EnvEnableSentry; + } } - if (GlobalOptions.IsVerbose) + + SentryIntegration Sentry; + + if (NoSentry == false) { - logging::SetLogLevel(logging::level::Trace); + std::string SentryDatabasePath = (std::filesystem::temp_directory_path() / ".zen-sentry-native").string(); + + ExtendableStringBuilder<512> SB; + for (int i = 0; i < argc; ++i) + { + if (i) + { + SB.Append(' '); + } + + SB.Append(argv[i]); + } + + SentryConfig.DatabasePath = SentryDatabasePath; + + Sentry.Initialize(SentryConfig, SB.ToString()); + + SentryIntegration::ClearCaches(); + } +#endif + + zen::LoggingOptions LogOptions; + LogOptions.IsDebug = GlobalOptions.IsDebug; + LogOptions.IsVerbose = GlobalOptions.IsVerbose; + LogOptions.AllowAsync = false; + zen::InitializeLogging(LogOptions); + + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); + + zen::MaximizeOpenFileCount(); + + ////////////////////////////////////////////////////////////////////////// + + auto _ = zen::MakeGuard([] { zen::ShutdownLogging(); }); + +#if ZEN_WITH_TRACE + if (TraceHost.size()) + { + TraceStart("zen", TraceHost.c_str(), TraceType::Network); + } + else if (TraceFile.size()) + { + TraceStart("zen", TraceFile.c_str(), TraceType::File); + } + else + { + TraceInit("zen"); } +#endif // ZEN_WITH_TRACE + +#if ZEN_WITH_MEMTRACK + FMalloc* TraceMalloc = MemoryTrace_Create(GMalloc); + if (TraceMalloc != GMalloc) + { + GMalloc = TraceMalloc; + MemoryTrace_Initialize(); + } +#endif for (const CommandInfo& CmdInfo : Commands) { diff --git a/src/zen/zen.h b/src/zen/zen.h index 9c9586050..995bd13b6 100644 --- a/src/zen/zen.h +++ b/src/zen/zen.h @@ -5,10 +5,7 @@ #include <zencore/except.h> #include <zencore/timer.h> #include <zencore/zencore.h> - -ZEN_THIRD_PARTY_INCLUDES_START -#include <cxxopts.hpp> -ZEN_THIRD_PARTY_INCLUDES_END +#include <zenutil/commandlineoptions.h> namespace cpr { class Response; @@ -82,9 +79,39 @@ public: std::string Details; uint64_t TotalCount = 0; uint64_t RemainingCount = 0; + enum class EStatus + { + Running, + Aborted, + Paused + }; + EStatus Status = EStatus::Running; + + static EStatus CalculateStatus(bool IsAborted, bool IsPaused) + { + if (IsAborted) + { + return EStatus::Aborted; + } + if (IsPaused) + { + return EStatus::Paused; + } + return EStatus::Running; + } }; - explicit ProgressBar(bool PlainProgress); + enum class Mode + { + Plain, + Pretty, + Log + }; + + static void SetLogOperationName(Mode InMode, std::string_view Name); + static void SetLogOperationProgress(Mode InMode, uint32_t StepIndex, uint32_t StepCount); + + explicit ProgressBar(Mode InMode, std::string_view InSubTask); ~ProgressBar(); void UpdateState(const State& NewState, bool DoLinebreak); @@ -94,11 +121,13 @@ public: bool HasActiveTask() const; private: - const bool m_PlainProgress; - Stopwatch m_SW; - uint64_t m_LastUpdateMS; - State m_State; - size_t m_LastOutputLength = 0; + const Mode m_Mode; + Stopwatch m_SW; + uint64_t m_LastUpdateMS; + uint64_t m_PausedMS; + State m_State; + const std::string m_SubTask; + size_t m_LastOutputLength = 0; }; } // namespace zen diff --git a/src/zencore-test/zencore-test.cpp b/src/zencore-test/zencore-test.cpp index 40cb51156..928c4e5c2 100644 --- a/src/zencore-test/zencore-test.cpp +++ b/src/zencore-test/zencore-test.cpp @@ -5,6 +5,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> +#include <zencore/trace.h> #include <zencore/zencore.h> #include <zencore/memory/newdelete.h> @@ -20,6 +21,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zencore_forcelinktests(); + zen::TraceInit("zencore-test"); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zencore/basicfile.cpp b/src/zencore/basicfile.cpp index c2a21ae90..6989da67e 100644 --- a/src/zencore/basicfile.cpp +++ b/src/zencore/basicfile.cpp @@ -28,6 +28,20 @@ BasicFile::~BasicFile() { Close(); } +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode) +{ + Open(FileName, Mode); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec) +{ + Open(FileName, Mode, Ec); +} + +BasicFile::BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback) +{ + Open(FileName, Mode, std::move(RetryCallback)); +} void BasicFile::Open(const std::filesystem::path& FileName, Mode Mode) @@ -167,58 +181,18 @@ BasicFile::ReadRange(uint64_t FileOffset, uint64_t ByteCount) void BasicFile::Read(void* Data, uint64_t BytesToRead, uint64_t FileOffset) { - const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; - - while (BytesToRead) + const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; + std::error_code Ec; + ReadFile(m_FileHandle, Data, BytesToRead, FileOffset, MaxChunkSize, Ec); + if (Ec) { - const uint64_t NumberOfBytesToRead = Min(BytesToRead, MaxChunkSize); - int32_t Error = 0; - size_t BytesRead = 0; - -#if ZEN_PLATFORM_WINDOWS - OVERLAPPED Ovl{}; - - Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu); - Ovl.OffsetHigh = DWORD(FileOffset >> 32); - - DWORD dwNumberOfBytesRead = 0; - BOOL Success = ::ReadFile(m_FileHandle, Data, DWORD(NumberOfBytesToRead), &dwNumberOfBytesRead, &Ovl); - if (Success) - { - BytesRead = size_t(dwNumberOfBytesRead); - } - else - { - Error = zen::GetLastError(); - } -#else - static_assert(sizeof(off_t) >= sizeof(uint64_t), "sizeof(off_t) does not support large files"); - int Fd = int(uintptr_t(m_FileHandle)); - ssize_t ReadResult = pread(Fd, Data, NumberOfBytesToRead, FileOffset); - if (ReadResult != -1) - { - BytesRead = size_t(ReadResult); - } - else - { - Error = zen::GetLastError(); - } -#endif - - if (Error || (BytesRead != NumberOfBytesToRead)) - { - std::error_code DummyEc; - throw std::system_error(std::error_code(Error, std::system_category()), - fmt::format("ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", - FileOffset, - NumberOfBytesToRead, - PathFromHandle(m_FileHandle, DummyEc), - FileSizeFromHandle(m_FileHandle))); - } - - BytesToRead -= NumberOfBytesToRead; - FileOffset += NumberOfBytesToRead; - Data = reinterpret_cast<uint8_t*>(Data) + NumberOfBytesToRead; + std::error_code DummyEc; + throw std::system_error(Ec, + fmt::format("BasicFile::Read: ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", + FileOffset, + BytesToRead, + PathFromHandle(m_FileHandle, DummyEc), + FileSizeFromHandle(m_FileHandle))); } } @@ -267,7 +241,21 @@ BasicFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<voi } uint64_t -BasicFile::Write(CompositeBuffer Data, uint64_t FileOffset, std::error_code& Ec) +BasicFile::Write(const CompositeBuffer& Data, uint64_t FileOffset) +{ + std::error_code Ec; + uint64_t WrittenBytes = Write(Data, FileOffset, Ec); + + if (Ec) + { + std::error_code Dummy; + throw std::system_error(Ec, fmt::format("Failed to write to file '{}'", zen::PathFromHandle(m_FileHandle, Dummy))); + } + return WrittenBytes; +} + +uint64_t +BasicFile::Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec) { uint64_t WrittenBytes = 0; for (const SharedBuffer& Buffer : Data.GetSegments()) @@ -295,41 +283,9 @@ BasicFile::Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec) void BasicFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec) { - Ec.clear(); - - const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; - - while (Size) - { - const uint64_t NumberOfBytesToWrite = Min(Size, MaxChunkSize); - -#if ZEN_PLATFORM_WINDOWS - OVERLAPPED Ovl{}; - - Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu); - Ovl.OffsetHigh = DWORD(FileOffset >> 32); - - DWORD dwNumberOfBytesWritten = 0; - - BOOL Success = ::WriteFile(m_FileHandle, Data, DWORD(NumberOfBytesToWrite), &dwNumberOfBytesWritten, &Ovl); -#else - static_assert(sizeof(off_t) >= sizeof(uint64_t), "sizeof(off_t) does not support large files"); - int Fd = int(uintptr_t(m_FileHandle)); - int BytesWritten = pwrite(Fd, Data, NumberOfBytesToWrite, FileOffset); - bool Success = (BytesWritten > 0); -#endif - - if (!Success) - { - Ec = MakeErrorCodeFromLastError(); + const uint64_t MaxChunkSize = 2u * 1024 * 1024; - return; - } - - Size -= NumberOfBytesToWrite; - FileOffset += NumberOfBytesToWrite; - Data = reinterpret_cast<const uint8_t*>(Data) + NumberOfBytesToWrite; - } + WriteFile(m_FileHandle, Data, Size, FileOffset, MaxChunkSize, Ec); } void @@ -375,59 +331,20 @@ BasicFile::Flush() uint64_t BasicFile::FileSize() const { -#if ZEN_PLATFORM_WINDOWS - ULARGE_INTEGER liFileSize; - liFileSize.LowPart = ::GetFileSize(m_FileHandle, &liFileSize.HighPart); - if (liFileSize.LowPart == INVALID_FILE_SIZE) - { - int Error = zen::GetLastError(); - if (Error) - { - std::error_code Dummy; - ThrowSystemError(Error, fmt::format("Failed to get file size from file '{}'", PathFromHandle(m_FileHandle, Dummy))); - } - } - return uint64_t(liFileSize.QuadPart); -#else - int Fd = int(uintptr_t(m_FileHandle)); - static_assert(sizeof(decltype(stat::st_size)) == sizeof(uint64_t), "fstat() doesn't support large files"); - struct stat Stat; - if (fstat(Fd, &Stat) == -1) + std::error_code Ec; + uint64_t FileSize = FileSizeFromHandle(m_FileHandle, Ec); + if (Ec) { std::error_code Dummy; - ThrowSystemError(GetLastError(), fmt::format("Failed to get file size from file '{}'", PathFromHandle(m_FileHandle, Dummy))); + ThrowSystemError(Ec.value(), fmt::format("Failed to get file size from file '{}'", PathFromHandle(m_FileHandle, Dummy))); } - return uint64_t(Stat.st_size); -#endif + return FileSize; } uint64_t BasicFile::FileSize(std::error_code& Ec) const { -#if ZEN_PLATFORM_WINDOWS - ULARGE_INTEGER liFileSize; - liFileSize.LowPart = ::GetFileSize(m_FileHandle, &liFileSize.HighPart); - if (liFileSize.LowPart == INVALID_FILE_SIZE) - { - int Error = zen::GetLastError(); - if (Error) - { - Ec = MakeErrorCode(Error); - return 0; - } - } - return uint64_t(liFileSize.QuadPart); -#else - int Fd = int(uintptr_t(m_FileHandle)); - static_assert(sizeof(decltype(stat::st_size)) == sizeof(uint64_t), "fstat() doesn't support large files"); - struct stat Stat; - if (fstat(Fd, &Stat) == -1) - { - Ec = MakeErrorCodeFromLastError(); - return 0; - } - return uint64_t(Stat.st_size); -#endif + return FileSizeFromHandle(m_FileHandle, Ec); } void @@ -560,7 +477,7 @@ TemporaryFile::MoveTemporaryIntoPlace(std::filesystem::path FinalFileName, std:: // deleting the temporary file BasicFile::Close(); - std::filesystem::rename(m_TempPath, FinalFileName, Ec); + RenameFile(m_TempPath, FinalFileName, Ec); if (Ec) { @@ -575,7 +492,6 @@ TemporaryFile::MoveTemporaryIntoPlace(std::filesystem::path FinalFileName, std:: void TemporaryFile::SafeWriteFile(const std::filesystem::path& Path, MemoryView Data) { - TemporaryFile TempFile; std::error_code Ec; SafeWriteFile(Path, Data, Ec); if (Ec) @@ -763,10 +679,45 @@ BasicFileWriter::~BasicFileWriter() } void +BasicFileWriter::AddPadding(uint64_t Padding) +{ + while (Padding) + { + const uint64_t BufferOffset = m_BufferEnd - m_BufferStart; + const uint64_t RemainingBufferCapacity = m_BufferSize - BufferOffset; + const uint64_t BlockPadBytes = Min(RemainingBufferCapacity, Padding); + + memset(m_Buffer + BufferOffset, 0, BlockPadBytes); + m_BufferEnd += BlockPadBytes; + Padding -= BlockPadBytes; + + if ((BufferOffset + BlockPadBytes) == m_BufferSize) + { + Flush(); + } + } +} + +uint64_t +BasicFileWriter::AlignTo(uint64_t Alignment) +{ + uint64_t AlignedPos = RoundUp(m_BufferEnd, Alignment); + uint64_t Padding = AlignedPos - m_BufferEnd; + AddPadding(Padding); + return AlignedPos; +} + +void BasicFileWriter::Write(const void* Data, uint64_t Size, uint64_t FileOffset) { if (m_Buffer == nullptr || (Size >= m_BufferSize)) { + if (FileOffset == m_BufferEnd) + { + Flush(); + m_BufferStart = m_BufferEnd = FileOffset + Size; + } + m_Base.Write(Data, Size, FileOffset); return; } @@ -804,6 +755,17 @@ BasicFileWriter::Write(const void* Data, uint64_t Size, uint64_t FileOffset) } void +BasicFileWriter::Write(const CompositeBuffer& Data, uint64_t FileOffset) +{ + for (const SharedBuffer& Segment : Data.GetSegments()) + { + const uint64_t SegmentSize = Segment.GetSize(); + Write(Segment.GetData(), SegmentSize, FileOffset); + FileOffset += SegmentSize; + } +} + +void BasicFileWriter::Flush() { const uint64_t BufferedBytes = m_BufferEnd - m_BufferStart; @@ -817,6 +779,78 @@ BasicFileWriter::Flush() m_Base.Write(m_Buffer, BufferedBytes, WriteOffset); } +IoBuffer +WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path) +{ + TemporaryFile Temp; + std::error_code Ec; + Temp.CreateTemporary(Path.parent_path(), Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to create temp file for blob at '{}'", Path)); + } + + uint64_t BufferSize = Buffer.GetSize(); + { + uint64_t Offset = 0; + static const uint64_t BufferingSize = 256u * 1024u; + BasicFileWriter BufferedOutput(Temp, Min(BufferingSize, BufferSize)); + for (const SharedBuffer& Segment : Buffer.GetSegments()) + { + size_t SegmentSize = Segment.GetSize(); + + IoBufferFileReference FileRef; + if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + BufferingSize, + [&BufferedOutput, &Offset](const void* Data, size_t Size) { + BufferedOutput.Write(Data, Size, Offset); + Offset += Size; + }); + } + else + { + BufferedOutput.Write(Segment.GetData(), SegmentSize, Offset); + Offset += SegmentSize; + } + } + } + + Temp.MoveTemporaryIntoPlace(Path, Ec); + if (Ec) + { + Ec.clear(); + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to move temp file to '{}'", Path)); + } + if (OpenTemp.FileSize() != BufferSize) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file size already exists", Path)); + } + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); + + IoHash ExistingHash = IoHash::HashBuffer(TmpBuffer); + const IoHash ExpectedHash = IoHash::HashBuffer(Buffer); + if (ExistingHash != ExpectedHash) + { + throw std::runtime_error(fmt::format("Failed to move temp file to '{}' - mismatching file hash already exists", Path)); + } + Buffer = CompositeBuffer{}; + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; + } + Buffer = CompositeBuffer{}; + BasicFile OpenTemp(Path, BasicFile::Mode::kDelete); + IoBuffer TmpBuffer(IoBuffer::File, OpenTemp.Detach(), 0, BufferSize, true); + TmpBuffer.SetDeleteOnClose(true); + return TmpBuffer; +} + ////////////////////////////////////////////////////////////////////////// /* @@ -866,9 +900,9 @@ TEST_CASE("TemporaryFile") TmpFile.CreateTemporary(std::filesystem::current_path(), Ec); CHECK(!Ec); Path = TmpFile.GetPath(); - CHECK(std::filesystem::exists(Path)); + CHECK(IsFile(Path)); } - CHECK(std::filesystem::exists(Path) == false); + CHECK(IsFile(Path) == false); } SUBCASE("MoveIntoPlace") @@ -879,11 +913,11 @@ TEST_CASE("TemporaryFile") CHECK(!Ec); std::filesystem::path TempPath = TmpFile.GetPath(); std::filesystem::path FinalPath = std::filesystem::current_path() / "final"; - CHECK(std::filesystem::exists(TempPath)); + CHECK(IsFile(TempPath)); TmpFile.MoveTemporaryIntoPlace(FinalPath, Ec); CHECK(!Ec); - CHECK(std::filesystem::exists(TempPath) == false); - CHECK(std::filesystem::exists(FinalPath)); + CHECK(IsFile(TempPath) == false); + CHECK(IsFile(FinalPath)); } } diff --git a/src/zencore/blake3.cpp b/src/zencore/blake3.cpp index 4a77aa49a..054f0d3a0 100644 --- a/src/zencore/blake3.cpp +++ b/src/zencore/blake3.cpp @@ -151,6 +151,28 @@ BLAKE3Stream::Append(const void* data, size_t byteCount) return *this; } +BLAKE3Stream& +BLAKE3Stream::Append(const IoBuffer& Buffer) +{ + blake3_hasher* b3h = reinterpret_cast<blake3_hasher*>(m_HashState); + + size_t BufferSize = Buffer.GetSize(); + static const uint64_t BufferingSize = 256u * 1024u; + IoBufferFileReference FileRef; + if (BufferSize >= (BufferingSize + BufferingSize / 2) && Buffer.GetFileReference(FileRef)) + { + ScanFile(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, [&b3h](const void* Data, size_t Size) { + blake3_hasher_update(b3h, Data, Size); + }); + } + else + { + blake3_hasher_update(b3h, Buffer.GetData(), BufferSize); + } + + return *this; +} + BLAKE3 BLAKE3Stream::GetHash() { diff --git a/src/zencore/compactbinary.cpp b/src/zencore/compactbinary.cpp index adccaba70..b43cc18f1 100644 --- a/src/zencore/compactbinary.cpp +++ b/src/zencore/compactbinary.cpp @@ -15,6 +15,8 @@ #include <zencore/testing.h> #include <zencore/uid.h> +#include <EASTL/fixed_vector.h> + #include <fmt/format.h> #include <string_view> @@ -1376,9 +1378,9 @@ TryMeasureCompactBinary(MemoryView View, CbFieldType& OutType, uint64_t& OutSize CbField LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) { - std::vector<uint8_t> HeaderBytes; - CbFieldType FieldType; - uint64_t FieldSize = 1; + eastl::fixed_vector<uint8_t, 32> HeaderBytes; + CbFieldType FieldType; + uint64_t FieldSize = 1; for (const int64_t StartPos = Ar.CurrentOffset(); FieldSize > 0;) { @@ -1393,7 +1395,7 @@ LoadCompactBinary(BinaryReader& Ar, BufferAllocator Allocator) HeaderBytes.resize(ReadOffset + ReadSize); Ar.Read(HeaderBytes.data() + ReadOffset, ReadSize); - if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes), FieldType, FieldSize)) + if (TryMeasureCompactBinary(MakeMemoryView(HeaderBytes.data(), HeaderBytes.size()), FieldType, FieldSize)) { if (FieldSize <= uint64_t(Ar.Size() - StartPos)) { diff --git a/src/zencore/compactbinarybuilder.cpp b/src/zencore/compactbinarybuilder.cpp index a60de023d..63c0b9c5c 100644 --- a/src/zencore/compactbinarybuilder.cpp +++ b/src/zencore/compactbinarybuilder.cpp @@ -15,23 +15,21 @@ namespace zen { -template<typename T> uint64_t -AddUninitialized(std::vector<T>& Vector, uint64_t Count) +AddUninitialized(CbWriter::CbWriterData_t& Vector, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); return Offset; } -template<typename T> uint64_t -Append(std::vector<T>& Vector, const T* Data, uint64_t Count) +Append(CbWriter::CbWriterData_t& Vector, const uint8_t* Data, uint64_t Count) { const uint64_t Offset = Vector.size(); Vector.resize(Offset + Count); - memcpy(Vector.data() + Offset, Data, sizeof(T) * Count); + memcpy(Vector.data() + Offset, Data, sizeof(uint8_t) * Count); return Offset; } @@ -76,7 +74,7 @@ IsUniformType(const CbFieldType Type) /** Append the payload from the compact binary value to the array and return its type. */ static inline CbFieldType -AppendCompactBinary(const CbFieldView& Value, std::vector<uint8_t>& OutData) +AppendCompactBinary(const CbFieldView& Value, CbWriter::CbWriterData_t& OutData) { struct FCopy : public CbFieldView { @@ -93,7 +91,6 @@ AppendCompactBinary(const CbFieldView& Value, std::vector<uint8_t>& OutData) CbWriter::CbWriter() { - States.reserve(4); States.emplace_back(); } diff --git a/src/zencore/compactbinaryjson.cpp b/src/zencore/compactbinaryjson.cpp index d8c8a8584..68ed09549 100644 --- a/src/zencore/compactbinaryjson.cpp +++ b/src/zencore/compactbinaryjson.cpp @@ -293,6 +293,7 @@ private: const uint32_t EncodedSize = Base64::GetEncodedDataSize(uint32_t(Value.GetSize())); const size_t EncodedIndex = Builder.AddUninitialized(size_t(EncodedSize)); Base64::Encode(static_cast<const uint8_t*>(Value.GetData()), uint32_t(Value.GetSize()), Builder.Data() + EncodedIndex); + Builder << '"'; } private: diff --git a/src/zencore/compactbinarypackage.cpp b/src/zencore/compactbinarypackage.cpp index 7de161845..ffe64f2e9 100644 --- a/src/zencore/compactbinarypackage.cpp +++ b/src/zencore/compactbinarypackage.cpp @@ -3,10 +3,13 @@ #include "zencore/compactbinarypackage.h" #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinaryvalidation.h> +#include <zencore/eastlutil.h> #include <zencore/endian.h> #include <zencore/stream.h> #include <zencore/testing.h> +#include <EASTL/span.h> + namespace zen { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -341,6 +344,12 @@ CbPackage::SetObject(CbObject InObject, const IoHash* InObjectHash, AttachmentRe } void +CbPackage::ReserveAttachments(size_t Count) +{ + Attachments.reserve(Count); +} + +void CbPackage::AddAttachment(const CbAttachment& Attachment, AttachmentResolver* Resolver) { if (!Attachment.IsNull()) @@ -374,17 +383,18 @@ CbPackage::AddAttachments(std::span<const CbAttachment> InAttachments) { ZEN_ASSERT(!Attachment.IsNull()); } + // Assume we have no duplicates! Attachments.insert(Attachments.end(), InAttachments.begin(), InAttachments.end()); std::sort(Attachments.begin(), Attachments.end()); - ZEN_ASSERT_SLOW(std::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); + ZEN_ASSERT_SLOW(eastl::unique(Attachments.begin(), Attachments.end()) == Attachments.end()); } int32_t CbPackage::RemoveAttachment(const IoHash& Hash) { return gsl::narrow_cast<int32_t>( - std::erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); + erase_if(Attachments, [&Hash](const CbAttachment& Attachment) -> bool { return Attachment.GetHash() == Hash; })); } bool diff --git a/src/zencore/compactbinaryvalidation.cpp b/src/zencore/compactbinaryvalidation.cpp index 6f53bba69..833649b88 100644 --- a/src/zencore/compactbinaryvalidation.cpp +++ b/src/zencore/compactbinaryvalidation.cpp @@ -135,6 +135,37 @@ ValidateCbFloat64(MemoryView& View, CbValidateMode Mode, CbValidateError& Error) } /** + * Validate and read a fixed-size value from the view. + * + * Modifies the view to start at the end of the value, and adds error flags if applicable. + */ +static MemoryView +ValidateCbFixedValue(MemoryView& View, CbValidateMode Mode, CbValidateError& Error, uint64_t Size) +{ + ZEN_UNUSED(Mode); + + const MemoryView Value = View.Left(Size); + View += Size; + if (Value.GetSize() < Size) + { + AddError(Error, CbValidateError::OutOfBounds); + } + return Value; +}; + +/** + * Validate and read a value from the view where the view begins with the value size. + * + * Modifies the view to start at the end of the value, and adds error flags if applicable. + */ +static MemoryView +ValidateCbDynamicValue(MemoryView& View, CbValidateMode Mode, CbValidateError& Error) +{ + const uint64_t ValueSize = ValidateCbUInt(View, Mode, Error); + return ValidateCbFixedValue(View, Mode, Error, ValueSize); +} + +/** * Validate and read a string from the view. * * Modifies the view to start at the end of the string, and adds error flags if applicable. @@ -378,8 +409,20 @@ ValidateCbField(MemoryView& View, CbValidateMode Mode, CbValidateError& Error, c ValidateFixedPayload(12); break; case CbFieldType::CustomById: + { + MemoryView Value = ValidateCbDynamicValue(View, Mode, Error); + ValidateCbUInt(Value, Mode, Error); + } + break; case CbFieldType::CustomByName: - ZEN_NOT_IMPLEMENTED(); // TODO: FIX! + { + MemoryView Value = ValidateCbDynamicValue(View, Mode, Error); + const std::string_view TypeName = ValidateCbString(Value, Mode, Error); + if (TypeName.empty() && !EnumHasAnyFlags(Error, CbValidateError::OutOfBounds)) + { + AddError(Error, CbValidateError::InvalidType); + } + } break; } diff --git a/src/zencore/compositebuffer.cpp b/src/zencore/compositebuffer.cpp index 49870a304..252ac9045 100644 --- a/src/zencore/compositebuffer.cpp +++ b/src/zencore/compositebuffer.cpp @@ -275,36 +275,18 @@ CompositeBuffer::IterateRange(uint64_t Offset, Visitor(View, Segment); break; } - if (Offset < SegmentSize) + else if (Offset <= SegmentSize) { - if (Offset == 0 && Size >= SegmentSize) + const MemoryView View = Segment.GetView().Mid(Offset, Size); + Offset = 0; + if (Size == 0 || !View.IsEmpty()) { - const MemoryView View = Segment.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } + Visitor(View, Segment); } - else + Size -= View.GetSize(); + if (Size == 0) { - // If we only want a section of the segment, do a subrange so we don't have to materialize the entire iobuffer - IoBuffer SubRange(Segment.AsIoBuffer(), Offset, Min(Size, SegmentSize - Offset)); - const MemoryView View = SubRange.GetView(); - if (!View.IsEmpty()) - { - Visitor(View, Segment); - } - Size -= View.GetSize(); - if (Size == 0) - { - break; - } - Offset = 0; + break; } } else diff --git a/src/zencore/compress.cpp b/src/zencore/compress.cpp index 29c1d9256..d9f381811 100644 --- a/src/zencore/compress.cpp +++ b/src/zencore/compress.cpp @@ -2,10 +2,12 @@ #include <zencore/compress.h> +#include <zencore/basicfile.h> #include <zencore/blake3.h> #include <zencore/compositebuffer.h> #include <zencore/crc32.h> #include <zencore/endian.h> +#include <zencore/filesystem.h> #include <zencore/intmath.h> #include <zencore/iohash.h> #include <zencore/stream.h> @@ -157,6 +159,10 @@ class BaseEncoder { public: [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const = 0; + [[nodiscard]] virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const = 0; }; class BaseDecoder @@ -184,6 +190,14 @@ public: const MemoryView HeaderView, uint64_t RawOffset, uint64_t RawSize) const = 0; + + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const = 0; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -191,11 +205,58 @@ public: class NoneEncoder final : public BaseEncoder { public: - [[nodiscard]] CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final + [[nodiscard]] virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t /* BlockSize */) const final { UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), BLAKE3::HashBuffer(RawData)); return CompositeBuffer(HeaderData.MoveToShared(), RawData.MakeOwned()); } + + [[nodiscard]] virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t /* BlockSize */) const final + { + const uint64_t HeaderSize = CompressedBuffer::GetHeaderSizeForNoneEncoder(); + + uint64_t RawOffset = 0; + BLAKE3Stream HashStream; + + for (const SharedBuffer& Segment : RawData.GetSegments()) + { + IoBufferFileReference FileRef = {nullptr, 0, 0}; + IoBuffer SegmentBuffer = Segment.AsIoBuffer(); + if (SegmentBuffer.GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + + ScanFile(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + 512u * 1024u, + [&](const void* Data, size_t Size) { + HashStream.Append(Data, Size); + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); + Callback(RawOffset, Size, HeaderSize + RawOffset, Tmp); + RawOffset += Size; + }); + } + else + { + const uint64_t Size = SegmentBuffer.GetSize(); + HashStream.Append(SegmentBuffer); + Callback(RawOffset, Size, HeaderSize + RawOffset, CompositeBuffer(Segment)); + RawOffset += Size; + } + } + + ZEN_ASSERT(RawOffset == RawData.GetSize()); + + UniqueBuffer HeaderData = CompressedBuffer::CreateHeaderForNoneEncoder(RawData.GetSize(), HashStream.GetHash()); + ZEN_ASSERT(HeaderData.GetSize() == HeaderSize); + Callback(0, 0, 0, CompositeBuffer(HeaderData.MoveToShared())); + + return true; + } }; class NoneDecoder final : public BaseDecoder @@ -262,6 +323,45 @@ public: } [[nodiscard]] uint64_t GetHeaderSize(const BufferHeader&) const final { return sizeof(BufferHeader); } + + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const final + { + if (Header.Method == CompressionMethod::None && Header.TotalCompressedSize == CompressedData.GetSize() && + Header.TotalCompressedSize == Header.TotalRawSize + sizeof(BufferHeader) && RawOffset < Header.TotalRawSize && + (RawOffset + RawSize) <= Header.TotalRawSize) + { + bool Result = true; + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + uint64_t CallbackOffset = 0; + ScanFile(FileRef.FileHandle, sizeof(BufferHeader) + RawOffset, RawSize, 512u * 1024u, [&](const void* Data, size_t Size) { + if (Result) + { + CompositeBuffer Tmp(SharedBuffer::MakeView(Data, Size)); + Result = Callback(sizeof(BufferHeader) + RawOffset + CallbackOffset, Size, CallbackOffset, Tmp); + } + CallbackOffset += Size; + }); + return Result; + } + else + { + return Callback(sizeof(BufferHeader) + RawOffset, + RawSize, + 0, + CompressedData.Mid(sizeof(BufferHeader) + RawOffset, RawSize)); + } + } + return false; + } }; ////////////////////////////////////////////////////////////////////////// @@ -269,7 +369,11 @@ public: class BlockEncoder : public BaseEncoder { public: - CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize = DefaultBlockSize) const final; + virtual CompositeBuffer Compress(const CompositeBuffer& RawData, uint64_t BlockSize) const final; + virtual bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize) const final; protected: virtual CompressionMethod GetMethod() const = 0; @@ -314,37 +418,77 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) CompressedBlockSizes.reserve(BlockCount); uint64_t CompressedSize = 0; { - UniqueBuffer RawBlockCopy; MutableMemoryView CompressedBlocksView = CompressedData.GetMutableView() + sizeof(BufferHeader) + MetaSize; - CompositeBuffer::Iterator It = RawData.GetIterator(0); - - for (uint64_t RawOffset = 0; RawOffset < RawSize;) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); - const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); - RawHash.Append(RawBlock); - - MutableMemoryView CompressedBlock = CompressedBlocksView; - if (!CompressBlock(CompressedBlock, RawBlock)) + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - return CompositeBuffer(); - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return CompositeBuffer(); + } - uint64_t CompressedBlockSize = CompressedBlock.GetSize(); - if (RawBlockSize <= CompressedBlockSize) - { - CompressedBlockSize = RawBlockSize; - CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; } - else + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) { - CompressedBlocksView += CompressedBlockSize; - } + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); + + MutableMemoryView CompressedBlock = CompressedBlocksView; + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return CompositeBuffer(); + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + CompressedBlockSize = RawBlockSize; + CompressedBlocksView = CompressedBlocksView.CopyFrom(RawBlock); + } + else + { + CompressedBlocksView += CompressedBlockSize; + } - CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); - CompressedSize += CompressedBlockSize; - RawOffset += RawBlockSize; + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } } } @@ -377,6 +521,143 @@ BlockEncoder::Compress(const CompositeBuffer& RawData, const uint64_t BlockSize) return CompositeBuffer(SharedBuffer::MakeView(CompositeView, CompressedData.MoveToShared())); } +bool +BlockEncoder::CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + uint64_t BlockSize = DefaultBlockSize) const +{ + ZEN_ASSERT(IsPow2(BlockSize) && (BlockSize <= (1u << 31))); + + const uint64_t RawSize = RawData.GetSize(); + BLAKE3Stream RawHash; + + const uint64_t BlockCount = RoundUp(RawSize, BlockSize) / BlockSize; + ZEN_ASSERT(BlockCount <= ~uint32_t(0)); + + const uint64_t MetaSize = BlockCount * sizeof(uint32_t); + const uint64_t FullHeaderSize = sizeof(BufferHeader) + MetaSize; + + std::vector<uint32_t> CompressedBlockSizes; + CompressedBlockSizes.reserve(BlockCount); + uint64_t CompressedSize = 0; + { + UniqueBuffer CompressedBlockBuffer = UniqueBuffer::Alloc(GetCompressedBlocksBound(1, BlockSize, Min(RawSize, BlockSize))); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((RawData.GetSegments().size() == 1) && RawData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + UniqueBuffer RawBlockCopy = UniqueBuffer::Alloc(BlockSize); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + Source.Read(RawBlockCopy.GetData(), RawBlockSize, FileRef.FileChunkOffset + RawOffset); + const MemoryView RawBlock = RawBlockCopy.GetView().Left(RawBlockSize); + RawHash.Append(RawBlock); + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + Source.Detach(); + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(FileRef.FileChunkOffset + RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlockCopy.GetView().GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(FileRef.FileChunkOffset + RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + Source.Detach(); + } + else + { + UniqueBuffer RawBlockCopy; + CompositeBuffer::Iterator It = RawData.GetIterator(0); + + for (uint64_t RawOffset = 0; RawOffset < RawSize;) + { + const uint64_t RawBlockSize = zen::Min(RawSize - RawOffset, BlockSize); + const MemoryView RawBlock = RawData.ViewOrCopyRange(It, RawBlockSize, RawBlockCopy); + RawHash.Append(RawBlock); + + MutableMemoryView CompressedBlock = CompressedBlockBuffer.GetMutableView(); + if (!CompressBlock(CompressedBlock, RawBlock)) + { + return false; + } + + uint64_t CompressedBlockSize = CompressedBlock.GetSize(); + if (RawBlockSize <= CompressedBlockSize) + { + Callback(RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawBlock.GetData(), RawBlockSize))); + CompressedBlockSize = RawBlockSize; + } + else + { + Callback(RawOffset, + RawBlockSize, + FullHeaderSize + CompressedSize, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, CompressedBlock.GetData(), CompressedBlockSize))); + } + + CompressedBlockSizes.push_back(static_cast<uint32_t>(CompressedBlockSize)); + CompressedSize += CompressedBlockSize; + RawOffset += RawBlockSize; + } + } + } + + // Return failure if the compressed data is larger than the raw data. + if (RawSize <= MetaSize + CompressedSize) + { + return false; + } + + // Write the header and calculate the CRC-32. + for (uint32_t& Size : CompressedBlockSizes) + { + Size = ByteSwap(Size); + } + UniqueBuffer HeaderBuffer = UniqueBuffer::Alloc(sizeof(BufferHeader) + MetaSize); + + BufferHeader Header; + Header.Method = GetMethod(); + Header.Compressor = GetCompressor(); + Header.CompressionLevel = GetCompressionLevel(); + Header.BlockSizeExponent = static_cast<uint8_t>(zen::FloorLog2_64(BlockSize)); + Header.BlockCount = static_cast<uint32_t>(BlockCount); + Header.TotalRawSize = RawSize; + Header.TotalCompressedSize = sizeof(BufferHeader) + MetaSize + CompressedSize; + Header.RawHash = RawHash.GetHash(); + + HeaderBuffer.GetMutableView().Mid(sizeof(BufferHeader), MetaSize).CopyFrom(MakeMemoryView(CompressedBlockSizes)); + Header.Write(HeaderBuffer.GetMutableView()); + + Callback(0, 0, 0, CompositeBuffer(IoBuffer(IoBuffer::Wrap, HeaderBuffer.GetData(), HeaderBuffer.GetSize()))); + return true; +} + class BlockDecoder : public BaseDecoder { public: @@ -406,6 +687,14 @@ public: MutableMemoryView RawView, uint64_t RawOffset) const final; + virtual bool DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) + const final; + protected: virtual bool DecompressBlock(MutableMemoryView RawData, MemoryView CompressedData) const = 0; }; @@ -528,6 +817,168 @@ BlockDecoder::DecompressToComposite(const BufferHeader& Header, const CompositeB } bool +BlockDecoder::DecompressToStream( + const BufferHeader& Header, + const CompositeBuffer& CompressedData, + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +{ + if (Header.TotalCompressedSize != CompressedData.GetSize()) + { + return false; + } + + const uint64_t BlockSize = uint64_t(1) << Header.BlockSizeExponent; + + UniqueBuffer BlockSizeBuffer; + MemoryView BlockSizeView = CompressedData.ViewOrCopyRange(sizeof(BufferHeader), Header.BlockCount * sizeof(uint32_t), BlockSizeBuffer); + std::span<uint32_t const> CompressedBlockSizes(reinterpret_cast<const uint32_t*>(BlockSizeView.GetData()), Header.BlockCount); + + UniqueBuffer CompressedBlockCopy; + + const size_t FirstBlockIndex = uint64_t(RawOffset / BlockSize); + const size_t LastBlockIndex = uint64_t((RawOffset + RawSize - 1) / BlockSize); + const uint64_t LastBlockSize = BlockSize - ((Header.BlockCount * BlockSize) - Header.TotalRawSize); + uint64_t OffsetInFirstBlock = RawOffset % BlockSize; + uint64_t CompressedOffset = sizeof(BufferHeader) + uint64_t(Header.BlockCount) * sizeof(uint32_t); + uint64_t RemainingRawSize = RawSize; + + for (size_t BlockIndex = 0; BlockIndex < FirstBlockIndex; BlockIndex++) + { + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + CompressedOffset += CompressedBlockSize; + } + + UniqueBuffer RawDataBuffer; + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) + { + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); + + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); + + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } + if (!Callback(FileRef.FileChunkOffset + CompressedOffset, + CompressedBlockSize, + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } + } + else + { + if (!Callback( + FileRef.FileChunkOffset + CompressedOffset, + BytesToUncompress, + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + Source.Detach(); + return false; + } + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawSize, UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + if (RawDataBuffer.IsNull()) + { + RawDataBuffer = UniqueBuffer::Alloc(zen::Min(RawSize, UncompressedBlockSize)); + } + else + { + ZEN_ASSERT(RawDataBuffer.GetSize() >= UncompressedBlockSize); + } + MutableMemoryView UncompressedBlock = RawDataBuffer.GetMutableView().Left(UncompressedBlockSize); + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + if (!Callback(CompressedOffset, + UncompressedBlockSize, + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer(IoBuffer(IoBuffer::Wrap, RawDataBuffer.GetData(), BytesToUncompress)))) + { + return false; + } + } + else + { + if (!Callback( + CompressedOffset, + BytesToUncompress, + BlockIndex * BlockSize + OffsetInFirstBlock, + CompositeBuffer( + IoBuffer(IoBuffer::Wrap, CompressedBlockCopy.GetView().Mid(OffsetInFirstBlock).GetData(), BytesToUncompress)))) + { + return false; + } + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + } + } + return true; +} + +bool BlockDecoder::TryDecompressTo(const BufferHeader& Header, const CompositeBuffer& CompressedData, MutableMemoryView RawView, @@ -560,51 +1011,118 @@ BlockDecoder::TryDecompressTo(const BufferHeader& Header, CompressedOffset += CompressedBlockSize; } - for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if ((CompressedData.GetSegments().size() == 1) && CompressedData.GetSegments()[0].AsIoBuffer().GetFileReference(FileRef)) { - const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; - const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); - const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; + ZEN_ASSERT(FileRef.FileHandle != nullptr); + BasicFile Source; + Source.Attach(FileRef.FileHandle); - const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) - : zen::Min(RemainingRawSize, BlockSize); + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) + { + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); - if (IsCompressed) - { - MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + if (CompressedBlockCopy.GetSize() < CompressedBlockSize) + { + CompressedBlockCopy = UniqueBuffer::Alloc(CompressedBlockSize); + } + Source.Read(CompressedBlockCopy.GetData(), CompressedBlockSize, FileRef.FileChunkOffset + CompressedOffset); - const bool IsAligned = BytesToUncompress == UncompressedBlockSize; - if (!IsAligned) + MemoryView CompressedBlock = CompressedBlockCopy.GetView().Left(CompressedBlockSize); + + if (IsCompressed) { - // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. - if (UncompressedBlockCopy.IsNull()) + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) { - UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); } - UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); - } - if (!DecompressBlock(UncompressedBlock, CompressedBlock)) - { - return false; - } + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + Source.Detach(); + return false; + } - if (!IsAligned) + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else { - RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; } - else + Source.Detach(); + } + else + { + for (size_t BlockIndex = FirstBlockIndex; BlockIndex <= LastBlockIndex; BlockIndex++) { - RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); - } + const uint64_t UncompressedBlockSize = BlockIndex == Header.BlockCount - 1 ? LastBlockSize : BlockSize; + const uint32_t CompressedBlockSize = ByteSwap(CompressedBlockSizes[BlockIndex]); + const bool IsCompressed = CompressedBlockSize < UncompressedBlockSize; - OffsetInFirstBlock = 0; - RemainingRawSize -= BytesToUncompress; - CompressedOffset += CompressedBlockSize; - RawView += BytesToUncompress; + const uint64_t BytesToUncompress = OffsetInFirstBlock > 0 + ? zen::Min(RawView.GetSize(), UncompressedBlockSize - OffsetInFirstBlock) + : zen::Min(RemainingRawSize, BlockSize); + + MemoryView CompressedBlock = CompressedData.ViewOrCopyRange(CompressedOffset, CompressedBlockSize, CompressedBlockCopy); + + if (IsCompressed) + { + MutableMemoryView UncompressedBlock = RawView.Left(BytesToUncompress); + + const bool IsAligned = BytesToUncompress == UncompressedBlockSize; + if (!IsAligned) + { + // Decompress to a temporary buffer when the first or the last block reads are not aligned with the block boundaries. + if (UncompressedBlockCopy.IsNull()) + { + UncompressedBlockCopy = UniqueBuffer::Alloc(BlockSize); + } + UncompressedBlock = UncompressedBlockCopy.GetMutableView().Mid(0, UncompressedBlockSize); + } + + if (!DecompressBlock(UncompressedBlock, CompressedBlock)) + { + return false; + } + + if (!IsAligned) + { + RawView.CopyFrom(UncompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + } + else + { + RawView.CopyFrom(CompressedBlock.Mid(OffsetInFirstBlock, BytesToUncompress)); + } + + OffsetInFirstBlock = 0; + RemainingRawSize -= BytesToUncompress; + CompressedOffset += CompressedBlockSize; + RawView += BytesToUncompress; + } } return RemainingRawSize == 0; @@ -1342,6 +1860,31 @@ CompressedBuffer::Compress(const SharedBuffer& RawData, return Compress(CompositeBuffer(RawData), Compressor, CompressionLevel, BlockSize); } +bool +CompressedBuffer::CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + OodleCompressor Compressor, + OodleCompressionLevel CompressionLevel, + uint64_t BlockSize) +{ + using namespace detail; + + if (BlockSize == 0) + { + BlockSize = DefaultBlockSize; + } + + if (CompressionLevel == OodleCompressionLevel::None) + { + return NoneEncoder().CompressToStream(RawData, std::move(Callback), BlockSize); + } + else + { + return OodleEncoder(Compressor, CompressionLevel).CompressToStream(RawData, std::move(Callback), BlockSize); + } +} + CompressedBuffer CompressedBuffer::FromCompressed(const CompositeBuffer& InCompressedData, IoHash& OutRawHash, uint64_t& OutRawSize) { @@ -1536,6 +2079,28 @@ CompressedBuffer::DecompressToComposite() const } bool +CompressedBuffer::DecompressToStream( + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const +{ + using namespace detail; + if (CompressedData) + { + const BufferHeader Header = BufferHeader::Read(CompressedData); + if (Header.Magic == BufferHeader::ExpectedMagic) + { + if (const BaseDecoder* const Decoder = GetDecoder(Header.Method)) + { + const uint64_t TotalRawSize = RawSize < ~uint64_t(0) ? RawSize : Header.TotalRawSize - RawOffset; + return Decoder->DecompressToStream(Header, CompressedData, RawOffset, TotalRawSize, std::move(Callback)); + } + } + } + return false; +} + +bool CompressedBuffer::TryGetCompressParameters(OodleCompressor& OutCompressor, OodleCompressionLevel& OutCompressionLevel, uint64_t& OutBlockSize) const diff --git a/src/zencore/except.cpp b/src/zencore/except.cpp index d5eabea9d..610b0ced5 100644 --- a/src/zencore/except.cpp +++ b/src/zencore/except.cpp @@ -47,7 +47,7 @@ ThrowSystemException([[maybe_unused]] HRESULT hRes, [[maybe_unused]] std::string { if (HRESULT_FACILITY(hRes) == FACILITY_WIN32) { - throw std::system_error(std::error_code(hRes & 0xffff, std::system_category()), std::string(Message)); + throw std::system_error(std::error_code(HRESULT_CODE(hRes), std::system_category()), std::string(Message)); } else { diff --git a/src/zencore/filesystem.cpp b/src/zencore/filesystem.cpp index b8c35212f..46337ffc8 100644 --- a/src/zencore/filesystem.cpp +++ b/src/zencore/filesystem.cpp @@ -33,6 +33,7 @@ ZEN_THIRD_PARTY_INCLUDES_END # include <dirent.h> # include <fcntl.h> # include <sys/resource.h> +# include <sys/mman.h> # include <sys/stat.h> # include <pwd.h> # include <unistd.h> @@ -43,6 +44,7 @@ ZEN_THIRD_PARTY_INCLUDES_END # include <fcntl.h> # include <libproc.h> # include <sys/resource.h> +# include <sys/mman.h> # include <sys/stat.h> # include <sys/syslimits.h> # include <pwd.h> @@ -86,16 +88,9 @@ DeleteReparsePoint(const wchar_t* Path, DWORD dwReparseTag) } bool -CreateDirectories(const wchar_t* Dir) +CreateDirectories(const wchar_t* Path) { - // This may be suboptimal, in that it appears to try and create directories - // from the root on up instead of from some directory which is known to - // be present - // - // We should implement a smarter version at some point since this can be - // pretty expensive in aggregate - - return std::filesystem::create_directories(Dir); + return CreateDirectories(std::filesystem::path(Path)); } // Erase all files and directories in a given directory, leaving an empty directory @@ -212,75 +207,377 @@ DeleteDirectoriesInternal(const wchar_t* DirPath) bool CleanDirectory(const wchar_t* DirPath, bool KeepDotFiles) { - if (std::filesystem::exists(DirPath)) + if (IsDir(DirPath)) { return WipeDirectory(DirPath, KeepDotFiles); } return CreateDirectories(DirPath); } + +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_WINDOWS +const uint32_t FileAttributesSystemReadOnlyFlag = FILE_ATTRIBUTE_READONLY; +#else +const uint32_t FileAttributesSystemReadOnlyFlag = 0x00000001; #endif // ZEN_PLATFORM_WINDOWS +const uint32_t FileModeWriteEnableFlags = 0222; + bool -CreateDirectories(const std::filesystem::path& Dir) +IsFileAttributeReadOnly(uint32_t FileAttributes) +{ +#if ZEN_PLATFORM_WINDOWS + return (FileAttributes & FileAttributesSystemReadOnlyFlag) != 0; +#else + return (FileAttributes & 0x00000001) != 0; +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +IsFileModeReadOnly(uint32_t FileMode) +{ + return (FileMode & FileModeWriteEnableFlags) == 0; +} + +uint32_t +MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly) +{ + return ReadOnly ? (FileAttributes | FileAttributesSystemReadOnlyFlag) : (FileAttributes & ~FileAttributesSystemReadOnlyFlag); +} + +uint32_t +MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly) +{ + return ReadOnly ? (FileMode & ~FileModeWriteEnableFlags) : (FileMode | FileModeWriteEnableFlags); +} + +#if ZEN_PLATFORM_WINDOWS + +static DWORD +WinGetFileAttributes(const std::filesystem::path& Path, std::error_code& Ec) { - if (Dir.string().ends_with(":")) + DWORD Attributes = ::GetFileAttributes(Path.native().c_str()); + if (Attributes == INVALID_FILE_ATTRIBUTES) { + DWORD LastError = GetLastError(); + switch (LastError) + { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_BAD_NETPATH: + case ERROR_INVALID_DRIVE: + break; + case ERROR_ACCESS_DENIED: + { + WIN32_FIND_DATA FindData; + HANDLE FindHandle = ::FindFirstFile(Path.native().c_str(), &FindData); + if (FindHandle == INVALID_HANDLE_VALUE) + { + DWORD LastFindError = GetLastError(); + if (LastFindError != ERROR_FILE_NOT_FOUND) + { + Ec = MakeErrorCode(LastError); + } + } + else + { + FindClose(FindHandle); + Attributes = FindData.dwFileAttributes; + } + } + break; + default: + Ec = MakeErrorCode(LastError); + break; + } + } + return Attributes; +} + +#endif // ZEN_PLATFORM_WINDOWS + +bool +RemoveDirNative(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + BOOL Success = ::RemoveDirectory(Path.native().c_str()); + if (!Success) + { + DWORD LastError = GetLastError(); + switch (LastError) + { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + break; + default: + Ec = MakeErrorCode(LastError); + break; + } return false; } - while (!std::filesystem::is_directory(Dir)) + return true; +#else + return std::filesystem::remove(Path, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +RemoveFileNative(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + const std::filesystem::path::value_type* NativePath = Path.native().c_str(); + BOOL Success = ::DeleteFile(NativePath); + if (!Success) { - if (Dir.has_parent_path()) + if (ForceRemoveReadOnlyFiles) { - CreateDirectories(Dir.parent_path()); + DWORD FileAttributes = WinGetFileAttributes(NativePath, Ec); + if (Ec) + { + return false; + } + + if ((FileAttributes != INVALID_FILE_ATTRIBUTES) && IsFileAttributeReadOnly(FileAttributes) != 0) + { + ::SetFileAttributes(NativePath, MakeFileAttributeReadOnly(FileAttributes, false)); + Success = ::DeleteFile(NativePath); + } } - std::error_code ErrorCode; - std::filesystem::create_directory(Dir, ErrorCode); - if (ErrorCode) + if (!Success) { - throw std::system_error(ErrorCode, fmt::format("Failed to create directories for '{}'", Dir.string())); + DWORD LastError = GetLastError(); + switch (LastError) + { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + break; + default: + Ec = MakeErrorCode(LastError); + break; + } + return false; + } + } + return true; +#else + if (!ForceRemoveReadOnlyFiles) + { + struct stat Stat; + int err = stat(Path.native().c_str(), &Stat); + if (err != 0) + { + int32_t err = errno; + if (err == ENOENT) + { + Ec.clear(); + return false; + } + } + const uint32_t Mode = (uint32_t)Stat.st_mode; + if (IsFileModeReadOnly(Mode)) + { + Ec = MakeErrorCode(EACCES); + return false; + } + } + return std::filesystem::remove(Path, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + +static void +WipeDirectoryContentInternal(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles, std::error_code& Ec) +{ + DirectoryContent LocalDirectoryContent; + GetDirectoryContent(Path, DirectoryContentFlags::IncludeDirs | DirectoryContentFlags::IncludeFiles, LocalDirectoryContent); + for (const std::filesystem::path& LocalFilePath : LocalDirectoryContent.Files) + { + RemoveFileNative(LocalFilePath, ForceRemoveReadOnlyFiles, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + if (IsFile(LocalFilePath)) + { + RemoveFileNative(LocalFilePath, ForceRemoveReadOnlyFiles, Ec); + } + } + if (Ec) + { + return; + } + } + + for (std::filesystem::path& LocalDirPath : LocalDirectoryContent.Directories) + { + WipeDirectoryContentInternal(LocalDirPath, ForceRemoveReadOnlyFiles, Ec); + if (Ec) + { + return; + } + + RemoveDirNative(LocalDirPath, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + if (IsDir(LocalDirPath)) + { + RemoveDirNative(LocalDirPath, Ec); + } + } + if (Ec) + { + return; } - return true; } - return false; } bool -DeleteDirectories(const std::filesystem::path& Dir) +CreateDirectory(const std::filesystem::path& Path, std::error_code& Ec) { - std::error_code ErrorCode; - return std::filesystem::remove_all(Dir, ErrorCode); +#if ZEN_PLATFORM_WINDOWS + BOOL Success = ::CreateDirectory(Path.native().c_str(), nullptr); + if (!Success) + { + DWORD LastError = GetLastError(); + switch (LastError) + { + case ERROR_FILE_EXISTS: + case ERROR_ALREADY_EXISTS: + break; + default: + Ec = MakeErrorCode(LastError); + break; + } + return false; + } + return Success; +#else + return std::filesystem::create_directory(Path, Ec); +#endif // ZEN_PLATFORM_WINDOWS } bool -CleanDirectory(const std::filesystem::path& Dir) +CreateDirectories(const std::filesystem::path& Path) { - if (std::filesystem::exists(Dir)) + std::error_code Ec; + bool Success = CreateDirectories(Path, Ec); + if (Ec) { - bool Success = true; + throw std::system_error(Ec, fmt::format("Failed to create directories for '{}'", Path.string())); + } + return Success; +} - for (const auto& Item : std::filesystem::directory_iterator(Dir)) - { - std::error_code ErrorCode; - const uintmax_t RemovedCount = std::filesystem::remove_all(Item, ErrorCode); +bool +CreateDirectories(const std::filesystem::path& Path, std::error_code& Ec) +{ + if (Path.string().ends_with(":")) + { + return false; + } + bool Exists = IsDir(Path, Ec); + if (Ec) + { + return false; + } + if (Exists) + { + return false; + } - Success = Success && !ErrorCode && RemovedCount; + if (Path.has_parent_path()) + { + bool Result = CreateDirectories(Path.parent_path(), Ec); + if (Ec) + { + return Result; } + } + return CreateDirectory(Path, Ec); +} - return Success; +bool +CleanDirectory(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles) +{ + std::error_code Ec; + bool Result = CleanDirectory(Path, ForceRemoveReadOnlyFiles, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to clean directory for '{}'", Path.string())); } + return Result; +} - return CreateDirectories(Dir); +bool +CleanDirectory(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles, std::error_code& Ec) +{ + bool Exists = IsDir(Path, Ec); + if (Ec) + { + return Exists; + } + if (Exists) + { + WipeDirectoryContentInternal(Path, ForceRemoveReadOnlyFiles, Ec); + return false; + } + return CreateDirectory(Path, Ec); +} + +bool +DeleteDirectories(const std::filesystem::path& Path) +{ + std::error_code Ec; + bool Result = DeleteDirectories(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to delete directories for '{}'", Path.string())); + } + return Result; } bool -CleanDirectoryExceptDotFiles(const std::filesystem::path& Dir) +DeleteDirectories(const std::filesystem::path& Path, std::error_code& Ec) +{ + bool Exists = IsDir(Path, Ec); + if (Ec) + { + return Exists; + } + + if (Exists) + { + WipeDirectoryContentInternal(Path, false, Ec); + if (Ec) + { + return false; + } + bool Result = RemoveDirNative(Path, Ec); + for (size_t Retries = 0; Ec && Retries < 3; Retries++) + { + Sleep(100 + int(Retries * 50)); + Ec.clear(); + if (IsDir(Path)) + { + Result = RemoveDirNative(Path, Ec); + } + } + return Result; + } + return false; +} + +bool +CleanDirectoryExceptDotFiles(const std::filesystem::path& Path) { #if ZEN_PLATFORM_WINDOWS const bool KeepDotFiles = true; - return CleanDirectory(Dir.c_str(), KeepDotFiles); + return CleanDirectory(Path.c_str(), KeepDotFiles); #else - ZEN_UNUSED(Dir); + ZEN_UNUSED(Path); ZEN_NOT_IMPLEMENTED(); #endif @@ -531,7 +828,10 @@ CloneFile(std::filesystem::path FromPath, std::filesystem::path ToPath) } void -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options, std::error_code& OutErrorCode) +CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutErrorCode) { OutErrorCode.clear(); @@ -544,7 +844,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop } bool -CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options) +CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options) { bool Success = false; @@ -587,7 +887,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop ScopedFd $From = {FromFd}; // To file - int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0666); + int ToFd = open(ToPath.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0666); if (ToFd < 0) { ThrowLastError(fmt::format("failed to create file {}", ToPath)); @@ -595,9 +895,14 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop fchmod(ToFd, 0666); ScopedFd $To = {ToFd}; + struct stat Stat; + fstat(FromFd, &Stat); + + size_t FileSizeBytes = Stat.st_size; + // Copy impl - static const size_t BufferSize = 64 << 10; - void* Buffer = malloc(BufferSize); + const size_t BufferSize = Min(FileSizeBytes, 64u << 10); + void* Buffer = malloc(BufferSize); while (true) { int BytesRead = read(FromFd, Buffer, BufferSize); @@ -607,7 +912,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop break; } - if (write(ToFd, Buffer, BytesRead) != BufferSize) + if (write(ToFd, Buffer, BytesRead) != BytesRead) { Success = false; break; @@ -618,7 +923,7 @@ CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop if (!Success) { - ThrowLastError("file copy failed"sv); + ThrowLastError(fmt::format("file copy from {} to {} failed", FromPath, ToPath)); } return true; @@ -629,7 +934,7 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop { // Validate arguments - if (FromPath.empty() || !std::filesystem::is_directory(FromPath)) + if (FromPath.empty() || !IsDir(FromPath)) throw std::runtime_error("invalid CopyTree source directory specified"); if (ToPath.empty()) @@ -638,16 +943,13 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop if (Options.MustClone && !SupportsBlockRefCounting(FromPath)) throw std::runtime_error(fmt::format("cloning not possible from '{}'", FromPath)); - if (std::filesystem::exists(ToPath)) + if (IsFile(ToPath)) { - if (!std::filesystem::is_directory(ToPath)) - { - throw std::runtime_error(fmt::format("specified CopyTree target '{}' is not a directory", ToPath)); - } + throw std::runtime_error(fmt::format("specified CopyTree target '{}' is not a directory", ToPath)); } - else + if (!IsDir(ToPath)) { - std::filesystem::create_directories(ToPath); + CreateDirectories(ToPath); } if (Options.MustClone && !SupportsBlockRefCounting(ToPath)) @@ -683,7 +985,7 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop { } - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::error_code Ec; const std::filesystem::path Relative = std::filesystem::relative(Parent, BasePath, Ec); @@ -752,6 +1054,100 @@ CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const Cop } void +WriteFile(void* NativeHandle, const void* Data, uint64_t Size, uint64_t FileOffset, uint64_t ChunkSize, std::error_code& Ec) +{ + ZEN_ASSERT(NativeHandle != nullptr); + + Ec.clear(); + + while (Size) + { + const uint64_t NumberOfBytesToWrite = Min(Size, ChunkSize); + +#if ZEN_PLATFORM_WINDOWS + OVERLAPPED Ovl{}; + + Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu); + Ovl.OffsetHigh = DWORD(FileOffset >> 32); + + DWORD dwNumberOfBytesWritten = 0; + + BOOL Success = ::WriteFile(NativeHandle, Data, DWORD(NumberOfBytesToWrite), &dwNumberOfBytesWritten, &Ovl); +#else + static_assert(sizeof(off_t) >= sizeof(uint64_t), "sizeof(off_t) does not support large files"); + int Fd = int(uintptr_t(NativeHandle)); + int BytesWritten = pwrite(Fd, Data, NumberOfBytesToWrite, FileOffset); + bool Success = (BytesWritten > 0); +#endif + + if (!Success) + { + Ec = MakeErrorCodeFromLastError(); + return; + } + + Size -= NumberOfBytesToWrite; + FileOffset += NumberOfBytesToWrite; + Data = reinterpret_cast<const uint8_t*>(Data) + NumberOfBytesToWrite; + } +} + +void +ReadFile(void* NativeHandle, void* Data, uint64_t Size, uint64_t FileOffset, uint64_t ChunkSize, std::error_code& Ec) +{ + while (Size) + { + const uint64_t NumberOfBytesToRead = Min(Size, ChunkSize); + size_t BytesRead = 0; + +#if ZEN_PLATFORM_WINDOWS + OVERLAPPED Ovl{}; + + Ovl.Offset = DWORD(FileOffset & 0xffff'ffffu); + Ovl.OffsetHigh = DWORD(FileOffset >> 32); + + DWORD dwNumberOfBytesRead = 0; + BOOL Success = ::ReadFile(NativeHandle, Data, DWORD(NumberOfBytesToRead), &dwNumberOfBytesRead, &Ovl); + if (Success) + { + BytesRead = size_t(dwNumberOfBytesRead); + } + else if ((BytesRead != NumberOfBytesToRead)) + { + Ec = MakeErrorCode(ERROR_HANDLE_EOF); + return; + } + else + { + Ec = MakeErrorCodeFromLastError(); + return; + } +#else + static_assert(sizeof(off_t) >= sizeof(uint64_t), "sizeof(off_t) does not support large files"); + int Fd = int(uintptr_t(NativeHandle)); + ssize_t ReadResult = pread(Fd, Data, NumberOfBytesToRead, FileOffset); + if (ReadResult != -1) + { + BytesRead = size_t(ReadResult); + } + else if ((BytesRead != NumberOfBytesToRead)) + { + Ec = MakeErrorCode(EIO); + return; + } + else + { + Ec = MakeErrorCodeFromLastError(); + return; + } +#endif + Size -= NumberOfBytesToRead; + FileOffset += NumberOfBytesToRead; + Data = reinterpret_cast<uint8_t*>(Data) + NumberOfBytesToRead; + } +} + +void WriteFile(std::filesystem::path Path, const IoBuffer* const* Data, size_t BufferCount) { #if ZEN_PLATFORM_WINDOWS @@ -803,7 +1199,7 @@ WriteFile(std::filesystem::path Path, const IoBuffer* const* Data, size_t Buffer { Outfile.Close(); std::error_code DummyEc; - std::filesystem::remove(Path, DummyEc); + RemoveFile(Path, DummyEc); ThrowSystemException(hRes, fmt::format("File write failed for '{}'", Path).c_str()); } #else @@ -811,7 +1207,7 @@ WriteFile(std::filesystem::path Path, const IoBuffer* const* Data, size_t Buffer { close(Fd); std::error_code DummyEc; - std::filesystem::remove(Path, DummyEc); + RemoveFile(Path, DummyEc); ThrowLastError(fmt::format("File write failed for '{}'", Path)); } #endif // ZEN_PLATFORM_WINDOWS @@ -1164,7 +1560,7 @@ void FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, TreeVisitor& Visitor) { #if ZEN_PLATFORM_WINDOWS - uint64_t FileInfoBuffer[8 * 1024]; + std::vector<uint64_t> FileInfoBuffer(8 * 1024); FILE_INFO_BY_HANDLE_CLASS FibClass = FileIdBothDirectoryRestartInfo; bool Continue = true; @@ -1175,7 +1571,7 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr if (FAILED(hRes)) { - if (hRes == ERROR_FILE_NOT_FOUND || hRes == ERROR_PATH_NOT_FOUND) + if (HRESULT_CODE(hRes) == ERROR_FILE_NOT_FOUND || HRESULT_CODE(hRes) == ERROR_PATH_NOT_FOUND) { // Directory no longer exist, treat it as empty return; @@ -1185,8 +1581,9 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr while (Continue) { - BOOL Success = GetFileInformationByHandleEx(RootDirHandle, FibClass, FileInfoBuffer, sizeof FileInfoBuffer); - FibClass = FileIdBothDirectoryInfo; // Set up for next iteration + BOOL Success = + GetFileInformationByHandleEx(RootDirHandle, FibClass, FileInfoBuffer.data(), (DWORD)(FileInfoBuffer.size() * sizeof(uint64_t))); + FibClass = FileIdBothDirectoryInfo; // Set up for next iteration uint64_t EntryOffset = 0; @@ -1205,7 +1602,7 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr for (;;) { const FILE_ID_BOTH_DIR_INFO* DirInfo = - reinterpret_cast<const FILE_ID_BOTH_DIR_INFO*>(reinterpret_cast<const uint8_t*>(FileInfoBuffer) + EntryOffset); + reinterpret_cast<const FILE_ID_BOTH_DIR_INFO*>(reinterpret_cast<const uint8_t*>(FileInfoBuffer.data()) + EntryOffset); std::wstring_view FileName(DirInfo->FileName, DirInfo->FileNameLength / sizeof(wchar_t)); @@ -1236,7 +1633,11 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else { - Visitor.VisitFile(RootDir, FileName, DirInfo->EndOfFile.QuadPart, gsl::narrow<uint32_t>(DirInfo->FileAttributes)); + Visitor.VisitFile(RootDir, + FileName, + DirInfo->EndOfFile.QuadPart, + gsl::narrow<uint32_t>(DirInfo->FileAttributes), + (uint64_t)DirInfo->LastWriteTime.QuadPart); } const uint64_t NextOffset = DirInfo->NextEntryOffset; @@ -1285,7 +1686,7 @@ FileSystemTraversal::TraverseFileSystem(const std::filesystem::path& RootDir, Tr } else if (S_ISREG(Stat.st_mode)) { - Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow<uint32_t>(Stat.st_mode)); + Visitor.VisitFile(RootDir, FileName, Stat.st_size, gsl::narrow<uint32_t>(Stat.st_mode), gsl::narrow<uint64_t>(Stat.st_mtime)); } else { @@ -1326,6 +1727,156 @@ CanonicalPath(std::filesystem::path InPath, std::error_code& Ec) #endif } +bool +IsFile(const std::filesystem::path& Path) +{ + std::error_code Ec; + bool Result = IsFile(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to test if path '{}' is a file", Path.string())); + } + return Result; +} + +bool +IsFile(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + DWORD Attributes = WinGetFileAttributes(Path, Ec); + if (Ec) + { + return false; + } + if (Attributes == INVALID_FILE_ATTRIBUTES) + { + return false; + } + return (Attributes & FILE_ATTRIBUTE_DIRECTORY) == 0; +#else + struct stat Stat; + int err = stat(Path.native().c_str(), &Stat); + if (err != 0) + { + int32_t err = errno; + if (err == ENOENT) + { + Ec.clear(); + return false; + } + } + if (S_ISREG(Stat.st_mode)) + { + return true; + } + return false; +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +IsDir(const std::filesystem::path& Path) +{ + std::error_code Ec; + bool Result = IsDir(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to test if path '{}' is a directory", Path.string())); + } + return Result; +} + +bool +IsDir(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + DWORD Attributes = WinGetFileAttributes(Path, Ec); + if (Ec) + { + return false; + } + if (Attributes == INVALID_FILE_ATTRIBUTES) + { + return false; + } + return (Attributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY; +#else + struct stat Stat; + int err = stat(Path.native().c_str(), &Stat); + if (err != 0) + { + int32_t err = errno; + if (err == ENOENT) + { + Ec.clear(); + return false; + } + } + if (S_ISDIR(Stat.st_mode)) + { + return true; + } + return false; +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +RemoveFile(const std::filesystem::path& Path) +{ + std::error_code Ec; + bool Success = RemoveFile(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to remove file '{}'", Path.string())); + } + return Success; +} + +bool +RemoveFile(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + return RemoveFileNative(Path, false, Ec); +#else + bool IsDirectory = std::filesystem::is_directory(Path, Ec); + if (IsDirectory) + { + Ec = MakeErrorCode(EPERM); + return false; + } + Ec.clear(); + return RemoveFileNative(Path, false, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + +bool +RemoveDir(const std::filesystem::path& Path) +{ + std::error_code Ec; + bool Success = RemoveDir(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to remove directory '{}'", Path.string())); + } + return Success; +} + +bool +RemoveDir(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + return RemoveDirNative(Path, Ec); +#else + bool IsFile = std::filesystem::is_regular_file(Path, Ec); + if (IsFile) + { + Ec = MakeErrorCode(EPERM); + return false; + } + Ec.clear(); + return RemoveDirNative(Path, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + std::filesystem::path PathFromHandle(void* NativeHandle, std::error_code& Ec) { @@ -1423,23 +1974,84 @@ PathFromHandle(void* NativeHandle, std::error_code& Ec) } uint64_t -FileSizeFromHandle(void* NativeHandle) +FileSizeFromPath(const std::filesystem::path& Path) { - uint64_t FileSize = ~0ull; + std::error_code Ec; + uint64_t Size = FileSizeFromPath(Path, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to get file size for path '{}'", Path.string())); + } + return Size; +} + +uint64_t +FileSizeFromPath(const std::filesystem::path& Path, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + void* Handle = ::CreateFile(Path.native().c_str(), + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, + OPEN_EXISTING, + 0, + nullptr); + if (Handle == INVALID_HANDLE_VALUE) + { + DWORD LastError = GetLastError(); + Ec = MakeErrorCode(LastError); + return 0; + } + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); + LARGE_INTEGER FileSize; + BOOL Success = GetFileSizeEx(Handle, &FileSize); + if (!Success) + { + Ec = MakeErrorCodeFromLastError(); + return 0; + } + return FileSize.QuadPart; +#else + return std::filesystem::file_size(Path, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} +uint64_t +FileSizeFromHandle(void* NativeHandle, std::error_code& Ec) +{ #if ZEN_PLATFORM_WINDOWS BY_HANDLE_FILE_INFORMATION Bhfh = {}; if (GetFileInformationByHandle(NativeHandle, &Bhfh)) { - FileSize = uint64_t(Bhfh.nFileSizeHigh) << 32 | Bhfh.nFileSizeLow; + return uint64_t(Bhfh.nFileSizeHigh) << 32 | Bhfh.nFileSizeLow; + } + else + { + Ec = MakeErrorCodeFromLastError(); + return 0; } #else - int Fd = int(intptr_t(NativeHandle)); + int Fd = int(intptr_t(NativeHandle)); + static_assert(sizeof(decltype(stat::st_size)) == sizeof(uint64_t), "fstat() doesn't support large files"); struct stat Stat; - fstat(Fd, &Stat); - FileSize = size_t(Stat.st_size); + if (fstat(Fd, &Stat) == -1) + { + Ec = MakeErrorCodeFromLastError(); + return 0; + } + return uint64_t(Stat.st_size); #endif +} +uint64_t +FileSizeFromHandle(void* NativeHandle) +{ + std::error_code Ec; + uint64_t FileSize = FileSizeFromHandle(NativeHandle, Ec); + if (Ec) + { + return ~0ull; + } return FileSize; } @@ -1465,6 +2077,138 @@ GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec) return 0; } +uint64_t +GetModificationTickFromPath(const std::filesystem::path& Filename) +{ + // PathFromHandle + void* Handle; +#if ZEN_PLATFORM_WINDOWS + Handle = CreateFileW(Filename.c_str(), + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, + OPEN_EXISTING, + 0, + nullptr); + if (Handle == INVALID_HANDLE_VALUE) + { + ThrowLastError(fmt::format("Failed to open file {} to check modification tick.", Filename)); + } + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); + std::error_code Ec; + uint64_t ModificatonTick = GetModificationTickFromHandle(Handle, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to get modification tick for path '{}'", Filename.string())); + } + return ModificatonTick; +#else + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + ThrowLastError(fmt::format("Failed to get mode of file {}", Filename)); + } + return gsl::narrow<uint64_t>(Stat.st_mtime); +#endif +} + +bool +TryGetFileProperties(const std::filesystem::path& Path, + uint64_t& OutSize, + uint64_t& OutModificationTick, + uint32_t& OutNativeModeOrAttributes) +{ +#if ZEN_PLATFORM_WINDOWS + const std::filesystem::path::value_type* NativePath = Path.native().c_str(); + { + void* Handle = CreateFileW(NativePath, + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, + OPEN_EXISTING, + 0, + nullptr); + if (Handle == INVALID_HANDLE_VALUE) + { + return false; + } + auto _ = MakeGuard([Handle]() { CloseHandle(Handle); }); + + BY_HANDLE_FILE_INFORMATION Bhfh = {}; + if (!GetFileInformationByHandle(Handle, &Bhfh)) + { + return false; + } + OutSize = uint64_t(Bhfh.nFileSizeHigh) << 32 | Bhfh.nFileSizeLow; + OutModificationTick = ((uint64_t(Bhfh.ftLastWriteTime.dwHighDateTime) << 32) | Bhfh.ftLastWriteTime.dwLowDateTime); + OutNativeModeOrAttributes = Bhfh.dwFileAttributes; + return true; + } +#else + struct stat Stat; + int err = stat(Path.native().c_str(), &Stat); + if (err) + { + return false; + } + OutModificationTick = gsl::narrow<uint64_t>(Stat.st_mtime); + OutSize = size_t(Stat.st_size); + OutNativeModeOrAttributes = (uint32_t)Stat.st_mode; + return true; +#endif +} + +void +RenameFile(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath) +{ + std::error_code Ec; + RenameFile(SourcePath, TargetPath, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to rename path from '{}' to '{}'", SourcePath.string(), TargetPath.string())); + } +} + +void +RenameFile(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + BOOL Success = ::MoveFileEx(SourcePath.native().c_str(), TargetPath.native().c_str(), MOVEFILE_REPLACE_EXISTING); + if (!Success) + { + Ec = MakeErrorCodeFromLastError(); + } +#else + return std::filesystem::rename(SourcePath, TargetPath, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + +void +RenameDirectory(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath) +{ + std::error_code Ec; + RenameDirectory(SourcePath, TargetPath, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to rename directory from '{}' to '{}'", SourcePath.string(), TargetPath.string())); + } +} + +void +RenameDirectory(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + BOOL Success = ::MoveFile(SourcePath.native().c_str(), TargetPath.native().c_str()); + if (!Success) + { + Ec = MakeErrorCodeFromLastError(); + } +#else + return std::filesystem::rename(SourcePath, TargetPath, Ec); +#endif // ZEN_PLATFORM_WINDOWS +} + std::filesystem::path GetRunningExecutablePath() { @@ -1528,6 +2272,43 @@ MaximizeOpenFileCount() #endif } +bool +PrepareFileForScatteredWrite(void* FileHandle, uint64_t FinalSize) +{ + bool Result = true; +#if ZEN_PLATFORM_WINDOWS + + BY_HANDLE_FILE_INFORMATION Information; + if (GetFileInformationByHandle(FileHandle, &Information)) + { + if ((Information.dwFileAttributes & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + { + DWORD _ = 0; + BOOL Ok = DeviceIoControl(FileHandle, FSCTL_SET_SPARSE, nullptr, 0, nullptr, 0, &_, nullptr); + if (!Ok) + { + std::error_code DummyEc; + ZEN_DEBUG("Unable to set sparse mode for file '{}'", PathFromHandle(FileHandle, DummyEc)); + Result = false; + } + } + } + + FILE_ALLOCATION_INFO AllocationInfo = {}; + AllocationInfo.AllocationSize.QuadPart = LONGLONG(FinalSize); + if (!SetFileInformationByHandle(FileHandle, FileAllocationInfo, &AllocationInfo, DWORD(sizeof(AllocationInfo)))) + { + std::error_code DummyEc; + ZEN_DEBUG("Unable to set file allocation size to {} for file '{}'", FinalSize, PathFromHandle(FileHandle, DummyEc)); + Result = false; + } + +#else // ZEN_PLATFORM_WINDOWS + ZEN_UNUSED(FileHandle, FinalSize); +#endif // ZEN_PLATFORM_WINDOWS + return Result; +} + void GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags Flags, DirectoryContent& OutContent) { @@ -1544,7 +2325,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1557,6 +2339,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, DirectoryContentFlags { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1612,7 +2398,8 @@ GetDirectoryContent(const std::filesystem::path& RootDir, virtual void VisitFile(const std::filesystem::path&, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) override + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) override { if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeFiles)) { @@ -1625,6 +2412,10 @@ GetDirectoryContent(const std::filesystem::path& RootDir, { Content.FileAttributes.push_back(NativeModeOrAttributes); } + if (EnumHasAnyFlags(Flags, DirectoryContentFlags::IncludeModificationTick)) + { + Content.FileModificationTicks.push_back(NativeModificationTick); + } } } @@ -1654,12 +2445,17 @@ GetDirectoryContent(const std::filesystem::path& RootDir, RelativeRoot = RelativeRoot / DirectoryName]() { ZEN_ASSERT(Visitor); auto _ = MakeGuard([&]() { PendingWorkCount->CountDown(); }); + try { MultithreadedVisitor SubVisitor(*WorkerPool, *PendingWorkCount, RelativeRoot, Flags, Visitor); FileSystemTraversal Traversal; Traversal.TraverseFileSystem(Path, SubVisitor); Visitor->AsyncVisitDirectory(SubVisitor.RelativeRoot, std::move(SubVisitor.Content)); } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed scheduling work to scan subfolder '{}'. Reason: '{}'", Path / RelativeRoot, Ex.what()); + } }); } catch (const std::exception Ex) @@ -1741,7 +2537,7 @@ RotateFiles(const std::filesystem::path& Filename, std::size_t MaxFiles) }; auto IsEmpty = [](const std::filesystem::path& Path, std::error_code& Ec) -> bool { - bool Exists = std::filesystem::exists(Path, Ec); + bool Exists = IsFile(Path, Ec); if (Ec) { return false; @@ -1750,7 +2546,7 @@ RotateFiles(const std::filesystem::path& Filename, std::size_t MaxFiles) { return true; } - uintmax_t Size = std::filesystem::file_size(Path, Ec); + uintmax_t Size = FileSizeFromPath(Path, Ec); if (Ec) { return false; @@ -1769,17 +2565,17 @@ RotateFiles(const std::filesystem::path& Filename, std::size_t MaxFiles) for (auto i = MaxFiles; i > 0; i--) { std::filesystem::path src = GetFileName(i - 1); - if (!std::filesystem::exists(src)) + if (!IsFile(src)) { continue; } std::error_code DummyEc; std::filesystem::path target = GetFileName(i); - if (std::filesystem::exists(target, DummyEc)) + if (IsFile(target, DummyEc)) { - std::filesystem::remove(target, DummyEc); + RemoveFile(target, DummyEc); } - std::filesystem::rename(src, target, DummyEc); + RenameFile(src, target, DummyEc); } } @@ -1816,16 +2612,16 @@ RotateDirectories(const std::filesystem::path& DirectoryName, std::size_t MaxDir { const std::filesystem::path SourcePath = GetPathForIndex(i - 1); - if (std::filesystem::exists(SourcePath)) + if (IsDir(SourcePath)) { std::filesystem::path TargetPath = GetPathForIndex(i); std::error_code DummyEc; - if (std::filesystem::exists(TargetPath, DummyEc)) + if (IsDir(TargetPath, DummyEc)) { - std::filesystem::remove_all(TargetPath, DummyEc); + DeleteDirectories(TargetPath, DummyEc); } - std::filesystem::rename(SourcePath, TargetPath, DummyEc); + RenameDirectory(SourcePath, TargetPath, DummyEc); } } @@ -1881,6 +2677,367 @@ PickDefaultSystemRootDirectory() #endif // ZEN_PLATFORM_WINDOWS } +#if ZEN_PLATFORM_WINDOWS + +uint32_t +GetFileAttributes(const std::filesystem::path& Filename, std::error_code& Ec) +{ + return WinGetFileAttributes(Filename, Ec); +} + +uint32_t +GetFileAttributes(const std::filesystem::path& Filename) +{ + std::error_code Ec; + uint32_t Result = zen::GetFileAttributes(Filename, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("failed to get attributes of file '{}'", Filename.string())); + } + return Result; +} + +void +SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes, std::error_code& Ec) +{ + if (::SetFileAttributes(Filename.native().c_str(), Attributes) == 0) + { + Ec = MakeErrorCodeFromLastError(); + } +} + +void +SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes) +{ + std::error_code Ec; + zen::SetFileAttributes(Filename, Attributes, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("failed to set attributes of file {}", Filename.string())); + } +} + +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +uint32_t +GetFileMode(const std::filesystem::path& Filename) +{ + std::error_code Ec; + uint32_t Result = GetFileMode(Filename, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to get mode of file {}", Filename)); + } + return Result; +} + +uint32_t +GetFileMode(const std::filesystem::path& Filename, std::error_code& Ec) +{ + struct stat Stat; + int err = stat(Filename.native().c_str(), &Stat); + if (err) + { + Ec = MakeErrorCodeFromLastError(); + return 0; + } + return (uint32_t)Stat.st_mode; +} + +void +SetFileMode(const std::filesystem::path& Filename, uint32_t Mode) +{ + std::error_code Ec; + SetFileMode(Filename, Mode, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("Failed to set mode of file {}", Filename)); + } +} + +void +SetFileMode(const std::filesystem::path& Filename, uint32_t Mode, std::error_code& Ec) +{ + int err = chmod(Filename.native().c_str(), (mode_t)Mode); + if (err) + { + Ec = MakeErrorCodeFromLastError(); + } +} + +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +bool +SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly, std::error_code& Ec) +{ +#if ZEN_PLATFORM_WINDOWS + uint32_t CurrentAttributes = GetFileAttributes(Filename, Ec); + if (Ec) + { + return false; + } + if (CurrentAttributes == INVALID_FILE_ATTRIBUTES) + { + Ec = MakeErrorCode(ERROR_FILE_NOT_FOUND); + return false; + } + uint32_t NewAttributes = MakeFileAttributeReadOnly(CurrentAttributes, ReadOnly); + if (CurrentAttributes != NewAttributes) + { + SetFileAttributes(Filename, NewAttributes, Ec); + if (Ec) + { + return false; + } + return true; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + uint32_t CurrentMode = GetFileMode(Filename, Ec); + if (Ec) + { + return false; + } + uint32_t NewMode = MakeFileModeReadOnly(CurrentMode, ReadOnly); + if (CurrentMode != NewMode) + { + SetFileMode(Filename, NewMode, Ec); + if (Ec) + { + return false; + } + return true; + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + return false; +} + +bool +SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly) +{ + std::error_code Ec; + bool Result = SetFileReadOnly(Filename, ReadOnly, Ec); + if (Ec) + { + throw std::system_error(Ec, fmt::format("failed to set read only mode of file '{}'", Filename.string())); + } + return Result; +} + +class SharedMemoryImpl : public SharedMemory +{ +public: + struct Data + { + void* Handle = nullptr; + void* DataPtr = nullptr; + size_t Size = 0; + std::string Name; + }; + + static Data Open(std::string_view Name, size_t Size, bool SystemGlobal) + { +#if ZEN_PLATFORM_WINDOWS + std::wstring InstanceMapName = Utf8ToWide(fmt::format("{}\\{}", SystemGlobal ? "Global" : "Local", Name)); + + HANDLE hMap = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, InstanceMapName.c_str()); + if (hMap == NULL) + { + return {}; + } + void* pBuf = MapViewOfFile(hMap, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, // offset high + 0, // offset low + DWORD(Size)); // size + + if (pBuf == NULL) + { + CloseHandle(hMap); + } + return Data{.Handle = hMap, .DataPtr = pBuf, .Size = Size, .Name = std::string(Name)}; +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + ZEN_UNUSED(SystemGlobal); + std::string InstanceMapName = fmt::format("/{}", Name); + + int Fd = shm_open(InstanceMapName.c_str(), O_RDWR, 0666); + if (Fd < 0) + { + return {}; + } + void* hMap = (void*)intptr_t(Fd); + + struct stat Stat; + fstat(Fd, &Stat); + + if (size_t(Stat.st_size) < Size) + { + close(Fd); + return {}; + } + + void* pBuf = mmap(nullptr, Size, PROT_READ | PROT_WRITE, MAP_SHARED, Fd, 0); + if (pBuf == MAP_FAILED) + { + close(Fd); + return {}; + } + return Data{.Handle = hMap, .DataPtr = pBuf, .Size = Size, .Name = std::string(Name)}; +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + + static Data Create(std::string_view Name, size_t Size, bool SystemGlobal) + { +#if ZEN_PLATFORM_WINDOWS + std::wstring InstanceMapName = Utf8ToWide(fmt::format("{}\\{}", SystemGlobal ? "Global" : "Local", Name)); + + SECURITY_ATTRIBUTES m_Attributes{}; + SECURITY_DESCRIPTOR m_Sd{}; + + m_Attributes.nLength = sizeof m_Attributes; + m_Attributes.bInheritHandle = false; // Disable inheritance + + const BOOL Success = InitializeSecurityDescriptor(&m_Sd, SECURITY_DESCRIPTOR_REVISION); + + if (Success) + { + if (!SetSecurityDescriptorDacl(&m_Sd, TRUE, (PACL)NULL, FALSE)) + { + ThrowLastError("SetSecurityDescriptorDacl failed"); + } + + m_Attributes.lpSecurityDescriptor = &m_Sd; + } + + HANDLE hMap = CreateFileMapping(INVALID_HANDLE_VALUE, // use paging file + &m_Attributes, // allow anyone to access + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + DWORD(Size), // maximum object size (low-order DWORD) + InstanceMapName.c_str()); + if (hMap == NULL) + { + return {}; + } + void* pBuf = MapViewOfFile(hMap, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, // offset high + 0, // offset low + DWORD(Size)); // size + + if (pBuf == NULL) + { + CloseHandle(hMap); + return {}; + } + return Data{.Handle = hMap, .DataPtr = pBuf, .Size = Size, .Name = std::string(Name)}; +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + ZEN_UNUSED(SystemGlobal); + std::string InstanceMapName = fmt::format("/{}", Name); + + int Fd = shm_open(InstanceMapName.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0666); + if (Fd < 0) + { + return {}; + } + fchmod(Fd, 0666); + void* hMap = (void*)intptr_t(Fd); + + int Result = ftruncate(Fd, Size); + ZEN_UNUSED(Result); + + void* pBuf = mmap(nullptr, Size, PROT_READ | PROT_WRITE, MAP_SHARED, Fd, 0); + if (pBuf == MAP_FAILED) + { + close(Fd); + return {}; + } + return Data{.Handle = hMap, .DataPtr = pBuf, .Size = Size, .Name = std::string(Name)}; +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + + static void Close(Data&& MemMap, bool Delete) + { +#if ZEN_PLATFORM_WINDOWS + ZEN_UNUSED(Delete); + if (MemMap.DataPtr != nullptr) + { + UnmapViewOfFile(MemMap.DataPtr); + MemMap.DataPtr = nullptr; + } + if (MemMap.Handle != nullptr) + { + CloseHandle(MemMap.Handle); + MemMap.Handle = nullptr; + } +#endif // ZEN_PLATFORM_WINDOWS +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + if (MemMap.DataPtr != nullptr) + { + munmap(MemMap.DataPtr, MemMap.Size); + MemMap.DataPtr = nullptr; + } + + if (MemMap.Handle != nullptr) + { + int Fd = int(intptr_t(MemMap.Handle)); + close(Fd); + MemMap.Handle = nullptr; + } + if (Delete) + { + std::string InstanceMapName = fmt::format("/{}", MemMap.Name); + shm_unlink(InstanceMapName.c_str()); + } +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + } + + SharedMemoryImpl(Data&& MemMap, bool IsOwned) : m_MemMap(std::move(MemMap)), m_IsOwned(IsOwned) {} + virtual ~SharedMemoryImpl() + { + try + { + Close(std::move(m_MemMap), /*Delete*/ m_IsOwned); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("SharedMemoryImpl::~SharedMemoryImpl threw exception: {}", Ex.what()); + } + } + + virtual void* GetData() override { return m_MemMap.DataPtr; } + +private: + Data m_MemMap; + const bool m_IsOwned = false; +}; + +std::unique_ptr<SharedMemory> +OpenSharedMemory(std::string_view Name, size_t Size, bool SystemGlobal) +{ + SharedMemoryImpl::Data MemMap = SharedMemoryImpl::Open(Name, Size, SystemGlobal); + if (MemMap.DataPtr) + { + return std::make_unique<SharedMemoryImpl>(std::move(MemMap), /*IsOwned*/ false); + } + return {}; +} + +std::unique_ptr<SharedMemory> +CreateSharedMemory(std::string_view Name, size_t Size, bool SystemGlobal) +{ + SharedMemoryImpl::Data MemMap = SharedMemoryImpl::Create(Name, Size, SystemGlobal); + if (MemMap.DataPtr) + { + return std::make_unique<SharedMemoryImpl>(std::move(MemMap), /*IsOwned*/ true); + } + return {}; +} + ////////////////////////////////////////////////////////////////////////// // // Testing related code follows... @@ -1901,7 +3058,7 @@ TEST_CASE("filesystem") path BinPath = GetRunningExecutablePath(); const bool ExpectedExe = PathToUtf8(BinPath.stem().native()).ends_with("-test"sv) || BinPath.stem() == "zenserver"; CHECK(ExpectedExe); - CHECK(is_regular_file(BinPath)); + CHECK(IsFile(BinPath)); // PathFromHandle void* Handle; @@ -1928,7 +3085,7 @@ TEST_CASE("filesystem") // Traversal struct : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t, uint32_t, uint64_t) override { bFoundExpected |= std::filesystem::equivalent(Parent / File, Expected); } @@ -1954,6 +3111,80 @@ TEST_CASE("filesystem") CHECK_EQ(BinScan.size(), BinRead.Data[0].GetSize()); } +TEST_CASE("Filesystem.Basics") +{ + std::filesystem::path TestBaseDir = GetRunningExecutablePath().parent_path() / ".test"; + CleanDirectory(TestBaseDir, true); + DeleteDirectories(TestBaseDir); + CHECK(!IsDir(TestBaseDir)); + CHECK(CleanDirectory(TestBaseDir, false)); + CHECK(IsDir(TestBaseDir)); + CHECK(!CleanDirectory(TestBaseDir, false)); + CHECK(!IsDir(TestBaseDir / "no_such_thing")); + CHECK(!IsDir("hgjda/cev_/q12")); + CHECK(!IsFile(TestBaseDir)); + CHECK(!IsFile(TestBaseDir / "no_such_thing")); + CHECK(!IsFile("hgjda/cev_/q12")); + CHECK_THROWS(FileSizeFromPath(TestBaseDir) == 0); + CHECK_THROWS(FileSizeFromPath(TestBaseDir / "no_such_file")); + CHECK(!CreateDirectories(TestBaseDir)); + CHECK(CreateDirectories(TestBaseDir / "nested" / "a" / "bit" / "deep")); + CHECK(!CreateDirectories(TestBaseDir / "nested" / "a" / "bit" / "deep")); + CHECK(IsDir(TestBaseDir / "nested" / "a" / "bit" / "deep")); + CHECK(IsDir(TestBaseDir / "nested" / "a" / "bit")); + CHECK(!IsDir(TestBaseDir / "nested" / "a" / "bit" / "deep" / "no")); + CHECK_THROWS(WriteFile(TestBaseDir / "nested" / "a", IoBuffer(20))); + CHECK_NOTHROW(WriteFile(TestBaseDir / "nested" / "a" / "yo", IoBuffer(20))); + CHECK(IsFile(TestBaseDir / "nested" / "a" / "yo")); + CHECK(FileSizeFromPath(TestBaseDir / "nested" / "a" / "yo") == 20); + CHECK(!IsFile(TestBaseDir / "nested" / "a")); + CHECK(DeleteDirectories(TestBaseDir / "nested" / "a" / "bit")); + CHECK(IsFile(TestBaseDir / "nested" / "a" / "yo")); + CHECK(!IsDir(TestBaseDir / "nested" / "a" / "bit")); + CHECK(!DeleteDirectories(TestBaseDir / "nested" / "a" / "bit")); + CHECK(IsDir(TestBaseDir / "nested" / "a")); + CHECK(DeleteDirectories(TestBaseDir / "nested")); + CHECK(!IsFile(TestBaseDir / "nested" / "a" / "yo")); + CHECK(CreateDirectories(TestBaseDir / "nested" / "deeper")); + CHECK_NOTHROW(WriteFile(TestBaseDir / "nested" / "deeper" / "yo", IoBuffer(20))); + CHECK_NOTHROW(RenameDirectory(TestBaseDir / "nested" / "deeper", TestBaseDir / "new_place")); + CHECK(IsFile(TestBaseDir / "new_place" / "yo")); + CHECK(FileSizeFromPath(TestBaseDir / "new_place" / "yo") == 20); + CHECK(IsDir(TestBaseDir / "new_place")); + CHECK(!IsFile(TestBaseDir / "new_place")); + CHECK_THROWS(RenameDirectory(TestBaseDir / "nested" / "deeper", TestBaseDir / "new_place")); + CHECK(!RemoveDir(TestBaseDir / "nested" / "deeper")); + CHECK(RemoveFile(TestBaseDir / "new_place" / "yo")); + CHECK(!IsFile(TestBaseDir / "new_place" / "yo")); + CHECK_THROWS(FileSizeFromPath(TestBaseDir / "new_place" / "yo")); + CHECK(!RemoveFile(TestBaseDir / "new_place" / "yo")); + CHECK_THROWS(RemoveFile(TestBaseDir / "nested")); + CHECK_THROWS(RemoveDir(TestBaseDir)); + CHECK_NOTHROW(WriteFile(TestBaseDir / "yo", IoBuffer(20))); + CHECK_NOTHROW(RenameFile(TestBaseDir / "yo", TestBaseDir / "new_place" / "yo")); + CHECK(!IsFile(TestBaseDir / "yo")); + CHECK(IsFile(TestBaseDir / "new_place" / "yo")); + CHECK(FileSizeFromPath(TestBaseDir / "new_place" / "yo") == 20); + CHECK_THROWS(RemoveDir(TestBaseDir / "new_place" / "yo")); + CHECK(DeleteDirectories(TestBaseDir)); + CHECK(!IsFile(TestBaseDir / "new_place" / "yo")); + CHECK(!IsDir(TestBaseDir)); + CHECK(!IsDir(TestBaseDir / "nested")); + CHECK(CreateDirectories(TestBaseDir / "nested")); + CHECK_NOTHROW(WriteFile(TestBaseDir / "nested" / "readonly", IoBuffer(20))); + CHECK(SetFileReadOnly(TestBaseDir / "nested" / "readonly", true)); + CHECK_THROWS(RemoveFile(TestBaseDir / "nested" / "readonly")); + CHECK_THROWS(CleanDirectory(TestBaseDir, false)); + CHECK(SetFileReadOnly(TestBaseDir / "nested" / "readonly", false)); + CHECK(RemoveFile(TestBaseDir / "nested" / "readonly")); + CHECK(!CleanDirectory(TestBaseDir, false)); + CHECK_NOTHROW(WriteFile(TestBaseDir / "nested" / "readonly", IoBuffer(20))); + CHECK(SetFileReadOnly(TestBaseDir / "nested" / "readonly", true)); + CHECK(!CleanDirectory(TestBaseDir / "nested", true)); + CHECK(!CleanDirectory(TestBaseDir, false)); + CHECK(RemoveDir(TestBaseDir)); +} + TEST_CASE("WriteFile") { std::filesystem::path TempFile = GetRunningExecutablePath().parent_path(); @@ -1988,7 +3219,7 @@ TEST_CASE("WriteFile") CHECK_EQ(memcmp(MagicTest.Data, MagicsReadback.Data[0].Data(), MagicTest.Size), 0); } - std::filesystem::remove(TempFile); + RemoveFile(TempFile); } TEST_CASE("DiskSpaceInfo") @@ -2045,7 +3276,7 @@ TEST_CASE("PathBuilder") TEST_CASE("RotateDirectories") { std::filesystem::path TestBaseDir = GetRunningExecutablePath().parent_path() / ".test"; - CleanDirectory(TestBaseDir); + CleanDirectory(TestBaseDir, false); std::filesystem::path RotateDir = TestBaseDir / "rotate_dir" / "dir_to_rotate"; IoBuffer DummyFileData = IoBufferBuilder::MakeCloneFromMemory("blubb", 5); @@ -2059,16 +3290,16 @@ TEST_CASE("RotateDirectories") const int RotateMax = 10; NewDir(); - CHECK(std::filesystem::exists(RotateDir)); + CHECK(IsDir(RotateDir)); RotateDirectories(RotateDir, RotateMax); - CHECK(!std::filesystem::exists(RotateDir)); - CHECK(std::filesystem::exists(DirWithSuffix(1))); + CHECK(!IsDir(RotateDir)); + CHECK(IsDir(DirWithSuffix(1))); NewDir(); - CHECK(std::filesystem::exists(RotateDir)); + CHECK(IsDir(RotateDir)); RotateDirectories(RotateDir, RotateMax); - CHECK(!std::filesystem::exists(RotateDir)); - CHECK(std::filesystem::exists(DirWithSuffix(1))); - CHECK(std::filesystem::exists(DirWithSuffix(2))); + CHECK(!IsDir(RotateDir)); + CHECK(IsDir(DirWithSuffix(1))); + CHECK(IsDir(DirWithSuffix(2))); for (int i = 0; i < RotateMax; ++i) { @@ -2078,19 +3309,35 @@ TEST_CASE("RotateDirectories") CHECK_EQ(IsError, false); } - CHECK(!std::filesystem::exists(RotateDir)); + CHECK(!IsDir(RotateDir)); for (int i = 0; i < RotateMax; ++i) { - CHECK(std::filesystem::exists(DirWithSuffix(i + 1))); + CHECK(IsDir(DirWithSuffix(i + 1))); } for (int i = RotateMax; i < RotateMax + 5; ++i) { - CHECK(!std::filesystem::exists(DirWithSuffix(RotateMax + i + 1))); + CHECK(!IsDir(DirWithSuffix(RotateMax + i + 1))); } } +TEST_CASE("SharedMemory") +{ + CHECK(!OpenSharedMemory("SharedMemoryTest0", 482, false)); + CHECK(!OpenSharedMemory("SharedMemoryTest0", 482, true)); + + { + auto Mem0 = CreateSharedMemory("SharedMemoryTest0", 482, false); + CHECK(Mem0); + strcpy((char*)Mem0->GetData(), "this is the string we are looking for"); + auto Mem1 = OpenSharedMemory("SharedMemoryTest0", 482, false); + CHECK_EQ(std::string((char*)Mem0->GetData()), std::string((char*)Mem1->GetData())); + } + + CHECK(!OpenSharedMemory("SharedMemoryTest0", 482, false)); +} + #endif } // namespace zen diff --git a/src/zencore/include/zencore/basicfile.h b/src/zencore/include/zencore/basicfile.h index 03c5605df..465499d2b 100644 --- a/src/zencore/include/zencore/basicfile.h +++ b/src/zencore/include/zencore/basicfile.h @@ -46,6 +46,10 @@ public: kPreventWrite = 0x2000'0000, // Do not open with write sharing mode (prevent other processes from writing to file while open) }; + BasicFile(const std::filesystem::path& FileName, Mode Mode); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); + BasicFile(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback); + void Open(const std::filesystem::path& FileName, Mode Mode); void Open(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec); void Open(const std::filesystem::path& FileName, Mode Mode, std::function<bool(std::error_code& Ec)>&& RetryCallback); @@ -56,7 +60,8 @@ public: void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); void Write(MemoryView Data, uint64_t FileOffset); void Write(MemoryView Data, uint64_t FileOffset, std::error_code& Ec); - uint64_t Write(CompositeBuffer Data, uint64_t FileOffset, std::error_code& Ec); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset); + uint64_t Write(const CompositeBuffer& Data, uint64_t FileOffset, std::error_code& Ec); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); void Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec); void Flush(); @@ -169,8 +174,11 @@ public: BasicFileWriter(BasicFile& Base, uint64_t BufferSize); ~BasicFileWriter(); - void Write(const void* Data, uint64_t Size, uint64_t FileOffset); - void Flush(); + void Write(const void* Data, uint64_t Size, uint64_t FileOffset); + void Write(const CompositeBuffer& Data, uint64_t FileOffset); + void AddPadding(uint64_t Padding); + uint64_t AlignTo(uint64_t Alignment); + void Flush(); private: BasicFile& m_Base; @@ -180,6 +188,8 @@ private: uint64_t m_BufferEnd; }; +IoBuffer WriteToTempFile(CompositeBuffer&& Buffer, const std::filesystem::path& Path); + ZENCORE_API void basicfile_forcelink(); } // namespace zen diff --git a/src/zencore/include/zencore/blake3.h b/src/zencore/include/zencore/blake3.h index 28bb348c0..f01e45266 100644 --- a/src/zencore/include/zencore/blake3.h +++ b/src/zencore/include/zencore/blake3.h @@ -53,6 +53,7 @@ struct BLAKE3Stream void Reset(); // Begin streaming hash compute (not needed on freshly constructed instance) BLAKE3Stream& Append(const void* data, size_t byteCount); // Append another chunk BLAKE3Stream& Append(MemoryView DataView) { return Append(DataView.GetData(), DataView.GetSize()); } // Append another chunk + BLAKE3Stream& Append(const IoBuffer& Buffer); // Append another chunk BLAKE3 GetHash(); // Obtain final hash. If you wish to reuse the instance call reset() private: diff --git a/src/zencore/include/zencore/compactbinarybuilder.h b/src/zencore/include/zencore/compactbinarybuilder.h index 1c625cacc..f11717453 100644 --- a/src/zencore/include/zencore/compactbinarybuilder.h +++ b/src/zencore/include/zencore/compactbinarybuilder.h @@ -18,6 +18,8 @@ #include <type_traits> #include <vector> +#include <EASTL/fixed_vector.h> + #include <gsl/gsl-lite.hpp> namespace zen { @@ -367,6 +369,8 @@ public: /** Private flags that are public to work with ENUM_CLASS_FLAGS. */ enum class StateFlags : uint8_t; + typedef eastl::fixed_vector<uint8_t, 2048> CbWriterData_t; + protected: /** Reserve the specified size up front until the format is optimized. */ ZENCORE_API explicit CbWriter(int64_t InitialSize); @@ -409,8 +413,8 @@ private: // provided externally, such as on the stack. That format will store the offsets that require // object or array sizes to be inserted and field types to be removed, and will perform those // operations only when saving to a buffer. - std::vector<uint8_t> Data; - std::vector<WriterState> States; + eastl::fixed_vector<WriterState, 4> States; + CbWriterData_t Data; }; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/zencore/include/zencore/compactbinaryfmt.h b/src/zencore/include/zencore/compactbinaryfmt.h new file mode 100644 index 000000000..b03683db4 --- /dev/null +++ b/src/zencore/include/zencore/compactbinaryfmt.h @@ -0,0 +1,24 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <fmt/format.h> +ZEN_THIRD_PARTY_INCLUDES_END + +#include <string_view> + +template<typename T> +requires DerivedFrom<T, zen::CbObjectView> +struct fmt::formatter<T> : fmt::formatter<std::string_view> +{ + template<typename FormatContext> + auto format(const zen::CbObject& a, FormatContext& ctx) const + { + zen::ExtendableStringBuilder<1024> ObjStr; + zen::CompactBinaryToJson(a, ObjStr); + return fmt::formatter<std::string_view>::format(ObjStr.ToView(), ctx); + } +}; diff --git a/src/zencore/include/zencore/compactbinarypackage.h b/src/zencore/include/zencore/compactbinarypackage.h index 12fcc41b7..9ec12cb0f 100644 --- a/src/zencore/include/zencore/compactbinarypackage.h +++ b/src/zencore/include/zencore/compactbinarypackage.h @@ -12,6 +12,8 @@ #include <span> #include <variant> +#include <EASTL/fixed_vector.h> + #ifdef GetObject # error "windows.h pollution" # undef GetObject @@ -265,7 +267,10 @@ public: } /** Returns the attachments in this package. */ - inline std::span<const CbAttachment> GetAttachments() const { return Attachments; } + inline std::span<const CbAttachment> GetAttachments() const + { + return std::span<const CbAttachment>(begin(Attachments), end(Attachments)); + } /** * Find an attachment by its hash. @@ -286,6 +291,8 @@ public: void AddAttachments(std::span<const CbAttachment> Attachments); + void ReserveAttachments(size_t Count); + /** * Remove an attachment by hash. * @@ -324,9 +331,9 @@ private: void GatherAttachments(const CbObject& Object, AttachmentResolver Resolver); /** Attachments ordered by their hash. */ - std::vector<CbAttachment> Attachments; - CbObject Object; - IoHash ObjectHash; + eastl::fixed_vector<CbAttachment, 32> Attachments; + CbObject Object; + IoHash ObjectHash; }; namespace legacy { diff --git a/src/zencore/include/zencore/compositebuffer.h b/src/zencore/include/zencore/compositebuffer.h index b435c5e74..1e1611de9 100644 --- a/src/zencore/include/zencore/compositebuffer.h +++ b/src/zencore/include/zencore/compositebuffer.h @@ -2,6 +2,7 @@ #pragma once +#include <zencore/eastlutil.h> #include <zencore/sharedbuffer.h> #include <zencore/zencore.h> @@ -9,6 +10,8 @@ #include <span> #include <vector> +#include <EASTL/fixed_vector.h> + namespace zen { /** @@ -35,7 +38,7 @@ public: { m_Segments.reserve((GetBufferCount(std::forward<BufferTypes>(Buffers)) + ...)); (AppendBuffers(std::forward<BufferTypes>(Buffers)), ...); - std::erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); + erase_if(m_Segments, [](const SharedBuffer& It) { return It.IsNull(); }); } } @@ -46,7 +49,10 @@ public: [[nodiscard]] ZENCORE_API uint64_t GetSize() const; /** Returns the segments that the buffer is composed from. */ - [[nodiscard]] inline std::span<const SharedBuffer> GetSegments() const { return std::span<const SharedBuffer>{m_Segments}; } + [[nodiscard]] inline std::span<const SharedBuffer> GetSegments() const + { + return std::span<const SharedBuffer>{begin(m_Segments), end(m_Segments)}; + } /** Returns true if the composite buffer is not null. */ [[nodiscard]] inline explicit operator bool() const { return !IsNull(); } @@ -120,6 +126,8 @@ public: static const CompositeBuffer Null; private: + typedef eastl::fixed_vector<SharedBuffer, 4> SharedBufferVector_t; + static inline size_t GetBufferCount(const CompositeBuffer& Buffer) { return Buffer.m_Segments.size(); } inline void AppendBuffers(const CompositeBuffer& Buffer) { @@ -134,12 +142,25 @@ private: inline void AppendBuffers(SharedBuffer&& Buffer) { m_Segments.push_back(std::move(Buffer)); } inline void AppendBuffers(IoBuffer&& Buffer) { m_Segments.push_back(SharedBuffer(std::move(Buffer))); } + static inline size_t GetBufferCount(std::span<IoBuffer>&& Container) { return Container.size(); } + inline void AppendBuffers(std::span<IoBuffer>&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (IoBuffer& Buffer : Container) + { + m_Segments.emplace_back(SharedBuffer(std::move(Buffer))); + } + } + static inline size_t GetBufferCount(std::vector<SharedBuffer>&& Container) { return Container.size(); } static inline size_t GetBufferCount(std::vector<IoBuffer>&& Container) { return Container.size(); } inline void AppendBuffers(std::vector<SharedBuffer>&& Container) { m_Segments.reserve(m_Segments.size() + Container.size()); - m_Segments.insert(m_Segments.end(), std::make_move_iterator(Container.begin()), std::make_move_iterator(Container.end())); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } } inline void AppendBuffers(std::vector<IoBuffer>&& Container) { @@ -150,8 +171,17 @@ private: } } + inline void AppendBuffers(SharedBufferVector_t&& Container) + { + m_Segments.reserve(m_Segments.size() + Container.size()); + for (SharedBuffer& Buffer : Container) + { + m_Segments.emplace_back(std::move(Buffer)); + } + } + private: - std::vector<SharedBuffer> m_Segments; + SharedBufferVector_t m_Segments; }; void compositebuffer_forcelink(); // internal diff --git a/src/zencore/include/zencore/compress.h b/src/zencore/include/zencore/compress.h index 5e761ceef..09fa6249d 100644 --- a/src/zencore/include/zencore/compress.h +++ b/src/zencore/include/zencore/compress.h @@ -74,6 +74,12 @@ public: OodleCompressor Compressor = OodleCompressor::Mermaid, OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, uint64_t BlockSize = 0); + [[nodiscard]] ZENCORE_API static bool CompressToStream( + const CompositeBuffer& RawData, + std::function<void(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback, + OodleCompressor Compressor = OodleCompressor::Mermaid, + OodleCompressionLevel CompressionLevel = OodleCompressionLevel::VeryFast, + uint64_t BlockSize = 0); /** * Construct from a compressed buffer previously created by Compress(). @@ -196,6 +202,17 @@ public: */ [[nodiscard]] ZENCORE_API CompositeBuffer DecompressToComposite() const; + /** + * Decompress into and call callback for ranges of decompressed data. + * The buffer in the callback will be overwritten when the callback returns. + * + * @return True if the buffer is valid and can be decompressed. + */ + [[nodiscard]] ZENCORE_API bool DecompressToStream( + uint64_t RawOffset, + uint64_t RawSize, + std::function<bool(uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& Range)>&& Callback) const; + /** A null compressed buffer. */ static const CompressedBuffer Null; diff --git a/src/zencore/include/zencore/eastlutil.h b/src/zencore/include/zencore/eastlutil.h new file mode 100644 index 000000000..642321dae --- /dev/null +++ b/src/zencore/include/zencore/eastlutil.h @@ -0,0 +1,20 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <algorithm> + +namespace zen { + +size_t +erase_if(auto& _Cont, auto Predicate) +{ + auto _First = _Cont.begin(); + const auto _Last = _Cont.end(); + const auto _Old_size = _Cont.size(); + _First = std::remove_if(_First, _Last, Predicate); + _Cont.erase(_First, _Last); + return _Old_size - _Cont.size(); +} + +} // namespace zen diff --git a/src/zencore/include/zencore/filesystem.h b/src/zencore/include/zencore/filesystem.h index ca8682cd7..36d4d1b68 100644 --- a/src/zencore/include/zencore/filesystem.h +++ b/src/zencore/include/zencore/filesystem.h @@ -20,21 +20,35 @@ class WorkerThreadPool; /** Delete directory (after deleting any contents) */ -ZENCORE_API bool DeleteDirectories(const std::filesystem::path& dir); +ZENCORE_API bool DeleteDirectories(const std::filesystem::path& Path); + +/** Delete directory (after deleting any contents) + */ +ZENCORE_API bool DeleteDirectories(const std::filesystem::path& Path, std::error_code& Ec); + +/** Ensure directory exists. + + Will also create any required parent direCleanDirectoryctories + */ +ZENCORE_API bool CreateDirectories(const std::filesystem::path& Path); /** Ensure directory exists. Will also create any required parent directories */ -ZENCORE_API bool CreateDirectories(const std::filesystem::path& dir); +ZENCORE_API bool CreateDirectories(const std::filesystem::path& Path, std::error_code& Ec); /** Ensure directory exists and delete contents (if any) before returning */ -ZENCORE_API bool CleanDirectory(const std::filesystem::path& dir); +ZENCORE_API bool CleanDirectory(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles); /** Ensure directory exists and delete contents (if any) before returning */ -ZENCORE_API bool CleanDirectoryExceptDotFiles(const std::filesystem::path& dir); +ZENCORE_API bool CleanDirectory(const std::filesystem::path& Path, bool ForceRemoveReadOnlyFiles, std::error_code& Ec); + +/** Ensure directory exists and delete contents (if any) before returning + */ +ZENCORE_API bool CleanDirectoryExceptDotFiles(const std::filesystem::path& Path); /** Map native file handle to a path */ @@ -44,20 +58,91 @@ ZENCORE_API std::filesystem::path PathFromHandle(void* NativeHandle, std::error_ */ ZENCORE_API std::filesystem::path CanonicalPath(std::filesystem::path InPath, std::error_code& Ec); +/** Query file size + */ +ZENCORE_API bool IsFile(const std::filesystem::path& Path); + +/** Query file size + */ +ZENCORE_API bool IsFile(const std::filesystem::path& Path, std::error_code& Ec); + +/** Query file size + */ +ZENCORE_API bool IsDir(const std::filesystem::path& Path); + +/** Query file size + */ +ZENCORE_API bool IsDir(const std::filesystem::path& Path, std::error_code& Ec); + +/** Query file size + */ +ZENCORE_API bool RemoveFile(const std::filesystem::path& Path); + +/** Query file size + */ +ZENCORE_API bool RemoveFile(const std::filesystem::path& Path, std::error_code& Ec); + +/** Query file size + */ +ZENCORE_API bool RemoveDir(const std::filesystem::path& Path); + +/** Query file size + */ +ZENCORE_API bool RemoveDir(const std::filesystem::path& Path, std::error_code& Ec); + +/** Query file size + */ +ZENCORE_API uint64_t FileSizeFromPath(const std::filesystem::path& Path); + +/** Query file size + */ +ZENCORE_API uint64_t FileSizeFromPath(const std::filesystem::path& Path, std::error_code& Ec); + /** Query file size from native file handle */ ZENCORE_API uint64_t FileSizeFromHandle(void* NativeHandle); +/** Query file size from native file handle + */ +ZENCORE_API uint64_t FileSizeFromHandle(void* NativeHandle, std::error_code& Ec); + /** Get a native time tick of last modification time */ ZENCORE_API uint64_t GetModificationTickFromHandle(void* NativeHandle, std::error_code& Ec); +/** Get a native time tick of last modification time + */ +ZENCORE_API uint64_t GetModificationTickFromPath(const std::filesystem::path& Filename); + +ZENCORE_API bool TryGetFileProperties(const std::filesystem::path& Path, + uint64_t& OutSize, + uint64_t& OutModificationTick, + uint32_t& OutNativeModeOrAttributes); + +/** Move a file, if the files are not on the same drive the function will fail + */ +ZENCORE_API void RenameFile(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath); + +/** Move a file, if the files are not on the same drive the function will fail + */ +ZENCORE_API void RenameFile(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath, std::error_code& Ec); + +/** Move a directory, if the files are not on the same drive the function will fail + */ +ZENCORE_API void RenameDirectory(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath); + +/** Move a directory, if the files are not on the same drive the function will fail + */ +ZENCORE_API void RenameDirectory(const std::filesystem::path& SourcePath, const std::filesystem::path& TargetPath, std::error_code& Ec); + ZENCORE_API std::filesystem::path GetRunningExecutablePath(); /** Set the max open file handle count to max allowed for the current process on Linux and MacOS */ ZENCORE_API void MaximizeOpenFileCount(); +ZENCORE_API bool PrepareFileForScatteredWrite(void* FileHandle, uint64_t FinalSize); + struct FileContents { std::vector<IoBuffer> Data; @@ -86,6 +171,13 @@ ZENCORE_API void ScanFile(void* NativeHandle, uint64_t Size, uint64_t ChunkSize, std::function<void(const void* Data, size_t Size)>&& ProcessFunc); +ZENCORE_API void WriteFile(void* NativeHandle, + const void* Data, + uint64_t Size, + uint64_t FileOffset, + uint64_t ChunkSize, + std::error_code& Ec); +ZENCORE_API void ReadFile(void* NativeHandle, void* Data, uint64_t Size, uint64_t FileOffset, uint64_t ChunkSize, std::error_code& Ec); struct CopyFileOptions { @@ -93,11 +185,11 @@ struct CopyFileOptions bool MustClone = false; }; -ZENCORE_API bool CopyFile(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); -ZENCORE_API void CopyFile(std::filesystem::path FromPath, - std::filesystem::path ToPath, - const CopyFileOptions& Options, - std::error_code& OutError); +ZENCORE_API bool CopyFile(const std::filesystem::path& FromPath, const std::filesystem::path& ToPath, const CopyFileOptions& Options); +ZENCORE_API void CopyFile(const std::filesystem::path& FromPath, + const std::filesystem::path& ToPath, + const CopyFileOptions& Options, + std::error_code& OutError); ZENCORE_API void CopyTree(std::filesystem::path FromPath, std::filesystem::path ToPath, const CopyFileOptions& Options); ZENCORE_API bool SupportsBlockRefCounting(std::filesystem::path Path); @@ -203,7 +295,8 @@ public: virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, - uint32_t NativeModeOrAttributes) = 0; + uint32_t NativeModeOrAttributes, + uint64_t NativeModificationTick) = 0; // This should return true if we should recurse into the directory virtual bool VisitDirectory(const std::filesystem::path& Parent, @@ -216,13 +309,14 @@ public: enum class DirectoryContentFlags : uint8_t { - None = 0, - IncludeDirs = 1u << 0, - IncludeFiles = 1u << 1, - Recursive = 1u << 2, - IncludeFileSizes = 1u << 3, - IncludeAttributes = 1u << 4, - IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive + None = 0, + IncludeDirs = 1u << 0, + IncludeFiles = 1u << 1, + Recursive = 1u << 2, + IncludeFileSizes = 1u << 3, + IncludeAttributes = 1u << 4, + IncludeModificationTick = 1u << 5, + IncludeAllEntries = IncludeDirs | IncludeFiles | Recursive }; ENUM_CLASS_FLAGS(DirectoryContentFlags) @@ -232,6 +326,7 @@ struct DirectoryContent std::vector<std::filesystem::path> Files; std::vector<uint64_t> FileSizes; std::vector<uint32_t> FileAttributes; + std::vector<uint64_t> FileModificationTicks; std::vector<std::filesystem::path> Directories; std::vector<uint32_t> DirectoryAttributes; }; @@ -246,6 +341,7 @@ public: std::vector<std::filesystem::path> FileNames; std::vector<uint64_t> FileSizes; std::vector<uint32_t> FileAttributes; + std::vector<uint64_t> FileModificationTicks; std::vector<std::filesystem::path> DirectoryNames; std::vector<uint32_t> DirectoryAttributes; }; @@ -267,6 +363,38 @@ std::error_code RotateDirectories(const std::filesystem::path& DirectoryName, st std::filesystem::path PickDefaultSystemRootDirectory(); +#if ZEN_PLATFORM_WINDOWS +uint32_t GetFileAttributes(const std::filesystem::path& Filename); +uint32_t GetFileAttributes(const std::filesystem::path& Filename, std::error_code& Ec); +void SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes); +void SetFileAttributes(const std::filesystem::path& Filename, uint32_t Attributes, std::error_code& Ec); +#endif // ZEN_PLATFORM_WINDOWS + +#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC +uint32_t GetFileMode(const std::filesystem::path& Filename); +uint32_t GetFileMode(const std::filesystem::path& Filename, std::error_code& Ec); +void SetFileMode(const std::filesystem::path& Filename, uint32_t Mode); +void SetFileMode(const std::filesystem::path& Filename, uint32_t Mode, std::error_code& Ec); +#endif // ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC + +bool IsFileAttributeReadOnly(uint32_t FileAttributes); +bool IsFileModeReadOnly(uint32_t FileMode); +uint32_t MakeFileAttributeReadOnly(uint32_t FileAttributes, bool ReadOnly); +uint32_t MakeFileModeReadOnly(uint32_t FileMode, bool ReadOnly); + +bool SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly, std::error_code& Ec); +bool SetFileReadOnly(const std::filesystem::path& Filename, bool ReadOnly); + +class SharedMemory +{ +public: + virtual ~SharedMemory() {} + virtual void* GetData() = 0; +}; + +std::unique_ptr<SharedMemory> OpenSharedMemory(std::string_view Name, size_t Size, bool SystemGlobal); +std::unique_ptr<SharedMemory> CreateSharedMemory(std::string_view Name, size_t Size, bool SystemGlobal); + ////////////////////////////////////////////////////////////////////////// void filesystem_forcelink(); // internal diff --git a/src/zencore/include/zencore/fmtutils.h b/src/zencore/include/zencore/fmtutils.h index 8482157fb..404e570fd 100644 --- a/src/zencore/include/zencore/fmtutils.h +++ b/src/zencore/include/zencore/fmtutils.h @@ -12,6 +12,7 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <fmt/format.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <chrono> #include <string_view> // Custom formatting for some zencore types @@ -20,7 +21,8 @@ template<typename T> requires DerivedFrom<T, zen::StringBuilderBase> struct fmt::formatter<T> : fmt::formatter<std::string_view> { - auto format(const zen::StringBuilderBase& a, format_context& ctx) const + template<typename FormatContext> + auto format(const zen::StringBuilderBase& a, FormatContext& ctx) const { return fmt::formatter<std::string_view>::format(a.ToView(), ctx); } @@ -30,7 +32,8 @@ template<typename T> requires DerivedFrom<T, zen::NiceBase> struct fmt::formatter<T> : fmt::formatter<std::string_view> { - auto format(const zen::NiceBase& a, format_context& ctx) const + template<typename FormatContext> + auto format(const zen::NiceBase& a, FormatContext& ctx) const { return fmt::formatter<std::string_view>::format(std::string_view(a), ctx); } @@ -97,8 +100,22 @@ template<typename T> requires DerivedFrom<T, zen::PathBuilderBase> struct fmt::formatter<T> : fmt::formatter<std::string_view> { - auto format(const zen::PathBuilderBase& a, format_context& ctx) const + template<typename FormatContext> + auto format(const zen::PathBuilderBase& a, FormatContext& ctx) const { return fmt::formatter<std::string_view>::format(a.ToView(), ctx); } }; + +template<> +struct fmt::formatter<std::chrono::system_clock::time_point> : formatter<string_view> +{ + template<typename FormatContext> + auto format(const std::chrono::system_clock::time_point& TimePoint, FormatContext& ctx) const + { + std::time_t Time = std::chrono::system_clock::to_time_t(TimePoint); + char TimeString[std::size("yyyy-mm-ddThh:mm:ss")]; + std::strftime(std::data(TimeString), std::size(TimeString), "%FT%T", std::localtime(&Time)); + return fmt::format_to(ctx.out(), "{}", TimeString); + } +}; diff --git a/src/zencore/include/zencore/iohash.h b/src/zencore/include/zencore/iohash.h index 8871a5895..a619b0053 100644 --- a/src/zencore/include/zencore/iohash.h +++ b/src/zencore/include/zencore/iohash.h @@ -47,8 +47,8 @@ struct IoHash static IoHash HashBuffer(const void* data, size_t byteCount); static IoHash HashBuffer(MemoryView Data) { return HashBuffer(Data.GetData(), Data.GetSize()); } - static IoHash HashBuffer(const CompositeBuffer& Buffer); - static IoHash HashBuffer(const IoBuffer& Buffer); + static IoHash HashBuffer(const CompositeBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes = nullptr); + static IoHash HashBuffer(const IoBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes = nullptr); static IoHash FromHexString(const char* string); static IoHash FromHexString(const std::string_view string); static bool TryParse(std::string_view Str, IoHash& Hash); @@ -102,6 +102,12 @@ struct IoHashStream return *this; } + IoHashStream& Append(const IoBuffer& Buffer) + { + m_Blake3Stream.Append(Buffer); + return *this; + } + /// Append another chunk IoHashStream& Append(MemoryView Data) { diff --git a/src/zencore/include/zencore/memory/newdelete.h b/src/zencore/include/zencore/memory/newdelete.h index d22c8604f..059f1d5ea 100644 --- a/src/zencore/include/zencore/memory/newdelete.h +++ b/src/zencore/include/zencore/memory/newdelete.h @@ -153,3 +153,29 @@ operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexce return zen_new_aligned_nothrow(n, static_cast<size_t>(al)); } #endif + +// EASTL operator new + +void* +operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line) +{ + ZEN_UNUSED(pName, flags, debugFlags, file, line); + return zen_new(size); +} + +void* +operator new[](size_t size, + size_t alignment, + size_t alignmentOffset, + const char* pName, + int flags, + unsigned debugFlags, + const char* file, + int line) +{ + ZEN_UNUSED(alignmentOffset, pName, flags, debugFlags, file, line); + + ZEN_ASSERT_SLOW(alignmentOffset == 0); // currently not supported + + return zen_new_aligned(size, alignment); +} diff --git a/src/zencore/include/zencore/process.h b/src/zencore/include/zencore/process.h index d1394cd9a..d3e1de703 100644 --- a/src/zencore/include/zencore/process.h +++ b/src/zencore/include/zencore/process.h @@ -98,7 +98,7 @@ ZENCORE_API int GetCurrentProcessId(); int GetProcessId(CreateProcResult ProcId); std::filesystem::path GetProcessExecutablePath(int Pid, std::error_code& OutEc); -std::error_code FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle); +std::error_code FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle, bool IncludeSelf = true); void process_forcelink(); // internal diff --git a/src/zenserver/sentryintegration.h b/src/zencore/include/zencore/sentryintegration.h index 40e22af4e..faf1238b7 100644 --- a/src/zenserver/sentryintegration.h +++ b/src/zencore/include/zencore/sentryintegration.h @@ -31,8 +31,18 @@ public: SentryIntegration(); ~SentryIntegration(); - void Initialize(std::string SentryDatabasePath, std::string SentryAttachmentsPath, bool AllowPII, const std::string& CommandLine); - void LogStartupInformation(); + struct Config + { + std::string DatabasePath; + std::string AttachmentsPath; + std::string Dsn; + std::string Environment; + bool AllowPII = false; + bool Debug = false; + }; + + void Initialize(const Config& Conf, const std::string& CommandLine); + void LogStartupInformation(); static void ClearCaches(); private: diff --git a/src/zencore/include/zencore/string.h b/src/zencore/include/zencore/string.h index e2ef1c1a0..68129b691 100644 --- a/src/zencore/include/zencore/string.h +++ b/src/zencore/include/zencore/string.h @@ -522,6 +522,9 @@ public: ////////////////////////////////////////////////////////////////////////// +bool IsValidUtf8(const std::string_view& str); +std::string_view::const_iterator FindFirstInvalidUtf8Byte(const std::string_view& str); + void Utf8ToWide(const char8_t* str, WideStringBuilderBase& out); void Utf8ToWide(const std::u8string_view& wstr, WideStringBuilderBase& out); void Utf8ToWide(const std::string_view& wstr, WideStringBuilderBase& out); diff --git a/src/zencore/include/zencore/thread.h b/src/zencore/include/zencore/thread.h index 8fb781571..d9fb5c023 100644 --- a/src/zencore/include/zencore/thread.h +++ b/src/zencore/include/zencore/thread.h @@ -183,6 +183,7 @@ public: void CountDown() { std::ptrdiff_t Old = Counter.fetch_sub(1); + ZEN_ASSERT(Old > 0); if (Old == 1) { Complete.Set(); @@ -197,8 +198,7 @@ public: void AddCount(std::ptrdiff_t Count) { std::atomic_ptrdiff_t Old = Counter.fetch_add(Count); - ZEN_UNUSED(Old); - ZEN_ASSERT_SLOW(Old > 0); + ZEN_ASSERT(Old > 0); } bool Wait(int TimeoutMs = -1) diff --git a/src/zencore/include/zencore/timer.h b/src/zencore/include/zencore/timer.h index e4ddc3505..767dc4314 100644 --- a/src/zencore/include/zencore/timer.h +++ b/src/zencore/include/zencore/timer.h @@ -21,6 +21,10 @@ ZENCORE_API uint64_t GetHifreqTimerFrequency(); ZENCORE_API double GetHifreqTimerToSeconds(); ZENCORE_API uint64_t GetHifreqTimerFrequencySafe(); // May be used during static init +// Query time since process was spawned (returns time in ms) + +uint64_t GetTimeSinceProcessStart(); + class Stopwatch { public: diff --git a/src/zencore/iobuffer.cpp b/src/zencore/iobuffer.cpp index 3b5c89c3e..8e9a37a27 100644 --- a/src/zencore/iobuffer.cpp +++ b/src/zencore/iobuffer.cpp @@ -297,49 +297,21 @@ IoBufferExtendedCore::Materialize() const AllocateBuffer(m_DataBytes, sizeof(void*)); NewFlags |= kIsOwnedByThis; - int32_t Error = 0; - size_t BytesRead = 0; -#if ZEN_PLATFORM_WINDOWS - OVERLAPPED Ovl{}; - - Ovl.Offset = DWORD(m_FileOffset & 0xffff'ffffu); - Ovl.OffsetHigh = DWORD(m_FileOffset >> 32); - - DWORD dwNumberOfBytesRead = 0; - BOOL Success = ::ReadFile(m_FileHandle, (void*)m_DataPtr, DWORD(m_DataBytes), &dwNumberOfBytesRead, &Ovl) == TRUE; - if (Success) - { - BytesRead = size_t(dwNumberOfBytesRead); - } - else - { - Error = zen::GetLastError(); - } -#else - static_assert(sizeof(off_t) >= sizeof(uint64_t), "sizeof(off_t) does not support large files"); - int Fd = int(uintptr_t(m_FileHandle)); - ssize_t ReadResult = pread(Fd, (void*)m_DataPtr, m_DataBytes, m_FileOffset); - if (ReadResult != -1) - { - BytesRead = size_t(ReadResult); - } - else - { - Error = zen::GetLastError(); - } -#endif // ZEN_PLATFORM_WINDOWS - if (Error || (BytesRead != m_DataBytes)) + std::error_code Ec; + ReadFile(m_FileHandle, (void*)m_DataPtr, m_DataBytes, m_FileOffset, DisableMMapSizeLimit, Ec); + if (Ec) { std::error_code DummyEc; - ZEN_WARN("IoBufferExtendedCore::Materialize: ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {}", + ZEN_WARN("IoBufferExtendedCore::Materialize: ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x}), {} ({})", m_FileOffset, m_DataBytes, zen::PathFromHandle(m_FileHandle, DummyEc), zen::FileSizeFromHandle(m_FileHandle), - GetSystemErrorAsString(Error)); + Ec.message(), + Ec.value()); throw std::system_error( - std::error_code(Error, std::system_category()), + Ec, fmt::format("IoBufferExtendedCore::Materialize: ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", m_FileOffset, m_DataBytes, diff --git a/src/zencore/iohash.cpp b/src/zencore/iohash.cpp index 7200e6e3f..3b2af0db4 100644 --- a/src/zencore/iohash.cpp +++ b/src/zencore/iohash.cpp @@ -30,7 +30,7 @@ IoHash::HashBuffer(const void* data, size_t byteCount) } IoHash -IoHash::HashBuffer(const CompositeBuffer& Buffer) +IoHash::HashBuffer(const CompositeBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes) { IoHashStream Hasher; @@ -46,11 +46,21 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Segment.GetData(), SegmentSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(SegmentSize); + } } } @@ -58,7 +68,7 @@ IoHash::HashBuffer(const CompositeBuffer& Buffer) } IoHash -IoHash::HashBuffer(const IoBuffer& Buffer) +IoHash::HashBuffer(const IoBuffer& Buffer, std::atomic<uint64_t>* ProcessedBytes) { IoHashStream Hasher; @@ -71,11 +81,21 @@ IoHash::HashBuffer(const IoBuffer& Buffer) FileRef.FileChunkOffset, FileRef.FileChunkSize, BufferingSize, - [&Hasher](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + [&Hasher, ProcessedBytes](const void* Data, size_t Size) { + Hasher.Append(Data, Size); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(Size); + } + }); } else { Hasher.Append(Buffer.GetData(), BufferSize); + if (ProcessedBytes != nullptr) + { + ProcessedBytes->fetch_add(BufferSize); + } } return Hasher.GetHash(); diff --git a/src/zencore/process.cpp b/src/zencore/process.cpp index c51e8f69d..fcbe657cb 100644 --- a/src/zencore/process.cpp +++ b/src/zencore/process.cpp @@ -60,7 +60,7 @@ GetPidStatus(int Pid, std::error_code& OutEc) { std::filesystem::path EntryPath = std::filesystem::path("/proc") / fmt::format("{}", Pid); std::filesystem::path StatPath = EntryPath / "stat"; - if (std::filesystem::is_regular_file(StatPath)) + if (IsFile(StatPath)) { FILE* StatFile = fopen(StatPath.c_str(), "r"); if (StatFile) @@ -929,7 +929,7 @@ GetProcessExecutablePath(int Pid, std::error_code& OutEc) } std::error_code -FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle) +FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHandle, bool IncludeSelf) { #if ZEN_PLATFORM_WINDOWS HANDLE ProcessSnapshotHandle = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); @@ -939,13 +939,15 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand } auto _ = MakeGuard([&]() { CloseHandle(ProcessSnapshotHandle); }); + const DWORD ThisProcessId = ::GetCurrentProcessId(); + PROCESSENTRY32 Entry; Entry.dwSize = sizeof(PROCESSENTRY32); if (Process32First(ProcessSnapshotHandle, (LPPROCESSENTRY32)&Entry)) { do { - if (ExecutableImage.filename() == Entry.szExeFile) + if ((IncludeSelf || (Entry.th32ProcessID != ThisProcessId)) && (ExecutableImage.filename() == Entry.szExeFile)) { std::error_code Ec; std::filesystem::path EntryPath = GetProcessExecutablePath(Entry.th32ProcessID, Ec); @@ -970,6 +972,7 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand } } } while (::Process32Next(ProcessSnapshotHandle, (LPPROCESSENTRY32)&Entry)); + return {}; } return MakeErrorCodeFromLastError(); #endif // ZEN_PLATFORM_WINDOWS @@ -980,6 +983,8 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand struct kinfo_proc* Processes = nullptr; uint32_t ProcCount = 0; + const pid_t ThisProcessId = getpid(); + if (sysctl(Mib, 4, NULL, &BufferSize, NULL, 0) != -1 && BufferSize > 0) { struct kinfo_proc* Processes = (struct kinfo_proc*)malloc(BufferSize); @@ -990,36 +995,46 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand char Buffer[PROC_PIDPATHINFO_MAXSIZE]; for (uint32_t ProcIndex = 0; ProcIndex < ProcCount; ProcIndex++) { - pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; - std::error_code Ec; - std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); - if (!Ec) + pid_t Pid = Processes[ProcIndex].kp_proc.p_pid; + if (IncludeSelf || (Pid != ThisProcessId)) { - if (EntryPath == ExecutableImage) + std::error_code Ec; + std::filesystem::path EntryPath = GetProcessExecutablePath(Pid, Ec); + if (!Ec) { - if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) + if (EntryPath == ExecutableImage) { - OutHandle.Initialize(Pid, Ec); - return Ec; + if (Processes[ProcIndex].kp_proc.p_stat != SZOMB) + { + OutHandle.Initialize(Pid, Ec); + return Ec; + } } } + Ec.clear(); } } + return {}; } } return MakeErrorCodeFromLastError(); #endif // ZEN_PLATFORM_MAC #if ZEN_PLATFORM_LINUX + const pid_t ThisProcessId = getpid(); + std::vector<uint32_t> RunningPids; DirectoryContent ProcList; GetDirectoryContent("/proc", DirectoryContentFlags::IncludeDirs, ProcList); for (const std::filesystem::path& EntryPath : ProcList.Directories) { std::string EntryName = EntryPath.stem(); - std::optional<uint32_t> Pid = ParseInt<uint32_t>(EntryName); - if (Pid.has_value()) + std::optional<uint32_t> PidMaybe = ParseInt<uint32_t>(EntryName); + if (PidMaybe.has_value()) { - RunningPids.push_back(Pid.value()); + if (pid_t Pid = PidMaybe.value(); IncludeSelf || (Pid != ThisProcessId)) + { + RunningPids.push_back(Pid); + } } } @@ -1042,6 +1057,7 @@ FindProcess(const std::filesystem::path& ExecutableImage, ProcessHandle& OutHand } } } + Ec.clear(); } return {}; #endif // ZEN_PLATFORM_LINUX @@ -1065,10 +1081,24 @@ TEST_CASE("Process") TEST_CASE("FindProcess") { - ProcessHandle Process; - std::error_code Ec = FindProcess(GetRunningExecutablePath(), Process); - CHECK(!Ec); - CHECK(Process.IsValid()); + { + ProcessHandle Process; + std::error_code Ec = FindProcess(GetRunningExecutablePath(), Process, /*IncludeSelf*/ true); + CHECK(!Ec); + CHECK(Process.IsValid()); + } + { + ProcessHandle Process; + std::error_code Ec = FindProcess(GetRunningExecutablePath(), Process, /*IncludeSelf*/ false); + CHECK(!Ec); + CHECK(!Process.IsValid()); + } + { + ProcessHandle Process; + std::error_code Ec = FindProcess("this/does\\not/exist\\123914921929412312312312asdad\\12134.no", Process, /*IncludeSelf*/ false); + CHECK(!Ec); + CHECK(!Process.IsValid()); + } } TEST_CASE("BuildArgV") diff --git a/src/zenserver/sentryintegration.cpp b/src/zencore/sentryintegration.cpp index 7996f25bb..118c4158a 100644 --- a/src/zenserver/sentryintegration.cpp +++ b/src/zencore/sentryintegration.cpp @@ -1,6 +1,6 @@ // Copyright Epic Games, Inc. All Rights Reserved. -#include "sentryintegration.h" +#include <zencore/sentryintegration.h> #include <zencore/config.h> #include <zencore/logging.h> @@ -31,6 +31,10 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace sentry { +namespace { + static const std::string DefaultDsn("https://[email protected]/5919284"); +} + struct SentryAssertImpl : zen::AssertImpl { virtual void ZEN_FORCENOINLINE ZEN_DEBUG_SECTION @@ -192,29 +196,37 @@ SentryIntegration::~SentryIntegration() } void -SentryIntegration::Initialize(std::string SentryDatabasePath, - std::string SentryAttachmentsPath, - bool AllowPII, - const std::string& CommandLine) +SentryIntegration::Initialize(const Config& Conf, const std::string& CommandLine) { - m_AllowPII = AllowPII; + m_AllowPII = Conf.AllowPII; + std::string SentryDatabasePath = Conf.DatabasePath; if (SentryDatabasePath.starts_with("\\\\?\\")) { SentryDatabasePath = SentryDatabasePath.substr(4); } sentry_options_t* SentryOptions = sentry_options_new(); - sentry_options_set_dsn(SentryOptions, "https://[email protected]/5919284"); + + sentry_options_set_dsn(SentryOptions, Conf.Dsn.empty() ? sentry::DefaultDsn.c_str() : Conf.Dsn.c_str()); sentry_options_set_database_path(SentryOptions, SentryDatabasePath.c_str()); sentry_options_set_logger(SentryOptions, SentryLogFunction, this); - if (SentryAttachmentsPath.starts_with("\\\\?\\")) + sentry_options_set_environment(SentryOptions, Conf.Environment.empty() ? "production" : Conf.Environment.c_str()); + + std::string SentryAttachmentsPath = Conf.AttachmentsPath; + if (!SentryAttachmentsPath.empty()) { - SentryAttachmentsPath = SentryAttachmentsPath.substr(4); + if (SentryAttachmentsPath.starts_with("\\\\?\\")) + { + SentryAttachmentsPath = SentryAttachmentsPath.substr(4); + } + sentry_options_add_attachment(SentryOptions, SentryAttachmentsPath.c_str()); } - sentry_options_add_attachment(SentryOptions, SentryAttachmentsPath.c_str()); sentry_options_set_release(SentryOptions, ZEN_CFG_VERSION); - // sentry_options_set_debug(SentryOptions, 1); + if (Conf.Debug) + { + sentry_options_set_debug(SentryOptions, 1); + } m_SentryErrorCode = sentry_init(SentryOptions); diff --git a/src/zencore/string.cpp b/src/zencore/string.cpp index 242d41abe..a0d8c927f 100644 --- a/src/zencore/string.cpp +++ b/src/zencore/string.cpp @@ -99,6 +99,20 @@ FilepathFindExtension(const std::string_view& Path, const char* ExtensionToMatch ////////////////////////////////////////////////////////////////////////// +bool +IsValidUtf8(const std::string_view& str) +{ + return utf8::is_valid(begin(str), end(str)); +} + +std::string_view::const_iterator +FindFirstInvalidUtf8Byte(const std::string_view& str) +{ + return utf8::find_invalid(begin(str), end(str)); +} + +////////////////////////////////////////////////////////////////////////// + void Utf8ToWide(const char8_t* Str8, WideStringBuilderBase& OutString) { diff --git a/src/zencore/testutils.cpp b/src/zencore/testutils.cpp index 641d5508a..9f50de032 100644 --- a/src/zencore/testutils.cpp +++ b/src/zencore/testutils.cpp @@ -4,6 +4,7 @@ #if ZEN_WITH_TESTS +# include <zencore/filesystem.h> # include <zencore/session.h> # include "zencore/string.h" @@ -19,8 +20,8 @@ CreateTemporaryDirectory() std::error_code Ec; std::filesystem::path DirPath = std::filesystem::temp_directory_path() / GetSessionIdString() / IntNum(++Sequence).c_str(); - std::filesystem::remove_all(DirPath, Ec); - std::filesystem::create_directories(DirPath); + DeleteDirectories(DirPath, Ec); + CreateDirectories(DirPath); return DirPath; } @@ -32,14 +33,14 @@ ScopedTemporaryDirectory::ScopedTemporaryDirectory() : m_RootPath(CreateTemporar ScopedTemporaryDirectory::ScopedTemporaryDirectory(std::filesystem::path Directory) : m_RootPath(Directory) { std::error_code Ec; - std::filesystem::remove_all(Directory, Ec); - std::filesystem::create_directories(Directory); + DeleteDirectories(Directory, Ec); + CreateDirectories(Directory); } ScopedTemporaryDirectory::~ScopedTemporaryDirectory() { std::error_code Ec; - std::filesystem::remove_all(m_RootPath, Ec); + DeleteDirectories(m_RootPath, Ec); } IoBuffer diff --git a/src/zencore/timer.cpp b/src/zencore/timer.cpp index 1655e912d..95536cb26 100644 --- a/src/zencore/timer.cpp +++ b/src/zencore/timer.cpp @@ -12,9 +12,20 @@ # include <unistd.h> #endif +#define GTSPS_IMPLEMENTATION +#include "GetTimeSinceProcessStart.h" + namespace zen { uint64_t +GetTimeSinceProcessStart() +{ + double TimeInSeconds = ::GetTimeSinceProcessStart(); + + return uint64_t(TimeInSeconds * 1000); +} + +uint64_t GetHifreqTimerValue() { uint64_t Timestamp; diff --git a/src/zencore/workthreadpool.cpp b/src/zencore/workthreadpool.cpp index d15fb2e83..445fe939e 100644 --- a/src/zencore/workthreadpool.cpp +++ b/src/zencore/workthreadpool.cpp @@ -274,7 +274,7 @@ WorkerThreadPool::ScheduleWork(Ref<IWork> Work) void WorkerThreadPool::ScheduleWork(std::function<void()>&& Work) { - ScheduleWork(Ref<IWork>(new detail::LambdaWork(Work))); + ScheduleWork(Ref<IWork>(new detail::LambdaWork(std::move(Work)))); } [[nodiscard]] size_t diff --git a/src/zencore/xmake.lua b/src/zencore/xmake.lua index 2efa3fdb8..b3a33e052 100644 --- a/src/zencore/xmake.lua +++ b/src/zencore/xmake.lua @@ -29,6 +29,7 @@ target('zencore') end add_includedirs("include", {public=true}) + add_includedirs("$(projectdir)/thirdparty/GetTimeSinceProcessStart") add_includedirs("$(projectdir)/thirdparty/utfcpp/source") add_includedirs("$(projectdir)/thirdparty/Oodle/include") add_includedirs("$(projectdir)/thirdparty/trace", {public=true}) @@ -55,6 +56,7 @@ target('zencore') add_packages( "vcpkg::doctest", + "vcpkg::eastl", "vcpkg::fmt", "vcpkg::gsl-lite", "vcpkg::lz4", @@ -62,6 +64,27 @@ target('zencore') {public=true} ) + if has_config("zensentry") then + add_packages("vcpkg::sentry-native") + + if is_plat("windows") then + add_links("dbghelp", "winhttp", "version") -- for Sentry + end + + if is_plat("linux") then + -- As sentry_native uses symbols from breakpad_client, the latter must + -- be specified after the former with GCC-like toolchains. xmake however + -- is unaware of this and simply globs files from vcpkg's output. The + -- line below forces breakpad_client to be to the right of sentry_native + add_syslinks("breakpad_client") + end + + if is_plat("macosx") then + add_syslinks("bsm") + end + + end + if is_plat("linux") then add_syslinks("rt") end diff --git a/src/zenhttp-test/zenhttp-test.cpp b/src/zenhttp-test/zenhttp-test.cpp index 49db1ba54..a1327320e 100644 --- a/src/zenhttp-test/zenhttp-test.cpp +++ b/src/zenhttp-test/zenhttp-test.cpp @@ -3,6 +3,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/memory/newdelete.h> +#include <zencore/trace.h> #include <zenhttp/zenhttp.h> #if ZEN_WITH_TESTS @@ -16,6 +17,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenhttp_forcelinktests(); + zen::TraceInit("zenhttp-test"); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenhttp/auth/authmgr.cpp b/src/zenhttp/auth/authmgr.cpp index 1a9892d5c..8f7befc80 100644 --- a/src/zenhttp/auth/authmgr.cpp +++ b/src/zenhttp/auth/authmgr.cpp @@ -379,7 +379,7 @@ private: AuthState.EndArray(); } - std::filesystem::create_directories(m_Config.RootDirectory); + CreateDirectories(m_Config.RootDirectory); std::optional<std::string> Reason; diff --git a/src/zenhttp/httpclient.cpp b/src/zenhttp/httpclient.cpp index 8052a8fd5..a2d323b5e 100644 --- a/src/zenhttp/httpclient.cpp +++ b/src/zenhttp/httpclient.cpp @@ -12,13 +12,19 @@ #include <zencore/filesystem.h> #include <zencore/iobuffer.h> #include <zencore/logging.h> +#include <zencore/memory/memory.h> #include <zencore/session.h> #include <zencore/sharedbuffer.h> #include <zencore/stream.h> #include <zencore/string.h> -#include <zencore/testing.h> #include <zencore/trace.h> +#if ZEN_WITH_TESTS +# include <zencore/basicfile.h> +# include <zencore/testing.h> +# include <zencore/testutils.h> +#endif // ZEN_WITH_TESTS + ZEN_THIRD_PARTY_INCLUDES_START #include <cpr/cpr.h> ZEN_THIRD_PARTY_INCLUDES_END @@ -42,9 +48,13 @@ namespace detail { class TempPayloadFile { public: + TempPayloadFile(const TempPayloadFile&) = delete; + TempPayloadFile& operator=(const TempPayloadFile&) = delete; + TempPayloadFile() : m_FileHandle(nullptr), m_WriteOffset(0) {} ~TempPayloadFile() { + ZEN_TRACE_CPU("TempPayloadFile::Close"); try { if (m_FileHandle) @@ -85,8 +95,9 @@ namespace detail { } } - std::error_code Open(const std::filesystem::path& TempFolderPath) + std::error_code Open(const std::filesystem::path& TempFolderPath, uint64_t FinalSize) { + ZEN_TRACE_CPU("TempPayloadFile::Open"); ZEN_ASSERT(m_FileHandle == nullptr); std::uint64_t TmpIndex = ((std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) & 0xffffffffu) << 32) | @@ -126,11 +137,14 @@ namespace detail { #endif // ZEN_PLATFORM_WINDOWS m_FileHandle = FileHandle; + PrepareFileForScatteredWrite(m_FileHandle, FinalSize); + return {}; } std::error_code Write(std::string_view DataString) { + ZEN_TRACE_CPU("TempPayloadFile::Write"); const uint8_t* DataPtr = (const uint8_t*)DataString.data(); size_t DataSize = DataString.size(); if (DataSize >= CacheBufferSize) @@ -165,6 +179,7 @@ namespace detail { IoBuffer DetachToIoBuffer() { + ZEN_TRACE_CPU("TempPayloadFile::DetachToIoBuffer"); if (std::error_code Ec = Flush(); Ec) { ThrowSystemError(Ec.value(), Ec.message()); @@ -180,6 +195,7 @@ namespace detail { IoBuffer BorrowIoBuffer() { + ZEN_TRACE_CPU("TempPayloadFile::BorrowIoBuffer"); if (std::error_code Ec = Flush(); Ec) { ThrowSystemError(Ec.value(), Ec.message()); @@ -193,6 +209,7 @@ namespace detail { uint64_t GetSize() const { return m_WriteOffset; } void ResetWritePos(uint64_t WriteOffset) { + ZEN_TRACE_CPU("TempPayloadFile::ResetWritePos"); Flush(); m_WriteOffset = WriteOffset; } @@ -200,6 +217,7 @@ namespace detail { private: std::error_code Flush() { + ZEN_TRACE_CPU("TempPayloadFile::Flush"); if (m_CacheBufferOffset == 0) { return {}; @@ -211,6 +229,7 @@ namespace detail { std::error_code AppendData(const void* Data, uint64_t Size) { + ZEN_TRACE_CPU("TempPayloadFile::AppendData"); ZEN_ASSERT(m_FileHandle != nullptr); const uint64_t MaxChunkSize = 2u * 1024 * 1024 * 1024; @@ -261,6 +280,167 @@ namespace detail { std::uint64_t m_CacheBufferOffset = 0; }; + class BufferedReadFileStream + { + public: + BufferedReadFileStream(const BufferedReadFileStream&) = delete; + BufferedReadFileStream& operator=(const BufferedReadFileStream&) = delete; + + BufferedReadFileStream(void* FileHandle, uint64_t FileOffset, uint64_t FileSize, uint64_t BufferSize) + : m_FileHandle(FileHandle) + , m_FileSize(FileSize) + , m_FileEnd(FileOffset + FileSize) + , m_BufferSize(Min(BufferSize, FileSize)) + , m_FileOffset(FileOffset) + { + } + + ~BufferedReadFileStream() { Memory::Free(m_Buffer); } + void Read(void* Data, uint64_t Size) + { + ZEN_ASSERT(Data != nullptr); + if (Size > m_BufferSize) + { + Read(Data, Size, m_FileOffset); + m_FileOffset += Size; + return; + } + uint8_t* WritePtr = ((uint8_t*)Data); + uint64_t Begin = m_FileOffset; + uint64_t End = m_FileOffset + Size; + ZEN_ASSERT(m_FileOffset >= m_BufferStart); + if (m_FileOffset < m_BufferEnd) + { + ZEN_ASSERT(m_Buffer != nullptr); + uint64_t Count = Min(m_BufferEnd, End) - m_FileOffset; + memcpy(WritePtr + Begin - m_FileOffset, m_Buffer + Begin - m_BufferStart, Count); + Begin += Count; + if (Begin == End) + { + m_FileOffset = End; + return; + } + } + if (End == m_FileEnd) + { + Read(WritePtr + Begin - m_FileOffset, End - Begin, Begin); + } + else + { + if (!m_Buffer) + { + m_BufferSize = Min(m_FileEnd - m_FileOffset, m_BufferSize); + m_Buffer = (uint8_t*)Memory::Alloc(gsl::narrow<size_t>(m_BufferSize)); + } + m_BufferStart = Begin; + m_BufferEnd = Min(Begin + m_BufferSize, m_FileEnd); + Read(m_Buffer, m_BufferEnd - m_BufferStart, m_BufferStart); + uint64_t Count = Min(m_BufferEnd, End) - m_BufferStart; + memcpy(WritePtr + Begin - m_FileOffset, m_Buffer, Count); + ZEN_ASSERT(Begin + Count == End); + } + m_FileOffset = End; + } + + private: + void Read(void* Data, uint64_t BytesToRead, uint64_t FileOffset) + { + const uint64_t MaxChunkSize = 1u * 1024 * 1024; + std::error_code Ec; + ReadFile(m_FileHandle, Data, BytesToRead, FileOffset, MaxChunkSize, Ec); + + if (Ec) + { + std::error_code DummyEc; + throw std::system_error( + Ec, + fmt::format( + "HttpClient::BufferedReadFileStream ReadFile/pread failed (offset {:#x}, size {:#x}) file: '{}' (size {:#x})", + FileOffset, + BytesToRead, + PathFromHandle(m_FileHandle, DummyEc).generic_string(), + m_FileSize)); + } + } + + void* m_FileHandle = nullptr; + const uint64_t m_FileSize = 0; + const uint64_t m_FileEnd = 0; + uint64_t m_BufferSize = 0; + uint8_t* m_Buffer = nullptr; + uint64_t m_BufferStart = 0; + uint64_t m_BufferEnd = 0; + uint64_t m_FileOffset = 0; + }; + + class CompositeBufferReadStream + { + public: + CompositeBufferReadStream(const CompositeBuffer& Data, uint64_t BufferSize) + : m_Data(Data) + , m_BufferSize(BufferSize) + , m_SegmentIndex(0) + , m_BytesLeftInSegment(0) + { + } + uint64_t Read(void* Data, uint64_t Size) + { + uint64_t Result = 0; + uint8_t* WritePtr = (uint8_t*)Data; + while ((Size > 0) && (m_SegmentIndex < m_Data.GetSegments().size())) + { + if (m_BytesLeftInSegment == 0) + { + const SharedBuffer& Segment = m_Data.GetSegments()[m_SegmentIndex]; + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if (Segment.AsIoBuffer().GetFileReference(FileRef)) + { + m_SegmentDiskBuffer = std::make_unique<BufferedReadFileStream>(FileRef.FileHandle, + FileRef.FileChunkOffset, + FileRef.FileChunkSize, + m_BufferSize); + } + else + { + m_SegmentMemoryBuffer = Segment.GetView(); + } + m_BytesLeftInSegment = Segment.GetSize(); + } + uint64_t BytesToRead = Min(m_BytesLeftInSegment, Size); + if (m_SegmentDiskBuffer) + { + m_SegmentDiskBuffer->Read(WritePtr, BytesToRead); + } + else + { + ZEN_ASSERT_SLOW(m_SegmentMemoryBuffer.GetSize() >= BytesToRead); + memcpy(WritePtr, m_SegmentMemoryBuffer.GetData(), BytesToRead); + m_SegmentMemoryBuffer.MidInline(BytesToRead); + } + WritePtr += BytesToRead; + Size -= BytesToRead; + Result += BytesToRead; + + m_BytesLeftInSegment -= BytesToRead; + if (m_BytesLeftInSegment == 0) + { + m_SegmentDiskBuffer.reset(); + m_SegmentMemoryBuffer.Reset(); + m_SegmentIndex++; + } + } + return Result; + } + + private: + const CompositeBuffer& m_Data; + const uint64_t m_BufferSize; + size_t m_SegmentIndex; + std::unique_ptr<BufferedReadFileStream> m_SegmentDiskBuffer; + MemoryView m_SegmentMemoryBuffer; + uint64_t m_BytesLeftInSegment; + }; + } // namespace detail ////////////////////////////////////////////////////////////////////////// @@ -282,7 +462,7 @@ AsCprBody(const IoBuffer& Obj) ////////////////////////////////////////////////////////////////////////// static HttpClient::Response -ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) +ResponseWithPayload(std::string_view SessionId, cpr::Response& HttpResponse, const HttpResponseCode WorkResponseCode, IoBuffer&& Payload) { // This ends up doing a memcpy, would be good to get rid of it by streaming results // into buffer directly @@ -297,7 +477,7 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp if (!IsHttpSuccessCode(WorkResponseCode) && WorkResponseCode != HttpResponseCode::NotFound) { - ZEN_WARN("HttpClient request failed: {}", HttpResponse); + ZEN_WARN("HttpClient request failed (session: {}): {}", SessionId, HttpResponse); } return HttpClient::Response{.StatusCode = WorkResponseCode, @@ -309,12 +489,16 @@ ResponseWithPayload(cpr::Response& HttpResponse, const HttpResponseCode WorkResp } static HttpClient::Response -CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) +CommonResponse(std::string_view SessionId, cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) { const HttpResponseCode WorkResponseCode = HttpResponseCode(HttpResponse.status_code); if (HttpResponse.error) { - ZEN_WARN("HttpClient client error: {}", HttpResponse); + if (HttpResponse.error.code != cpr::ErrorCode::OPERATION_TIMEDOUT && + HttpResponse.error.code != cpr::ErrorCode::CONNECTION_FAILURE && HttpResponse.error.code != cpr::ErrorCode::REQUEST_CANCELLED) + { + ZEN_WARN("HttpClient client failure (session: {}): {}", SessionId, HttpResponse); + } // Client side failure code return HttpClient::Response{ @@ -339,6 +523,7 @@ CommonResponse(cpr::Response&& HttpResponse, IoBuffer&& Payload = {}) else { return ResponseWithPayload( + SessionId, HttpResponse, WorkResponseCode, Payload ? std::move(Payload) : IoBufferBuilder::MakeCloneFromMemory(HttpResponse.text.data(), HttpResponse.text.size())); @@ -352,9 +537,10 @@ ShouldRetry(const cpr::Response& Response) { case cpr::ErrorCode::OK: break; - case cpr::ErrorCode::OPERATION_TIMEDOUT: + case cpr::ErrorCode::INTERNAL_ERROR: case cpr::ErrorCode::NETWORK_RECEIVE_ERROR: case cpr::ErrorCode::NETWORK_SEND_FAILURE: + case cpr::ErrorCode::OPERATION_TIMEDOUT: return true; default: return false; @@ -364,6 +550,7 @@ ShouldRetry(const cpr::Response& Response) case HttpResponseCode::RequestTimeout: case HttpResponseCode::TooManyRequests: case HttpResponseCode::InternalServerError: + case HttpResponseCode::BadGateway: case HttpResponseCode::ServiceUnavailable: case HttpResponseCode::GatewayTimeout: return true; @@ -375,6 +562,7 @@ ShouldRetry(const cpr::Response& Response) static bool ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile>& PayloadFile) { + ZEN_TRACE_CPU("ValidatePayload"); IoBuffer ResponseBuffer = (Response.text.empty() && PayloadFile) ? PayloadFile->BorrowIoBuffer() : IoBuffer(IoBuffer::Wrap, Response.text.data(), Response.text.size()); @@ -396,6 +584,11 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile } } + if (Response.status_code == (long)HttpResponseCode::PartialContent) + { + return true; + } + if (auto JupiterHash = Response.header.find("X-Jupiter-IoHash"); JupiterHash != Response.header.end()) { IoHash ExpectedPayloadHash; @@ -448,22 +641,40 @@ ValidatePayload(cpr::Response& Response, std::unique_ptr<detail::TempPayloadFile } static cpr::Response -DoWithRetry(std::function<cpr::Response()>&& Func, uint8_t RetryCount) +DoWithRetry( + std::string_view SessionId, + std::function<cpr::Response()>&& Func, + uint8_t RetryCount, + std::function<bool(cpr::Response& Result)>&& Validate = [](cpr::Response&) { return true; }) { uint8_t Attempt = 0; cpr::Response Result = Func(); - while (Attempt < RetryCount && ShouldRetry(Result)) + while (Attempt < RetryCount) { + if (!ShouldRetry(Result)) + { + if (Result.error || !IsHttpSuccessCode(Result.status_code)) + { + break; + } + if (Validate(Result)) + { + break; + } + } Sleep(100 * (Attempt + 1)); Attempt++; - ZEN_INFO("{} Attempt {}/{}", CommonResponse(std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); + ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); Result = Func(); } return Result; } static cpr::Response -DoWithRetry(std::function<cpr::Response()>&& Func, std::unique_ptr<detail::TempPayloadFile>& PayloadFile, uint8_t RetryCount) +DoWithRetry(std::string_view SessionId, + std::function<cpr::Response()>&& Func, + std::unique_ptr<detail::TempPayloadFile>& PayloadFile, + uint8_t RetryCount) { uint8_t Attempt = 0; cpr::Response Result = Func(); @@ -482,7 +693,7 @@ DoWithRetry(std::function<cpr::Response()>&& Func, std::unique_ptr<detail::TempP } Sleep(100 * (Attempt + 1)); Attempt++; - ZEN_INFO("{} Attempt {}/{}", CommonResponse(std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); + ZEN_INFO("{} Attempt {}/{}", CommonResponse(SessionId, std::move(Result)).ErrorMessage("Retry"), Attempt, RetryCount + 1); Result = Func(); } return Result; @@ -511,12 +722,14 @@ struct HttpClient::Impl : public RefCounted inline cpr::Session* operator->() const { return CprSession; } inline cpr::Response Get() { + ZEN_TRACE_CPU("HttpClient::Impl::Get"); cpr::Response Result = CprSession->Get(); ZEN_TRACE("GET {}", Result); return Result; } inline cpr::Response Download(cpr::WriteCallback&& Write, std::optional<cpr::HeaderCallback>&& Header = {}) { + ZEN_TRACE_CPU("HttpClient::Impl::Download"); if (Header) { CprSession->SetHeaderCallback(std::move(Header.value())); @@ -529,12 +742,14 @@ struct HttpClient::Impl : public RefCounted } inline cpr::Response Head() { + ZEN_TRACE_CPU("HttpClient::Impl::Head"); cpr::Response Result = CprSession->Head(); ZEN_TRACE("HEAD {}", Result); return Result; } inline cpr::Response Put(std::optional<cpr::ReadCallback>&& Read = {}) { + ZEN_TRACE_CPU("HttpClient::Impl::Put"); if (Read) { CprSession->SetReadCallback(std::move(Read.value())); @@ -546,6 +761,7 @@ struct HttpClient::Impl : public RefCounted } inline cpr::Response Post(std::optional<cpr::ReadCallback>&& Read = {}) { + ZEN_TRACE_CPU("HttpClient::Impl::Post"); if (Read) { CprSession->SetReadCallback(std::move(Read.value())); @@ -557,6 +773,7 @@ struct HttpClient::Impl : public RefCounted } inline cpr::Response Delete() { + ZEN_TRACE_CPU("HttpClient::Impl::Delete"); cpr::Response Result = CprSession->Delete(); ZEN_TRACE("DELETE {}", Result); return Result; @@ -596,6 +813,7 @@ HttpClient::Impl::Impl(LoggerRef Log) : m_Log(Log) HttpClient::Impl::~Impl() { + ZEN_TRACE_CPU("HttpClient::Impl::~Impl"); m_SessionLock.WithExclusiveLock([&] { for (auto CprSession : m_Sessions) { @@ -614,6 +832,7 @@ HttpClient::Impl::AllocSession(const std::string_view BaseUrl, const std::string_view SessionId, std::optional<HttpClientAccessToken> AccessToken) { + ZEN_TRACE_CPU("HttpClient::Impl::AllocSession"); cpr::Session* CprSession = nullptr; m_SessionLock.WithExclusiveLock([&] { if (!m_Sessions.empty()) @@ -670,6 +889,7 @@ HttpClient::Impl::AllocSession(const std::string_view BaseUrl, void HttpClient::Impl::ReleaseSession(cpr::Session* CprSession) { + ZEN_TRACE_CPU("HttpClient::Impl::ReleaseSession"); CprSession->SetUrl({}); CprSession->SetHeader({}); CprSession->SetBody({}); @@ -694,6 +914,7 @@ HttpClient::~HttpClient() bool HttpClient::Authenticate() { + ZEN_TRACE_CPU("HttpClient::Authenticate"); std::optional<HttpClientAccessToken> Token = GetAccessToken(); if (!Token) { @@ -705,6 +926,7 @@ HttpClient::Authenticate() const std::optional<HttpClientAccessToken> HttpClient::GetAccessToken() { + ZEN_TRACE_CPU("HttpClient::GetAccessToken"); if (!m_ConnectionSettings.AccessTokenProvider.has_value()) { return {}; @@ -829,15 +1051,18 @@ HttpClient::Put(std::string_view Url, const IoBuffer& Payload, const KeyValueMap { ZEN_TRACE_CPU("HttpClient::Put"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -845,31 +1070,40 @@ HttpClient::Put(std::string_view Url, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Put"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - {{"Content-Length", "0"}}, - Parameters, - m_SessionId, - GetAccessToken()); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse(m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + {{"Content-Length", "0"}}, + Parameters, + m_SessionId, + GetAccessToken()); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response HttpClient::Get(std::string_view Url, const KeyValueMap& AdditionalHeader, const KeyValueMap& Parameters) { ZEN_TRACE_CPU("HttpClient::Get"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Get(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Get(); + }, + m_ConnectionSettings.RetryCount, + [](cpr::Response& Result) { + std::unique_ptr<detail::TempPayloadFile> NoTempFile; + return ValidatePayload(Result, NoTempFile); + })); } HttpClient::Response @@ -877,13 +1111,16 @@ HttpClient::Head(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Head"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Head(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Head(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -891,13 +1128,16 @@ HttpClient::Delete(std::string_view Url, const KeyValueMap& AdditionalHeader) { ZEN_TRACE_CPU("HttpClient::Delete"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - return Sess.Delete(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + return Sess.Delete(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -905,13 +1145,16 @@ HttpClient::Post(std::string_view Url, const KeyValueMap& AdditionalHeader, cons { ZEN_TRACE_CPU("HttpClient::PostNoPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, Parameters, m_SessionId, GetAccessToken()); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -925,16 +1168,32 @@ HttpClient::Post(std::string_view Url, const IoBuffer& Payload, ZenContentType C { ZEN_TRACE_CPU("HttpClient::PostWithPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if (Payload.GetFileReference(FileRef)) + { + uint64_t Offset = 0; + detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u); + auto ReadCallback = [&Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) { + size = Min<size_t>(size, Payload.GetSize() - Offset); + Buffer.Read(buffer, size); + Offset += size; + return true; + }; + return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + } + Sess->SetBody(AsCprBody(Payload)); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -942,16 +1201,19 @@ HttpClient::Post(std::string_view Url, CbObject Payload, const KeyValueMap& Addi { ZEN_TRACE_CPU("HttpClient::PostObjectPayload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - - Sess->SetBody(AsCprBody(Payload)); - Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); - return Sess.Post(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + + Sess->SetBody(AsCprBody(Payload)); + Sess->UpdateHeader({HeaderContentType(ZenContentType::kCbObject)}); + return Sess.Post(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -965,24 +1227,23 @@ HttpClient::Post(std::string_view Url, const CompositeBuffer& Payload, ZenConten { ZEN_TRACE_CPU("HttpClient::Post"); - return CommonResponse(DoWithRetry( - [&]() { - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u); + auto ReadCallback = [&Reader](char* buffer, size_t& size, intptr_t) { + size = Reader.Read(buffer, size); + return true; + }; + return Sess.Post(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -990,29 +1251,32 @@ HttpClient::Upload(std::string_view Url, const IoBuffer& Payload, const KeyValue { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); - - uint64_t Offset = 0; - if (Payload.IsWholeFile()) - { - auto ReadCallback = [&Payload, &Offset](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, Payload.GetSize() - Offset); - IoBuffer PayloadRange = IoBuffer(Payload, Offset, size); - MutableMemoryView Data(buffer, size); - Data.CopyFrom(PayloadRange.GetView()); - Offset += size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - } - Sess->SetBody(AsCprBody(Payload)); - return Sess.Put(); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(Payload.GetContentType())}); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + if (Payload.GetFileReference(FileRef)) + { + uint64_t Offset = 0; + detail::BufferedReadFileStream Buffer(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize, 512u * 1024u); + auto ReadCallback = [&Payload, &Offset, &Buffer](char* buffer, size_t& size, intptr_t) { + size = Min<size_t>(size, Payload.GetSize() - Offset); + Buffer.Read(buffer, size); + Offset += size; + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + } + Sess->SetBody(AsCprBody(Payload)); + return Sess.Put(); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1020,24 +1284,23 @@ HttpClient::Upload(std::string_view Url, const CompositeBuffer& Payload, ZenCont { ZEN_TRACE_CPU("HttpClient::Upload"); - return CommonResponse(DoWithRetry( - [&]() { - Impl::Session Sess = - m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); - Sess->UpdateHeader({HeaderContentType(ContentType)}); - - uint64_t SizeLeft = Payload.GetSize(); - CompositeBuffer::Iterator BufferIt = Payload.GetIterator(0); - auto ReadCallback = [&Payload, &BufferIt, &SizeLeft](char* buffer, size_t& size, intptr_t) { - size = Min<size_t>(size, SizeLeft); - MutableMemoryView Data(buffer, size); - Payload.CopyTo(Data, BufferIt); - SizeLeft -= size; - return true; - }; - return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); - }, - m_ConnectionSettings.RetryCount)); + return CommonResponse( + m_SessionId, + DoWithRetry( + m_SessionId, + [&]() { + Impl::Session Sess = + m_Impl->AllocSession(m_BaseUri, Url, m_ConnectionSettings, AdditionalHeader, {}, m_SessionId, GetAccessToken()); + Sess->UpdateHeader({HeaderContentType(ContentType)}); + + detail::CompositeBufferReadStream Reader(Payload, 512u * 1024u); + auto ReadCallback = [&Reader](char* buffer, size_t& size, intptr_t) { + size = Reader.Read(buffer, size); + return true; + }; + return Sess.Put(cpr::ReadCallback(gsl::narrow<cpr::cpr_off_t>(Payload.GetSize()), ReadCallback)); + }, + m_ConnectionSettings.RetryCount)); } HttpClient::Response @@ -1048,6 +1311,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::string PayloadString; std::unique_ptr<detail::TempPayloadFile> PayloadFile; cpr::Response Response = DoWithRetry( + m_SessionId, [&]() { auto GetHeader = [&](std::string header) -> std::pair<std::string, std::string> { size_t DelimiterPos = header.find(':'); @@ -1087,6 +1351,30 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold return true; }; + uint64_t RequestedContentLength = (uint64_t)-1; + if (auto RangeIt = AdditionalHeader.Entries.find("Range"); RangeIt != AdditionalHeader.Entries.end()) + { + if (RangeIt->second.starts_with("bytes")) + { + size_t RangeStartPos = RangeIt->second.find('=', 5); + if (RangeStartPos != std::string::npos) + { + RangeStartPos++; + size_t RangeSplitPos = RangeIt->second.find('-', RangeStartPos); + if (RangeSplitPos != std::string::npos) + { + std::optional<size_t> RequestedRangeStart = + ParseInt<size_t>(RangeIt->second.substr(RangeStartPos, RangeSplitPos - RangeStartPos)); + std::optional<size_t> RequestedRangeEnd = ParseInt<size_t>(RangeIt->second.substr(RangeStartPos + 1)); + if (RequestedRangeStart.has_value() && RequestedRangeEnd.has_value()) + { + RequestedContentLength = RequestedRangeEnd.value() - 1; + } + } + } + } + } + cpr::Response Response; { std::vector<std::pair<std::string, std::string>> ReceivedHeaders; @@ -1094,13 +1382,13 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold std::pair<std::string, std::string> Header = GetHeader(header); if (Header.first == "Content-Length"sv) { - std::optional<size_t> ContentSize = ParseInt<size_t>(Header.second); - if (ContentSize.has_value()) + std::optional<size_t> ContentLength = ParseInt<size_t>(Header.second); + if (ContentLength.has_value()) { - if (ContentSize.value() > 1024 * 1024) + if (ContentLength.value() > 1024 * 1024) { PayloadFile = std::make_unique<detail::TempPayloadFile>(); - std::error_code Ec = PayloadFile->Open(TempFolderPath); + std::error_code Ec = PayloadFile->Open(TempFolderPath, ContentLength.value()); if (Ec) { ZEN_WARN("Failed to create temp file in '{}' for HttpClient::Download. Reason: {}", @@ -1111,7 +1399,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold } else { - PayloadString.reserve(ContentSize.value()); + PayloadString.reserve(ContentLength.value()); } } } @@ -1157,85 +1445,90 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold auto It = Response.header.find("Content-Length"); if (It != Response.header.end()) { - std::optional<int64_t> ContentLength = ParseInt<int64_t>(It->second); - if (ContentLength) - { - std::vector<std::pair<std::string, std::string>> ReceivedHeaders; + std::vector<std::pair<std::string, std::string>> ReceivedHeaders; - auto HeaderCallback = [&](std::string header, intptr_t) { - std::pair<std::string, std::string> Header = GetHeader(header); - if (!Header.first.empty()) - { - ReceivedHeaders.emplace_back(std::move(Header)); - } + auto HeaderCallback = [&](std::string header, intptr_t) { + std::pair<std::string, std::string> Header = GetHeader(header); + if (!Header.first.empty()) + { + ReceivedHeaders.emplace_back(std::move(Header)); + } - if (Header.first == "Content-Range"sv) + if (Header.first == "Content-Range"sv) + { + if (Header.second.starts_with("bytes "sv)) { - if (Header.second.starts_with("bytes "sv)) + size_t RangeStartEnd = Header.second.find('-', 6); + if (RangeStartEnd != std::string::npos) { - size_t RangeStartEnd = Header.second.find('-', 6); - if (RangeStartEnd != std::string::npos) + const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6)); + if (Start) { - const auto Start = ParseInt<uint64_t>(Header.second.substr(6, RangeStartEnd - 6)); - if (Start) + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + if (Start.value() == DownloadedSize) { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - if (Start.value() == DownloadedSize) - { - return 1; - } - else if (Start.value() > DownloadedSize) - { - return 0; - } - if (PayloadFile) - { - PayloadFile->ResetWritePos(Start.value()); - } - else - { - PayloadString = PayloadString.substr(0, Start.value()); - } return 1; } + else if (Start.value() > DownloadedSize) + { + return 0; + } + if (PayloadFile) + { + PayloadFile->ResetWritePos(Start.value()); + } + else + { + PayloadString = PayloadString.substr(0, Start.value()); + } + return 1; } } - return 0; } - return 1; - }; + return 0; + } + return 1; + }; - KeyValueMap HeadersWithRange(AdditionalHeader); - do - { - uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); + KeyValueMap HeadersWithRange(AdditionalHeader); + do + { + uint64_t DownloadedSize = PayloadFile ? PayloadFile->GetSize() : PayloadString.length(); - std::string Range = fmt::format("bytes={}-{}", DownloadedSize, ContentLength.value()); - if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + uint64_t ContentLength = RequestedContentLength; + if (ContentLength == uint64_t(-1)) + { + if (auto ParsedContentLength = ParseInt<int64_t>(It->second); ParsedContentLength.has_value()) { - if (RangeIt->second == Range) - { - // If we didn't make any progress, abort - break; - } + ContentLength = ParsedContentLength.value(); } - HeadersWithRange.Entries.insert_or_assign("Range", Range); - - Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, - Url, - m_ConnectionSettings, - HeadersWithRange, - {}, - m_SessionId, - GetAccessToken()); - Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); - for (const std::pair<std::string, std::string>& H : ReceivedHeaders) + } + + std::string Range = fmt::format("bytes={}-{}", DownloadedSize, DownloadedSize + ContentLength - 1); + if (auto RangeIt = HeadersWithRange.Entries.find("Range"); RangeIt != HeadersWithRange.Entries.end()) + { + if (RangeIt->second == Range) { - Response.header.insert_or_assign(H.first, H.second); + // If we didn't make any progress, abort + break; } - ReceivedHeaders.clear(); - } while (ShouldResume(Response)); - } + } + HeadersWithRange.Entries.insert_or_assign("Range", Range); + + Impl::Session Sess = m_Impl->AllocSession(m_BaseUri, + Url, + m_ConnectionSettings, + HeadersWithRange, + {}, + m_SessionId, + GetAccessToken()); + Response = Sess.Download(cpr::WriteCallback{DownloadCallback}, cpr::HeaderCallback{HeaderCallback}); + for (const std::pair<std::string, std::string>& H : ReceivedHeaders) + { + Response.header.insert_or_assign(H.first, H.second); + } + ReceivedHeaders.clear(); + } while (ShouldResume(Response)); } } } @@ -1249,7 +1542,7 @@ HttpClient::Download(std::string_view Url, const std::filesystem::path& TempFold PayloadFile, m_ConnectionSettings.RetryCount); - return CommonResponse(std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); + return CommonResponse(m_SessionId, std::move(Response), PayloadFile ? PayloadFile->DetachToIoBuffer() : IoBuffer{}); } ////////////////////////////////////////////////////////////////////////// @@ -1361,6 +1654,133 @@ HttpClient::Response::ThrowError(std::string_view ErrorPrefix) #if ZEN_WITH_TESTS +namespace testutil { + IoHash HashComposite(const CompositeBuffer& Payload) + { + IoHashStream Hasher; + const uint64_t PayloadSize = Payload.GetSize(); + std::vector<uint8_t> Buffer(64u * 1024u); + detail::CompositeBufferReadStream Stream(Payload, 137u * 1024u); + for (uint64_t Offset = 0; Offset < PayloadSize;) + { + uint64_t Count = Min(64u * 1024u, PayloadSize - Offset); + Stream.Read(Buffer.data(), Count); + Hasher.Append(Buffer.data(), Count); + Offset += Count; + } + return Hasher.GetHash(); + }; + + IoHash HashFileStream(void* FileHandle, uint64_t FileOffset, uint64_t FileSize) + { + IoHashStream Hasher; + std::vector<uint8_t> Buffer(64u * 1024u); + detail::BufferedReadFileStream Stream(FileHandle, FileOffset, FileSize, 137u * 1024u); + for (uint64_t Offset = 0; Offset < FileSize;) + { + uint64_t Count = Min(64u * 1024u, FileSize - Offset); + Stream.Read(Buffer.data(), Count); + Hasher.Append(Buffer.data(), Count); + Offset += Count; + } + return Hasher.GetHash(); + } + +} // namespace testutil + +TEST_CASE("responseformat") +{ + using namespace std::literals; + + SUBCASE("identity") + { + BodyLogFormatter _{"abcd"}; + CHECK_EQ(_.GetText(), "abcd"sv); + } + + SUBCASE("very long") + { + std::string_view LongView = + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; + + BodyLogFormatter _{LongView}; + + CHECK(_.GetText().size() < LongView.size()); + CHECK(_.GetText().starts_with("[truncated"sv)); + } + + SUBCASE("invalid text") + { + std::string_view BadText = "totobaba\xff\xfe"; + + BodyLogFormatter _{BadText}; + + CHECK_EQ(_.GetText(), "totobaba"); + } +} + +TEST_CASE("BufferedReadFileStream") +{ + ScopedTemporaryDirectory TmpDir; + + IoBuffer DiskBuffer = WriteToTempFile(CompositeBuffer(CreateRandomBlob(496 * 5 * 1024)), TmpDir.Path() / "diskbuffer1"); + + IoBufferFileReference FileRef = {nullptr, 0, 0}; + CHECK(DiskBuffer.GetFileReference(FileRef)); + CHECK_EQ(IoHash::HashBuffer(DiskBuffer), testutil::HashFileStream(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize)); + + IoBuffer Partial(DiskBuffer, 37 * 1024, 512 * 1024); + CHECK(Partial.GetFileReference(FileRef)); + CHECK_EQ(IoHash::HashBuffer(Partial), testutil::HashFileStream(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize)); + + IoBuffer SmallDiskBuffer = WriteToTempFile(CompositeBuffer(CreateRandomBlob(63 * 1024)), TmpDir.Path() / "diskbuffer2"); + CHECK(SmallDiskBuffer.GetFileReference(FileRef)); + CHECK_EQ(IoHash::HashBuffer(SmallDiskBuffer), + testutil::HashFileStream(FileRef.FileHandle, FileRef.FileChunkOffset, FileRef.FileChunkSize)); +} + +TEST_CASE("CompositeBufferReadStream") +{ + ScopedTemporaryDirectory TmpDir; + + IoBuffer MemoryBuffer1 = CreateRandomBlob(64); + CHECK_EQ(IoHash::HashBuffer(MemoryBuffer1), testutil::HashComposite(CompositeBuffer(SharedBuffer(MemoryBuffer1)))); + + IoBuffer MemoryBuffer2 = CreateRandomBlob(561 * 1024); + CHECK_EQ(IoHash::HashBuffer(MemoryBuffer2), testutil::HashComposite(CompositeBuffer(SharedBuffer(MemoryBuffer2)))); + + IoBuffer DiskBuffer1 = WriteToTempFile(CompositeBuffer(CreateRandomBlob(267 * 3 * 1024)), TmpDir.Path() / "diskbuffer1"); + CHECK_EQ(IoHash::HashBuffer(DiskBuffer1), testutil::HashComposite(CompositeBuffer(SharedBuffer(DiskBuffer1)))); + + IoBuffer DiskBuffer2 = WriteToTempFile(CompositeBuffer(CreateRandomBlob(3 * 1024)), TmpDir.Path() / "diskbuffer2"); + CHECK_EQ(IoHash::HashBuffer(DiskBuffer2), testutil::HashComposite(CompositeBuffer(SharedBuffer(DiskBuffer2)))); + + IoBuffer DiskBuffer3 = WriteToTempFile(CompositeBuffer(CreateRandomBlob(496 * 5 * 1024)), TmpDir.Path() / "diskbuffer3"); + CHECK_EQ(IoHash::HashBuffer(DiskBuffer3), testutil::HashComposite(CompositeBuffer(SharedBuffer(DiskBuffer3)))); + + CompositeBuffer Data(SharedBuffer(std::move(MemoryBuffer1)), + SharedBuffer(std::move(DiskBuffer1)), + SharedBuffer(std::move(DiskBuffer2)), + SharedBuffer(std::move(MemoryBuffer2)), + SharedBuffer(std::move(DiskBuffer3))); + CHECK_EQ(IoHash::HashBuffer(Data), testutil::HashComposite(Data)); +} + TEST_CASE("httpclient") { using namespace std::literals; diff --git a/src/zenhttp/httpclientauth.cpp b/src/zenhttp/httpclientauth.cpp index 04ac2ad3f..39efe1d0c 100644 --- a/src/zenhttp/httpclientauth.cpp +++ b/src/zenhttp/httpclientauth.cpp @@ -2,14 +2,26 @@ #include <zenhttp/httpclientauth.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/process.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/uid.h> #include <zenhttp/auth/authmgr.h> +#include <ctime> + ZEN_THIRD_PARTY_INCLUDES_START #include <cpr/cpr.h> #include <fmt/format.h> #include <json11.hpp> ZEN_THIRD_PARTY_INCLUDES_END +#if ZEN_PLATFORM_WINDOWS +# define timegm _mkgmtime +#endif // ZEN_PLATFORM_WINDOWS + namespace zen { namespace httpclientauth { using namespace std::literals; @@ -41,6 +53,7 @@ namespace zen { namespace httpclientauth { if (Response.error || Response.status_code != 200) { + ZEN_WARN("Failed fetching OAuth access token {}. Reason: '{}'", OAuthParams.Url, Response.reason); return HttpClientAccessToken{}; } @@ -49,6 +62,7 @@ namespace zen { namespace httpclientauth { if (JsonError.empty() == false) { + ZEN_WARN("Unable to parse OAuth json response from {}. Reason: '{}'", OAuthParams.Url, JsonError); return HttpClientAccessToken{}; } @@ -73,4 +87,101 @@ namespace zen { namespace httpclientauth { return CreateFromOpenIdProvider(AuthManager, "Default"sv); } + static HttpClientAccessToken GetOidcTokenFromExe(const std::filesystem::path& OidcExecutablePath, + std::string_view CloudHost, + bool Unattended) + { + Stopwatch Timer; + + CreateProcOptions ProcOptions; + + const std::filesystem::path AuthTokenPath(std::filesystem::temp_directory_path() / fmt::format(".zen-auth-{}", Oid::NewOid())); + auto _ = MakeGuard([AuthTokenPath]() { RemoveFile(AuthTokenPath); }); + + const std::string ProcArgs = fmt::format("{} --AuthConfigUrl {} --OutFile {} --Unattended={}", + OidcExecutablePath, + CloudHost, + AuthTokenPath, + Unattended ? "true"sv : "false"sv); + ZEN_DEBUG("Running: {}", ProcArgs); + ProcessHandle Proc; + Proc.Initialize(CreateProc(OidcExecutablePath, ProcArgs, ProcOptions)); + if (!Proc.IsValid()) + { + throw std::runtime_error(fmt::format("failed to launch '{}'", OidcExecutablePath)); + } + + int ExitCode = Proc.WaitExitCode(); + + auto EndTime = std::chrono::system_clock::now(); + + if (ExitCode == 0) + { + IoBuffer Body = IoBufferBuilder::MakeFromFile(AuthTokenPath); + std::string JsonText(reinterpret_cast<const char*>(Body.GetData()), Body.GetSize()); + + std::string JsonError; + json11::Json Json = json11::Json::parse(JsonText, JsonError); + + if (JsonError.empty() == false) + { + ZEN_WARN("Unable to parse Oidcs json response from {}. Reason: '{}'", AuthTokenPath, JsonError); + return HttpClientAccessToken{}; + } + std::string Token = Json["Token"].string_value(); + std::string ExpiresAtUTCString = Json["ExpiresAtUtc"].string_value(); + ZEN_ASSERT(!ExpiresAtUTCString.empty()); + + int Year = 0; + int Month = 0; + int Day = 0; + int Hour = 0; + int Minute = 0; + int Second = 0; + int Millisecond = 0; + sscanf(ExpiresAtUTCString.c_str(), "%d-%d-%dT%d:%d:%d.%dZ", &Year, &Month, &Day, &Hour, &Minute, &Second, &Millisecond); + + std::tm Time = { + Second, + Minute, + Hour, + Day, + Month - 1, + Year - 1900, + }; + + time_t UTCTime = timegm(&Time); + HttpClientAccessToken::TimePoint ExpireTime = std::chrono::system_clock::from_time_t(UTCTime); + ExpireTime += std::chrono::microseconds(Millisecond); + + return HttpClientAccessToken{.Value = fmt::format("Bearer {}"sv, Token), .ExpireTime = ExpireTime}; + } + else + { + ZEN_WARN("Failed running {} to get auth token, error code {}", OidcExecutablePath, ExitCode); + } + return HttpClientAccessToken{}; + } + + std::optional<std::function<HttpClientAccessToken()>> CreateFromOidcTokenExecutable(const std::filesystem::path& OidcExecutablePath, + std::string_view CloudHost) + { + HttpClientAccessToken InitialToken = GetOidcTokenFromExe(OidcExecutablePath, CloudHost, false); + if (InitialToken.IsValid()) + { + return [OidcExecutablePath = std::filesystem::path(OidcExecutablePath), + CloudHost = std::string(CloudHost), + InitialToken]() mutable { + if (InitialToken.IsValid()) + { + HttpClientAccessToken Result = InitialToken; + InitialToken = {}; + return Result; + } + return GetOidcTokenFromExe(OidcExecutablePath, CloudHost, true); + }; + } + return {}; + } + }} // namespace zen::httpclientauth diff --git a/src/zenhttp/httpserver.cpp b/src/zenhttp/httpserver.cpp index 1fbe22628..764f2a2a7 100644 --- a/src/zenhttp/httpserver.cpp +++ b/src/zenhttp/httpserver.cpp @@ -31,6 +31,8 @@ #include <span> #include <string_view> +#include <EASTL/fixed_vector.h> + namespace zen { using namespace std::literals; @@ -529,7 +531,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType { std::span<const SharedBuffer> Segments = Payload.GetSegments(); - std::vector<IoBuffer> Buffers; + eastl::fixed_vector<IoBuffer, 64> Buffers; Buffers.reserve(Segments.size()); for (auto& Segment : Segments) @@ -537,7 +539,7 @@ HttpServerRequest::WriteResponse(HttpResponseCode ResponseCode, HttpContentType Buffers.push_back(Segment.AsIoBuffer()); } - WriteResponse(ResponseCode, ContentType, Buffers); + WriteResponse(ResponseCode, ContentType, std::span<IoBuffer>(begin(Buffers), end(Buffers))); } std::string @@ -785,120 +787,131 @@ HttpRpcHandler::AddRpc(std::string_view RpcId, std::function<void(CbObject& RpcA ////////////////////////////////////////////////////////////////////////// -enum class HttpServerClass -{ - kHttpAsio, - kHttpSys, - kHttpPlugin, - kHttpMulti, - kHttpNull -}; - Ref<HttpServer> -CreateHttpServerClass(HttpServerClass Class, const HttpServerConfig& Config) +CreateHttpServerClass(const std::string_view ServerClass, const HttpServerConfig& Config) { - switch (Class) + if (ServerClass == "asio"sv) { - default: - case HttpServerClass::kHttpAsio: - ZEN_INFO("using asio HTTP server implementation"); - return CreateHttpAsioServer(Config.ForceLoopback, Config.ThreadCount); - - case HttpServerClass::kHttpMulti: - { - ZEN_INFO("using multi HTTP server implementation"); - Ref<HttpMultiServer> Server{new HttpMultiServer()}; - - // This is hardcoded for now, but should be configurable in the future - Server->AddServer(CreateHttpServerClass(HttpServerClass::kHttpSys, Config)); - Server->AddServer(CreateHttpServerClass(HttpServerClass::kHttpPlugin, Config)); + ZEN_INFO("using asio HTTP server implementation") + return CreateHttpAsioServer(Config.ForceLoopback, Config.ThreadCount); + } +#if ZEN_WITH_HTTPSYS + else if (ServerClass == "httpsys"sv) + { + ZEN_INFO("using http.sys server implementation") + return Ref<HttpServer>(CreateHttpSysServer({.ThreadCount = Config.ThreadCount, + .AsyncWorkThreadCount = Config.HttpSys.AsyncWorkThreadCount, + .IsAsyncResponseEnabled = Config.HttpSys.IsAsyncResponseEnabled, + .IsRequestLoggingEnabled = Config.HttpSys.IsRequestLoggingEnabled, + .IsDedicatedServer = Config.IsDedicatedServer, + .ForceLoopback = Config.ForceLoopback})); + } +#endif + else if (ServerClass == "null"sv) + { + ZEN_INFO("using null HTTP server implementation") + return Ref<HttpServer>(new HttpNullServer); + } + else + { + ZEN_WARN("unknown HTTP server implementation '{}', falling back to default", ServerClass) - return Server; - } +#if ZEN_WITH_HTTPSYS + return CreateHttpServerClass("httpsys"sv, Config); +#else + return CreateHttpServerClass("asio"sv, Config); +#endif + } +} #if ZEN_WITH_PLUGINS - case HttpServerClass::kHttpPlugin: - { - ZEN_INFO("using plugin HTTP server implementation"); - Ref<HttpPluginServer> Server{CreateHttpPluginServer()}; +Ref<HttpServer> +CreateHttpServerPlugin(const HttpServerPluginConfig& PluginConfig) +{ + const std::string& PluginName = PluginConfig.PluginName; - // This is hardcoded for now, but should be configurable in the future + ZEN_INFO("using '{}' plugin HTTP server implementation", PluginName) + if (PluginName.starts_with("builtin:"sv)) + { # if 0 - Ref<TransportPlugin> WinsockPlugin{CreateSocketTransportPlugin()}; - WinsockPlugin->Configure("port", "8558"); - Server->AddPlugin(WinsockPlugin); -# endif + Ref<TransportPlugin> Plugin = {}; + if (PluginName == "builtin:winsock"sv) + { + Plugin = CreateSocketTransportPlugin(); + } + else if (PluginName == "builtin:asio"sv) + { + Plugin = CreateAsioTransportPlugin(); + } + else + { + ZEN_WARN("Unknown builtin plugin '{}'", PluginName) + return {}; + } -# if 0 - Ref<TransportPlugin> AsioPlugin{CreateAsioTransportPlugin()}; - AsioPlugin->Configure("port", "8558"); - Server->AddPlugin(AsioPlugin); -# endif + ZEN_ASSERT(!Plugin.IsNull()); -# if 1 - Ref<DllTransportPlugin> DllPlugin{CreateDllTransportPlugin()}; - DllPlugin->LoadDll("winsock"); - DllPlugin->ConfigureDll("winsock", "port", "8558"); - Server->AddPlugin(DllPlugin); -# endif + for (const std::pair<std::string, std::string>& Option : PluginConfig.PluginOptions) + { + Plugin->Configure(Option.first.c_str(), Option.second.c_str()); + } - return Server; - } -#endif + Ref<HttpPluginServer> Server{CreateHttpPluginServer()}; + Server->AddPlugin(Plugin); + return Server; +# else + ZEN_WARN("Builtin plugin '{}' is not supported", PluginName) + return {}; +# endif + } -#if ZEN_WITH_HTTPSYS - case HttpServerClass::kHttpSys: - ZEN_INFO("using http.sys server implementation"); - return Ref<HttpServer>(CreateHttpSysServer({.ThreadCount = Config.ThreadCount, - .AsyncWorkThreadCount = Config.HttpSys.AsyncWorkThreadCount, - .IsAsyncResponseEnabled = Config.HttpSys.IsAsyncResponseEnabled, - .IsRequestLoggingEnabled = Config.HttpSys.IsRequestLoggingEnabled, - .IsDedicatedServer = Config.IsDedicatedServer, - .ForceLoopback = Config.ForceLoopback})); -#endif + Ref<DllTransportPlugin> DllPlugin{CreateDllTransportPlugin()}; + if (!DllPlugin->LoadDll(PluginName)) + { + return {}; + } - case HttpServerClass::kHttpNull: - ZEN_INFO("using null HTTP server implementation"); - return Ref<HttpServer>(new HttpNullServer); + for (const std::pair<std::string, std::string>& Option : PluginConfig.PluginOptions) + { + DllPlugin->ConfigureDll(PluginName, Option.first.c_str(), Option.second.c_str()); } + + Ref<HttpPluginServer> Server{CreateHttpPluginServer()}; + Server->AddPlugin(DllPlugin); + return Server; } +#endif Ref<HttpServer> CreateHttpServer(const HttpServerConfig& Config) { using namespace std::literals; - HttpServerClass Class = HttpServerClass::kHttpNull; - -#if ZEN_WITH_HTTPSYS - Class = HttpServerClass::kHttpSys; -#else - Class = HttpServerClass::kHttpAsio; -#endif - - if (Config.ServerClass == "asio"sv) - { - Class = HttpServerClass::kHttpAsio; - } - else if (Config.ServerClass == "httpsys"sv) - { - Class = HttpServerClass::kHttpSys; - } - else if (Config.ServerClass == "plugin"sv) - { - Class = HttpServerClass::kHttpPlugin; - } - else if (Config.ServerClass == "null"sv) +#if ZEN_WITH_PLUGINS + if (Config.PluginConfigs.empty()) { - Class = HttpServerClass::kHttpNull; + return CreateHttpServerClass(Config.ServerClass, Config); } - else if (Config.ServerClass == "multi"sv) + else { - Class = HttpServerClass::kHttpMulti; - } + Ref<HttpMultiServer> Server{new HttpMultiServer()}; + Server->AddServer(CreateHttpServerClass(Config.ServerClass, Config)); - return CreateHttpServerClass(Class, Config); + for (const HttpServerPluginConfig& PluginConfig : Config.PluginConfigs) + { + Ref<HttpServer> PluginServer = CreateHttpServerPlugin(PluginConfig); + if (!PluginServer.IsNull()) + { + Server->AddServer(PluginServer); + } + } + + return Server; + } +#else + return CreateHttpServerClass(Config.ServerClass, Config); +#endif } ////////////////////////////////////////////////////////////////////////// diff --git a/src/zenhttp/include/zenhttp/formatters.h b/src/zenhttp/include/zenhttp/formatters.h index 538136238..05a23d675 100644 --- a/src/zenhttp/include/zenhttp/formatters.h +++ b/src/zenhttp/include/zenhttp/formatters.h @@ -7,12 +7,57 @@ #include <zencore/iobuffer.h> #include <zencore/string.h> #include <zenhttp/httpclient.h> +#include <zenhttp/httpcommon.h> ZEN_THIRD_PARTY_INCLUDES_START #include <cpr/cpr.h> #include <fmt/format.h> ZEN_THIRD_PARTY_INCLUDES_END +namespace zen { + +struct BodyLogFormatter +{ +private: + std::string_view ResponseText; + zen::ExtendableStringBuilder<128> ModifiedResponse; + +public: + explicit BodyLogFormatter(std::string_view InResponseText) : ResponseText(InResponseText) + { + using namespace std::literals; + + const int TextSizeLimit = 1024; + + // Trim invalid UTF8 + + auto InvalidIt = zen::FindFirstInvalidUtf8Byte(ResponseText); + + if (InvalidIt != end(ResponseText)) + { + ResponseText = ResponseText.substr(0, InvalidIt - begin(ResponseText)); + } + + if (ResponseText.empty()) + { + ResponseText = "<suppressed non-text response>"sv; + } + + if (ResponseText.size() > TextSizeLimit) + { + const auto TruncatedString = "[truncated response] "sv; + ModifiedResponse.Append(TruncatedString); + ModifiedResponse.Append(ResponseText.data(), TextSizeLimit - TruncatedString.size()); + + ResponseText = ModifiedResponse; + } + } + + inline std::string_view GetText() const { return ResponseText; } +}; + +} // namespace zen + template<> struct fmt::formatter<cpr::Response> { @@ -23,15 +68,19 @@ struct fmt::formatter<cpr::Response> { using namespace std::literals; - if (Response.status_code == 200 || Response.status_code == 201) + zen::NiceTimeSpanMs NiceResponseTime(uint64_t(Response.elapsed * 1000)); + + if (zen::IsHttpSuccessCode(Response.status_code)) { return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s", + "Url: {}, Status: {}, Error: '{}' ({}), Bytes: {}/{} (Up/Down), Elapsed: {}", Response.url.str(), Response.status_code, + Response.error.message, + int(Response.error.code), Response.uploaded_bytes, Response.downloaded_bytes, - Response.elapsed); + NiceResponseTime.c_str()); } else { @@ -45,27 +94,35 @@ struct fmt::formatter<cpr::Response> zen::ExtendableStringBuilder<256> Sb; std::string_view Json = Obj.ToJson(Sb).ToView(); - return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Response: '{}', Reason: '{}'", - Response.url.str(), - Response.status_code, - Response.uploaded_bytes, - Response.downloaded_bytes, - Response.elapsed, - Json, - Response.reason); + return fmt::format_to( + Ctx.out(), + "Url: {}, Status: {}, Error: '{}' ({}). Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", + Response.url.str(), + Response.status_code, + Response.error.message, + int(Response.error.code), + Response.uploaded_bytes, + Response.downloaded_bytes, + NiceResponseTime.c_str(), + Json, + Response.reason); } else { - return fmt::format_to(Ctx.out(), - "Url: {}, Status: {}, Bytes: {}/{} (Up/Down), Elapsed: {}s, Reponse: '{}', Reason: '{}'", - Response.url.str(), - Response.status_code, - Response.uploaded_bytes, - Response.downloaded_bytes, - Response.elapsed, - Response.text, - Response.reason); + zen::BodyLogFormatter Body(Response.text); + + return fmt::format_to( + Ctx.out(), + "Url: {}, Status: {}, Error: '{}' ({}), Bytes: {}/{} (Up/Down), Elapsed: {}, Response: '{}', Reason: '{}'", + Response.url.str(), + Response.status_code, + Response.error.message, + int(Response.error.code), + Response.uploaded_bytes, + Response.downloaded_bytes, + NiceResponseTime.c_str(), + Body.GetText(), + Response.reason); } } } diff --git a/src/zenhttp/include/zenhttp/httpclient.h b/src/zenhttp/include/zenhttp/httpclient.h index 1cf77d794..c991a71ea 100644 --- a/src/zenhttp/include/zenhttp/httpclient.h +++ b/src/zenhttp/include/zenhttp/httpclient.h @@ -34,7 +34,7 @@ struct HttpClientAccessToken using Clock = std::chrono::system_clock; using TimePoint = Clock::time_point; - static constexpr int64_t ExpireMarginInSeconds = 30; + static constexpr int64_t ExpireMarginInSeconds = 60 * 5; std::string Value; TimePoint ExpireTime; @@ -60,9 +60,6 @@ struct HttpClientSettings class HttpClient { public: - struct Settings - { - }; HttpClient(std::string_view BaseUri, const HttpClientSettings& Connectionsettings = {}); ~HttpClient(); @@ -100,7 +97,7 @@ public: HttpResponseCode StatusCode = HttpResponseCode::ImATeapot; IoBuffer ResponsePayload; // Note: this also includes the content type - // Contains the reponse headers + // Contains the response headers KeyValueMap Header; // The number of bytes sent as part of the request @@ -180,6 +177,7 @@ public: LoggerRef Logger() { return m_Log; } std::string_view GetBaseUri() const { return m_BaseUri; } bool Authenticate(); + std::string_view GetSessionId() const { return m_SessionId; } private: const std::optional<HttpClientAccessToken> GetAccessToken(); diff --git a/src/zenhttp/include/zenhttp/httpclientauth.h b/src/zenhttp/include/zenhttp/httpclientauth.h index aa07620ca..5b9b9d305 100644 --- a/src/zenhttp/include/zenhttp/httpclientauth.h +++ b/src/zenhttp/include/zenhttp/httpclientauth.h @@ -3,6 +3,7 @@ #pragma once #include <zenhttp/httpclient.h> +#include <optional> namespace zen { @@ -24,6 +25,9 @@ namespace httpclientauth { std::function<HttpClientAccessToken()> CreateFromOpenIdProvider(AuthMgr& AuthManager, std::string_view OpenIdProvider); std::function<HttpClientAccessToken()> CreateFromDefaultOpenIdProvider(AuthMgr& AuthManager); + + std::optional<std::function<HttpClientAccessToken()>> CreateFromOidcTokenExecutable(const std::filesystem::path& OidcExecutablePath, + std::string_view CloudHost); } // namespace httpclientauth } // namespace zen diff --git a/src/zenhttp/include/zenhttp/httpserver.h b/src/zenhttp/include/zenhttp/httpserver.h index 7b87cb84b..03e547bf3 100644 --- a/src/zenhttp/include/zenhttp/httpserver.h +++ b/src/zenhttp/include/zenhttp/httpserver.h @@ -184,12 +184,19 @@ public: virtual void Close() = 0; }; +struct HttpServerPluginConfig +{ + std::string PluginName; + std::vector<std::pair<std::string, std::string>> PluginOptions; +}; + struct HttpServerConfig { - bool IsDedicatedServer = false; // Should be set to true for shared servers - std::string ServerClass; // Choice of HTTP server implementation - bool ForceLoopback = false; - unsigned int ThreadCount = 0; + bool IsDedicatedServer = false; // Should be set to true for shared servers + std::string ServerClass; // Choice of HTTP server implementation + std::vector<HttpServerPluginConfig> PluginConfigs; + bool ForceLoopback = false; + unsigned int ThreadCount = 0; struct { @@ -208,7 +215,7 @@ class HttpRouterRequest public: HttpRouterRequest(HttpServerRequest& Request) : m_HttpRequest(Request) {} - ZENCORE_API std::string GetCapture(uint32_t Index) const; + std::string_view GetCapture(uint32_t Index) const; inline HttpServerRequest& ServerRequest() { return m_HttpRequest; } private: @@ -220,12 +227,14 @@ private: friend class HttpRequestRouter; }; -inline std::string +inline std::string_view HttpRouterRequest::GetCapture(uint32_t Index) const { ZEN_ASSERT(Index < m_Match.size()); - return m_Match[Index]; + const auto& Match = m_Match[Index]; + + return std::string_view(&*Match.first, Match.second - Match.first); } /** HTTP request router helper diff --git a/src/zenhttp/packageformat.cpp b/src/zenhttp/packageformat.cpp index 676fc73fd..9d423ecbc 100644 --- a/src/zenhttp/packageformat.cpp +++ b/src/zenhttp/packageformat.cpp @@ -19,6 +19,8 @@ #include <span> #include <vector> +#include <EASTL/fixed_vector.h> + #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> #endif @@ -31,6 +33,10 @@ namespace zen { const std::string_view HandlePrefix(":?#:"); +typedef eastl::fixed_vector<IoBuffer, 16> IoBufferVec_t; + +IoBufferVec_t FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle); + std::vector<IoBuffer> FormatPackageMessage(const CbPackage& Data, void* TargetProcessHandle) { @@ -42,10 +48,18 @@ FormatPackageMessageBuffer(const CbPackage& Data, void* TargetProcessHandle) return FormatPackageMessageBuffer(Data, FormatFlags::kDefault, TargetProcessHandle); } +std::vector<IoBuffer> +FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +{ + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return std::vector<IoBuffer>(begin(Vec), end(Vec)); +} + CompositeBuffer FormatPackageMessageBuffer(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { - return CompositeBuffer(FormatPackageMessage(Data, Flags, TargetProcessHandle)); + auto Vec = FormatPackageMessageInternal(Data, Flags, TargetProcessHandle); + return CompositeBuffer(std::span{begin(Vec), end(Vec)}); } static void @@ -54,7 +68,7 @@ MarshalLocal(CbAttachmentEntry*& AttachmentInfo, CbAttachmentReferenceHeader& LocalRef, const IoHash& AttachmentHash, bool IsCompressed, - std::vector<IoBuffer>& ResponseBuffers) + IoBufferVec_t& ResponseBuffers) { IoBuffer RefBuffer(sizeof(CbAttachmentReferenceHeader) + Path8.size()); @@ -146,8 +160,8 @@ IsLocalRef(tsl::robin_map<void*, std::string>& FileNameMap, return true; }; -std::vector<IoBuffer> -FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) +IoBufferVec_t +FormatPackageMessageInternal(const CbPackage& Data, FormatFlags Flags, void* TargetProcessHandle) { ZEN_TRACE_CPU("FormatPackageMessage"); @@ -177,7 +191,7 @@ FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProce #endif // ZEN_PLATFORM_WINDOWS const std::span<const CbAttachment>& Attachments = Data.GetAttachments(); - std::vector<IoBuffer> ResponseBuffers; + IoBufferVec_t ResponseBuffers; ResponseBuffers.reserve(2 + Attachments.size()); // TODO: may want to use an additional fudge factor here to avoid growing since each // attachment is likely to consist of several buffers @@ -265,11 +279,10 @@ FormatPackageMessage(const CbPackage& Data, FormatFlags Flags, void* TargetProce { IoBuffer ObjIoBuffer = AttachmentObject.GetBuffer().AsIoBuffer(); ZEN_ASSERT(ObjIoBuffer.GetSize() > 0); - ResponseBuffers.emplace_back(std::move(ObjIoBuffer)); - *AttachmentInfo++ = {.PayloadSize = ObjIoBuffer.Size(), .Flags = CbAttachmentEntry::kIsObject, .AttachmentHash = Attachment.GetHash()}; + ResponseBuffers.emplace_back(std::move(ObjIoBuffer)); } else if (const CompositeBuffer& AttachmentBinary = Attachment.AsCompositeBinary()) { @@ -486,30 +499,25 @@ ParsePackageMessage(IoBuffer Payload, std::function<IoBuffer(const IoHash&, uint { if (Entry.Flags & CbAttachmentEntry::kIsObject) { + CompressedBuffer CompBuf(CompressedBuffer::FromCompressedNoValidate(IoBuffer(AttachmentBuffer))); + if (!CompBuf) + { + // First payload is always a compact binary object + MalformedAttachments.push_back( + std::make_pair(i, + fmt::format("Invalid format, expected compressed buffer for CbObject (size {}) for {}", + AttachmentBuffer.GetSize(), + Entry.AttachmentHash))); + } + CbObject AttachmentObject = LoadCompactBinaryObject(std::move(CompBuf)); if (i == 0) { - CompressedBuffer CompBuf(CompressedBuffer::FromCompressedNoValidate(IoBuffer(AttachmentBuffer))); - if (CompBuf) - { - Package.SetObject(LoadCompactBinaryObject(std::move(CompBuf))); - } - else - { - // First payload is always a compact binary object - MalformedAttachments.push_back( - std::make_pair(i, - fmt::format("Invalid format, expected compressed buffer for CbObject (size {}) for {}", - AttachmentBuffer.GetSize(), - Entry.AttachmentHash))); - } + // First payload is always a compact binary object + Package.SetObject(AttachmentObject); } else { - MalformedAttachments.push_back(std::make_pair( - i, - fmt::format("Invalid format, compressed object attachments are not currently supported (size {}) for {}", - AttachmentBuffer.GetSize(), - Entry.AttachmentHash))); + Attachments.emplace_back(CbAttachment(AttachmentObject, Entry.AttachmentHash)); } } else @@ -533,17 +541,14 @@ ParsePackageMessage(IoBuffer Payload, std::function<IoBuffer(const IoHash&, uint { if (Entry.Flags & CbAttachmentEntry::kIsObject) { + CbObject AttachmentObject = LoadCompactBinaryObject(AttachmentBuffer); if (i == 0) { - Package.SetObject(LoadCompactBinaryObject(AttachmentBuffer)); + Package.SetObject(AttachmentObject); } else { - MalformedAttachments.push_back( - std::make_pair(i, - fmt::format("Invalid format, object attachments are not currently supported (size {}) for {}", - AttachmentBuffer.GetSize(), - Entry.AttachmentHash))); + Attachments.emplace_back(CbAttachment(AttachmentObject, Entry.AttachmentHash)); } } else if (AttachmentSize > 0) diff --git a/src/zenhttp/servers/httpasio.cpp b/src/zenhttp/servers/httpasio.cpp index fe59e3a6f..c1b7294c9 100644 --- a/src/zenhttp/servers/httpasio.cpp +++ b/src/zenhttp/servers/httpasio.cpp @@ -678,7 +678,7 @@ struct HttpAcceptor if (BindErrorCode == asio::error::address_in_use) { // Do a retry after a short sleep on same port just to be sure - ZEN_INFO("Desired port %d is in use, retrying", BasePort); + ZEN_INFO("Desired port {} is in use, retrying", BasePort); Sleep(100); m_Acceptor.bind(asio::ip::tcp::endpoint(BindAddress, EffectivePort), BindErrorCode); } @@ -697,13 +697,20 @@ struct HttpAcceptor { ZEN_ERROR("Unable open asio service, error '{}'", BindErrorCode.message()); } - else if (BindAddress.is_loopback()) + else { - m_AlternateProtocolAcceptor.bind(asio::ip::tcp::endpoint(asio::ip::address_v4::loopback(), EffectivePort), BindErrorCode); - m_UseAlternateProtocolAcceptor = true; - ZEN_INFO("Registered local-only handler 'http://{}:{}/' - this is not accessible from remote hosts", - "localhost", - EffectivePort); + if (EffectivePort != BasePort) + { + ZEN_WARN("Desired port {} is in use, remapped to port {}", BasePort, EffectivePort); + } + if (BindAddress.is_loopback()) + { + m_AlternateProtocolAcceptor.bind(asio::ip::tcp::endpoint(asio::ip::address_v4::loopback(), EffectivePort), BindErrorCode); + m_UseAlternateProtocolAcceptor = true; + ZEN_INFO("Registered local-only handler 'http://{}:{}/' - this is not accessible from remote hosts", + "localhost", + EffectivePort); + } } #if ZEN_PLATFORM_WINDOWS diff --git a/src/zenhttp/servers/httpmulti.cpp b/src/zenhttp/servers/httpmulti.cpp index 2a6a90d2e..f4dc1e15b 100644 --- a/src/zenhttp/servers/httpmulti.cpp +++ b/src/zenhttp/servers/httpmulti.cpp @@ -103,6 +103,10 @@ HttpMultiServer::RequestExit() void HttpMultiServer::Close() { + for (auto& Server : m_Servers) + { + Server->Close(); + } } void diff --git a/src/zenhttp/servers/httpsys.cpp b/src/zenhttp/servers/httpsys.cpp index 87128c0c9..62dab02c4 100644 --- a/src/zenhttp/servers/httpsys.cpp +++ b/src/zenhttp/servers/httpsys.cpp @@ -16,6 +16,8 @@ #include <zencore/trace.h> #include <zenhttp/packageformat.h> +#include <EASTL/fixed_vector.h> + #if ZEN_WITH_HTTPSYS # define _WINSOCKAPI_ # include <zencore/windows.h> @@ -381,14 +383,14 @@ public: void SuppressResponseBody(); // typically used for HEAD requests private: - std::vector<HTTP_DATA_CHUNK> m_HttpDataChunks; - uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes - uint16_t m_ResponseCode = 0; - uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists - uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends - bool m_IsInitialResponse = true; - HttpContentType m_ContentType = HttpContentType::kBinary; - std::vector<IoBuffer> m_DataBuffers; + eastl::fixed_vector<HTTP_DATA_CHUNK, 16> m_HttpDataChunks; + uint64_t m_TotalDataSize = 0; // Sum of all chunk sizes + uint16_t m_ResponseCode = 0; + uint32_t m_NextDataChunkOffset = 0; // Cursor used for very large chunk lists + uint32_t m_RemainingChunkCount = 0; // Backlog for multi-call sends + bool m_IsInitialResponse = true; + HttpContentType m_ContentType = HttpContentType::kBinary; + eastl::fixed_vector<IoBuffer, 16> m_DataBuffers; void InitializeForPayload(uint16_t ResponseCode, std::span<IoBuffer> Blobs); }; @@ -533,7 +535,14 @@ HttpMessageResponseRequest::HandleCompletion(ULONG IoResult, ULONG_PTR NumberOfB if (IoResult != NO_ERROR) { - ZEN_WARN("response aborted due to error: {}", GetSystemErrorAsString(IoResult)); + ZEN_WARN("response '{}' ({}) aborted after transfering '{}', {} out of {} bytes, reason: {} ({})", + ReasonStringForHttpResultCode(m_ResponseCode), + m_ResponseCode, + ToString(m_ContentType), + NumberOfBytesTransferred, + m_TotalDataSize, + GetSystemErrorAsString(IoResult), + IoResult); // if one transmit failed there's really no need to go on return nullptr; @@ -682,7 +691,7 @@ HttpMessageResponseRequest::IssueRequest(std::error_code& ErrorCode) ); } - auto EmitReponseDetails = [&](StringBuilderBase& ResponseDetails) -> void { + auto EmitResponseDetails = [&](StringBuilderBase& ResponseDetails) -> void { for (int i = 0; i < ThisRequestChunkCount; ++i) { const HTTP_DATA_CHUNK Chunk = m_HttpDataChunks[ThisRequestChunkOffset + i]; @@ -765,7 +774,7 @@ HttpMessageResponseRequest::IssueRequest(std::error_code& ErrorCode) // Emit diagnostics ExtendableStringBuilder<256> ResponseDetails; - EmitReponseDetails(ResponseDetails); + EmitResponseDetails(ResponseDetails); ZEN_WARN("failed to send HTTP response (error {}: '{}'), request URL: '{}', ({}.{}) response: {}", SendResult, diff --git a/src/zenhttp/transports/dlltransport.cpp b/src/zenhttp/transports/dlltransport.cpp index e09e62ec5..fb3dd23b5 100644 --- a/src/zenhttp/transports/dlltransport.cpp +++ b/src/zenhttp/transports/dlltransport.cpp @@ -21,18 +21,31 @@ namespace zen { ////////////////////////////////////////////////////////////////////////// +class DllTransportLogger : public TransportLogger, public RefCounted +{ +public: + DllTransportLogger(std::string_view PluginName); + virtual ~DllTransportLogger() = default; + + void LogMessage(LogLevel Level, const char* Message) override; + +private: + std::string m_PluginName; +}; + struct LoadedDll { std::string Name; std::filesystem::path LoadedFromPath; + DllTransportLogger* Logger = nullptr; Ref<TransportPlugin> Plugin; }; class DllTransportPluginImpl : public DllTransportPlugin, RefCounted { public: - DllTransportPluginImpl(); - ~DllTransportPluginImpl(); + DllTransportPluginImpl() = default; + ~DllTransportPluginImpl() = default; virtual uint32_t AddRef() const override; virtual uint32_t Release() const override; @@ -42,7 +55,7 @@ public: virtual const char* GetDebugName() override; virtual bool IsAvailable() override; - virtual void LoadDll(std::string_view Name) override; + virtual bool LoadDll(std::string_view Name) override; virtual void ConfigureDll(std::string_view Name, const char* OptionTag, const char* OptionValue) override; private: @@ -51,12 +64,27 @@ private: std::vector<LoadedDll> m_Transports; }; -DllTransportPluginImpl::DllTransportPluginImpl() +DllTransportLogger::DllTransportLogger(std::string_view PluginName) : m_PluginName(PluginName) { } -DllTransportPluginImpl::~DllTransportPluginImpl() +void +DllTransportLogger::LogMessage(LogLevel PluginLogLevel, const char* Message) { + logging::level::LogLevel Level; + // clang-format off + switch (PluginLogLevel) + { + case LogLevel::Trace: Level = logging::level::Trace; break; + case LogLevel::Debug: Level = logging::level::Debug; break; + case LogLevel::Info: Level = logging::level::Info; break; + case LogLevel::Warn: Level = logging::level::Warn; break; + case LogLevel::Err: Level = logging::level::Err; break; + case LogLevel::Critical: Level = logging::level::Critical; break; + default: Level = logging::level::Off; break; + } + // clang-format on + ZEN_LOG(Log(), Level, "[{}] {}", m_PluginName, Message) } uint32_t @@ -109,6 +137,7 @@ DllTransportPluginImpl::Shutdown() try { Transport.Plugin->Shutdown(); + Transport.Logger->Release(); } catch (const std::exception&) { @@ -143,42 +172,73 @@ DllTransportPluginImpl::ConfigureDll(std::string_view Name, const char* OptionTa } } -void +bool DllTransportPluginImpl::LoadDll(std::string_view Name) { RwLock::ExclusiveLockScope _(m_Lock); - ExtendableStringBuilder<128> DllPath; - DllPath << Name << ".dll"; + ExtendableStringBuilder<1024> DllPath; + DllPath << Name; + if (!Name.ends_with(".dll")) + { + DllPath << ".dll"; + } + + std::string FileName = std::filesystem::path(DllPath.c_str()).filename().replace_extension().string(); + HMODULE DllHandle = LoadLibraryA(DllPath.c_str()); if (!DllHandle) { - std::error_code Ec = MakeErrorCodeFromLastError(); - - throw std::system_error(Ec, fmt::format("failed to load transport DLL from '{}'", DllPath)); + ZEN_WARN("Failed to load transport DLL from '{}' due to '{}'", DllPath, GetLastErrorAsString()) + return false; } - TransportPlugin* CreateTransportPlugin(); + PfnGetTransportPluginVersion GetVersion = (PfnGetTransportPluginVersion)GetProcAddress(DllHandle, "GetTransportPluginVersion"); + PfnCreateTransportPlugin CreatePlugin = (PfnCreateTransportPlugin)GetProcAddress(DllHandle, "CreateTransportPlugin"); + + uint32_t APIVersion = 0; + uint32_t PluginVersion = 0; + + if (GetVersion) + { + GetVersion(&APIVersion, &PluginVersion); + } - PfnCreateTransportPlugin CreatePlugin = (PfnCreateTransportPlugin)GetProcAddress(DllHandle, "CreateTransportPlugin"); + const bool bValidApiVersion = APIVersion == kTransportApiVersion; - if (!CreatePlugin) + if (!GetVersion || !CreatePlugin || !bValidApiVersion) { std::error_code Ec = MakeErrorCodeFromLastError(); FreeLibrary(DllHandle); - throw std::system_error(Ec, fmt::format("API mismatch detected in transport DLL loaded from '{}'", DllPath)); + if (GetVersion && !bValidApiVersion) + { + ZEN_WARN("Failed to load transport DLL from '{}' due to invalid API version {}, supported API version is {}", + DllPath, + APIVersion, + kTransportApiVersion) + } + else + { + ZEN_WARN("Failed to load transport DLL from '{}' due to not finding GetTransportPluginVersion or CreateTransportPlugin", + DllPath) + } + + return false; } LoadedDll NewDll; NewDll.Name = Name; NewDll.LoadedFromPath = DllPath.c_str(); - NewDll.Plugin = CreatePlugin(); + NewDll.Logger = new DllTransportLogger(FileName); + NewDll.Logger->AddRef(); + NewDll.Plugin = CreatePlugin(NewDll.Logger); m_Transports.emplace_back(std::move(NewDll)); + return true; } DllTransportPlugin* diff --git a/src/zenhttp/transports/dlltransport.h b/src/zenhttp/transports/dlltransport.h index 9346a10ce..c49f888da 100644 --- a/src/zenhttp/transports/dlltransport.h +++ b/src/zenhttp/transports/dlltransport.h @@ -15,7 +15,7 @@ namespace zen { class DllTransportPlugin : public TransportPlugin { public: - virtual void LoadDll(std::string_view Name) = 0; + virtual bool LoadDll(std::string_view Name) = 0; virtual void ConfigureDll(std::string_view Name, const char* OptionTag, const char* OptionValue) = 0; }; diff --git a/src/zennet-test/zennet-test.cpp b/src/zennet-test/zennet-test.cpp index 482d3c617..bac0dec8f 100644 --- a/src/zennet-test/zennet-test.cpp +++ b/src/zennet-test/zennet-test.cpp @@ -2,6 +2,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> +#include <zencore/trace.h> #include <zennet/zennet.h> #include <zencore/memory/newdelete.h> @@ -17,6 +18,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char** argv) #if ZEN_WITH_TESTS zen::zennet_forcelinktests(); + zen::TraceInit("zennet-test"); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenserver-test/zenserver-test.cpp b/src/zenserver-test/zenserver-test.cpp index 01c3144ad..301f93a55 100644 --- a/src/zenserver-test/zenserver-test.cpp +++ b/src/zenserver-test/zenserver-test.cpp @@ -100,6 +100,7 @@ main(int argc, char** argv) using namespace std::literals; using namespace zen; + zen::TraceInit("zenserver-test"); zen::logging::InitializeLogging(); zen::logging::SetLogLevel(zen::logging::level::Debug); @@ -3361,9 +3362,9 @@ TEST_CASE("project.remote") { cpr::Response StatusResponse = Session.Get(); CHECK(IsHttpSuccessCode(StatusResponse.status_code)); - CbObject ReponseObject = + CbObject ResponseObject = LoadCompactBinaryObject(IoBuffer(IoBuffer::Wrap, StatusResponse.text.data(), StatusResponse.text.size())); - std::string_view Status = ReponseObject["Status"sv].AsString(); + std::string_view Status = ResponseObject["Status"sv].AsString(); CHECK(Status != "Aborted"sv); if (Status == "Complete"sv) { @@ -3650,18 +3651,18 @@ GenerateFolderContent(const std::filesystem::path& RootPath) std::filesystem::path EmptyFolder(RootPath / "empty_folder"); std::filesystem::path FirstFolder(RootPath / "first_folder"); - std::filesystem::create_directory(FirstFolder); + CreateDirectories(FirstFolder); Result.push_back(std::make_pair(FirstFolder / "first_folder_blob1.bin", CreateRandomBlob(22))); Result.push_back(std::make_pair(FirstFolder / "first_folder_blob2.bin", CreateRandomBlob(122))); std::filesystem::path SecondFolder(RootPath / "second_folder"); - std::filesystem::create_directory(SecondFolder); + CreateDirectories(SecondFolder); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob1.bin", CreateRandomBlob(522))); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob2.bin", CreateRandomBlob(122))); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob3.bin", CreateRandomBlob(225))); std::filesystem::path SecondFolderChild(SecondFolder / "child_in_second"); - std::filesystem::create_directory(SecondFolderChild); + CreateDirectories(SecondFolderChild); Result.push_back(std::make_pair(SecondFolderChild / "second_child_folder_blob1.bin", CreateRandomBlob(622))); for (const auto& It : Result) @@ -3817,7 +3818,7 @@ TEST_CASE("workspaces.create") while (true) { std::error_code Ec; - std::filesystem::remove_all(Root2Path / Share2Path, Ec); + DeleteDirectories(Root2Path / Share2Path, Ec); if (!Ec) break; } @@ -3974,7 +3975,7 @@ TEST_CASE("workspaces.lifetimes") } // Wipe system config - std::filesystem::remove_all(SystemRootPath); + DeleteDirectories(SystemRootPath); // Restart @@ -4040,8 +4041,8 @@ TEST_CASE("workspaces.share") uint64_t Size = FileObject["size"sv].AsUInt64(); std::u8string_view Path = FileObject["clientpath"sv].AsU8String(); std::filesystem::path AbsFilePath = SharePath / Path; - CHECK(std::filesystem::is_regular_file(AbsFilePath)); - CHECK(std::filesystem::file_size(AbsFilePath) == Size); + CHECK(IsFile(AbsFilePath)); + CHECK(FileSizeFromPath(AbsFilePath) == Size); Files.insert_or_assign(ChunkId, std::make_pair(AbsFilePath, Size)); } } @@ -4064,7 +4065,7 @@ TEST_CASE("workspaces.share") CHECK(ChunkId != Oid::Zero); std::u8string_view Path = FileObject["clientpath"sv].AsU8String(); std::filesystem::path AbsFilePath = SharePath / Path; - CHECK(std::filesystem::is_regular_file(AbsFilePath)); + CHECK(IsFile(AbsFilePath)); } } } @@ -4084,7 +4085,7 @@ TEST_CASE("workspaces.share") CHECK(ChunkId != Oid::Zero); std::u8string_view Path = FileObject["clientpath"sv].AsU8String(); std::filesystem::path AbsFilePath = SharePath / Path; - CHECK(std::filesystem::is_regular_file(AbsFilePath)); + CHECK(IsFile(AbsFilePath)); } } diff --git a/src/zenserver/admin/admin.cpp b/src/zenserver/admin/admin.cpp index 2888f5450..73166e608 100644 --- a/src/zenserver/admin/admin.cpp +++ b/src/zenserver/admin/admin.cpp @@ -20,6 +20,7 @@ #include <zenstore/cidstore.h> #include <zenstore/gc.h> +#include <zenstore/buildstore/buildstore.h> #include <zenstore/cache/structuredcachestore.h> #include <zenutil/workerpools.h> #include "config.h" @@ -39,7 +40,7 @@ struct DirStats DirStats GetStatsForDirectory(std::filesystem::path Dir) { - if (!std::filesystem::exists(Dir)) + if (!IsDir(Dir)) return {}; struct StatsTraversal : public GetDirectoryContentVisitor @@ -105,6 +106,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, ZenCacheStore* CacheStore, CidStore* CidStore, ProjectStore* ProjectStore, + BuildStore* BuildStore, const LogPaths& LogPaths, const ZenServerOptions& ServerOptions) : m_GcScheduler(Scheduler) @@ -112,6 +114,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, , m_CacheStore(CacheStore) , m_CidStore(CidStore) , m_ProjectStore(ProjectStore) +, m_BuildStore(BuildStore) , m_LogPaths(LogPaths) , m_ServerOptions(ServerOptions) { @@ -306,6 +309,7 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, Response << "Interval" << ToTimeSpan(State.Config.Interval); Response << "MaxCacheDuration" << ToTimeSpan(State.Config.MaxCacheDuration); Response << "MaxProjectStoreDuration" << ToTimeSpan(State.Config.MaxProjectStoreDuration); + Response << "MaxBuildStoreDuration" << ToTimeSpan(State.Config.MaxBuildStoreDuration); Response << "CollectSmallObjects" << State.Config.CollectSmallObjects; Response << "Enabled" << State.Config.Enabled; Response << "DiskReserveSize" << NiceBytes(State.Config.DiskReserveSize); @@ -401,6 +405,14 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, } } + if (auto Param = Params.GetValue("maxbuildstoreduration"); Param.empty() == false) + { + if (auto Value = ParseInt<uint64_t>(Param)) + { + GcParams.MaxBuildStoreDuration = std::chrono::seconds(Value.value()); + } + } + if (auto Param = Params.GetValue("disksizesoftlimit"); Param.empty() == false) { if (auto Value = ParseInt<uint64_t>(Param)) @@ -782,6 +794,10 @@ HttpAdminService::HttpAdminService(GcScheduler& Scheduler, { m_ProjectStore->Flush(); } + if (m_BuildStore) + { + m_BuildStore->Flush(); + } HttpReq.WriteResponse(HttpResponseCode::OK); }, HttpVerb::kPost); diff --git a/src/zenserver/admin/admin.h b/src/zenserver/admin/admin.h index 563c4f536..e7821dead 100644 --- a/src/zenserver/admin/admin.h +++ b/src/zenserver/admin/admin.h @@ -12,6 +12,7 @@ class JobQueue; class ZenCacheStore; class CidStore; class ProjectStore; +class BuildStore; struct ZenServerOptions; class HttpAdminService : public zen::HttpService @@ -28,6 +29,7 @@ public: ZenCacheStore* CacheStore, CidStore* CidStore, ProjectStore* ProjectStore, + BuildStore* BuildStore, const LogPaths& LogPaths, const ZenServerOptions& ServerOptions); ~HttpAdminService(); @@ -42,6 +44,7 @@ private: ZenCacheStore* m_CacheStore; CidStore* m_CidStore; ProjectStore* m_ProjectStore; + BuildStore* m_BuildStore; LogPaths m_LogPaths; const ZenServerOptions& m_ServerOptions; }; diff --git a/src/zenserver/buildstore/httpbuildstore.cpp b/src/zenserver/buildstore/httpbuildstore.cpp new file mode 100644 index 000000000..bcec74ce6 --- /dev/null +++ b/src/zenserver/buildstore/httpbuildstore.cpp @@ -0,0 +1,573 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include "httpbuildstore.h" + +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> +#include <zencore/compactbinaryvalue.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/trace.h> +#include <zenhttp/packageformat.h> +#include <zenstore/buildstore/buildstore.h> +#include <zenutil/workerpools.h> + +#include <numeric> + +namespace zen { +using namespace std::literals; + +ZEN_DEFINE_LOG_CATEGORY_STATIC(LogBuilds, "builds"sv); + +HttpBuildStoreService::HttpBuildStoreService(HttpStatusService& StatusService, HttpStatsService& StatsService, BuildStore& Store) +: m_Log(logging::Get("builds")) +, m_StatusService(StatusService) +, m_StatsService(StatsService) +, m_BuildStore(Store) +{ + Initialize(); + + m_StatusService.RegisterHandler("builds", *this); + m_StatsService.RegisterHandler("builds", *this); +} + +HttpBuildStoreService::~HttpBuildStoreService() +{ + m_StatsService.UnregisterHandler("builds", *this); + m_StatusService.UnregisterHandler("builds", *this); +} + +const char* +HttpBuildStoreService::BaseUri() const +{ + return "/builds/"; +} + +void +HttpBuildStoreService::Initialize() +{ + ZEN_LOG_INFO(LogBuilds, "Initializing Builds Service"); + + m_Router.AddPattern("namespace", "([[:alnum:]-_.]+)"); + m_Router.AddPattern("bucket", "([[:alnum:]-_.]+)"); + m_Router.AddPattern("buildid", "([[:xdigit:]]{24})"); + m_Router.AddPattern("hash", "([[:xdigit:]]{40})"); + + m_Router.RegisterRoute( + "{namespace}/{bucket}/{buildid}/blobs/{hash}", + [this](HttpRouterRequest& Req) { PutBlobRequest(Req); }, + HttpVerb::kPut); + + m_Router.RegisterRoute( + "{namespace}/{bucket}/{buildid}/blobs/{hash}", + [this](HttpRouterRequest& Req) { GetBlobRequest(Req); }, + HttpVerb::kGet); + + m_Router.RegisterRoute( + "{namespace}/{bucket}/{buildid}/blobs/putBlobMetadata", + [this](HttpRouterRequest& Req) { PutMetadataRequest(Req); }, + HttpVerb::kPost); + + m_Router.RegisterRoute( + "{namespace}/{bucket}/{buildid}/blobs/getBlobMetadata", + [this](HttpRouterRequest& Req) { GetMetadatasRequest(Req); }, + HttpVerb::kPost); + + m_Router.RegisterRoute( + "{namespace}/{bucket}/{buildid}/blobs/exists", + [this](HttpRouterRequest& Req) { BlobsExistsRequest(Req); }, + HttpVerb::kPost); +} + +void +HttpBuildStoreService::HandleRequest(zen::HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::HandleRequest"); + metrics::OperationTiming::Scope $(m_HttpRequests); + + m_BuildStoreStats.RequestCount++; + if (m_Router.HandleRequest(Request) == false) + { + ZEN_LOG_WARN(LogBuilds, "No route found for {0}", Request.RelativeUri()); + return Request.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, "Not found"sv); + } +} + +void +HttpBuildStoreService::PutBlobRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::PutBlobRequest"); + HttpServerRequest& ServerRequest = Req.ServerRequest(); + const std::string_view Namespace = Req.GetCapture(1); + const std::string_view Bucket = Req.GetCapture(2); + const std::string_view BuildId = Req.GetCapture(3); + const std::string_view Hash = Req.GetCapture(4); + ZEN_UNUSED(Namespace, Bucket, BuildId); + IoHash BlobHash; + if (!IoHash::TryParse(Hash, BlobHash)) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Invalid blob hash '{}'", Hash)); + } + m_BuildStoreStats.BlobWriteCount++; + IoBuffer Payload = ServerRequest.ReadPayload(); + if (!Payload) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Payload blob {} is empty", Hash)); + } + if (Payload.GetContentType() != HttpContentType::kCompressedBinary) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Payload blob {} content type {} is invalid", Hash, ToString(Payload.GetContentType()))); + } + m_BuildStore.PutBlob(BlobHash, ServerRequest.ReadPayload()); + // ZEN_INFO("Stored blob {}. Size: {}", BlobHash, ServerRequest.ReadPayload().GetSize()); + return ServerRequest.WriteResponse(HttpResponseCode::OK); +} + +void +HttpBuildStoreService::GetBlobRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::GetBlobRequest"); + HttpServerRequest& ServerRequest = Req.ServerRequest(); + std::string_view Namespace = Req.GetCapture(1); + std::string_view Bucket = Req.GetCapture(2); + std::string_view BuildId = Req.GetCapture(3); + std::string_view Hash = Req.GetCapture(4); + ZEN_UNUSED(Namespace, Bucket, BuildId); + IoHash BlobHash; + if (!IoHash::TryParse(Hash, BlobHash)) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Invalid blob hash '{}'", Hash)); + } + zen::HttpRanges Ranges; + bool HasRange = ServerRequest.TryGetRanges(Ranges); + if (Ranges.size() > 1) + { + // Only a single range is supported + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + "Multiple ranges in blob request is not supported"); + } + + m_BuildStoreStats.BlobReadCount++; + IoBuffer Blob = m_BuildStore.GetBlob(BlobHash); + if (!Blob) + { + return ServerRequest.WriteResponse(HttpResponseCode::NotFound, + HttpContentType::kText, + fmt::format("Blob with hash '{}' could not be found", Hash)); + } + // ZEN_INFO("Fetched blob {}. Size: {}", BlobHash, Blob.GetSize()); + m_BuildStoreStats.BlobHitCount++; + if (HasRange) + { + const HttpRange& Range = Ranges.front(); + const uint64_t BlobSize = Blob.GetSize(); + const uint64_t MaxBlobSize = Range.Start < BlobSize ? Range.Start - BlobSize : 0; + const uint64_t RangeSize = Min(Range.End - Range.Start + 1, MaxBlobSize); + if (Range.Start + RangeSize > BlobSize) + { + return ServerRequest.WriteResponse(HttpResponseCode::NoContent); + } + Blob = IoBuffer(Blob, Range.Start, RangeSize); + return ServerRequest.WriteResponse(HttpResponseCode::OK, ZenContentType::kBinary, Blob); + } + else + { + return ServerRequest.WriteResponse(HttpResponseCode::OK, Blob.GetContentType(), Blob); + } +} + +void +HttpBuildStoreService::PutMetadataRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::PutMetadataRequest"); + HttpServerRequest& ServerRequest = Req.ServerRequest(); + std::string_view Namespace = Req.GetCapture(1); + std::string_view Bucket = Req.GetCapture(2); + std::string_view BuildId = Req.GetCapture(3); + + IoBuffer MetaPayload = ServerRequest.ReadPayload(); + if (MetaPayload.GetContentType() != ZenContentType::kCbPackage) + { + throw std::runtime_error(fmt::format("PutMetadataRequest payload has unexpected payload type '{}', expected '{}'", + ToString(MetaPayload.GetContentType()), + ToString(ZenContentType::kCbPackage))); + } + CbPackage Message = ParsePackageMessage(MetaPayload); + + CbObjectView MessageObject = Message.GetObject(); + if (!MessageObject) + { + throw std::runtime_error("PutMetadataRequest payload object is missing"); + } + CbArrayView BlobsArray = MessageObject["blobHashes"sv].AsArrayView(); + CbArrayView MetadataArray = MessageObject["metadatas"sv].AsArrayView(); + + const uint64_t BlobCount = BlobsArray.Num(); + if (BlobCount == 0) + { + throw std::runtime_error("PutMetadataRequest blobs array is empty"); + } + if (BlobCount != MetadataArray.Num()) + { + throw std::runtime_error( + fmt::format("PutMetadataRequest metadata array size {} does not match blobs array size {}", MetadataArray.Num(), BlobCount)); + } + + std::vector<IoHash> BlobHashes; + std::vector<IoBuffer> MetadataPayloads; + + BlobHashes.reserve(BlobCount); + MetadataPayloads.reserve(BlobCount); + + auto BlobsArrayIt = begin(BlobsArray); + auto MetadataArrayIt = begin(MetadataArray); + while (BlobsArrayIt != end(BlobsArray)) + { + const IoHash BlobHash = (*BlobsArrayIt).AsHash(); + const IoHash MetadataHash = (*MetadataArrayIt).AsAttachment(); + + const CbAttachment* Attachment = Message.FindAttachment(MetadataHash); + if (Attachment == nullptr) + { + throw std::runtime_error(fmt::format("Blob metadata attachment {} is missing", MetadataHash)); + } + BlobHashes.push_back(BlobHash); + if (Attachment->IsObject()) + { + MetadataPayloads.push_back(Attachment->AsObject().GetBuffer().MakeOwned().AsIoBuffer()); + MetadataPayloads.back().SetContentType(ZenContentType::kCbObject); + } + else if (Attachment->IsCompressedBinary()) + { + MetadataPayloads.push_back(Attachment->AsCompressedBinary().GetCompressed().Flatten().AsIoBuffer()); + MetadataPayloads.back().SetContentType(ZenContentType::kCompressedBinary); + } + else + { + ZEN_ASSERT(Attachment->IsBinary()); + MetadataPayloads.push_back(Attachment->AsBinary().AsIoBuffer()); + MetadataPayloads.back().SetContentType(ZenContentType::kBinary); + } + + BlobsArrayIt++; + MetadataArrayIt++; + } + m_BuildStore.PutMetadatas(BlobHashes, MetadataPayloads); + return ServerRequest.WriteResponse(HttpResponseCode::OK); +} + +void +HttpBuildStoreService::GetMetadatasRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::GetMetadatasRequest"); + HttpServerRequest& ServerRequest = Req.ServerRequest(); + std::string_view Namespace = Req.GetCapture(1); + std::string_view Bucket = Req.GetCapture(2); + std::string_view BuildId = Req.GetCapture(3); + ZEN_UNUSED(Namespace, Bucket, BuildId); + IoBuffer RequestPayload = ServerRequest.ReadPayload(); + if (!RequestPayload) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + "Expected compact binary body for metadata request, body is missing"); + } + if (RequestPayload.GetContentType() != HttpContentType::kCbObject) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Expected compact binary body for metadata request, got {}", ToString(RequestPayload.GetContentType()))); + } + if (CbValidateError ValidateError = ValidateCompactBinary(RequestPayload.GetView(), CbValidateMode::Default); + ValidateError != CbValidateError::None) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Compact binary body for metadata request is not valid, reason: {}", ToString(ValidateError))); + } + CbObject RequestObject = LoadCompactBinaryObject(RequestPayload); + CbArrayView BlobsArray = RequestObject["blobHashes"sv].AsArrayView(); + if (!BlobsArray) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + "Compact binary body for metadata request is missing 'blobHashes' array"); + } + const uint64_t BlobCount = BlobsArray.Num(); + + std::vector<IoHash> BlobRawHashes; + BlobRawHashes.reserve(BlobCount); + for (CbFieldView BlockHashView : BlobsArray) + { + BlobRawHashes.push_back(BlockHashView.AsHash()); + if (BlobRawHashes.back() == IoHash::Zero) + { + const uint8_t Type = (uint8_t)BlockHashView.GetValue().GetType(); + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Compact binary body for metadata 'blobHashes' array contains invalid field type: {}", Type)); + } + } + m_BuildStoreStats.BlobMetaReadCount += BlobRawHashes.size(); + std::vector<IoBuffer> BlockMetadatas = m_BuildStore.GetMetadatas(BlobRawHashes, &GetSmallWorkerPool(EWorkloadType::Burst)); + + CbPackage ResponsePackage; + std::vector<CbAttachment> Attachments; + tsl::robin_set<IoHash, IoHash::Hasher> AttachmentHashes; + Attachments.reserve(BlobCount); + AttachmentHashes.reserve(BlobCount); + { + CbObjectWriter ResponseWriter; + + ResponseWriter.BeginArray("blobHashes"); + for (size_t BlockHashIndex = 0; BlockHashIndex < BlobRawHashes.size(); BlockHashIndex++) + { + if (BlockMetadatas[BlockHashIndex]) + { + const IoHash& BlockHash = BlobRawHashes[BlockHashIndex]; + ResponseWriter.AddHash(BlockHash); + } + } + ResponseWriter.EndArray(); // blobHashes + + ResponseWriter.BeginArray("metadatas"); + + for (size_t BlockHashIndex = 0; BlockHashIndex < BlobRawHashes.size(); BlockHashIndex++) + { + if (IoBuffer Metadata = BlockMetadatas[BlockHashIndex]; Metadata) + { + switch (Metadata.GetContentType()) + { + case ZenContentType::kCbObject: + { + CbObject Object = CbObject(SharedBuffer(std::move(Metadata)).MakeOwned()); + const IoHash ObjectHash = Object.GetHash(); + ResponseWriter.AddBinaryAttachment(ObjectHash); + if (!AttachmentHashes.contains(ObjectHash)) + { + Attachments.push_back(CbAttachment(Object, ObjectHash)); + AttachmentHashes.insert(ObjectHash); + } + } + break; + case ZenContentType::kCompressedBinary: + { + IoHash RawHash; + uint64_t _; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(std::move(Metadata)), RawHash, _); + ResponseWriter.AddBinaryAttachment(RawHash); + if (!AttachmentHashes.contains(RawHash)) + { + Attachments.push_back(CbAttachment(Compressed, RawHash)); + AttachmentHashes.insert(RawHash); + } + } + break; + default: + { + const IoHash RawHash = IoHash::HashBuffer(Metadata); + ResponseWriter.AddBinaryAttachment(RawHash); + if (!AttachmentHashes.contains(RawHash)) + { + Attachments.push_back(CbAttachment(SharedBuffer(Metadata), RawHash)); + AttachmentHashes.insert(RawHash); + } + } + break; + } + } + } + + ResponseWriter.EndArray(); // metadatas + + ResponsePackage.SetObject(ResponseWriter.Save()); + } + ResponsePackage.AddAttachments(Attachments); + + CompositeBuffer RpcResponseBuffer = FormatPackageMessageBuffer(ResponsePackage); + ServerRequest.WriteResponse(HttpResponseCode::OK, HttpContentType::kCbPackage, RpcResponseBuffer); +} + +void +HttpBuildStoreService::BlobsExistsRequest(HttpRouterRequest& Req) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::BlobsExistsRequest"); + HttpServerRequest& ServerRequest = Req.ServerRequest(); + std::string_view Namespace = Req.GetCapture(1); + std::string_view Bucket = Req.GetCapture(2); + std::string_view BuildId = Req.GetCapture(3); + ZEN_UNUSED(Namespace, Bucket, BuildId); + IoBuffer RequestPayload = ServerRequest.ReadPayload(); + if (!RequestPayload) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + "Expected compact binary body for blob exists request, body is missing"); + } + if (RequestPayload.GetContentType() != HttpContentType::kCbObject) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Expected compact binary body for blob exists request, got {}", ToString(RequestPayload.GetContentType()))); + } + if (CbValidateError ValidateError = ValidateCompactBinary(RequestPayload.GetView(), CbValidateMode::Default); + ValidateError != CbValidateError::None) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Compact binary body for blob exists request is not valid, reason: {}", ToString(ValidateError))); + } + CbObject RequestObject = LoadCompactBinaryObject(RequestPayload); + CbArrayView BlobsArray = RequestObject["blobHashes"sv].AsArrayView(); + if (!BlobsArray) + { + m_BuildStoreStats.BadRequestCount++; + return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, + HttpContentType::kText, + "Compact binary body for blob exists request is missing 'blobHashes' array"); + } + + std::vector<IoHash> BlobRawHashes; + BlobRawHashes.reserve(BlobsArray.Num()); + for (CbFieldView BlockHashView : BlobsArray) + { + BlobRawHashes.push_back(BlockHashView.AsHash()); + if (BlobRawHashes.back() == IoHash::Zero) + { + const uint8_t Type = (uint8_t)BlockHashView.GetValue().GetType(); + return ServerRequest.WriteResponse( + HttpResponseCode::BadRequest, + HttpContentType::kText, + fmt::format("Compact binary body for blob exists request 'blobHashes' array contains invalid field type: {}", Type)); + } + } + + m_BuildStoreStats.BlobExistsCount += BlobRawHashes.size(); + std::vector<BuildStore::BlobExistsResult> BlobsExists = m_BuildStore.BlobsExists(BlobRawHashes); + CbObjectWriter ResponseWriter(9 * BlobsExists.size()); + ResponseWriter.BeginArray("blobExists"sv); + for (const BuildStore::BlobExistsResult& BlobExists : BlobsExists) + { + ResponseWriter.AddBool(BlobExists.HasBody); + if (BlobExists.HasBody) + { + m_BuildStoreStats.BlobExistsBodyHitCount++; + } + } + ResponseWriter.EndArray(); // blobExist + ResponseWriter.BeginArray("metadataExists"sv); + for (const BuildStore::BlobExistsResult& BlobExists : BlobsExists) + { + ResponseWriter.AddBool(BlobExists.HasBody); + if (BlobExists.HasMetadata) + { + m_BuildStoreStats.BlobExistsMetaHitCount++; + } + } + ResponseWriter.EndArray(); // metadataExists + CbObject ResponseObject = ResponseWriter.Save(); + return ServerRequest.WriteResponse(HttpResponseCode::OK, ResponseObject); +} + +void +HttpBuildStoreService::HandleStatsRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::Stats"); + CbObjectWriter Cbo; + + EmitSnapshot("requests", m_HttpRequests, Cbo); + + Cbo.BeginObject("builds"); + { + Cbo.BeginObject("blobs"); + { + Cbo << "readcount" << m_BuildStoreStats.BlobReadCount << "writecount" << m_BuildStoreStats.BlobWriteCount << "hitcount" + << m_BuildStoreStats.BlobHitCount; + } + Cbo.EndObject(); + + Cbo.BeginObject("metadata"); + { + Cbo << "readcount" << m_BuildStoreStats.BlobMetaReadCount << "writecount" << m_BuildStoreStats.BlobMetaWriteCount << "hitcount" + << m_BuildStoreStats.BlobMetaHitCount; + } + Cbo.EndObject(); + + Cbo << "requestcount" << m_BuildStoreStats.RequestCount; + Cbo << "badrequestcount" << m_BuildStoreStats.BadRequestCount; + } + Cbo.EndObject(); + + Cbo.BeginObject("size"); + { + BuildStore::StorageStats StorageStats = m_BuildStore.GetStorageStats(); + + Cbo << "count" << StorageStats.EntryCount; + Cbo << "bytes" << StorageStats.LargeBlobBytes + StorageStats.SmallBlobBytes + StorageStats.MetadataByteCount; + Cbo.BeginObject("blobs"); + { + Cbo << "count" << (StorageStats.LargeBlobCount + StorageStats.SmallBlobCount); + Cbo << "bytes" << (StorageStats.LargeBlobBytes + StorageStats.SmallBlobBytes); + Cbo.BeginObject("large"); + { + Cbo << "count" << StorageStats.LargeBlobCount; + Cbo << "bytes" << StorageStats.LargeBlobBytes; + } + Cbo.EndObject(); // large + Cbo.BeginObject("small"); + { + Cbo << "count" << StorageStats.SmallBlobCount; + Cbo << "bytes" << StorageStats.SmallBlobBytes; + } + Cbo.EndObject(); // small + } + Cbo.EndObject(); // blobs + + Cbo.BeginObject("metadata"); + { + Cbo << "count" << StorageStats.MetadataCount; + Cbo << "bytes" << StorageStats.MetadataByteCount; + } + Cbo.EndObject(); // metadata + } + Cbo.EndObject(); // size + + return Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void +HttpBuildStoreService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpBuildStoreService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +} // namespace zen diff --git a/src/zenserver/buildstore/httpbuildstore.h b/src/zenserver/buildstore/httpbuildstore.h new file mode 100644 index 000000000..50cb5db12 --- /dev/null +++ b/src/zenserver/buildstore/httpbuildstore.h @@ -0,0 +1,68 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/stats.h> +#include <zenhttp/httpserver.h> +#include <zenhttp/httpstats.h> +#include <zenhttp/httpstatus.h> + +#include <filesystem> + +namespace zen { + +class BuildStore; + +class HttpBuildStoreService final : public zen::HttpService, public IHttpStatusProvider, public IHttpStatsProvider +{ +public: + HttpBuildStoreService(HttpStatusService& StatusService, HttpStatsService& StatsService, BuildStore& Store); + virtual ~HttpBuildStoreService(); + + virtual const char* BaseUri() const override; + virtual void HandleRequest(zen::HttpServerRequest& Request) override; + + virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; + +private: + struct BuildStoreStats + { + std::atomic_uint64_t BlobReadCount{}; + std::atomic_uint64_t BlobHitCount{}; + std::atomic_uint64_t BlobWriteCount{}; + std::atomic_uint64_t BlobMetaReadCount{}; + std::atomic_uint64_t BlobMetaHitCount{}; + std::atomic_uint64_t BlobMetaWriteCount{}; + std::atomic_uint64_t BlobExistsCount{}; + std::atomic_uint64_t BlobExistsBodyHitCount{}; + std::atomic_uint64_t BlobExistsMetaHitCount{}; + std::atomic_uint64_t RequestCount{}; + std::atomic_uint64_t BadRequestCount{}; + }; + + void Initialize(); + + inline LoggerRef Log() { return m_Log; } + + LoggerRef m_Log; + + void PutBlobRequest(HttpRouterRequest& Req); + void GetBlobRequest(HttpRouterRequest& Req); + + void PutMetadataRequest(HttpRouterRequest& Req); + void GetMetadatasRequest(HttpRouterRequest& Req); + + void BlobsExistsRequest(HttpRouterRequest& Req); + + HttpRequestRouter m_Router; + + HttpStatusService& m_StatusService; + HttpStatsService& m_StatsService; + + BuildStore& m_BuildStore; + BuildStoreStats m_BuildStoreStats; + metrics::OperationTiming m_HttpRequests; +}; + +} // namespace zen diff --git a/src/zenserver/cache/httpstructuredcache.cpp b/src/zenserver/cache/httpstructuredcache.cpp index 224cc6678..acb6053d9 100644 --- a/src/zenserver/cache/httpstructuredcache.cpp +++ b/src/zenserver/cache/httpstructuredcache.cpp @@ -25,6 +25,7 @@ #include <zenutil/cache/cacherequests.h> #include <zenutil/cache/rpcrecording.h> #include <zenutil/jupiter/jupiterclient.h> +#include <zenutil/parallelwork.h> #include <zenutil/workerpools.h> #include "upstream/upstreamcache.h" @@ -1643,15 +1644,26 @@ HttpStructuredCacheService::ReplayRequestRecorder(const CacheRequestContext& Co WorkerThreadPool WorkerPool(ThreadCount); uint64_t RequestCount = Replayer.GetRequestCount(); Stopwatch Timer; - auto _ = MakeGuard([&]() { ZEN_INFO("Replayed {} requests in {}", RequestCount, NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); }); - Latch JobLatch(RequestCount); + auto _ = MakeGuard([&]() { ZEN_INFO("Replayed {} requests in {}", RequestCount, NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); }); + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); ZEN_INFO("Replaying {} requests", RequestCount); for (uint64_t RequestIndex = 0; RequestIndex < RequestCount; ++RequestIndex) { - WorkerPool.ScheduleWork([this, &Context, &JobLatch, &Replayer, RequestIndex]() { + if (AbortFlag) + { + break; + } + Work.ScheduleWork(WorkerPool, [this, &Context, &Replayer, RequestIndex](std::atomic<bool>& AbortFlag) { IoBuffer Body; zen::cache::RecordedRequestInfo RequestInfo = Replayer.GetRequest(RequestIndex, /* out */ Body); + if (AbortFlag) + { + return; + } + if (Body) { uint32_t AcceptMagic = 0; @@ -1692,16 +1704,15 @@ HttpStructuredCacheService::ReplayRequestRecorder(const CacheRequestContext& Co } } } - JobLatch.CountDown(); }); } - while (!JobLatch.Wait(10000)) - { + Work.Wait(10000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(IsAborted, IsPaused); ZEN_INFO("Replayed {} of {} requests, elapsed {}", - RequestCount - JobLatch.Remaining(), + RequestCount - PendingWork, RequestCount, NiceLatencyNs(Timer.GetElapsedTimeUs() * 1000)); - } + }); } void diff --git a/src/zenserver/config.cpp b/src/zenserver/config.cpp index c8949f5fd..d53bedad0 100644 --- a/src/zenserver/config.cpp +++ b/src/zenserver/config.cpp @@ -15,12 +15,15 @@ #include <zencore/logging.h> #include <zencore/string.h> #include <zenhttp/zenhttp.h> +#include <zenutil/commandlineoptions.h> +#include <zenutil/environmentoptions.h> ZEN_THIRD_PARTY_INCLUDES_START #include <fmt/format.h> #include <fmt/ranges.h> #include <zencore/logging.h> #include <cxxopts.hpp> +#include <json11.hpp> #include <sol/sol.hpp> ZEN_THIRD_PARTY_INCLUDES_END @@ -178,27 +181,6 @@ ParseBucketConfigs(std::span<std::string> Buckets) return Cfg; } -static std::string -MakeSafePath(const std::string_view Path) -{ -#if ZEN_PLATFORM_WINDOWS - if (Path.empty()) - { - return std::string(Path); - } - - std::string FixedPath(Path); - std::replace(FixedPath.begin(), FixedPath.end(), '/', '\\'); - if (!FixedPath.starts_with("\\\\?\\")) - { - FixedPath.insert(0, "\\\\?\\"); - } - return FixedPath; -#else - return std::string(Path); -#endif -}; - class CachePolicyOption : public LuaConfig::OptionValue { public: @@ -324,7 +306,7 @@ public: std::string Name = Bucket.value().get_or("name", std::string("Default")); std::string Directory = Bucket.value().get_or("directory", std::string()); - Value.Buckets.push_back({.Name = std::move(Name), .Directory = LuaConfig::MakeSafePath(Directory)}); + Value.Buckets.push_back({.Name = std::move(Name), .Directory = MakeSafeAbsolutePath(Directory)}); } } } @@ -332,6 +314,97 @@ public: ZenObjectStoreConfig& Value; }; +class ZenStructuredCacheBucketsConfigOption : public LuaConfig::OptionValue +{ +public: + ZenStructuredCacheBucketsConfigOption(std::vector<std::pair<std::string, ZenStructuredCacheBucketConfig>>& Value) : Value(Value) {} + virtual void Print(std::string_view Indent, zen::StringBuilderBase& StringBuilder) override + { + if (Value.empty()) + { + StringBuilder.Append("{}"); + return; + } + LuaConfig::LuaContainerWriter Writer(StringBuilder, Indent); + for (const std::pair<std::string, ZenStructuredCacheBucketConfig>& Bucket : Value) + { + Writer.BeginContainer(""); + { + Writer.WriteValue("name", Bucket.first); + const ZenStructuredCacheBucketConfig& BucketConfig = Bucket.second; + + Writer.WriteValue("maxblocksize", fmt::format("{}", BucketConfig.MaxBlockSize)); + Writer.BeginContainer("memlayer"); + { + Writer.WriteValue("sizethreshold", fmt::format("{}", BucketConfig.MemCacheSizeThreshold)); + } + Writer.EndContainer(); + + Writer.WriteValue("payloadalignment", fmt::format("{}", BucketConfig.PayloadAlignment)); + Writer.WriteValue("largeobjectthreshold", fmt::format("{}", BucketConfig.PayloadAlignment)); + } + Writer.EndContainer(); + } + } + virtual void Parse(sol::object Object) override + { + if (sol::optional<sol::table> Buckets = Object.as<sol::table>()) + { + for (const auto& Kv : Buckets.value()) + { + if (sol::optional<sol::table> Bucket = Kv.second.as<sol::table>()) + { + ZenStructuredCacheBucketConfig BucketConfig; + std::string Name = Kv.first.as<std::string>(); + if (Name.empty()) + { + throw zen::OptionParseException(fmt::format("cache bucket option must have a name.")); + } + + const uint64_t MaxBlockSize = Bucket.value().get_or("maxblocksize", BucketConfig.MaxBlockSize); + if (MaxBlockSize == 0) + { + throw zen::OptionParseException( + fmt::format("maxblocksize option for cache bucket '{}' is invalid. It must be non-zero.", Name)); + } + BucketConfig.MaxBlockSize = MaxBlockSize; + + if (sol::optional<sol::table> Memlayer = Bucket.value().get_or("memlayer", sol::table())) + { + const uint64_t MemCacheSizeThreshold = Bucket.value().get_or("sizethreshold", BucketConfig.MemCacheSizeThreshold); + if (MemCacheSizeThreshold == 0) + { + throw zen::OptionParseException( + fmt::format("memlayer.sizethreshold option for cache bucket '{}' is invalid. It must be non-zero.", Name)); + } + BucketConfig.MemCacheSizeThreshold = Bucket.value().get_or("sizethreshold", BucketConfig.MemCacheSizeThreshold); + } + + const uint32_t PayloadAlignment = Bucket.value().get_or("payloadalignment", BucketConfig.PayloadAlignment); + if (PayloadAlignment == 0 || !IsPow2(PayloadAlignment)) + { + throw zen::OptionParseException(fmt::format( + "payloadalignment option for cache bucket '{}' is invalid. It needs to be non-zero and a power of two.", + Name)); + } + BucketConfig.PayloadAlignment = PayloadAlignment; + + const uint64_t LargeObjectThreshold = Bucket.value().get_or("largeobjectthreshold", BucketConfig.LargeObjectThreshold); + if (LargeObjectThreshold == 0) + { + throw zen::OptionParseException( + fmt::format("largeobjectthreshold option for cache bucket '{}' is invalid. It must be non-zero.", Name)); + } + BucketConfig.LargeObjectThreshold = LargeObjectThreshold; + + Value.push_back(std::make_pair(std::move(Name), BucketConfig)); + } + } + } + } + std::vector<std::pair<std::string, ZenStructuredCacheBucketConfig>>& Value; +}; + std::shared_ptr<LuaConfig::OptionValue> MakeOption(zen::UpstreamCachePolicy& Value) { @@ -350,6 +423,35 @@ MakeOption(zen::ZenObjectStoreConfig& Value) return std::make_shared<ZenObjectStoreConfigOption>(Value); }; +std::shared_ptr<LuaConfig::OptionValue> +MakeOption(std::vector<std::pair<std::string, ZenStructuredCacheBucketConfig>>& Value) +{ + return std::make_shared<ZenStructuredCacheBucketsConfigOption>(Value); +}; + +void +ParseEnvVariables(ZenServerOptions& ServerOptions, const cxxopts::ParseResult& CmdLineResult) +{ + using namespace std::literals; + + EnvironmentOptions Options; + Options.AddOption("UE_ZEN_SENTRY_ALLOWPERSONALINFO"sv, ServerOptions.SentryConfig.AllowPII, "sentry-allow-personal-info"sv); + Options.AddOption("UE_ZEN_SENTRY_DSN"sv, ServerOptions.SentryConfig.Dsn, "sentry-dsn"sv); + Options.AddOption("UE_ZEN_SENTRY_ENVIRONMENT"sv, ServerOptions.SentryConfig.Environment, "sentry-environment"sv); + + bool EnvEnableSentry = !ServerOptions.SentryConfig.Disable; + Options.AddOption("UE_ZEN_SENTRY_ENABLED"sv, EnvEnableSentry, "no-sentry"sv); + + Options.AddOption("UE_ZEN_SENTRY_DEBUG"sv, ServerOptions.SentryConfig.Debug, "sentry-debug"sv); + + Options.Parse(CmdLineResult); + + if (EnvEnableSentry != !ServerOptions.SentryConfig.Disable) + { + ServerOptions.SentryConfig.Disable = !EnvEnableSentry; + } +} + void ParseConfigFile(const std::filesystem::path& Path, ZenServerOptions& ServerOptions, @@ -363,12 +465,16 @@ ParseConfigFile(const std::filesystem::path& Path, ////// server LuaOptions.AddOption("server.dedicated"sv, ServerOptions.IsDedicated, "dedicated"sv); LuaOptions.AddOption("server.logid"sv, ServerOptions.LogId, "log-id"sv); - LuaOptions.AddOption("server.sentry.disable"sv, ServerOptions.NoSentry, "no-sentry"sv); - LuaOptions.AddOption("server.sentry.allowpersonalinfo"sv, ServerOptions.SentryAllowPII, "sentry-allow-personal-info"sv); + LuaOptions.AddOption("server.sentry.disable"sv, ServerOptions.SentryConfig.Disable, "no-sentry"sv); + LuaOptions.AddOption("server.sentry.allowpersonalinfo"sv, ServerOptions.SentryConfig.AllowPII, "sentry-allow-personal-info"sv); + LuaOptions.AddOption("server.sentry.dsn"sv, ServerOptions.SentryConfig.Dsn, "sentry-dsn"sv); + LuaOptions.AddOption("server.sentry.environment"sv, ServerOptions.SentryConfig.Environment, "sentry-environment"sv); + LuaOptions.AddOption("server.sentry.debug"sv, ServerOptions.SentryConfig.Debug, "sentry-debug"sv); LuaOptions.AddOption("server.systemrootdir"sv, ServerOptions.SystemRootDir, "system-dir"sv); LuaOptions.AddOption("server.datadir"sv, ServerOptions.DataDir, "data-dir"sv); LuaOptions.AddOption("server.contentdir"sv, ServerOptions.ContentDir, "content-dir"sv); LuaOptions.AddOption("server.abslog"sv, ServerOptions.AbsLogFile, "abslog"sv); + LuaOptions.AddOption("server.pluginsconfigfile"sv, ServerOptions.PluginsConfigFile, "plugins-config"sv); LuaOptions.AddOption("server.debug"sv, ServerOptions.IsDebug, "debug"sv); LuaOptions.AddOption("server.clean"sv, ServerOptions.IsCleanStart, "clean"sv); LuaOptions.AddOption("server.noconsole"sv, ServerOptions.NoConsoleOutput, "quiet"sv); @@ -377,6 +483,10 @@ ParseConfigFile(const std::filesystem::path& Path, LuaOptions.AddOption("server.objectstore.enabled"sv, ServerOptions.ObjectStoreEnabled, "objectstore-enabled"sv); LuaOptions.AddOption("server.objectstore.buckets"sv, ServerOptions.ObjectStoreConfig); + ////// buildsstore + LuaOptions.AddOption("server.buildstore.enabled"sv, ServerOptions.BuildStoreConfig.Enabled, "buildstore-enabled"sv); + LuaOptions.AddOption("server.buildstore.disksizelimit"sv, ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit, "buildstore-disksizelimit"); + ////// network LuaOptions.AddOption("network.httpserverclass"sv, ServerOptions.HttpServerConfig.ServerClass, "http"sv); LuaOptions.AddOption("network.httpserverthreads"sv, ServerOptions.HttpServerConfig.ThreadCount, "http-threads"sv); @@ -411,7 +521,7 @@ ParseConfigFile(const std::filesystem::path& Path, LuaOptions.AddOption("cache.limitoverwrites"sv, ServerOptions.StructuredCacheConfig.LimitOverwrites, "cache-limit-overwrites"sv); LuaOptions.AddOption("cache.memlayer.sizethreshold"sv, - ServerOptions.StructuredCacheConfig.MemCacheSizeThreshold, + ServerOptions.StructuredCacheConfig.BucketConfig.MemCacheSizeThreshold, "cache-memlayer-sizethreshold"sv); LuaOptions.AddOption("cache.memlayer.targetfootprint"sv, ServerOptions.StructuredCacheConfig.MemTargetFootprintBytes, @@ -421,6 +531,19 @@ ParseConfigFile(const std::filesystem::path& Path, "cache-memlayer-triminterval"sv); LuaOptions.AddOption("cache.memlayer.maxage"sv, ServerOptions.StructuredCacheConfig.MemMaxAgeSeconds, "cache-memlayer-maxage"sv); + LuaOptions.AddOption("cache.bucket.maxblocksize"sv, + ServerOptions.StructuredCacheConfig.BucketConfig.MaxBlockSize, + "cache-bucket-maxblocksize"sv); + LuaOptions.AddOption("cache.bucket.memlayer.sizethreshold"sv, + ServerOptions.StructuredCacheConfig.BucketConfig.MemCacheSizeThreshold, + "cache-bucket-memlayer-sizethreshold"sv); + LuaOptions.AddOption("cache.bucket.payloadalignment"sv, + ServerOptions.StructuredCacheConfig.BucketConfig.PayloadAlignment, + "cache-bucket-payloadalignment"sv); + LuaOptions.AddOption("cache.bucket.largeobjectthreshold"sv, + ServerOptions.StructuredCacheConfig.BucketConfig.LargeObjectThreshold, + "cache-bucket-largeobjectthreshold"sv); + ////// cache.upstream LuaOptions.AddOption("cache.upstream.policy"sv, ServerOptions.UpstreamCacheConfig.CachePolicy, "upstream-cache-policy"sv); LuaOptions.AddOption("cache.upstream.upstreamthreadcount"sv, @@ -493,6 +616,9 @@ ParseConfigFile(const std::filesystem::path& Path, LuaOptions.AddOption("gc.projectstore.duration.seconds"sv, ServerOptions.GcConfig.ProjectStore.MaxDurationSeconds, "gc-projectstore-duration-seconds"); + LuaOptions.AddOption("gc.buildstore.duration.seconds"sv, + ServerOptions.GcConfig.BuildStore.MaxDurationSeconds, + "gc-buildstore-duration-seconds"); ////// security LuaOptions.AddOption("security.encryptionaeskey"sv, ServerOptions.EncryptionKey, "encryption-aes-key"sv); @@ -505,6 +631,8 @@ ParseConfigFile(const std::filesystem::path& Path, ServerOptions.WorksSpacesConfig.AllowConfigurationChanges, "workspaces-allow-changes"sv); + LuaOptions.AddOption("cache.buckets"sv, ServerOptions.StructuredCacheConfig.PerBucketConfigs, "cache.buckets"sv); + LuaOptions.Parse(Path, CmdLineResult); // These have special command line processing so we make sure we export them if they were configured on command line @@ -516,10 +644,14 @@ ParseConfigFile(const std::filesystem::path& Path, { LuaOptions.Touch("server.objectstore.buckets"sv); } + if (!ServerOptions.StructuredCacheConfig.PerBucketConfigs.empty()) + { + LuaOptions.Touch("cache.buckets"sv); + } if (!OutputConfigFile.empty()) { - std::filesystem::path WritePath(MakeSafePath(OutputConfigFile)); + std::filesystem::path WritePath(MakeSafeAbsolutePath(OutputConfigFile)); zen::ExtendableStringBuilder<512> ConfigStringBuilder; LuaOptions.Print(ConfigStringBuilder, CmdLineResult); zen::BasicFile Output; @@ -529,6 +661,68 @@ ParseConfigFile(const std::filesystem::path& Path, } void +ParsePluginsConfigFile(const std::filesystem::path& Path, ZenServerOptions& ServerOptions, int BasePort) +{ + using namespace std::literals; + + IoBuffer Body = IoBufferBuilder::MakeFromFile(Path); + std::string JsonText(reinterpret_cast<const char*>(Body.GetData()), Body.GetSize()); + std::string JsonError; + json11::Json PluginsInfo = json11::Json::parse(JsonText, JsonError); + if (!JsonError.empty()) + { + ZEN_WARN("failed parsing plugins config file '{}'. Reason: '{}'", Path, JsonError); + return; + } + for (const json11::Json& PluginInfo : PluginsInfo.array_items()) + { + if (!PluginInfo.is_object()) + { + ZEN_WARN("the json file '{}' does not contain a valid plugin definition, object expected, got '{}'", Path, PluginInfo.dump()); + continue; + } + + HttpServerPluginConfig Config = {}; + + bool bNeedsPort = true; + + for (const std::pair<const std::string, json11::Json>& Items : PluginInfo.object_items()) + { + if (!Items.second.is_string()) + { + ZEN_WARN("the json file '{}' does not contain a valid plugins definition, string expected, got '{}'", + Path, + Items.second.dump()); + continue; + } + + const std::string& Name = Items.first; + const std::string& Value = Items.second.string_value(); + + if (Name == "name"sv) + Config.PluginName = Value; + else + { + Config.PluginOptions.push_back({Name, Value}); + + if (Name == "port"sv) + { + bNeedsPort = false; + } + } + } + + // add a default base port in case if json config didn't provide one + if (bNeedsPort) + { + Config.PluginOptions.push_back({"port", std::to_string(BasePort)}); + } + + ServerOptions.HttpServerConfig.PluginConfigs.push_back(Config); + } +} + +void ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) { const char* DefaultHttp = "asio"; @@ -560,6 +754,7 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) std::string ContentDir; std::string AbsLogFile; std::string ConfigFile; + std::string PluginsConfigFile; std::string OutputConfigFile; std::string BaseSnapshotDir; @@ -587,13 +782,19 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) "Exit immediately after initialization is complete", cxxopts::value<bool>(ServerOptions.IsPowerCycle)); options.add_options()("config", "Path to Lua config file", cxxopts::value<std::string>(ConfigFile)); + options.add_options()("plugins-config", "Path to plugins config file", cxxopts::value<std::string>(PluginsConfigFile)); options.add_options()("write-config", "Path to output Lua config file", cxxopts::value<std::string>(OutputConfigFile)); options.add_options()("no-sentry", "Disable Sentry crash handler", - cxxopts::value<bool>(ServerOptions.NoSentry)->default_value("false")); + cxxopts::value<bool>(ServerOptions.SentryConfig.Disable)->default_value("false")); options.add_options()("sentry-allow-personal-info", "Allow personally identifiable information in sentry crash reports", - cxxopts::value<bool>(ServerOptions.SentryAllowPII)->default_value("false")); + cxxopts::value<bool>(ServerOptions.SentryConfig.AllowPII)->default_value("false")); + options.add_options()("sentry-dsn", "Sentry DSN to send events to", cxxopts::value<std::string>(ServerOptions.SentryConfig.Dsn)); + options.add_options()("sentry-environment", "Sentry environment", cxxopts::value<std::string>(ServerOptions.SentryConfig.Environment)); + options.add_options()("sentry-debug", + "Enable debug mode for Sentry", + cxxopts::value<bool>(ServerOptions.SentryConfig.Debug)->default_value("false")); options.add_options()("detach", "Indicate whether zenserver should detach from parent process group", cxxopts::value<bool>(ServerOptions.Detach)->default_value("true")); @@ -871,8 +1072,9 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) "cache", "", "cache-memlayer-sizethreshold", - "The largest size of a cache entry that may be cached in memory. Default set to 1024 (1 Kb). Set to 0 to disable memory caching.", - cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.MemCacheSizeThreshold)->default_value("1024"), + "The largest size of a cache entry that may be cached in memory. Default set to 1024 (1 Kb). Set to 0 to disable memory caching. " + "Obsolete, replaced by `--cache-bucket-memlayer-sizethreshold`", + cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.BucketConfig.MemCacheSizeThreshold)->default_value("1024"), ""); options.add_option("cache", @@ -896,6 +1098,36 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.MemMaxAgeSeconds)->default_value("86400"), ""); + options.add_option("cache", + "", + "cache-bucket-maxblocksize", + "Max size of cache bucket blocks. Default set to 1073741824 (1GB).", + cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.BucketConfig.MaxBlockSize)->default_value("1073741824"), + ""); + + options.add_option("cache", + "", + "cache-bucket-payloadalignment", + "Payload alignement for cache bucket blocks. Default set to 16.", + cxxopts::value<uint32_t>(ServerOptions.StructuredCacheConfig.BucketConfig.PayloadAlignment)->default_value("16"), + ""); + + options.add_option( + "cache", + "", + "cache-bucket-largeobjectthreshold", + "Threshold for storing cache bucket values as loose files. Default set to 131072 (128 KB).", + cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.BucketConfig.LargeObjectThreshold)->default_value("131072"), + ""); + + options.add_option( + "cache", + "", + "cache-bucket-memlayer-sizethreshold", + "The largest size of a cache entry that may be cached in memory. Default set to 1024 (1 Kb). Set to 0 to disable memory caching.", + cxxopts::value<uint64_t>(ServerOptions.StructuredCacheConfig.BucketConfig.MemCacheSizeThreshold)->default_value("1024"), + ""); + options.add_option("gc", "", "gc-cache-attachment-store", @@ -968,6 +1200,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) options.add_option("gc", "", + "gc-buildstore-duration-seconds", + "Max duration in seconds before build store entries get evicted. Default set to 604800 (1 week)", + cxxopts::value<int32_t>(ServerOptions.GcConfig.BuildStore.MaxDurationSeconds)->default_value("604800"), + ""); + + options.add_option("gc", + "", "disk-reserve-size", "Size of gc disk reserve in bytes. Default set to 268435456 (256 Mb). Set to zero to disable.", cxxopts::value<uint64_t>(ServerOptions.GcConfig.DiskReserveSize)->default_value("268435456"), @@ -1039,6 +1278,19 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) cxxopts::value<std::vector<std::string>>(BucketConfigs), ""); + options.add_option("buildstore", + "", + "buildstore-enabled", + "Whether the builds store is enabled or not.", + cxxopts::value<bool>(ServerOptions.BuildStoreConfig.Enabled)->default_value("false"), + ""); + options.add_option("buildstore", + "", + "buildstore-disksizelimit", + "Max number of bytes before build store entries get evicted. Default set to 1099511627776 (1TB week)", + cxxopts::value<uint64_t>(ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit)->default_value("1099511627776"), + ""); + options.add_option("stats", "", "statsd", @@ -1091,12 +1343,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) } logging::RefreshLogLevels(); - ServerOptions.SystemRootDir = MakeSafePath(SystemRootDir); - ServerOptions.DataDir = MakeSafePath(DataDir); - ServerOptions.BaseSnapshotDir = MakeSafePath(BaseSnapshotDir); - ServerOptions.ContentDir = MakeSafePath(ContentDir); - ServerOptions.AbsLogFile = MakeSafePath(AbsLogFile); - ServerOptions.ConfigFile = MakeSafePath(ConfigFile); + ServerOptions.SystemRootDir = MakeSafeAbsolutePath(SystemRootDir); + ServerOptions.DataDir = MakeSafeAbsolutePath(DataDir); + ServerOptions.BaseSnapshotDir = MakeSafeAbsolutePath(BaseSnapshotDir); + ServerOptions.ContentDir = MakeSafeAbsolutePath(ContentDir); + ServerOptions.AbsLogFile = MakeSafeAbsolutePath(AbsLogFile); + ServerOptions.ConfigFile = MakeSafeAbsolutePath(ConfigFile); + ServerOptions.PluginsConfigFile = MakeSafeAbsolutePath(PluginsConfigFile); ServerOptions.UpstreamCacheConfig.CachePolicy = ParseUpstreamCachePolicy(UpstreamCachePolicyOptions); if (!BaseSnapshotDir.empty()) @@ -1104,7 +1357,7 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) if (DataDir.empty()) throw zen::OptionParseException("You must explicitly specify a data directory when specifying a base snapshot"); - if (!std::filesystem::is_directory(ServerOptions.BaseSnapshotDir)) + if (!IsDir(ServerOptions.BaseSnapshotDir)) throw OptionParseException(fmt::format("Snapshot directory must be a directory: '{}", BaseSnapshotDir)); } @@ -1121,6 +1374,8 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) ServerOptions.ObjectStoreConfig = ParseBucketConfigs(BucketConfigs); + ParseEnvVariables(ServerOptions, Result); + if (!ServerOptions.ConfigFile.empty()) { ParseConfigFile(ServerOptions.ConfigFile, ServerOptions, Result, OutputConfigFile); @@ -1130,6 +1385,11 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions) ParseConfigFile(ServerOptions.DataDir / "zen_cfg.lua", ServerOptions, Result, OutputConfigFile); } + if (!ServerOptions.PluginsConfigFile.empty()) + { + ParsePluginsConfigFile(ServerOptions.PluginsConfigFile, ServerOptions, ServerOptions.BasePort); + } + ValidateOptions(ServerOptions); } catch (const zen::OptionParseException& e) diff --git a/src/zenserver/config.h b/src/zenserver/config.h index 4ce265c78..7d90aa4c1 100644 --- a/src/zenserver/config.h +++ b/src/zenserver/config.h @@ -59,11 +59,17 @@ struct ZenProjectStoreEvictionPolicy int32_t MaxDurationSeconds = 7 * 24 * 60 * 60; }; +struct ZenBuildStoreEvictionPolicy +{ + int32_t MaxDurationSeconds = 3 * 24 * 60 * 60; +}; + struct ZenGcConfig { // ZenCasEvictionPolicy Cas; ZenCacheEvictionPolicy Cache; ZenProjectStoreEvictionPolicy ProjectStore; + ZenBuildStoreEvictionPolicy BuildStore; int32_t MonitorIntervalSeconds = 30; int32_t IntervalSeconds = 0; bool CollectSmallObjects = true; @@ -113,16 +119,25 @@ struct ZenStatsConfig int StatsdPort = 8125; }; +struct ZenStructuredCacheBucketConfig +{ + uint64_t MaxBlockSize = 1ull << 30; + uint32_t PayloadAlignment = 1u << 4; + uint64_t MemCacheSizeThreshold = 1 * 1024; + uint64_t LargeObjectThreshold = 128 * 1024; +}; + struct ZenStructuredCacheConfig { - bool Enabled = true; - bool WriteLogEnabled = false; - bool AccessLogEnabled = false; - bool LimitOverwrites = false; - uint64_t MemCacheSizeThreshold = 1 * 1024; - uint64_t MemTargetFootprintBytes = 512 * 1024 * 1024; - uint64_t MemTrimIntervalSeconds = 60; - uint64_t MemMaxAgeSeconds = gsl::narrow<uint64_t>(std::chrono::seconds(std::chrono::days(1)).count()); + bool Enabled = true; + bool WriteLogEnabled = false; + bool AccessLogEnabled = false; + bool LimitOverwrites = false; + std::vector<std::pair<std::string, ZenStructuredCacheBucketConfig>> PerBucketConfigs; + ZenStructuredCacheBucketConfig BucketConfig; + uint64_t MemTargetFootprintBytes = 512 * 1024 * 1024; + uint64_t MemTrimIntervalSeconds = 60; + uint64_t MemMaxAgeSeconds = gsl::narrow<uint64_t>(std::chrono::seconds(std::chrono::days(1)).count()); }; struct ZenProjectStoreConfig @@ -131,12 +146,27 @@ struct ZenProjectStoreConfig bool StoreProjectAttachmentMetaData = false; }; +struct ZenBuildStoreConfig +{ + bool Enabled = false; + uint64_t MaxDiskSpaceLimit = 1u * 1024u * 1024u * 1024u * 1024u; // 1TB +}; + struct ZenWorkspacesConfig { bool Enabled = false; bool AllowConfigurationChanges = false; }; +struct ZenSentryConfig +{ + bool Disable = false; + bool AllowPII = false; // Allow personally identifiable information in sentry crash reports + std::string Dsn; + std::string Environment; + bool Debug = false; // Enable debug mode for Sentry +}; + struct ZenServerOptions { ZenUpstreamCacheConfig UpstreamCacheConfig; @@ -146,13 +176,16 @@ struct ZenServerOptions zen::HttpServerConfig HttpServerConfig; ZenStructuredCacheConfig StructuredCacheConfig; ZenProjectStoreConfig ProjectStoreConfig; + ZenBuildStoreConfig BuildStoreConfig; ZenStatsConfig StatsConfig; ZenWorkspacesConfig WorksSpacesConfig; + ZenSentryConfig SentryConfig; std::filesystem::path SystemRootDir; // System root directory (used for machine level config) std::filesystem::path DataDir; // Root directory for state (used for testing) std::filesystem::path ContentDir; // Root directory for serving frontend content (experimental) std::filesystem::path AbsLogFile; // Absolute path to main log file std::filesystem::path ConfigFile; // Path to Lua config file + std::filesystem::path PluginsConfigFile; // Path to plugins config file std::filesystem::path BaseSnapshotDir; // Path to server state snapshot (will be copied into data dir on start) std::string ChildId; // Id assigned by parent process (used for lifetime management) std::string LogId; // Id for tagging log output @@ -169,9 +202,7 @@ struct ZenServerOptions bool IsDedicated = false; // Indicates a dedicated/shared instance, with larger resource requirements bool ShouldCrash = false; // Option for testing crash handling bool IsFirstRun = false; - bool NoSentry = false; - bool SentryAllowPII = false; // Allow personally identifiable information in sentry crash reports - bool Detach = true; // Whether zenserver should detach from existing process group (Mac/Linux) + bool Detach = true; // Whether zenserver should detach from existing process group (Mac/Linux) bool ObjectStoreEnabled = false; bool NoConsoleOutput = false; // Control default use of stdout for diagnostics std::string Loggers[zen::logging::level::LogLevelCount]; diff --git a/src/zenserver/config/luaconfig.cpp b/src/zenserver/config/luaconfig.cpp index f742fa34a..2c54de29e 100644 --- a/src/zenserver/config/luaconfig.cpp +++ b/src/zenserver/config/luaconfig.cpp @@ -4,27 +4,6 @@ namespace zen::LuaConfig { -std::string -MakeSafePath(const std::string_view Path) -{ -#if ZEN_PLATFORM_WINDOWS - if (Path.empty()) - { - return std::string(Path); - } - - std::string FixedPath(Path); - std::replace(FixedPath.begin(), FixedPath.end(), '/', '\\'); - if (!FixedPath.starts_with("\\\\?\\")) - { - FixedPath.insert(0, "\\\\?\\"); - } - return FixedPath; -#else - return std::string(Path); -#endif -}; - void EscapeBackslash(std::string& InOutString) { @@ -101,7 +80,7 @@ FilePathOption::Parse(sol::object Object) std::string Str = Object.as<std::string>(); if (!Str.empty()) { - Value = MakeSafePath(Str); + Value = MakeSafeAbsolutePath(Str); } } diff --git a/src/zenserver/config/luaconfig.h b/src/zenserver/config/luaconfig.h index 76b3088a3..ce7013a9a 100644 --- a/src/zenserver/config/luaconfig.h +++ b/src/zenserver/config/luaconfig.h @@ -4,10 +4,10 @@ #include <zenbase/concepts.h> #include <zencore/fmtutils.h> +#include <zenutil/commandlineoptions.h> ZEN_THIRD_PARTY_INCLUDES_START #include <fmt/format.h> -#include <cxxopts.hpp> #include <sol/sol.hpp> ZEN_THIRD_PARTY_INCLUDES_END @@ -20,8 +20,7 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen::LuaConfig { -std::string MakeSafePath(const std::string_view Path); -void EscapeBackslash(std::string& InOutString); +void EscapeBackslash(std::string& InOutString); class OptionValue { diff --git a/src/zenserver/frontend/frontend.cpp b/src/zenserver/frontend/frontend.cpp index 31d9e1c94..dfa710ae0 100644 --- a/src/zenserver/frontend/frontend.cpp +++ b/src/zenserver/frontend/frontend.cpp @@ -2,6 +2,7 @@ #include "frontend.h" +#include <zencore/compactbinarybuilder.h> #include <zencore/endian.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> @@ -26,7 +27,9 @@ static unsigned char gHtmlZipData[] = { namespace zen { //////////////////////////////////////////////////////////////////////////////// -HttpFrontendService::HttpFrontendService(std::filesystem::path Directory) : m_Directory(Directory) +HttpFrontendService::HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService) +: m_Directory(Directory) +, m_StatusService(StatusService) { std::filesystem::path SelfPath = GetRunningExecutablePath(); @@ -50,7 +53,7 @@ HttpFrontendService::HttpFrontendService(std::filesystem::path Directory) : m_Di { break; } - if (std::filesystem::is_regular_file(ParentPath / "xmake.lua", ErrorCode)) + if (IsFile(ParentPath / "xmake.lua", ErrorCode)) { if (ErrorCode) { @@ -59,7 +62,7 @@ HttpFrontendService::HttpFrontendService(std::filesystem::path Directory) : m_Di std::filesystem::path HtmlDir = ParentPath / "src" / "zenserver" / "frontend" / "html"; - if (std::filesystem::is_directory(HtmlDir, ErrorCode)) + if (IsDir(HtmlDir, ErrorCode)) { m_Directory = HtmlDir; } @@ -81,10 +84,12 @@ HttpFrontendService::HttpFrontendService(std::filesystem::path Directory) : m_Di { ZEN_INFO("front-end is NOT AVAILABLE"); } + m_StatusService.RegisterHandler("dashboard", *this); } HttpFrontendService::~HttpFrontendService() { + m_StatusService.UnregisterHandler("dashboard", *this); } const char* @@ -95,6 +100,14 @@ HttpFrontendService::BaseUri() const //////////////////////////////////////////////////////////////////////////////// void +HttpFrontendService::HandleStatusRequest(zen::HttpServerRequest& Request) +{ + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void HttpFrontendService::HandleRequest(zen::HttpServerRequest& Request) { using namespace std::literals; diff --git a/src/zenserver/frontend/frontend.h b/src/zenserver/frontend/frontend.h index 6eac20620..84ffaac42 100644 --- a/src/zenserver/frontend/frontend.h +++ b/src/zenserver/frontend/frontend.h @@ -3,23 +3,26 @@ #pragma once #include <zenhttp/httpserver.h> +#include <zenhttp/httpstatus.h> #include "zipfs.h" #include <filesystem> namespace zen { -class HttpFrontendService final : public zen::HttpService +class HttpFrontendService final : public zen::HttpService, public IHttpStatusProvider { public: - HttpFrontendService(std::filesystem::path Directory); + HttpFrontendService(std::filesystem::path Directory, HttpStatusService& StatusService); virtual ~HttpFrontendService(); virtual const char* BaseUri() const override; virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: ZipFs m_ZipFs; std::filesystem::path m_Directory; + HttpStatusService& m_StatusService; }; } // namespace zen diff --git a/src/zenserver/frontend/html.zip b/src/zenserver/frontend/html.zip Binary files differindex e0a1888e6..5778fa3d2 100644 --- a/src/zenserver/frontend/html.zip +++ b/src/zenserver/frontend/html.zip diff --git a/src/zenserver/frontend/html/indexer/cache.js b/src/zenserver/frontend/html/indexer/cache.js index 390aa948d..b90194855 100644 --- a/src/zenserver/frontend/html/indexer/cache.js +++ b/src/zenserver/frontend/html/indexer/cache.js @@ -9,7 +9,7 @@ export class Cache { this._db_name = db_name; this._store_names = store_names; - this._version = 1; + this._version = 2; this._db = this._open(); } diff --git a/src/zenserver/frontend/html/indexer/indexer.js b/src/zenserver/frontend/html/indexer/indexer.js index 4412e3a57..688bc71b0 100644 --- a/src/zenserver/frontend/html/indexer/indexer.js +++ b/src/zenserver/frontend/html/indexer/indexer.js @@ -43,9 +43,10 @@ class Indexer *search(needle) { + var needleLwr = needle.toLowerCase(); for (const page of this._pages) for (const [_, name] of page) - if (name.indexOf(needle) >= 0) + if (name.toLowerCase().indexOf(needleLwr) >= 0) yield name; } @@ -60,7 +61,7 @@ class Indexer { for (const page of this._pages) for (const [_, name, size, raw_size] of page) - yield [name, size|0, raw_size|0]; + yield [name, size|0n, raw_size|0n]; } } diff --git a/src/zenserver/frontend/html/indexer/worker.js b/src/zenserver/frontend/html/indexer/worker.js index 25c8d7671..69ee234fa 100644 --- a/src/zenserver/frontend/html/indexer/worker.js +++ b/src/zenserver/frontend/html/indexer/worker.js @@ -77,11 +77,11 @@ async function map_id_to_key(project_id, oplog, start, end, page_size, stride) continue; var id = 0n; - var size = 0; - var raw_size = 0; + var size = 0n; + var raw_size = 0n; for (const item of pkg_data.as_array()) { - var found = 0, pkg_id; + var found = 0, pkg_id = undefined; for (const field of item.as_object()) { if (!id && field.is_named("id")) pkg_id = field.as_value(); diff --git a/src/zenserver/frontend/html/pages/entry.js b/src/zenserver/frontend/html/pages/entry.js index 65a3ef39b..54fb11c18 100644 --- a/src/zenserver/frontend/html/pages/entry.js +++ b/src/zenserver/frontend/html/pages/entry.js @@ -59,6 +59,81 @@ export class Page extends ZenPage } } + _find_iohash_field(container, name) + { + const found_field = container.find(name); + if (found_field != undefined) + { + var found_value = found_field.as_value(); + if (found_value instanceof Uint8Array) + { + var ret = ""; + for (var x of found_value) + ret += x.toString(16).padStart(2, "0"); + return ret; + } + } + return null; + } + + async _build_meta(section, entry) + { + var tree = {} + + for (const field of entry) + { + var visibleKey = undefined; + const name = field.get_name(); + if (name == "CookPackageArtifacts") + { + visibleKey = name; + } + else if (name.startsWith("meta.")) + { + visibleKey = name.slice(5); + } + + if (visibleKey != undefined) + { + var found_value = field.as_value(); + if (found_value instanceof Uint8Array) + { + var ret = ""; + for (var x of found_value) + ret += x.toString(16).padStart(2, "0"); + tree[visibleKey] = ret; + } + } + + } + + if (Object.keys(tree).length == 0) + return; + + const sub_section = section.add_section("meta"); + + const table = sub_section.add_widget( + Table, + ["name", "actions"], Table.Flag_PackRight + ); + for (const key in tree) + { + const row = table.add_row(key); + const value = tree[key]; + + const project = this.get_param("project"); + const oplog = this.get_param("oplog"); + const link = row.get_cell(0).link( + "/" + ["prj", project, "oplog", oplog, value+".json"].join("/") + ); + + const action_tb = new Toolbar(row.get_cell(-1), true); + action_tb.left().add("copy-hash").on_click(async (v) => { + await navigator.clipboard.writeText(v); + }, value); + } + } + async _build_page() { var entry = await this._entry; @@ -78,8 +153,16 @@ export class Page extends ZenPage delete tree["$id"]; - const sub_section = section.add_section("deps"); - this._build_deps(sub_section, tree); + if (Object.keys(tree).length != 0) + { + const sub_section = section.add_section("deps"); + this._build_deps(sub_section, tree); + } + } + + // meta + { + this._build_meta(section, entry); } // data @@ -128,7 +211,6 @@ export class Page extends ZenPage ); link.first_child().attr("download", `${io_hash}_${base_name}`); - const do_nothing = () => void(0); const action_tb = new Toolbar(row.get_cell(-1), true); action_tb.left().add("copy-hash").on_click(async (v) => { await navigator.clipboard.writeText(v); @@ -147,8 +229,11 @@ export class Page extends ZenPage _display_unsupported(section, entry) { + const replacer = (key, value) => + typeof value === "bigint" ? { $bigint: value.toString() } : value; + const object = entry.to_js_object(); - const text = JSON.stringify(object, null, " "); + const text = JSON.stringify(object, replacer, " "); section.tag("pre").text(text); } diff --git a/src/zenserver/frontend/html/pages/oplog.js b/src/zenserver/frontend/html/pages/oplog.js index f22c2a58f..bef5bacce 100644 --- a/src/zenserver/frontend/html/pages/oplog.js +++ b/src/zenserver/frontend/html/pages/oplog.js @@ -142,9 +142,12 @@ export class Page extends ZenPage async _search(needle) { - needle = needle.trim(); - if (needle.length < 3) + if (needle.length == 0) + { + this._build_table(this._index_start); return; + } + needle = needle.trim(); this._entry_table.clear(this._index_start); diff --git a/src/zenserver/frontend/html/pages/start.js b/src/zenserver/frontend/html/pages/start.js index 8c9df62f9..d1c13ccc7 100644 --- a/src/zenserver/frontend/html/pages/start.js +++ b/src/zenserver/frontend/html/pages/start.js @@ -5,6 +5,7 @@ import { ZenPage } from "./page.js" import { Fetcher } from "../util/fetcher.js" import { Friendly } from "../util/friendly.js" +import { Modal } from "../util/modal.js" import { Table, Toolbar } from "../util/widgets.js" //////////////////////////////////////////////////////////////////////////////// @@ -40,6 +41,41 @@ export class Page extends ZenPage action_tb.left().add("drop").on_click((x) => this.drop_project(x), project.Id); } + // cache + var section = this.add_section("z$"); + columns = [ + "namespace", + "dir", + "buckets", + "entries", + "size disk", + "size mem", + "actions", + ] + var zcache_info = new Fetcher().resource("/z$/").json(); + const cache_table = section.add_widget(Table, columns, Table.Flag_FitLeft|Table.Flag_PackRight); + for (const namespace of (await zcache_info)["Namespaces"]) + { + new Fetcher().resource(`/z$/${namespace}/`).json().then((data) => { + const row = cache_table.add_row( + "", + data["Configuration"]["RootDir"], + data["Buckets"].length, + data["EntryCount"], + Friendly.kib(data["StorageSize"].DiskSize), + Friendly.kib(data["StorageSize"].MemorySize) + ); + var cell = row.get_cell(0); + cell.tag().text(namespace).on_click(() => this.view_zcache(namespace)); + row.get_cell(1).tag().text(namespace); + + cell = row.get_cell(-1); + const action_tb = new Toolbar(cell, true); + action_tb.left().add("view").on_click(() => this.view_zcache(namespace)); + action_tb.left().add("drop").on_click(() => this.drop_zcache(namespace)); + }); + } + // stats section = this.add_section("stats"); columns = [ @@ -91,4 +127,23 @@ export class Page extends ZenPage .option("Yes", () => drop()) .option("No"); } + + view_zcache(namespace) + { + window.location = "?page=zcache&namespace=" + namespace; + } + + drop_zcache(namespace) + { + const drop = async () => { + await new Fetcher().resource("z$", namespace).delete(); + this.reload(); + }; + + new Modal() + .title("Confirmation") + .message(`Drop zcache '${namespace}'?`) + .option("Yes", () => drop()) + .option("No"); + } } diff --git a/src/zenserver/frontend/html/pages/zcache.js b/src/zenserver/frontend/html/pages/zcache.js new file mode 100644 index 000000000..974893b21 --- /dev/null +++ b/src/zenserver/frontend/html/pages/zcache.js @@ -0,0 +1,70 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +"use strict"; + +import { ZenPage } from "./page.js" +import { Fetcher } from "../util/fetcher.js" +import { Friendly } from "../util/friendly.js" +import { Modal } from "../util/modal.js" +import { Table, PropTable, Toolbar } from "../util/widgets.js" + +//////////////////////////////////////////////////////////////////////////////// +export class Page extends ZenPage +{ + async main() + { + const namespace = this.get_param("namespace"); + + var info = new Fetcher().resource(`/z$/${namespace}/`).json(); + + this.set_title("cache - " + namespace); + + var section = this.add_section("info"); + var cfg_table = section.add_section("config").add_widget(PropTable); + var storage_table = section.add_section("storage").add_widget(PropTable); + + info = await info; + + cfg_table.add_object(info["Configuration"], true); + + storage_table.add_property("disk", Friendly.kib(info["StorageSize"]["DiskSize"])); + storage_table.add_property("mem", Friendly.kib(info["StorageSize"]["MemorySize"])); + storage_table.add_property("entries", Friendly.sep(info["EntryCount"])); + + var column_names = ["name", "disk", "mem", "entries", "actions"]; + var bucket_table = this.add_section("buckets").add_widget( + Table, + column_names, + Table.Flag_BiasLeft + ); + for (const bucket of info["Buckets"]) + { + const row = bucket_table.add_row(bucket); + new Fetcher().resource(`/z$/${namespace}/${bucket}`).json().then((data) => { + row.get_cell(1).text(Friendly.kib(data["StorageSize"]["DiskSize"])); + row.get_cell(2).text(Friendly.kib(data["StorageSize"]["MemorySize"])); + row.get_cell(3).text(Friendly.sep(data["DiskEntryCount"])); + + const cell = row.get_cell(-1); + const action_tb = new Toolbar(cell, true); + action_tb.left().add("view") + action_tb.left().add("drop").on_click(() => this.drop_bucket(bucket)); + }); + } + } + + drop_bucket(bucket) + { + const drop = async () => { + const namespace = this.get_param("namespace"); + await new Fetcher().resource("z$", namespace, bucket).delete(); + this.reload(); + }; + + new Modal() + .title("Confirmation") + .message(`Drop bucket '${bucket}'?`) + .option("Yes", () => drop()) + .option("No"); + } +} diff --git a/src/zenserver/frontend/html/util/compactbinary.js b/src/zenserver/frontend/html/util/compactbinary.js index 366ec6aff..90e4249f6 100644 --- a/src/zenserver/frontend/html/util/compactbinary.js +++ b/src/zenserver/frontend/html/util/compactbinary.js @@ -284,7 +284,7 @@ CbFieldView.prototype.as_array = function() } //////////////////////////////////////////////////////////////////////////////// -CbFieldView.prototype.as_value = function(int_type=Number) +CbFieldView.prototype.as_value = function(int_type=BigInt) { switch (CbFieldTypeOps.get_type(this.get_type())) { @@ -388,8 +388,8 @@ CbObjectView.prototype.to_js_object = function() } if (node.is_string()) return node.as_value(); - if (node.is_float()) return node.as_value(); if (node.is_integer()) return node.as_value(); + if (node.is_float()) return node.as_value(); var ret = node.as_value(); if (ret instanceof Uint8Array) diff --git a/src/zenserver/frontend/html/util/friendly.js b/src/zenserver/frontend/html/util/friendly.js index b27721964..a15252faf 100644 --- a/src/zenserver/frontend/html/util/friendly.js +++ b/src/zenserver/frontend/html/util/friendly.js @@ -7,17 +7,17 @@ export class Friendly { static sep(value, prec=0) { - return (+value).toLocaleString("en", { + return (+Number(value)).toLocaleString("en", { style: "decimal", minimumFractionDigits : prec, maximumFractionDigits : prec, }); } - static k(x, p=0) { return Friendly.sep((x + 999) / Math.pow(10, 3)|0, p) + "K"; } - static m(x, p=1) { return Friendly.sep( x / Math.pow(10, 6), p) + "M"; } - static g(x, p=2) { return Friendly.sep( x / Math.pow(10, 9), p) + "G"; } - static kib(x, p=0) { return Friendly.sep((x + 1023) / (1 << 10)|0, p) + " KiB"; } - static mib(x, p=1) { return Friendly.sep( x / (1 << 20), p) + " MiB"; } - static gib(x, p=2) { return Friendly.sep( x / (1 << 30), p) + " GiB"; } + static k(x, p=0) { return Friendly.sep((BigInt(x) + 999n) / BigInt(Math.pow(10, 3))|0n, p) + "K"; } + static m(x, p=1) { return Friendly.sep( BigInt(x) / BigInt(Math.pow(10, 6)), p) + "M"; } + static g(x, p=2) { return Friendly.sep( BigInt(x) / BigInt(Math.pow(10, 9)), p) + "G"; } + static kib(x, p=0) { return Friendly.sep((BigInt(x) + 1023n) / (1n << 10n)|0n, p) + " KiB"; } + static mib(x, p=1) { return Friendly.sep( BigInt(x) / (1n << 20n), p) + " MiB"; } + static gib(x, p=2) { return Friendly.sep( BigInt(x) / (1n << 30n), p) + " GiB"; } } diff --git a/src/zenserver/frontend/html/util/widgets.js b/src/zenserver/frontend/html/util/widgets.js index d4f9875cd..32a3f4d28 100644 --- a/src/zenserver/frontend/html/util/widgets.js +++ b/src/zenserver/frontend/html/util/widgets.js @@ -173,7 +173,7 @@ export class PropTable extends Table continue; } - if (friendly && typeof value == "number") + if (friendly && ((typeof value == "number") || (typeof value == "bigint"))) { if (key.indexOf("memory") >= 0) value = Friendly.kib(value); else if (key.indexOf("disk") >= 0) value = Friendly.kib(value); diff --git a/src/zenserver/main.cpp b/src/zenserver/main.cpp index d5419d342..b0d945814 100644 --- a/src/zenserver/main.cpp +++ b/src/zenserver/main.cpp @@ -10,6 +10,7 @@ #include <zencore/fmtutils.h> #include <zencore/logging.h> #include <zencore/scopeguard.h> +#include <zencore/sentryintegration.h> #include <zencore/session.h> #include <zencore/string.h> #include <zencore/thread.h> @@ -25,7 +26,6 @@ #include "config.h" #include "diag/logging.h" -#include "sentryintegration.h" #if ZEN_PLATFORM_WINDOWS # include <zencore/windows.h> @@ -96,12 +96,18 @@ ZenEntryPoint::Run() #if ZEN_USE_SENTRY SentryIntegration Sentry; - if (m_ServerOptions.NoSentry == false) + if (m_ServerOptions.SentryConfig.Disable == false) { std::string SentryDatabasePath = (m_ServerOptions.DataDir / ".sentry-native").string(); std::string SentryAttachmentPath = m_ServerOptions.AbsLogFile.string(); - Sentry.Initialize(SentryDatabasePath, SentryAttachmentPath, m_ServerOptions.SentryAllowPII, m_ServerOptions.CommandLine); + Sentry.Initialize({.DatabasePath = SentryDatabasePath, + .AttachmentsPath = SentryAttachmentPath, + .Dsn = m_ServerOptions.SentryConfig.Dsn, + .Environment = m_ServerOptions.SentryConfig.Environment, + .AllowPII = m_ServerOptions.SentryConfig.AllowPII, + .Debug = m_ServerOptions.SentryConfig.Debug}, + m_ServerOptions.CommandLine); } #endif try @@ -406,17 +412,17 @@ main(int argc, char* argv[]) if (!DeleteReason.empty()) { - if (std::filesystem::exists(ServerOptions.DataDir)) + if (IsDir(ServerOptions.DataDir)) { ZEN_CONSOLE_INFO("deleting files from '{}' ({})", ServerOptions.DataDir, DeleteReason); DeleteDirectories(ServerOptions.DataDir); } } - if (!std::filesystem::exists(ServerOptions.DataDir)) + if (!IsDir(ServerOptions.DataDir)) { ServerOptions.IsFirstRun = true; - std::filesystem::create_directories(ServerOptions.DataDir); + CreateDirectories(ServerOptions.DataDir); } if (!ServerOptions.BaseSnapshotDir.empty()) diff --git a/src/zenserver/objectstore/objectstore.cpp b/src/zenserver/objectstore/objectstore.cpp index b0212ab07..8faf12165 100644 --- a/src/zenserver/objectstore/objectstore.cpp +++ b/src/zenserver/objectstore/objectstore.cpp @@ -219,13 +219,17 @@ private: StringBuilderBase& Builder; }; -HttpObjectStoreService::HttpObjectStoreService(ObjectStoreConfig Cfg) : m_Cfg(std::move(Cfg)) +HttpObjectStoreService::HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg) +: m_StatusService(StatusService) +, m_Cfg(std::move(Cfg)) { Inititalize(); + m_StatusService.RegisterHandler("obj", *this); } HttpObjectStoreService::~HttpObjectStoreService() { + m_StatusService.UnregisterHandler("obj", *this); } const char* @@ -245,13 +249,21 @@ HttpObjectStoreService::HandleRequest(zen::HttpServerRequest& Request) } void +HttpObjectStoreService::HandleStatusRequest(HttpServerRequest& Request) +{ + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void HttpObjectStoreService::Inititalize() { namespace fs = std::filesystem; ZEN_LOG_INFO(LogObj, "Initialzing Object Store in '{}'", m_Cfg.RootDirectory); const fs::path BucketsPath = m_Cfg.RootDirectory / "buckets"; - if (!fs::exists(BucketsPath)) + if (!IsDir(BucketsPath)) { CreateDirectories(BucketsPath); } @@ -269,9 +281,9 @@ HttpObjectStoreService::Inititalize() m_Router.RegisterRoute( "bucket/{path}", [this](zen::HttpRouterRequest& Request) { - const std::string Path = Request.GetCapture(1); - const auto Sep = Path.find_last_of('.'); - const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; + const std::string_view Path = Request.GetCapture(1); + const auto Sep = Path.find_last_of('.'); + const bool IsObject = Sep != std::string::npos && Path.size() - Sep > 0; if (IsObject) { @@ -324,7 +336,7 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) const fs::path BucketPath = m_Cfg.RootDirectory / "buckets" / BucketName; { std::lock_guard _(BucketsMutex); - if (!fs::exists(BucketPath)) + if (!IsDir(BucketPath)) { CreateDirectories(BucketPath); ZEN_LOG_INFO(LogObj, "CREATE - new bucket '{}' OK", BucketName); @@ -337,18 +349,18 @@ HttpObjectStoreService::CreateBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; if (BucketName.empty()) { return Request.ServerRequest().WriteResponse(HttpResponseCode::BadRequest); } - std::string BucketPrefix = Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; if (BucketPrefix.empty()) { const auto QueryParms = Request.ServerRequest().GetQueryParams(); @@ -376,7 +388,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s Writer.BeginArray("Contents"sv); } - void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + void VisitFile(const fs::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { const fs::path FullPath = Parent / fs::path(File); fs::path RelativePath = fs::relative(FullPath, BucketPath); @@ -406,7 +418,7 @@ HttpObjectStoreService::ListBucket(zen::HttpRouterRequest& Request, const std::s Visitor FileVisitor(BucketName, BucketRoot, RelativeBucketPath); FileSystemTraversal Traversal; - if (std::filesystem::exists(FullPath)) + if (IsDir(FullPath)) { std::lock_guard _(BucketsMutex); Traversal.TraverseFileSystem(FullPath, FileVisitor); @@ -450,14 +462,13 @@ HttpObjectStoreService::DeleteBucket(zen::HttpRouterRequest& Request) } void -HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string& Path) +HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::string_view Path) { namespace fs = std::filesystem; - const auto Sep = Path.find_first_of('/'); - const std::string BucketName = Sep == std::string::npos ? Path : Path.substr(0, Sep); - const std::string BucketPrefix = - Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1); + const auto Sep = Path.find_first_of('/'); + const std::string BucketName{Sep == std::string::npos ? Path : Path.substr(0, Sep)}; + const std::string BucketPrefix{Sep == std::string::npos || Sep == Path.size() - 1 ? std::string() : Path.substr(BucketName.size() + 1)}; const fs::path BucketDir = GetBucketDirectory(BucketName); @@ -476,7 +487,7 @@ HttpObjectStoreService::GetObject(zen::HttpRouterRequest& Request, const std::st } const fs::path FilePath = BucketDir / RelativeBucketPath; - if (!fs::exists(FilePath)) + if (!IsFile(FilePath)) { ZEN_LOG_DEBUG(LogObj, "GET - '{}/{}' [FAILED], doesn't exist", BucketName, FilePath); return Request.ServerRequest().WriteResponse(HttpResponseCode::NotFound); @@ -554,8 +565,8 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) { namespace fs = std::filesystem; - const std::string& BucketName = Request.GetCapture(1); - const fs::path BucketDir = GetBucketDirectory(BucketName); + const std::string_view BucketName = Request.GetCapture(1); + const fs::path BucketDir = GetBucketDirectory(BucketName); if (BucketDir.empty()) { @@ -577,7 +588,7 @@ HttpObjectStoreService::PutObject(zen::HttpRouterRequest& Request) { std::lock_guard _(BucketsMutex); - if (!fs::exists(FileDirectory)) + if (!IsDir(FileDirectory)) { CreateDirectories(FileDirectory); } diff --git a/src/zenserver/objectstore/objectstore.h b/src/zenserver/objectstore/objectstore.h index c905ceab3..44e50e208 100644 --- a/src/zenserver/objectstore/objectstore.h +++ b/src/zenserver/objectstore/objectstore.h @@ -3,6 +3,7 @@ #pragma once #include <zenhttp/httpserver.h> +#include <zenhttp/httpstatus.h> #include <atomic> #include <filesystem> #include <mutex> @@ -23,24 +24,26 @@ struct ObjectStoreConfig std::vector<BucketConfig> Buckets; }; -class HttpObjectStoreService final : public zen::HttpService +class HttpObjectStoreService final : public zen::HttpService, public IHttpStatusProvider { public: - HttpObjectStoreService(ObjectStoreConfig Cfg); + HttpObjectStoreService(HttpStatusService& StatusService, ObjectStoreConfig Cfg); virtual ~HttpObjectStoreService(); virtual const char* BaseUri() const override; virtual void HandleRequest(zen::HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: void Inititalize(); std::filesystem::path GetBucketDirectory(std::string_view BucketName); void CreateBucket(zen::HttpRouterRequest& Request); - void ListBucket(zen::HttpRouterRequest& Request, const std::string& Path); + void ListBucket(zen::HttpRouterRequest& Request, const std::string_view Path); void DeleteBucket(zen::HttpRouterRequest& Request); - void GetObject(zen::HttpRouterRequest& Request, const std::string& Path); + void GetObject(zen::HttpRouterRequest& Request, const std::string_view Path); void PutObject(zen::HttpRouterRequest& Request); + HttpStatusService& m_StatusService; ObjectStoreConfig m_Cfg; std::mutex BucketsMutex; HttpRequestRouter m_Router; diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.cpp b/src/zenserver/projectstore/buildsremoteprojectstore.cpp index 302b81729..ab96ae92d 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.cpp +++ b/src/zenserver/projectstore/buildsremoteprojectstore.cpp @@ -3,6 +3,7 @@ #include "buildsremoteprojectstore.h" #include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> #include <zencore/compress.h> #include <zencore/fmtutils.h> @@ -34,16 +35,10 @@ public: , m_BuildId(BuildId) , m_MetaData(MetaData) , m_TempFilePath(TempFilePath) + , m_EnableBlocks(!ForceDisableBlocks) + , m_UseTempBlocks(!ForceDisableTempBlocks) { m_MetaData.MakeOwned(); - if (ForceDisableBlocks) - { - m_EnableBlocks = false; - } - if (ForceDisableTempBlocks) - { - m_UseTempBlocks = false; - } } virtual RemoteStoreInfo GetInfo() const override @@ -70,7 +65,7 @@ public: { ZEN_ASSERT(m_OplogBuildPartId == Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); IoBuffer Payload = m_MetaData; Payload.SetContentType(ZenContentType::kCbObject); @@ -94,7 +89,7 @@ public: virtual SaveResult SaveContainer(const IoBuffer& Payload) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); PutBuildPartResult PutResult = Session.PutBuildPart(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, OplogContainerPartName, Payload); AddStats(PutResult); @@ -114,24 +109,25 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, + const IoHash& RawHash, + ChunkBlockDescription&& Block) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterResult PutResult = - Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, ZenContentType::kCompressedBinary, Payload); + Session.PutBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, ZenContentType::kCompressedBinary, Payload); AddStats(PutResult); SaveAttachmentResult Result{ConvertResult(PutResult)}; if (Result.ErrorCode) { - Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed saving oplog attachment to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); return Result; @@ -139,57 +135,21 @@ public: if (Block.BlockHash == RawHash) { - ZEN_ASSERT(Block.ChunkLengths.size() == Block.ChunkHashes.size()); - CbObjectWriter Writer; - Writer.AddHash("rawHash"sv, RawHash); - Writer.BeginArray("rawHashes"sv); - { - for (const IoHash& ChunkHash : Block.ChunkHashes) - { - Writer.AddHash(ChunkHash); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkLengths"); - { - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(ChunkSize); - } - } - Writer.EndArray(); - Writer.BeginArray("chunkOffsets"); - { - ZEN_ASSERT(Block.FirstChunkOffset != (uint32_t)-1); - uint32_t Offset = Block.FirstChunkOffset; - for (uint32_t ChunkSize : Block.ChunkLengths) - { - Writer.AddInteger(Offset); - Offset += ChunkSize; - } - } - Writer.EndArray(); + CbObjectWriter BlockMetaData; + BlockMetaData.AddString("createdBy", GetRunningExecutablePath().stem().string()); - Writer.BeginObject("metadata"sv); - { - Writer.AddString("createdBy", "zenserver"); - } - Writer.EndObject(); - - IoBuffer MetaPayload = Writer.Save().GetBuffer().AsIoBuffer(); + IoBuffer MetaPayload = BuildChunkBlockDescription(Block, BlockMetaData.Save()).GetBuffer().AsIoBuffer(); MetaPayload.SetContentType(ZenContentType::kCbObject); - JupiterResult PutMetaResult = - Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, MetaPayload); + JupiterResult PutMetaResult = Session.PutBlockMetadata(m_Namespace, m_Bucket, m_BuildId, RawHash, MetaPayload); AddStats(PutMetaResult); RemoteProjectStore::Result MetaDataResult = ConvertResult(PutMetaResult); if (MetaDataResult.ErrorCode) { - ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}/{}. Reason: '{}'", + ZEN_WARN("Failed saving block attachment meta data to {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, MetaDataResult.Reason); } @@ -217,7 +177,7 @@ public: ZEN_UNUSED(RawHash); ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); FinalizeBuildPartResult FinalizeRefResult = Session.FinalizeBuildPart(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash); AddStats(FinalizeRefResult); @@ -256,7 +216,7 @@ public: { ZEN_ASSERT(m_OplogBuildPartId == Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterResult GetBuildResult = Session.GetBuild(m_Namespace, m_Bucket, m_BuildId); AddStats(GetBuildResult); LoadContainerResult Result{ConvertResult(GetBuildResult)}; @@ -341,52 +301,48 @@ public: virtual GetKnownBlocksResult GetKnownBlocks() override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult FindResult = Session.FindBlocks(m_Namespace, m_Bucket, m_BuildId, (uint64_t)-1); AddStats(FindResult); GetKnownBlocksResult Result{ConvertResult(FindResult)}; if (Result.ErrorCode) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed listing know blocks for {}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, Result.Reason); return Result; } - CbObject BlocksObject = LoadCompactBinaryObject(FindResult.Response); - if (!BlocksObject) + if (ValidateCompactBinary(FindResult.Response.GetView(), CbValidateMode::Default) != CbValidateError::None) { Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); - Result.Reason = fmt::format("The block list {}/{}/{}/{} is not formatted as a compact binary object"sv, + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a compact binary object"sv, m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, - m_BuildId, - m_OplogBuildPartId); + m_BuildId); return Result; } - - CbArrayView Blocks = BlocksObject["blocks"].AsArrayView(); - Result.Blocks.reserve(Blocks.Num()); - for (CbFieldView BlockView : Blocks) + std::optional<std::vector<ChunkBlockDescription>> Blocks = + ParseChunkBlockDescriptionList(LoadCompactBinaryObject(FindResult.Response)); + if (!Blocks) { - CbObjectView BlockObject = BlockView.AsObjectView(); - IoHash BlockHash = BlockObject["rawHash"sv].AsHash(); - if (BlockHash != IoHash::Zero) - { - CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); - std::vector<IoHash> ChunkHashes; - ChunkHashes.reserve(ChunksArray.Num()); - for (CbFieldView ChunkView : ChunksArray) - { - ChunkHashes.push_back(ChunkView.AsHash()); - } - Result.Blocks.emplace_back(Block{.BlockHash = BlockHash, .ChunkHashes = ChunkHashes}); - } + Result.ErrorCode = gsl::narrow<int32_t>(HttpResponseCode::InternalServerError); + Result.Reason = fmt::format("The block list {}/{}/{} is not formatted as a list of blocks"sv, + m_JupiterClient->ServiceUrl(), + m_Namespace, + m_Bucket, + m_BuildId); + return Result; + } + Result.Blocks.reserve(Blocks.value().size()); + for (ChunkBlockDescription& BlockDescription : Blocks.value()) + { + Result.Blocks.push_back(ThinChunkBlockDescription{.BlockHash = BlockDescription.BlockHash, + .ChunkRawHashes = std::move(BlockDescription.ChunkRawHashes)}); } return Result; } @@ -394,19 +350,18 @@ public: virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { ZEN_ASSERT(m_OplogBuildPartId != Oid::Zero); - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); - JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, m_OplogBuildPartId, RawHash, m_TempFilePath); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); + JupiterResult GetResult = Session.GetBuildBlob(m_Namespace, m_Bucket, m_BuildId, RawHash, m_TempFilePath); AddStats(GetResult); LoadAttachmentResult Result{ConvertResult(GetResult), std::move(GetResult.Response)}; if (GetResult.ErrorCode) { - Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}&{}/{}/{}. Reason: '{}'", + Result.Reason = fmt::format("Failed fetching oplog attachment from {}/{}/{}/{}/{}. Reason: '{}'", m_JupiterClient->ServiceUrl(), m_Namespace, m_Bucket, m_BuildId, - m_OplogBuildPartId, RawHash, Result.Reason); } @@ -491,8 +446,9 @@ private: IoBuffer m_MetaData; Oid m_OplogBuildPartId = Oid::Zero; std::filesystem::path m_TempFilePath; - bool m_EnableBlocks = true; - bool m_UseTempBlocks = true; + const bool m_EnableBlocks = true; + const bool m_UseTempBlocks = true; + const bool m_AllowRedirect = false; std::atomic_uint64_t m_SentBytes = {}; std::atomic_uint64_t m_ReceivedBytes = {}; @@ -533,7 +489,15 @@ CreateBuildsRemoteStore(const BuildsRemoteStoreOptions& Options, const std::file { TokenProvider = httpclientauth::CreateFromStaticToken(Options.AccessToken); } - else + else if (!Options.OidcExePath.empty()) + { + if (auto TokenProviderMaybe = httpclientauth::CreateFromOidcTokenExecutable(Options.OidcExePath, Url); TokenProviderMaybe) + { + TokenProvider = TokenProviderMaybe.value(); + } + } + + if (!TokenProvider) { TokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(Options.AuthManager); } diff --git a/src/zenserver/projectstore/buildsremoteprojectstore.h b/src/zenserver/projectstore/buildsremoteprojectstore.h index 8b2c6c8c8..c52b13886 100644 --- a/src/zenserver/projectstore/buildsremoteprojectstore.h +++ b/src/zenserver/projectstore/buildsremoteprojectstore.h @@ -10,17 +10,18 @@ class AuthMgr; struct BuildsRemoteStoreOptions : RemoteStoreOptions { - std::string Url; - std::string Namespace; - std::string Bucket; - Oid BuildId; - std::string OpenIdProvider; - std::string AccessToken; - AuthMgr& AuthManager; - bool ForceDisableBlocks = false; - bool ForceDisableTempBlocks = false; - bool AssumeHttp2 = false; - IoBuffer MetaData; + std::string Url; + std::string Namespace; + std::string Bucket; + Oid BuildId; + std::string OpenIdProvider; + std::string AccessToken; + AuthMgr& AuthManager; + std::filesystem::path OidcExePath; + bool ForceDisableBlocks = false; + bool ForceDisableTempBlocks = false; + bool AssumeHttp2 = false; + IoBuffer MetaData; }; std::shared_ptr<RemoteProjectStore> CreateBuildsRemoteStore(const BuildsRemoteStoreOptions& Options, diff --git a/src/zenserver/projectstore/fileremoteprojectstore.cpp b/src/zenserver/projectstore/fileremoteprojectstore.cpp index 0fe739a12..375e44e59 100644 --- a/src/zenserver/projectstore/fileremoteprojectstore.cpp +++ b/src/zenserver/projectstore/fileremoteprojectstore.cpp @@ -73,7 +73,7 @@ public: ContainerObject.IterateAttachments([&](CbFieldView FieldView) { IoHash AttachmentHash = FieldView.AsBinaryAttachment(); std::filesystem::path AttachmentPath = GetAttachmentPath(AttachmentHash); - if (!std::filesystem::exists(AttachmentPath)) + if (!IsFile(AttachmentPath)) { Result.Needs.insert(AttachmentHash); } @@ -106,12 +106,12 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { Stopwatch Timer; SaveAttachmentResult Result; std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); - if (!std::filesystem::exists(ChunkPath)) + if (!IsFile(ChunkPath)) { try { @@ -182,7 +182,7 @@ public: for (const IoHash& RawHash : BlockHashes) { std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); - if (std::filesystem::is_regular_file(ChunkPath)) + if (IsFile(ChunkPath)) { ExistingBlockHashes.push_back(RawHash); } @@ -192,7 +192,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; } - std::vector<RemoteProjectStore::Block> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector<ThinChunkBlockDescription> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{{.ElapsedSeconds = LoadResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000}}; Result.Blocks = std::move(KnownBlocks); return Result; @@ -203,7 +203,7 @@ public: Stopwatch Timer; LoadAttachmentResult Result; std::filesystem::path ChunkPath = GetAttachmentPath(RawHash); - if (!std::filesystem::is_regular_file(ChunkPath)) + if (!IsFile(ChunkPath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog attachment from '{}'. Reason: 'The file does not exist'", ChunkPath.string()); @@ -246,7 +246,7 @@ private: LoadContainerResult Result; std::filesystem::path SourcePath = m_OutputPath; SourcePath.append(Name); - if (!std::filesystem::is_regular_file(SourcePath)) + if (!IsFile(SourcePath)) { Result.ErrorCode = gsl::narrow<int>(HttpResponseCode::NotFound); Result.Reason = fmt::format("Failed loading oplog container from '{}'. Reason: 'The file does not exist'", SourcePath.string()); diff --git a/src/zenserver/projectstore/httpprojectstore.cpp b/src/zenserver/projectstore/httpprojectstore.cpp index 0b8e5f13b..317a419eb 100644 --- a/src/zenserver/projectstore/httpprojectstore.cpp +++ b/src/zenserver/projectstore/httpprojectstore.cpp @@ -8,6 +8,7 @@ #include <zencore/compactbinarybuilder.h> #include <zencore/compactbinarypackage.h> #include <zencore/compactbinaryutil.h> +#include <zencore/compactbinaryvalidation.h> #include <zencore/filesystem.h> #include <zencore/fmtutils.h> #include <zencore/logging.h> @@ -234,10 +235,15 @@ namespace { ////////////////////////////////////////////////////////////////////////// -HttpProjectService::HttpProjectService(CidStore& Store, ProjectStore* Projects, HttpStatsService& StatsService, AuthMgr& AuthMgr) +HttpProjectService::HttpProjectService(CidStore& Store, + ProjectStore* Projects, + HttpStatusService& StatusService, + HttpStatsService& StatsService, + AuthMgr& AuthMgr) : m_Log(logging::Get("project")) , m_CidStore(Store) , m_ProjectStore(Projects) +, m_StatusService(StatusService) , m_StatsService(StatsService) , m_AuthMgr(AuthMgr) { @@ -245,8 +251,6 @@ HttpProjectService::HttpProjectService(CidStore& Store, ProjectStore* Projects, using namespace std::literals; - m_StatsService.RegisterHandler("prj", *this); - m_Router.AddPattern("project", "([[:alnum:]_.]+)"); m_Router.AddPattern("log", "([[:alnum:]_.]+)"); m_Router.AddPattern("op", "([[:digit:]]+?)"); @@ -365,11 +369,15 @@ HttpProjectService::HttpProjectService(CidStore& Store, ProjectStore* Projects, "details\\$/{project}/{log}/{chunk}", [this](HttpRouterRequest& Req) { HandleOplogOpDetailsRequest(Req); }, HttpVerb::kGet); + + m_StatusService.RegisterHandler("prj", *this); + m_StatsService.RegisterHandler("prj", *this); } HttpProjectService::~HttpProjectService() { m_StatsService.UnregisterHandler("prj", *this); + m_StatusService.UnregisterHandler("prj", *this); } const char* @@ -465,6 +473,15 @@ HttpProjectService::HandleStatsRequest(HttpServerRequest& HttpReq) } void +HttpProjectService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpProjectService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void HttpProjectService::HandleProjectListRequest(HttpRouterRequest& Req) { ZEN_TRACE_CPU("ProjectService::ProjectList"); @@ -885,10 +902,63 @@ HttpProjectService::HandleChunkByCidRequest(HttpRouterRequest& Req) case HttpVerb::kGet: { IoBuffer Value; - std::pair<HttpResponseCode, std::string> Result = - m_ProjectStore->GetChunk(ProjectId, OplogId, Cid, AcceptType, Value, nullptr); + std::pair<HttpResponseCode, std::string> Result = m_ProjectStore->GetChunk(ProjectId, OplogId, Cid, Value, nullptr); if (Result.first == HttpResponseCode::OK) { + if (AcceptType == ZenContentType::kUnknownContentType || AcceptType == ZenContentType::kBinary || + AcceptType == ZenContentType::kJSON || AcceptType == ZenContentType::kYAML || + AcceptType == ZenContentType::kCbObject) + { + CompressedBuffer Compressed = CompressedBuffer::FromCompressedNoValidate(std::move(Value)); + IoBuffer DecompressedBuffer = Compressed.Decompress().AsIoBuffer(); + + if (DecompressedBuffer) + { + if (AcceptType == ZenContentType::kJSON || AcceptType == ZenContentType::kYAML || + AcceptType == ZenContentType::kCbObject) + { + CbValidateError CbErr = ValidateCompactBinary(DecompressedBuffer.GetView(), CbValidateMode::Default); + if (!!CbErr) + { + m_ProjectStats.BadRequestCount++; + ZEN_DEBUG( + "chunk - '{}/{}/{}' WRONGTYPE. Reason: `Requested {} format, but could not convert to object`", + ProjectId, + OplogId, + Cid, + ToString(AcceptType)); + return HttpReq.WriteResponse( + HttpResponseCode::NotAcceptable, + HttpContentType::kText, + fmt::format("Content format not supported, requested {} format, but could not convert to object", + ToString(AcceptType))); + } + + m_ProjectStats.ChunkHitCount++; + CbObject ContainerObject = LoadCompactBinaryObject(DecompressedBuffer); + return HttpReq.WriteResponse(HttpResponseCode::OK, ContainerObject); + } + else + { + Value = DecompressedBuffer; + Value.SetContentType(ZenContentType::kBinary); + } + } + else + { + m_ProjectStats.BadRequestCount++; + ZEN_DEBUG("chunk - '{}/{}/{}' WRONGTYPE. Reason: `Requested {} format, but could not decompress stored data`", + ProjectId, + OplogId, + Cid, + ToString(AcceptType)); + return HttpReq.WriteResponse( + HttpResponseCode::NotAcceptable, + HttpContentType::kText, + fmt::format("Content format not supported, requested {} format, but could not decompress stored data", + ToString(AcceptType))); + } + } m_ProjectStats.ChunkHitCount++; return HttpReq.WriteResponse(HttpResponseCode::OK, Value.GetContentType(), Value); } @@ -983,15 +1053,19 @@ HttpProjectService::HandleOplogOpPrepRequest(HttpRouterRequest& Req) IoBuffer Payload = HttpReq.ReadPayload(); CbObject RequestObject = LoadCompactBinaryObject(Payload); - std::vector<IoHash> ChunkList; - CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); - ChunkList.reserve(HaveList.Num()); - for (auto& Entry : HaveList) + std::vector<IoHash> NeedList; + { - ChunkList.push_back(Entry.AsHash()); - } + eastl::fixed_vector<IoHash, 16> ChunkList; + CbArrayView HaveList = RequestObject["have"sv].AsArrayView(); + ChunkList.reserve(HaveList.Num()); + for (auto& Entry : HaveList) + { + ChunkList.push_back(Entry.AsHash()); + } - std::vector<IoHash> NeedList = FoundLog->CheckPendingChunkReferences(ChunkList, std::chrono::minutes(2)); + NeedList = FoundLog->CheckPendingChunkReferences(std::span(begin(ChunkList), end(ChunkList)), std::chrono::minutes(2)); + } CbObjectWriter Cbo(1 + 1 + 5 + NeedList.size() * (1 + sizeof(IoHash::Hash)) + 1); Cbo.BeginArray("need"); @@ -1151,7 +1225,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) return HttpReq.WriteResponse(HttpResponseCode::BadRequest, HttpContentType::kText, "No oplog entry key specified"); } - std::vector<IoHash> ReferencedChunks; + eastl::fixed_vector<IoHash, 16> ReferencedChunks; Core.IterateAttachments([&ReferencedChunks](CbFieldView View) { ReferencedChunks.push_back(View.AsAttachment()); }); // Write core to oplog @@ -1169,7 +1243,7 @@ HttpProjectService::HandleOplogOpNewRequest(HttpRouterRequest& Req) // Once we stored the op, we no longer need to retain any chunks this op references if (!ReferencedChunks.empty()) { - FoundLog->RemovePendingChunkReferences(ReferencedChunks); + FoundLog->RemovePendingChunkReferences(std::span(begin(ReferencedChunks), end(ReferencedChunks))); } m_ProjectStats.OpWriteCount++; @@ -1301,9 +1375,9 @@ HttpProjectService::HandleOpLogOpRequest(HttpRouterRequest& Req) HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string& ProjectId = Req.GetCapture(1); - const std::string& OplogId = Req.GetCapture(2); - const std::string& OpIdString = Req.GetCapture(3); + const std::string_view ProjectId = Req.GetCapture(1); + const std::string_view OplogId = Req.GetCapture(2); + const std::string_view OpIdString = Req.GetCapture(3); Ref<ProjectStore::Project> Project = m_ProjectStore->OpenProject(ProjectId); if (!Project) @@ -1690,8 +1764,8 @@ HttpProjectService::HandleProjectRequest(HttpRouterRequest& Req) using namespace std::literals; - HttpServerRequest& HttpReq = Req.ServerRequest(); - const std::string ProjectId = Req.GetCapture(1); + HttpServerRequest& HttpReq = Req.ServerRequest(); + const std::string_view ProjectId = Req.GetCapture(1); switch (HttpReq.RequestVerb()) { diff --git a/src/zenserver/projectstore/httpprojectstore.h b/src/zenserver/projectstore/httpprojectstore.h index 8e74c57a5..295defa5c 100644 --- a/src/zenserver/projectstore/httpprojectstore.h +++ b/src/zenserver/projectstore/httpprojectstore.h @@ -5,6 +5,7 @@ #include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> +#include <zenhttp/httpstatus.h> #include <zenstore/cidstore.h> namespace zen { @@ -31,16 +32,21 @@ class ProjectStore; // refs: // -class HttpProjectService : public HttpService, public IHttpStatsProvider +class HttpProjectService : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: - HttpProjectService(CidStore& Store, ProjectStore* InProjectStore, HttpStatsService& StatsService, AuthMgr& AuthMgr); + HttpProjectService(CidStore& Store, + ProjectStore* InProjectStore, + HttpStatusService& StatusService, + HttpStatsService& StatsService, + AuthMgr& AuthMgr); ~HttpProjectService(); virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& Request) override; virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: struct ProjectStats @@ -89,6 +95,7 @@ private: CidStore& m_CidStore; HttpRequestRouter m_Router; Ref<ProjectStore> m_ProjectStore; + HttpStatusService& m_StatusService; HttpStatsService& m_StatsService; AuthMgr& m_AuthMgr; ProjectStats m_ProjectStats; diff --git a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp index e906127ff..3728babb5 100644 --- a/src/zenserver/projectstore/jupiterremoteprojectstore.cpp +++ b/src/zenserver/projectstore/jupiterremoteprojectstore.cpp @@ -31,15 +31,9 @@ public: , m_Key(Key) , m_OptionalBaseKey(OptionalBaseKey) , m_TempFilePath(TempFilePath) + , m_EnableBlocks(!ForceDisableBlocks) + , m_UseTempBlocks(!ForceDisableTempBlocks) { - if (ForceDisableBlocks) - { - m_EnableBlocks = false; - } - if (ForceDisableTempBlocks) - { - m_UseTempBlocks = false; - } } virtual RemoteStoreInfo GetInfo() const override @@ -75,7 +69,7 @@ public: virtual SaveResult SaveContainer(const IoBuffer& Payload) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); PutRefResult PutResult = Session.PutRef(m_Namespace, m_Bucket, m_Key, Payload, ZenContentType::kCbObject); AddStats(PutResult); @@ -92,9 +86,9 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterResult PutResult = Session.PutCompressedBlob(m_Namespace, RawHash, Payload); AddStats(PutResult); @@ -127,7 +121,7 @@ public: virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); FinalizeRefResult FinalizeRefResult = Session.FinalizeRef(m_Namespace, m_Bucket, m_Key, RawHash); AddStats(FinalizeRefResult); @@ -164,7 +158,7 @@ public: {.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds}}; } - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterExistsResult ExistsResult = Session.CompressedBlobExists(m_Namespace, std::set<IoHash>(BlockHashes.begin(), BlockHashes.end())); AddStats(ExistsResult); @@ -193,7 +187,7 @@ public: return GetKnownBlocksResult{{.ErrorCode = static_cast<int>(HttpResponseCode::NoContent), .ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds}}; } - std::vector<RemoteProjectStore::Block> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); + std::vector<ThinChunkBlockDescription> KnownBlocks = GetBlocksFromOplog(LoadResult.ContainerObject, ExistingBlockHashes); GetKnownBlocksResult Result{ {.ElapsedSeconds = LoadResult.ElapsedSeconds + ExistsResult.ElapsedSeconds + Timer.GetElapsedTimeUs() * 1000.0}}; @@ -203,7 +197,7 @@ public: virtual LoadAttachmentResult LoadAttachment(const IoHash& RawHash) override { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterResult GetResult = Session.GetCompressedBlob(m_Namespace, RawHash, m_TempFilePath); AddStats(GetResult); @@ -239,7 +233,7 @@ public: private: LoadContainerResult LoadContainer(const IoHash& Key) { - JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client()); + JupiterSession Session(m_JupiterClient->Logger(), m_JupiterClient->Client(), m_AllowRedirect); JupiterResult GetResult = Session.GetRef(m_Namespace, m_Bucket, Key, ZenContentType::kCbObject); AddStats(GetResult); if (GetResult.ErrorCode || !GetResult.Success) @@ -329,8 +323,9 @@ private: const IoHash m_Key; const IoHash m_OptionalBaseKey; std::filesystem::path m_TempFilePath; - bool m_EnableBlocks = true; - bool m_UseTempBlocks = true; + const bool m_EnableBlocks = true; + const bool m_UseTempBlocks = true; + const bool m_AllowRedirect = false; std::atomic_uint64_t m_SentBytes = {}; std::atomic_uint64_t m_ReceivedBytes = {}; @@ -371,7 +366,15 @@ CreateJupiterRemoteStore(const JupiterRemoteStoreOptions& Options, const std::fi { TokenProvider = httpclientauth::CreateFromStaticToken(Options.AccessToken); } - else + else if (!Options.OidcExePath.empty()) + { + if (auto TokenProviderMaybe = httpclientauth::CreateFromOidcTokenExecutable(Options.OidcExePath, Url); TokenProviderMaybe) + { + TokenProvider = TokenProviderMaybe.value(); + } + } + + if (!TokenProvider) { TokenProvider = httpclientauth::CreateFromDefaultOpenIdProvider(Options.AuthManager); } diff --git a/src/zenserver/projectstore/jupiterremoteprojectstore.h b/src/zenserver/projectstore/jupiterremoteprojectstore.h index 27f3d9b73..8bf79d563 100644 --- a/src/zenserver/projectstore/jupiterremoteprojectstore.h +++ b/src/zenserver/projectstore/jupiterremoteprojectstore.h @@ -10,17 +10,18 @@ class AuthMgr; struct JupiterRemoteStoreOptions : RemoteStoreOptions { - std::string Url; - std::string Namespace; - std::string Bucket; - IoHash Key; - IoHash OptionalBaseKey; - std::string OpenIdProvider; - std::string AccessToken; - AuthMgr& AuthManager; - bool ForceDisableBlocks = false; - bool ForceDisableTempBlocks = false; - bool AssumeHttp2 = false; + std::string Url; + std::string Namespace; + std::string Bucket; + IoHash Key; + IoHash OptionalBaseKey; + std::string OpenIdProvider; + std::string AccessToken; + AuthMgr& AuthManager; + std::filesystem::path OidcExePath; + bool ForceDisableBlocks = false; + bool ForceDisableTempBlocks = false; + bool AssumeHttp2 = false; }; std::shared_ptr<RemoteProjectStore> CreateJupiterRemoteStore(const JupiterRemoteStoreOptions& Options, diff --git a/src/zenserver/projectstore/projectstore.cpp b/src/zenserver/projectstore/projectstore.cpp index 46a236af9..53e687983 100644 --- a/src/zenserver/projectstore/projectstore.cpp +++ b/src/zenserver/projectstore/projectstore.cpp @@ -22,6 +22,7 @@ #include <zenstore/scrubcontext.h> #include <zenutil/cache/rpcrecording.h> #include <zenutil/openprocesscache.h> +#include <zenutil/parallelwork.h> #include <zenutil/referencemetadata.h> #include <zenutil/workerpools.h> @@ -58,7 +59,7 @@ namespace { std::filesystem::path DroppedBucketPath; do { - if (!std::filesystem::exists(Dir)) + if (!IsDir(Dir)) { return true; } @@ -68,7 +69,7 @@ namespace { std::string DroppedName = fmt::format("[dropped]{}({})", Dir.filename().string(), MovedId); DroppedBucketPath = Dir.parent_path() / DroppedName; - if (std::filesystem::exists(DroppedBucketPath)) + if (IsDir(DroppedBucketPath)) { if (!DeleteDirectories(DroppedBucketPath)) { @@ -77,7 +78,7 @@ namespace { Dir); continue; } - if (std::filesystem::exists(DroppedBucketPath)) + if (IsDir(DroppedBucketPath)) { ZEN_INFO("Drop directory '{}' for '{}' still exists after remove, attempting different name.", DroppedBucketPath, Dir); continue; @@ -88,13 +89,13 @@ namespace { do { std::error_code Ec; - std::filesystem::rename(Dir, DroppedBucketPath, Ec); + RenameDirectory(Dir, DroppedBucketPath, Ec); if (!Ec) { OutDeleteDir = DroppedBucketPath; return true; } - if (std::filesystem::exists(DroppedBucketPath)) + if (IsDir(DroppedBucketPath)) { ZEN_INFO("Can't rename '{}' to still existing drop directory '{}'. Reason: '{}'. Attempting different name.", Dir, @@ -210,6 +211,16 @@ namespace { AccessToken = GetEnvVariable(AccessTokenEnvVariable); } } + std::filesystem::path OidcExePath; + if (std::string_view OidcExePathString = Cloud["oidc-exe-path"].AsString(); !OidcExePathString.empty()) + { + std::filesystem::path OidcExePathMaybe(OidcExePathString); + if (!IsFile(OidcExePathMaybe)) + { + ZEN_WARN("Path to OidcToken executable '{}' can not be reached by server", OidcExePathString); + OidcExePath = std::move(OidcExePathMaybe); + } + } std::string_view KeyParam = Cloud["key"sv].AsString(); if (KeyParam.empty()) { @@ -252,6 +263,7 @@ namespace { std::string(OpenIdProvider), AccessToken, AuthManager, + OidcExePath, ForceDisableBlocks, ForceDisableTempBlocks, AssumeHttp2}; @@ -307,6 +319,16 @@ namespace { AccessToken = GetEnvVariable(AccessTokenEnvVariable); } } + std::filesystem::path OidcExePath; + if (std::string_view OidcExePathString = Builds["oidc-exe-path"].AsString(); !OidcExePathString.empty()) + { + std::filesystem::path OidcExePathMaybe(OidcExePathString); + if (!IsFile(OidcExePathMaybe)) + { + ZEN_WARN("Path to OidcToken executable '{}' can not be reached by server", OidcExePathString); + OidcExePath = std::move(OidcExePathMaybe); + } + } std::string_view BuildIdParam = Builds["buildsid"sv].AsString(); if (BuildIdParam.empty()) { @@ -337,6 +359,7 @@ namespace { std::string(OpenIdProvider), AccessToken, AuthManager, + OidcExePath, ForceDisableBlocks, ForceDisableTempBlocks, AssumeHttp2, @@ -423,9 +446,13 @@ ComputeOpKey(const CbObjectView& Op) { using namespace std::literals; - BinaryWriter KeyStream; + eastl::fixed_vector<uint8_t, 256> KeyData; - Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { KeyStream.Write(Data, Size); }); + Op["key"sv].WriteToStream([&](const void* Data, size_t Size) { + auto Begin = reinterpret_cast<const uint8_t*>(Data); + auto End = Begin + Size; + KeyData.insert(KeyData.end(), Begin, End); + }); XXH3_128 KeyHash128; @@ -434,15 +461,15 @@ ComputeOpKey(const CbObjectView& Op) // path but longer paths are evaluated properly. In the future all key lengths // should be evaluated using the proper path, this is a temporary workaround to // maintain compatibility with existing disk state. - if (KeyStream.GetSize() < 240) + if (KeyData.size() < 240) { XXH3_128Stream_deprecated KeyHasher; - KeyHasher.Append(KeyStream.Data(), KeyStream.Size()); + KeyHasher.Append(KeyData.data(), KeyData.size()); KeyHash128 = KeyHasher.GetHash(); } else { - KeyHash128 = XXH3_128::HashMemory(KeyStream.GetView()); + KeyHash128 = XXH3_128::HashMemory(KeyData.data(), KeyData.size()); } Oid KeyHash; @@ -482,7 +509,7 @@ struct ProjectStore::OplogStorage : public RefCounted [[nodiscard]] bool Exists() const { return Exists(m_OplogStoragePath); } [[nodiscard]] static bool Exists(const std::filesystem::path& BasePath) { - return std::filesystem::exists(GetLogPath(BasePath)) && std::filesystem::exists(GetBlobsPath(BasePath)); + return IsFile(GetLogPath(BasePath)) && IsFile(GetBlobsPath(BasePath)); } [[nodiscard]] bool IsValid() const { return IsValid(m_OplogStoragePath); } [[nodiscard]] static bool IsValid(const std::filesystem::path& BasePath) @@ -492,13 +519,13 @@ struct ProjectStore::OplogStorage : public RefCounted void WipeState() const { std::error_code Ec; - std::filesystem::remove(GetLogPath(), Ec); - std::filesystem::remove(GetBlobsPath(), Ec); + RemoveFile(GetLogPath(), Ec); + RemoveFile(GetBlobsPath(), Ec); } static bool Delete(const std::filesystem::path& BasePath) { return DeleteDirectories(BasePath); } - uint64_t OpBlobsSize() const { return std::filesystem::file_size(GetBlobsPath()); } + uint64_t OpBlobsSize() const { return FileSizeFromPath(GetBlobsPath()); } uint64_t OpsSize() const { return OpsSize(m_OplogStoragePath); } static uint64_t OpsSize(const std::filesystem::path& BasePath) @@ -506,7 +533,7 @@ struct ProjectStore::OplogStorage : public RefCounted if (Exists(BasePath)) { std::error_code DummyEc; - return std::filesystem::file_size(GetLogPath(BasePath)) + std::filesystem::file_size(GetBlobsPath(BasePath)); + return FileSizeFromPath(GetLogPath(BasePath)) + FileSizeFromPath(GetBlobsPath(BasePath)); } return 0; } @@ -685,7 +712,7 @@ struct ProjectStore::OplogStorage : public RefCounted m_OpBlobs.Close(); Oplog.Close(); - std::filesystem::rename(OplogPath, GetLogPath(), Ec); + RenameFile(OplogPath, GetLogPath(), Ec); if (Ec) { throw std::system_error( @@ -698,9 +725,9 @@ struct ProjectStore::OplogStorage : public RefCounted if (Ec) { // We failed late - clean everything up as best we can - std::filesystem::remove(OpBlobs.GetPath(), Ec); - std::filesystem::remove(GetLogPath(), Ec); - std::filesystem::remove(GetBlobsPath(), Ec); + RemoveFile(OpBlobs.GetPath(), Ec); + RemoveFile(GetLogPath(), Ec); + RemoveFile(GetBlobsPath(), Ec); throw std::system_error(Ec, fmt::format("Oplog::Compact failed to rename temporary oplog file from '{}' to '{}'", OpBlobs.GetPath(), @@ -735,7 +762,7 @@ struct ProjectStore::OplogStorage : public RefCounted } catch (const std::exception& /*Ex*/) { - std::filesystem::remove(OpBlobs.GetPath(), Ec); + RemoveFile(OpBlobs.GetPath(), Ec); throw; } } @@ -983,8 +1010,8 @@ struct ProjectStore::OplogStorage : public RefCounted .OpCoreHash = OpData.OpCoreHash, .OpKeyHash = OpData.KeyHash}; - m_Oplog.Append(Entry); m_OpBlobs.Write(OpData.Buffer.GetData(), WriteSize, WriteOffset); + m_Oplog.Append(Entry); return Entry; } @@ -1104,7 +1131,7 @@ ProjectStore::Oplog::Oplog(std::string_view Id, ZEN_WARN("Invalid oplog found at '{}'. Wiping state for oplog.", m_BasePath); m_Storage->WipeState(); std::error_code DummyEc; - std::filesystem::remove(m_MetaPath, DummyEc); + RemoveFile(m_MetaPath, DummyEc); } } m_Storage->Open(/* IsCreate */ !StoreExists); @@ -1112,7 +1139,7 @@ ProjectStore::Oplog::Oplog(std::string_view Id, m_MetaPath = m_BasePath / "ops.meta"sv; m_MetaValid = !IsFileOlderThan(m_MetaPath, m_Storage->GetBlobsPath()); - CleanDirectory(m_TempPath); + CleanDirectory(m_TempPath, /*ForceRemoveReadOnlyFiles*/ false); } ProjectStore::Oplog::~Oplog() @@ -1138,7 +1165,7 @@ ProjectStore::Oplog::Flush() if (!m_MetaValid) { std::error_code DummyEc; - std::filesystem::remove(m_MetaPath, DummyEc); + RemoveFile(m_MetaPath, DummyEc); } uint64_t LogCount = m_Storage->LogCount(); @@ -1234,19 +1261,19 @@ ProjectStore::Oplog::TotalSize(const std::filesystem::path& BasePath) uint64_t Size = OplogStorage::OpsSize(BasePath); std::filesystem::path StateFilePath = BasePath / "oplog.zcb"sv; - if (std::filesystem::exists(StateFilePath)) + if (IsFile(StateFilePath)) { - Size += std::filesystem::file_size(StateFilePath); + Size += FileSizeFromPath(StateFilePath); } std::filesystem::path MetaFilePath = BasePath / "ops.meta"sv; - if (std::filesystem::exists(MetaFilePath)) + if (IsFile(MetaFilePath)) { - Size += std::filesystem::file_size(MetaFilePath); + Size += FileSizeFromPath(MetaFilePath); } std::filesystem::path IndexFilePath = BasePath / "ops.zidx"sv; - if (std::filesystem::exists(IndexFilePath)) + if (IsFile(IndexFilePath)) { - Size += std::filesystem::file_size(IndexFilePath); + Size += FileSizeFromPath(IndexFilePath); } return Size; @@ -1299,7 +1326,7 @@ ProjectStore::Oplog::ExistsAt(const std::filesystem::path& BasePath) using namespace std::literals; std::filesystem::path StateFilePath = BasePath / "oplog.zcb"sv; - return std::filesystem::is_regular_file(StateFilePath); + return IsFile(StateFilePath); } bool @@ -1333,7 +1360,7 @@ ProjectStore::Oplog::Read() if (!m_MetaValid) { std::error_code DummyEc; - std::filesystem::remove(m_MetaPath, DummyEc); + RemoveFile(m_MetaPath, DummyEc); } ReadIndexSnapshot(); @@ -1434,7 +1461,7 @@ ProjectStore::Oplog::Reset() m_Storage = new OplogStorage(this, m_BasePath); m_Storage->Open(true); m_MetaValid = false; - CleanDirectory(m_TempPath); + CleanDirectory(m_TempPath, /*ForceRemoveReadOnlyFiles*/ false); Write(); } // Erase content on disk @@ -1453,7 +1480,7 @@ ProjectStore::Oplog::ReadStateFile(const std::filesystem::path& BasePath, std::f using namespace std::literals; std::filesystem::path StateFilePath = BasePath / "oplog.zcb"sv; - if (std::filesystem::is_regular_file(StateFilePath)) + if (IsFile(StateFilePath)) { // ZEN_INFO("oplog '{}/{}': config read from '{}'", m_OuterProject->Identifier, m_OplogId, StateFilePath); @@ -1484,11 +1511,17 @@ ProjectStore::Oplog::Validate(std::atomic_bool& IsCancelledFlag, WorkerThreadPoo ValidationResult Result; + const size_t OpCount = OplogCount(); + std::vector<Oid> KeyHashes; std::vector<std::string> Keys; std::vector<std::vector<IoHash>> Attachments; std::vector<OplogEntryMapping> Mappings; + KeyHashes.reserve(OpCount); + Keys.reserve(OpCount); + Mappings.reserve(OpCount); + IterateOplogWithKey([&](uint32_t LSN, const Oid& Key, CbObjectView OpView) { Result.LSNLow = Min(Result.LSNLow, LSN); Result.LSNHigh = Max(Result.LSNHigh, LSN); @@ -1513,77 +1546,90 @@ ProjectStore::Oplog::Validate(std::atomic_bool& IsCancelledFlag, WorkerThreadPoo bool HasMissingEntries = false; for (const ChunkMapping& Chunk : Mapping.Chunks) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(Chunk.Hash); !Payload) + if (!m_CidStore.ContainsChunk(Chunk.Hash)) { ResultLock.WithExclusiveLock([&]() { Result.MissingChunks.push_back({KeyHash, Chunk}); }); HasMissingEntries = true; } } + for (const ChunkMapping& Meta : Mapping.Meta) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(Meta.Hash); !Payload) + if (!m_CidStore.ContainsChunk(Meta.Hash)) { ResultLock.WithExclusiveLock([&]() { Result.MissingMetas.push_back({KeyHash, Meta}); }); HasMissingEntries = true; } } + for (const FileMapping& File : Mapping.Files) { if (File.Hash == IoHash::Zero) { std::filesystem::path FilePath = m_OuterProject->RootDir / File.ServerPath; - if (!std::filesystem::is_regular_file(FilePath)) + if (!IsFile(FilePath)) { ResultLock.WithExclusiveLock([&]() { Result.MissingFiles.push_back({KeyHash, File}); }); HasMissingEntries = true; } } - else + else if (!m_CidStore.ContainsChunk(File.Hash)) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(File.Hash); !Payload) - { - ResultLock.WithExclusiveLock([&]() { Result.MissingFiles.push_back({KeyHash, File}); }); - HasMissingEntries = true; - } + ResultLock.WithExclusiveLock([&]() { Result.MissingFiles.push_back({KeyHash, File}); }); + HasMissingEntries = true; } } + const std::vector<IoHash>& OpAttachments = Attachments[OpIndex]; for (const IoHash& Attachment : OpAttachments) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(Attachment); !Payload) + if (!m_CidStore.ContainsChunk(Attachment)) { ResultLock.WithExclusiveLock([&]() { Result.MissingAttachments.push_back({KeyHash, Attachment}); }); HasMissingEntries = true; } } + if (HasMissingEntries) { ResultLock.WithExclusiveLock([&]() { Result.OpKeys.push_back({KeyHash, Key}); }); } }; - Latch WorkLatch(1); - - for (uint32_t OpIndex = 0; !IsCancelledFlag && OpIndex < Result.OpCount; OpIndex++) + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - if (OptionalWorkerPool) - { - WorkLatch.AddCount(1); - OptionalWorkerPool->ScheduleWork([&, Index = OpIndex]() { - ZEN_MEMSCOPE(GetProjectstoreTag()); - - auto _ = MakeGuard([&WorkLatch] { WorkLatch.CountDown(); }); - ValidateOne(Index); - }); - } - else + for (uint32_t OpIndex = 0; !IsCancelledFlag && OpIndex < Result.OpCount; OpIndex++) { - ValidateOne(OpIndex); + if (AbortFlag) + { + break; + } + if (OptionalWorkerPool) + { + Work.ScheduleWork(*OptionalWorkerPool, [&ValidateOne, Index = OpIndex](std::atomic<bool>& AbortFlag) { + ZEN_MEMSCOPE(GetProjectstoreTag()); + if (AbortFlag) + { + return; + } + ValidateOne(Index); + }); + } + else + { + ValidateOne(OpIndex); + } } } - - WorkLatch.CountDown(); - WorkLatch.Wait(); + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_WARN("Failed validating oplogs in {}. Reason: '{}'", m_BasePath, Ex.what()); + } + Work.Wait(); { // Check if we were deleted while we were checking the references without a lock... @@ -1617,31 +1663,9 @@ ProjectStore::Oplog::WriteIndexSnapshot() namespace fs = std::filesystem; - fs::path IndexPath = m_BasePath / "ops.zidx"; - fs::path TempIndexPath = m_BasePath / "ops.zidx.tmp"; - - // Move index away, we keep it if something goes wrong - if (fs::is_regular_file(TempIndexPath)) - { - std::error_code Ec; - if (!fs::remove(TempIndexPath, Ec) || Ec) - { - ZEN_WARN("oplog '{}/{}': snapshot failed to clean up temp snapshot at {}, reason: '{}'", - GetOuterProject()->Identifier, - m_OplogId, - TempIndexPath, - Ec.message()); - return; - } - } - + const fs::path IndexPath = m_BasePath / "ops.zidx"; try { - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, TempIndexPath); - } - // Write the current state of the location map to a new index state std::vector<uint32_t> LSNEntries; std::vector<Oid> Keys; @@ -1722,36 +1746,28 @@ ProjectStore::Oplog::WriteIndexSnapshot() uint64_t Offset = 0; IndexFile.Write(&Header, sizeof(OplogIndexHeader), Offset); - Offset += sizeof(OplogIndexHeader); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(LSNEntries.data(), LSNEntries.size() * sizeof(uint32_t), Offset); - Offset += LSNEntries.size() * sizeof(uint32_t); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(Keys.data(), Keys.size() * sizeof(Oid), Offset); - Offset += Keys.size() * sizeof(Oid); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(AddressMapEntries.data(), AddressMapEntries.size() * sizeof(OplogEntryAddress), Offset); - Offset += AddressMapEntries.size() * sizeof(OplogEntryAddress); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(LatestOpMapEntries.data(), LatestOpMapEntries.size() * sizeof(uint32_t), Offset); - Offset += LatestOpMapEntries.size() * sizeof(uint32_t); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(ChunkMapEntries.data(), ChunkMapEntries.size() * sizeof(IoHash), Offset); - Offset += ChunkMapEntries.size() * sizeof(IoHash); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(MetaMapEntries.data(), MetaMapEntries.size() * sizeof(IoHash), Offset); - Offset += MetaMapEntries.size() * sizeof(IoHash); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); IndexFile.Write(FilePathLengths.data(), FilePathLengths.size() * sizeof(uint32_t), Offset); - Offset += FilePathLengths.size() * sizeof(uint32_t); - Offset = RoundUp(Offset, OplogIndexHeader::DataAlignment); + Offset = IndexFile.AlignTo(OplogIndexHeader::DataAlignment); for (const auto& FilePath : FilePaths) { @@ -1763,7 +1779,11 @@ ProjectStore::Oplog::WriteIndexSnapshot() ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec); if (Ec) { - throw std::system_error(Ec, fmt::format("Failed to move temp file '{}' to '{}'", ObjectIndexFile.GetPath(), IndexPath)); + throw std::system_error(Ec, + fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", + ObjectIndexFile.GetPath(), + IndexPath, + Ec.message())); } EntryCount = LSNEntries.size(); m_LogFlushPosition = IndexLogPosition; @@ -1771,35 +1791,6 @@ ProjectStore::Oplog::WriteIndexSnapshot() catch (const std::exception& Err) { ZEN_WARN("oplog '{}/{}': snapshot FAILED, reason: '{}'", m_OuterProject->Identifier, m_OplogId, Err.what()); - - // Restore any previous snapshot - - if (fs::is_regular_file(TempIndexPath)) - { - std::error_code Ec; - fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless - fs::rename(TempIndexPath, IndexPath, Ec); - if (Ec) - { - ZEN_WARN("oplog '{}/{}': snapshot failed to restore old snapshot from {}, reason: '{}'", - m_OuterProject->Identifier, - m_OplogId, - TempIndexPath, - Ec.message()); - } - } - } - if (fs::is_regular_file(TempIndexPath)) - { - std::error_code Ec; - if (!fs::remove(TempIndexPath, Ec) || Ec) - { - ZEN_WARN("oplog '{}/{}': snapshot failed to remove temporary file {}, reason: '{}'", - m_OuterProject->Identifier, - m_OplogId, - TempIndexPath, - Ec.message()); - } } } @@ -1809,8 +1800,8 @@ ProjectStore::Oplog::ReadIndexSnapshot() ZEN_MEMSCOPE(GetProjectstoreTag()); ZEN_TRACE_CPU("Oplog::ReadIndexSnapshot"); - std::filesystem::path IndexPath = m_BasePath / "ops.zidx"; - if (std::filesystem::is_regular_file(IndexPath)) + const std::filesystem::path IndexPath = m_BasePath / "ops.zidx"; + if (IsFile(IndexPath)) { uint64_t EntryCount = 0; Stopwatch Timer; @@ -2135,66 +2126,81 @@ ProjectStore::Oplog::IterateChunks(std::span<Oid> ChunkIds, } if (OptionalWorkerPool) { - std::atomic_bool Result = true; - Latch WorkLatch(1); - - for (size_t ChunkIndex = 0; ChunkIndex < FileChunkIndexes.size(); ChunkIndex++) + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - if (Result.load() == false) + for (size_t ChunkIndex = 0; ChunkIndex < FileChunkIndexes.size(); ChunkIndex++) { - break; - } - WorkLatch.AddCount(1); - OptionalWorkerPool->ScheduleWork( - [this, &WorkLatch, &ChunkIds, IncludeModTag, ChunkIndex, &FileChunkIndexes, &FileChunkPaths, &AsyncCallback, &Result]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - if (Result.load() == false) - { - return; - } - size_t FileChunkIndex = FileChunkIndexes[ChunkIndex]; - const std::filesystem::path& FilePath = FileChunkPaths[ChunkIndex]; - try - { - IoBuffer Payload = IoBufferBuilder::MakeFromFile(FilePath); - if (!Payload) + if (AbortFlag) + { + break; + } + Work.ScheduleWork( + *OptionalWorkerPool, + [this, &ChunkIds, IncludeModTag, ChunkIndex, &FileChunkIndexes, &FileChunkPaths, &AsyncCallback]( + std::atomic<bool>& AbortFlag) { + if (AbortFlag) { - ZEN_WARN("Trying to fetch chunk {} using file path {} failed", ChunkIds[ChunkIndex], FilePath); + return; } + size_t FileChunkIndex = FileChunkIndexes[ChunkIndex]; + const std::filesystem::path& FilePath = FileChunkPaths[ChunkIndex]; + try + { + IoBuffer Payload = IoBufferBuilder::MakeFromFile(FilePath); + if (!Payload) + { + ZEN_WARN("Trying to fetch chunk {} using file path {} failed", ChunkIds[FileChunkIndex], FilePath); + } - if (!AsyncCallback(FileChunkIndex, Payload, IncludeModTag ? GetModificationTagFromModificationTime(Payload) : 0)) + if (!AsyncCallback(FileChunkIndex, + Payload, + IncludeModTag ? GetModificationTagFromModificationTime(Payload) : 0)) + { + AbortFlag.store(true); + } + } + catch (const std::exception& Ex) { - Result.store(false); + ZEN_WARN("oplog '{}/{}': exception caught when iterating file chunk {}, path '{}'. Reason: '{}'", + m_OuterProject->Identifier, + m_OplogId, + FileChunkIndex, + FilePath, + Ex.what()); } - } - catch (const std::exception& Ex) - { - ZEN_WARN("oplog '{}/{}': exception caught when iterating file chunk {}, path '{}'. Reason: '{}'", - m_OuterProject->Identifier, - m_OplogId, - FileChunkIndex, - FilePath, - Ex.what()); - } - }); - } + }); + } - if (!CidChunkHashes.empty()) + if (!CidChunkHashes.empty() && !AbortFlag) + { + m_CidStore.IterateChunks( + CidChunkHashes, + [&](size_t Index, const IoBuffer& Payload) { + size_t CidChunkIndex = CidChunkIndexes[Index]; + if (AbortFlag) + { + return false; + } + return AsyncCallback(CidChunkIndex, + Payload, + IncludeModTag ? GetModificationTagFromRawHash(CidChunkHashes[Index]) : 0); + }, + OptionalWorkerPool, + LargeSizeLimit); + } + } + catch (const std::exception& Ex) { - m_CidStore.IterateChunks( - CidChunkHashes, - [&](size_t Index, const IoBuffer& Payload) { - size_t CidChunkIndex = CidChunkIndexes[Index]; - return AsyncCallback(CidChunkIndex, Payload, IncludeModTag ? GetModificationTagFromRawHash(CidChunkHashes[Index]) : 0); - }, - OptionalWorkerPool, - LargeSizeLimit); + AbortFlag.store(true); + ZEN_WARN("Failed iterating oplog chunks in {}. Reason: '{}'", m_BasePath, Ex.what()); } - WorkLatch.CountDown(); - WorkLatch.Wait(); + Work.Wait(); - return Result.load(); + return !AbortFlag; } else { @@ -2735,7 +2741,7 @@ ProjectStore::Oplog::CheckPendingChunkReferences(std::span<const IoHash> ChunkHa MissingChunks.reserve(ChunkHashes.size()); for (const IoHash& FileHash : ChunkHashes) { - if (IoBuffer Payload = m_CidStore.FindChunkByCid(FileHash); !Payload) + if (!m_CidStore.ContainsChunk(FileHash)) { MissingChunks.push_back(FileHash); } @@ -3129,7 +3135,7 @@ ProjectStore::Project::~Project() bool ProjectStore::Project::Exists(const std::filesystem::path& BasePath) { - return std::filesystem::exists(BasePath / "Project.zcb"); + return IsFile(BasePath / "Project.zcb"); } void @@ -3203,7 +3209,7 @@ ProjectStore::Project::ReadAccessTimes() using namespace std::literals; std::filesystem::path ProjectAccessTimesFilePath = m_OplogStoragePath / "AccessTimes.zcb"sv; - if (!std::filesystem::exists(ProjectAccessTimesFilePath)) + if (!IsFile(ProjectAccessTimesFilePath)) { return; } @@ -3359,7 +3365,6 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo ZEN_MEMSCOPE(GetProjectstoreTag()); ZEN_TRACE_CPU("Store::OpenOplog"); - std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); { RwLock::SharedLockScope ProjectLock(m_ProjectLock); @@ -3367,21 +3372,35 @@ ProjectStore::Project::OpenOplog(std::string_view OplogId, bool AllowCompact, bo if (OplogIt != m_Oplogs.end()) { - if (!VerifyPathOnDisk || Oplog::ExistsAt(OplogBasePath)) + bool ReOpen = false; + + if (VerifyPathOnDisk) { - return OplogIt->second.get(); + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + + if (!Oplog::ExistsAt(OplogBasePath)) + { + // Somebody deleted the oplog on disk behind our back + ProjectLock.ReleaseNow(); + std::filesystem::path DeletePath; + if (!RemoveOplog(OplogId, DeletePath)) + { + ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + } + + ReOpen = true; + } } - // Somebody deleted the oplog on disk behind our back - ProjectLock.ReleaseNow(); - std::filesystem::path DeletePath; - if (!RemoveOplog(OplogId, DeletePath)) + if (!ReOpen) { - ZEN_WARN("Failed to clean up deleted oplog {}/{}", Identifier, OplogId, OplogBasePath); + return OplogIt->second.get(); } } } + std::filesystem::path OplogBasePath = BasePathForOplog(OplogId); + RwLock::ExclusiveLockScope Lock(m_ProjectLock); if (auto It = m_Oplogs.find(std::string{OplogId}); It != m_Oplogs.end()) { @@ -3581,14 +3600,14 @@ ProjectStore::Project::TotalSize(const std::filesystem::path& BasePath) uint64_t Size = 0; std::filesystem::path AccessTimesFilePath = BasePath / "AccessTimes.zcb"sv; - if (std::filesystem::exists(AccessTimesFilePath)) + if (IsFile(AccessTimesFilePath)) { - Size += std::filesystem::file_size(AccessTimesFilePath); + Size += FileSizeFromPath(AccessTimesFilePath); } std::filesystem::path ProjectFilePath = BasePath / "Project.zcb"sv; - if (std::filesystem::exists(ProjectFilePath)) + if (IsFile(ProjectFilePath)) { - Size += std::filesystem::file_size(ProjectFilePath); + Size += FileSizeFromPath(ProjectFilePath); } return Size; @@ -3700,7 +3719,7 @@ ProjectStore::Project::IsExpired(const std::string& EntryName, if (!MarkerPath.empty()) { std::error_code Ec; - if (std::filesystem::exists(MarkerPath, Ec)) + if (IsFile(MarkerPath, Ec)) { if (Ec) { @@ -3853,7 +3872,7 @@ void ProjectStore::DiscoverProjects() { ZEN_MEMSCOPE(GetProjectstoreTag()); - if (!std::filesystem::exists(m_ProjectBasePath)) + if (!IsDir(m_ProjectBasePath)) { return; } @@ -3902,26 +3921,32 @@ ProjectStore::Flush() } WorkerThreadPool& WorkerPool = GetSmallWorkerPool(EWorkloadType::Burst); - Latch WorkLatch(1); - - for (const Ref<Project>& Project : Projects) + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - WorkLatch.AddCount(1); - WorkerPool.ScheduleWork([this, &WorkLatch, Project]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - try - { - Project->Flush(); - } - catch (const std::exception& Ex) - { - ZEN_WARN("Exception while flushing project {}: {}", Project->Identifier, Ex.what()); - } - }); + for (const Ref<Project>& Project : Projects) + { + Work.ScheduleWork(WorkerPool, [this, Project](std::atomic<bool>&) { + try + { + Project->Flush(); + } + catch (const std::exception& Ex) + { + ZEN_WARN("Exception while flushing project {}: {}", Project->Identifier, Ex.what()); + } + }); + } + } + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_WARN("Failed projects in {}. Reason: '{}'", m_ProjectBasePath, Ex.what()); } - WorkLatch.CountDown(); - WorkLatch.Wait(); + Work.Wait(); } void @@ -3962,7 +3987,7 @@ ProjectStore::StorageSize() const GcStorageSize Result; { - if (std::filesystem::exists(m_ProjectBasePath)) + if (IsDir(m_ProjectBasePath)) { DirectoryContent ProjectsFolderContent; GetDirectoryContent(m_ProjectBasePath, DirectoryContentFlags::IncludeDirs, ProjectsFolderContent); @@ -3970,7 +3995,7 @@ ProjectStore::StorageSize() const for (const std::filesystem::path& ProjectBasePath : ProjectsFolderContent.Directories) { std::filesystem::path ProjectStateFilePath = ProjectBasePath / "Project.zcb"sv; - if (std::filesystem::exists(ProjectStateFilePath)) + if (IsFile(ProjectStateFilePath)) { Result.DiskSize += Project::TotalSize(ProjectBasePath); DirectoryContent DirContent; @@ -4753,7 +4778,6 @@ std::pair<HttpResponseCode, std::string> ProjectStore::GetChunk(const std::string_view ProjectId, const std::string_view OplogId, const std::string_view Cid, - ZenContentType AcceptType, IoBuffer& OutChunk, uint64_t* OptionalInOutModificationTag) { @@ -4795,16 +4819,7 @@ ProjectStore::GetChunk(const std::string_view ProjectId, } } - if (AcceptType == ZenContentType::kUnknownContentType || AcceptType == ZenContentType::kBinary) - { - CompressedBuffer Compressed = CompressedBuffer::FromCompressedNoValidate(std::move(OutChunk)); - OutChunk = Compressed.Decompress().AsIoBuffer(); - OutChunk.SetContentType(ZenContentType::kBinary); - } - else - { - OutChunk.SetContentType(ZenContentType::kCompressedBinary); - } + OutChunk.SetContentType(ZenContentType::kCompressedBinary); return {HttpResponseCode::OK, {}}; } @@ -5347,7 +5362,7 @@ ProjectStore::ReadOplog(const std::string_view ProjectId, /* BuildBlocks */ false, /* IgnoreMissingAttachments */ false, /* AllowChunking*/ false, - [](CompressedBuffer&&, RemoteProjectStore::Block&&) {}, + [](CompressedBuffer&&, ChunkBlockDescription&&) {}, [](const IoHash&, TGetAttachmentBufferFunc&&) {}, [](std::vector<std::pair<IoHash, FetchChunkFunc>>&&) {}, /* EmbedLooseFiles*/ false); @@ -7236,7 +7251,7 @@ TEST_CASE("project.store.gc") CHECK(ProjectStore.OpenProject("proj2"sv)); } - std::filesystem::remove(Project1FilePath); + RemoveFile(Project1FilePath); { GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), @@ -7265,7 +7280,7 @@ TEST_CASE("project.store.gc") CHECK(ProjectStore.OpenProject("proj2"sv)); } - std::filesystem::remove(Project2Oplog1Path); + RemoveFile(Project2Oplog1Path); { GcSettings Settings = {.CacheExpireTime = GcClock::Now() - std::chrono::hours(24), .ProjectStoreExpireTime = GcClock::Now() - std::chrono::hours(24), @@ -7294,7 +7309,7 @@ TEST_CASE("project.store.gc") CHECK(ProjectStore.OpenProject("proj2"sv)); } - std::filesystem::remove(Project2FilePath); + RemoveFile(Project2FilePath); { GcSettings Settings = {.CacheExpireTime = GcClock::Now() + std::chrono::hours(24), .ProjectStoreExpireTime = GcClock::Now() + std::chrono::hours(24), @@ -8018,7 +8033,7 @@ TEST_CASE("project.store.rpc.getchunks") CompositeBuffer Buffer = Attachment->AsCompositeBinary(); CHECK_EQ(IoHash::HashBuffer(IoBuffer(ReadFile(FilesOpIdAttachments[0].second).Flatten(), 81823, 5434)), IoHash::HashBuffer(Buffer)); - CHECK_EQ(Chunk["Size"sv].AsUInt64(), std::filesystem::file_size(FilesOpIdAttachments[0].second)); + CHECK_EQ(Chunk["Size"sv].AsUInt64(), FileSizeFromPath(FilesOpIdAttachments[0].second)); CHECK(!Chunk.FindView("RawSize")); } { @@ -8499,12 +8514,7 @@ TEST_CASE("project.store.partial.read") uint64_t ModificationTag = 0; IoBuffer Chunk; CHECK(ProjectStore - .GetChunk("proj1"sv, - "oplog1"sv, - Attachments[OpIds[1]][0].second.DecodeRawHash().ToHexString(), - HttpContentType::kCompressedBinary, - Chunk, - &ModificationTag) + .GetChunk("proj1"sv, "oplog1"sv, Attachments[OpIds[1]][0].second.DecodeRawHash().ToHexString(), Chunk, &ModificationTag) .first == HttpResponseCode::OK); IoHash RawHash; uint64_t RawSize; @@ -8513,12 +8523,7 @@ TEST_CASE("project.store.partial.read") CHECK(ModificationTag != 0); CHECK(ProjectStore - .GetChunk("proj1"sv, - "oplog1"sv, - Attachments[OpIds[1]][0].second.DecodeRawHash().ToHexString(), - HttpContentType::kCompressedBinary, - Chunk, - &ModificationTag) + .GetChunk("proj1"sv, "oplog1"sv, Attachments[OpIds[1]][0].second.DecodeRawHash().ToHexString(), Chunk, &ModificationTag) .first == HttpResponseCode::NotModified); } @@ -8621,14 +8626,18 @@ TEST_CASE("project.store.block") Chunks.reserve(AttachmentSizes.size()); for (const auto& It : AttachmentsWithId) { - Chunks.push_back(std::make_pair(It.second.DecodeRawHash(), - [Buffer = It.second.GetCompressed().Flatten().AsIoBuffer()](const IoHash&) -> CompositeBuffer { - return CompositeBuffer(SharedBuffer(Buffer)); - })); - } - RemoteProjectStore::Block Block; - CompressedBuffer BlockBuffer = GenerateBlock(std::move(Chunks), Block); - CHECK(IterateBlock(BlockBuffer.Decompress(), [](CompressedBuffer&&, const IoHash&) {})); + Chunks.push_back( + std::make_pair(It.second.DecodeRawHash(), [Buffer = It.second](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return {Buffer.DecodeRawSize(), Buffer}; + })); + } + ChunkBlockDescription Block; + CompressedBuffer BlockBuffer = GenerateChunkBlock(std::move(Chunks), Block); + uint64_t HeaderSize; + CHECK(IterateChunkBlock( + BlockBuffer.Decompress(), + [](CompressedBuffer&&, const IoHash&) {}, + HeaderSize)); } TEST_CASE("project.store.iterateoplog") diff --git a/src/zenserver/projectstore/projectstore.h b/src/zenserver/projectstore/projectstore.h index 8f2d3ce0d..368da5ea4 100644 --- a/src/zenserver/projectstore/projectstore.h +++ b/src/zenserver/projectstore/projectstore.h @@ -449,7 +449,6 @@ public: std::pair<HttpResponseCode, std::string> GetChunk(const std::string_view ProjectId, const std::string_view OplogId, const std::string_view Cid, - ZenContentType AcceptType, IoBuffer& OutChunk, uint64_t* OptionalInOutModificationTag); diff --git a/src/zenserver/projectstore/remoteprojectstore.cpp b/src/zenserver/projectstore/remoteprojectstore.cpp index 0589fdc5f..f96b3e185 100644 --- a/src/zenserver/projectstore/remoteprojectstore.cpp +++ b/src/zenserver/projectstore/remoteprojectstore.cpp @@ -12,8 +12,8 @@ #include <zencore/stream.h> #include <zencore/timer.h> #include <zencore/workthreadpool.h> -#include <zenstore/chunkedfile.h> #include <zenstore/cidstore.h> +#include <zenutil/chunkedfile.h> #include <zenutil/workerpools.h> #include <unordered_map> @@ -143,7 +143,7 @@ namespace remotestore_impl { NiceBytes(Stats.m_PeakReceivedBytes)); } - size_t AddBlock(RwLock& BlocksLock, std::vector<RemoteProjectStore::Block>& Blocks) + size_t AddBlock(RwLock& BlocksLock, std::vector<ChunkBlockDescription>& Blocks) { size_t BlockIndex; { @@ -154,63 +154,6 @@ namespace remotestore_impl { return BlockIndex; } - IoBuffer WriteToTempFile(CompressedBuffer&& CompressedBuffer, std::filesystem::path Path) - { - if (std::filesystem::is_regular_file(Path)) - { - IoBuffer ExistingTempFile = IoBuffer(IoBufferBuilder::MakeFromFile(Path)); - if (ExistingTempFile && ExistingTempFile.GetSize() == CompressedBuffer.GetCompressedSize()) - { - ExistingTempFile.SetDeleteOnClose(true); - return ExistingTempFile; - } - } - IoBuffer BlockBuffer; - BasicFile BlockFile; - uint32_t RetriesLeft = 3; - BlockFile.Open(Path, BasicFile::Mode::kTruncateDelete, [&](std::error_code& Ec) { - if (RetriesLeft == 0) - { - return false; - } - ZEN_WARN("Failed to create temporary oplog block '{}': '{}', retries left: {}.", Path, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms - RetriesLeft--; - return true; - }); - uint64_t Offset = 0; - { - CompositeBuffer Compressed = std::move(CompressedBuffer).GetCompressed(); - for (const SharedBuffer& Segment : Compressed.GetSegments()) - { - size_t SegmentSize = Segment.GetSize(); - static const uint64_t BufferingSize = 256u * 1024u; - - IoBufferFileReference FileRef; - if (SegmentSize >= (BufferingSize + BufferingSize / 2) && Segment.GetFileReference(FileRef)) - { - ScanFile(FileRef.FileHandle, - FileRef.FileChunkOffset, - FileRef.FileChunkSize, - BufferingSize, - [&BlockFile, &Offset](const void* Data, size_t Size) { - BlockFile.Write(Data, Size, Offset); - Offset += Size; - }); - } - else - { - BlockFile.Write(Segment.GetData(), SegmentSize, Offset); - Offset += SegmentSize; - } - } - } - void* FileHandle = BlockFile.Detach(); - BlockBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, Offset, /*IsWholeFile*/ true); - BlockBuffer.SetDeleteOnClose(true); - return BlockBuffer; - } - RemoteProjectStore::Result WriteOplogSection(ProjectStore::Oplog& Oplog, const CbObjectView& SectionObject, JobContext* OptionalContext) { using namespace std::literals; @@ -573,21 +516,23 @@ namespace remotestore_impl { return; } - bool StoreChunksOK = IterateBlock( - BlockPayload, - [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, - const IoHash& AttachmentRawHash) { - if (WantedChunks.contains(AttachmentRawHash)) - { - WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); - IoHash RawHash; - uint64_t RawSize; - ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); - ZEN_ASSERT(RawHash == AttachmentRawHash); - WriteRawHashes.emplace_back(AttachmentRawHash); - WantedChunks.erase(AttachmentRawHash); - } - }); + uint64_t BlockHeaderSize = 0; + bool StoreChunksOK = IterateChunkBlock( + BlockPayload, + [&WantedChunks, &WriteAttachmentBuffers, &WriteRawHashes, &Info](CompressedBuffer&& Chunk, + const IoHash& AttachmentRawHash) { + if (WantedChunks.contains(AttachmentRawHash)) + { + WriteAttachmentBuffers.emplace_back(Chunk.GetCompressed().Flatten().AsIoBuffer()); + IoHash RawHash; + uint64_t RawSize; + ZEN_ASSERT(CompressedBuffer::ValidateCompressedHeader(WriteAttachmentBuffers.back(), RawHash, RawSize)); + ZEN_ASSERT(RawHash == AttachmentRawHash); + WriteRawHashes.emplace_back(AttachmentRawHash); + WantedChunks.erase(AttachmentRawHash); + } + }, + BlockHeaderSize); if (!StoreChunksOK) { @@ -738,14 +683,14 @@ namespace remotestore_impl { }); }; - void CreateBlock(WorkerThreadPool& WorkerPool, - Latch& OpSectionsLatch, - std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, - RwLock& SectionsLock, - std::vector<RemoteProjectStore::Block>& Blocks, - size_t BlockIndex, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, - AsyncRemoteResult& RemoteResult) + void CreateBlock(WorkerThreadPool& WorkerPool, + Latch& OpSectionsLatch, + std::vector<std::pair<IoHash, FetchChunkFunc>>&& ChunksInBlock, + RwLock& SectionsLock, + std::vector<ChunkBlockDescription>& Blocks, + size_t BlockIndex, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, + AsyncRemoteResult& RemoteResult) { OpSectionsLatch.AddCount(1); WorkerPool.ScheduleWork([&Blocks, @@ -764,10 +709,10 @@ namespace remotestore_impl { try { ZEN_ASSERT(ChunkCount > 0); - Stopwatch Timer; - RemoteProjectStore::Block Block; - CompressedBuffer CompressedBlock = GenerateBlock(std::move(Chunks), Block); - IoHash BlockHash = CompressedBlock.DecodeRawHash(); + Stopwatch Timer; + ChunkBlockDescription Block; + CompressedBuffer CompressedBlock = GenerateChunkBlock(std::move(Chunks), Block); + IoHash BlockHash = CompressedBlock.DecodeRawHash(); { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope __(SectionsLock); @@ -800,8 +745,8 @@ namespace remotestore_impl { struct CreatedBlock { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; }; void UploadAttachments(WorkerThreadPool& WorkerPool, @@ -931,8 +876,8 @@ namespace remotestore_impl { } try { - IoBuffer Payload; - RemoteProjectStore::Block Block; + IoBuffer Payload; + ChunkBlockDescription Block; if (auto BlockIt = CreatedBlocks.find(RawHash); BlockIt != CreatedBlocks.end()) { Payload = BlockIt->second.Payload; @@ -1058,7 +1003,7 @@ namespace remotestore_impl { { auto It = BulkBlockAttachmentsToUpload.find(Chunk); ZEN_ASSERT(It != BulkBlockAttachmentsToUpload.end()); - CompositeBuffer ChunkPayload = It->second(It->first); + CompressedBuffer ChunkPayload = It->second(It->first).second; if (!ChunkPayload) { RemoteResult.SetError(static_cast<int32_t>(HttpResponseCode::NotFound), @@ -1067,8 +1012,8 @@ namespace remotestore_impl { ChunkBuffers.clear(); break; } - ChunksSize += ChunkPayload.GetSize(); - ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).Flatten().AsIoBuffer())); + ChunksSize += ChunkPayload.GetCompressedSize(); + ChunkBuffers.emplace_back(SharedBuffer(std::move(ChunkPayload).GetCompressed().Flatten().AsIoBuffer())); } RemoteProjectStore::SaveAttachmentsResult Result = RemoteStore.SaveAttachments(ChunkBuffers); if (Result.ErrorCode) @@ -1139,54 +1084,13 @@ namespace remotestore_impl { } } // namespace remotestore_impl -bool -IterateBlock(const SharedBuffer& BlockPayload, std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor) -{ - ZEN_ASSERT(BlockPayload); - if (BlockPayload.GetSize() < 1) - { - return false; - } - - MemoryView BlockView = BlockPayload.GetView(); - const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData()); - uint32_t NumberSize; - uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); - ReadPtr += NumberSize; - std::vector<uint64_t> ChunkSizes; - ChunkSizes.reserve(ChunkCount); - while (ChunkCount--) - { - ChunkSizes.push_back(ReadVarUInt(ReadPtr, NumberSize)); - ReadPtr += NumberSize; - } - ptrdiff_t TempBufferLength = std::distance(reinterpret_cast<const uint8_t*>(BlockView.GetData()), ReadPtr); - ZEN_ASSERT(TempBufferLength > 0); - for (uint64_t ChunkSize : ChunkSizes) - { - IoBuffer Chunk(IoBuffer::Wrap, ReadPtr, ChunkSize); - IoHash AttachmentRawHash; - uint64_t AttachmentRawSize; - CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); - - if (!CompressedChunk) - { - ZEN_ERROR("Invalid chunk in block"); - return false; - } - Visitor(std::move(CompressedChunk), AttachmentRawHash); - ReadPtr += ChunkSize; - ZEN_ASSERT(ReadPtr <= BlockView.GetDataEnd()); - } - return true; -}; std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject) { using namespace std::literals; - std::vector<RemoteProjectStore::Block> Result; - CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); + std::vector<ChunkBlockDescription> Result; + CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); std::vector<IoHash> BlockHashes; BlockHashes.reserve(BlocksArray.Num()); @@ -1199,11 +1103,11 @@ GetBlockHashesFromOplog(CbObjectView ContainerObject) return BlockHashes; } -std::vector<RemoteProjectStore::Block> +std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes) { using namespace std::literals; - std::vector<RemoteProjectStore::Block> Result; + std::vector<ThinChunkBlockDescription> Result; CbArrayView BlocksArray = ContainerObject["blocks"sv].AsArrayView(); tsl::robin_set<IoHash, IoHash::Hasher> IncludeSet; IncludeSet.insert(IncludeBlockHashes.begin(), IncludeBlockHashes.end()); @@ -1226,53 +1130,12 @@ GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> Include { ChunkHashes.push_back(ChunkField.AsHash()); } - Result.push_back({.BlockHash = BlockHash, .ChunkHashes = std::move(ChunkHashes)}); + Result.push_back(ThinChunkBlockDescription{.BlockHash = BlockHash, .ChunkRawHashes = std::move(ChunkHashes)}); } } return Result; } -CompressedBuffer -GenerateBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, RemoteProjectStore::Block& OutBlock) -{ - const size_t ChunkCount = FetchChunks.size(); - - std::vector<SharedBuffer> ChunkSegments; - ChunkSegments.resize(1); - ChunkSegments.reserve(1 + ChunkCount); - OutBlock.ChunkHashes.reserve(ChunkCount); - OutBlock.ChunkLengths.reserve(ChunkCount); - { - IoBuffer TempBuffer(ChunkCount * 9); - MutableMemoryView View = TempBuffer.GetMutableView(); - uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData()); - uint8_t* BufferEndPtr = BufferStartPtr; - BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); - for (const auto& It : FetchChunks) - { - CompositeBuffer Chunk = It.second(It.first); - uint64_t ChunkSize = 0; - std::span<const SharedBuffer> Segments = Chunk.GetSegments(); - for (const SharedBuffer& Segment : Segments) - { - ChunkSize += Segment.GetSize(); - ChunkSegments.push_back(Segment); - } - BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); - OutBlock.ChunkHashes.push_back(It.first); - OutBlock.ChunkLengths.push_back(gsl::narrow<uint32_t>(ChunkSize)); - } - ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); - ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); - ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength))); - } - CompressedBuffer CompressedBlock = - CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); - OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); - OutBlock.FirstChunkOffset = gsl::narrow<uint32_t>(CompressedBuffer::GetHeaderSizeForNoneEncoder() + ChunkSegments[0].GetSize()); - return CompressedBlock; -} - CbObject BuildContainer(CidStore& ChunkStore, ProjectStore::Project& Project, @@ -1283,9 +1146,9 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::vector<RemoteProjectStore::Block>& KnownBlocks, + const std::vector<ThinChunkBlockDescription>& KnownBlocks, WorkerThreadPool& WorkerPool, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles, @@ -1307,9 +1170,9 @@ BuildContainer(CidStore& ChunkStore, std::unordered_map<IoHash, FoundAttachment, IoHash::Hasher> UploadAttachments; - RwLock BlocksLock; - std::vector<RemoteProjectStore::Block> Blocks; - CompressedBuffer OpsBuffer; + RwLock BlocksLock; + std::vector<ChunkBlockDescription> Blocks; + CompressedBuffer OpsBuffer; std::filesystem::path AttachmentTempPath = Oplog.TempPath(); AttachmentTempPath.append(".pending"); @@ -1349,7 +1212,7 @@ BuildContainer(CidStore& ChunkStore, { std::string_view ServerPath = View["serverpath"sv].AsString(); std::filesystem::path FilePath = Project.RootDir / ServerPath; - if (!std::filesystem::is_regular_file(FilePath)) + if (!IsFile(FilePath)) { remotestore_impl::ReportMessage( OptionalContext, @@ -1525,7 +1388,7 @@ BuildContainer(CidStore& ChunkStore, return {}; } - auto FindReuseBlocks = [](const std::vector<RemoteProjectStore::Block>& KnownBlocks, + auto FindReuseBlocks = [](const std::vector<ThinChunkBlockDescription>& KnownBlocks, const std::unordered_set<IoHash, IoHash::Hasher>& Attachments, JobContext* OptionalContext) -> std::vector<size_t> { std::vector<size_t> ReuseBlockIndexes; @@ -1538,14 +1401,14 @@ BuildContainer(CidStore& ChunkStore, for (size_t KnownBlockIndex = 0; KnownBlockIndex < KnownBlocks.size(); KnownBlockIndex++) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - size_t BlockAttachmentCount = KnownBlock.ChunkHashes.size(); + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + size_t BlockAttachmentCount = KnownBlock.ChunkRawHashes.size(); if (BlockAttachmentCount == 0) { continue; } size_t FoundAttachmentCount = 0; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (Attachments.contains(KnownHash)) { @@ -1586,8 +1449,8 @@ BuildContainer(CidStore& ChunkStore, std::vector<size_t> ReusedBlockIndexes = FindReuseBlocks(KnownBlocks, FoundHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (UploadAttachments.erase(KnownHash) == 1) { @@ -1605,10 +1468,7 @@ BuildContainer(CidStore& ChunkStore, }; std::vector<ChunkedFile> ChunkedFiles; - auto ChunkFile = [AttachmentTempPath](const IoHash& RawHash, - IoBuffer& RawData, - const IoBufferFileReference& FileRef, - JobContext*) -> ChunkedFile { + auto ChunkFile = [](const IoHash& RawHash, IoBuffer& RawData, const IoBufferFileReference& FileRef, JobContext*) -> ChunkedFile { ChunkedFile Chunked; Stopwatch Timer; @@ -1632,12 +1492,12 @@ BuildContainer(CidStore& ChunkStore, return Chunked; }; - RwLock ResolveLock; - std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; - std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; - std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; - std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> LooseUploadAttachments; - std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; + RwLock ResolveLock; + std::unordered_set<IoHash, IoHash::Hasher> ChunkedHashes; + std::unordered_set<IoHash, IoHash::Hasher> LargeChunkHashes; + std::unordered_map<IoHash, size_t, IoHash::Hasher> ChunkedUploadAttachments; + std::unordered_map<IoHash, std::pair<uint64_t, IoBuffer>, IoHash::Hasher> LooseUploadAttachments; + std::unordered_set<IoHash, IoHash::Hasher> MissingHashes; remotestore_impl::ReportMessage(OptionalContext, fmt::format("Resolving {} attachments from {} ops", UploadAttachments.size(), TotalOpCount)); @@ -1717,9 +1577,7 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - - IoBuffer TempAttachmentBuffer = - remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), @@ -1730,7 +1588,7 @@ BuildContainer(CidStore& ChunkStore, } else { - size_t RawSize = RawData.GetSize(); + uint64_t RawSize = RawData.GetSize(); CompressedBuffer Compressed = CompressedBuffer::Compress(SharedBuffer(RawData), OodleCompressor::Mermaid, OodleCompressionLevel::VeryFast); @@ -1738,23 +1596,24 @@ BuildContainer(CidStore& ChunkStore, std::filesystem::path AttachmentPath = AttachmentTempPath; AttachmentPath.append(RawHash.ToHexString()); - IoBuffer TempAttachmentBuffer = remotestore_impl::WriteToTempFile(std::move(Compressed), AttachmentPath); + uint64_t CompressedSize = Compressed.GetCompressedSize(); + IoBuffer TempAttachmentBuffer = WriteToTempFile(std::move(Compressed).GetCompressed(), AttachmentPath); ZEN_INFO("Saved temp attachment to '{}', {} ({})", AttachmentPath, NiceBytes(RawSize), NiceBytes(TempAttachmentBuffer.GetSize())); - if (Compressed.GetCompressedSize() > MaxChunkEmbedSize) + if (CompressedSize > MaxChunkEmbedSize) { OnLargeAttachment(RawHash, [Data = std::move(TempAttachmentBuffer)](const IoHash&) { return Data; }); ResolveLock.WithExclusiveLock([RawHash, &LargeChunkHashes]() { LargeChunkHashes.insert(RawHash); }); } else { - UploadAttachment->Size = Compressed.GetCompressedSize(); + UploadAttachment->Size = CompressedSize; ResolveLock.WithExclusiveLock( - [RawHash, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { - LooseUploadAttachments.insert_or_assign(RawHash, std::move(Data)); + [RawHash, RawSize, &LooseUploadAttachments, Data = std::move(TempAttachmentBuffer)]() { + LooseUploadAttachments.insert_or_assign(RawHash, std::make_pair(RawSize, std::move(Data))); }); } } @@ -1927,8 +1786,8 @@ BuildContainer(CidStore& ChunkStore, std::vector<size_t> ReusedBlockFromChunking = FindReuseBlocks(KnownBlocks, ChunkedHashes, OptionalContext); for (size_t KnownBlockIndex : ReusedBlockIndexes) { - const RemoteProjectStore::Block& KnownBlock = KnownBlocks[KnownBlockIndex]; - for (const IoHash& KnownHash : KnownBlock.ChunkHashes) + const ThinChunkBlockDescription& KnownBlock = KnownBlocks[KnownBlockIndex]; + for (const IoHash& KnownHash : KnownBlock.ChunkRawHashes) { if (ChunkedHashes.erase(KnownHash) == 1) { @@ -1946,7 +1805,7 @@ BuildContainer(CidStore& ChunkStore, Blocks.reserve(ReuseBlockCount); for (auto It = ReusedBlockIndexes.begin(); It != UniqueKnownBlocksEnd; It++) { - Blocks.push_back(KnownBlocks[*It]); + Blocks.push_back({KnownBlocks[*It]}); } remotestore_impl::ReportMessage(OptionalContext, fmt::format("Reused {} attachments from {} blocks", ReusedAttachmentCount, ReuseBlockCount)); @@ -2062,9 +1921,9 @@ BuildContainer(CidStore& ChunkStore, { // We can share the lock as we are not resizing the vector and only touch BlockHash at our own index RwLock::SharedLockScope _(BlocksLock); - Blocks[BlockIndex].ChunkHashes.insert(Blocks[BlockIndex].ChunkHashes.end(), - BlockAttachmentHashes.begin(), - BlockAttachmentHashes.end()); + Blocks[BlockIndex].ChunkRawHashes.insert(Blocks[BlockIndex].ChunkRawHashes.end(), + BlockAttachmentHashes.begin(), + BlockAttachmentHashes.end()); } uint64_t NowMS = Timer.GetElapsedTimeMs(); ZEN_INFO("Assembled block {} with {} chunks in {} ({})", @@ -2109,16 +1968,25 @@ BuildContainer(CidStore& ChunkStore, { if (auto It = LooseUploadAttachments.find(RawHash); It != LooseUploadAttachments.end()) { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [IoBuffer = SharedBuffer(It->second)](const IoHash&) { - return CompositeBuffer(IoBuffer); - })); + ChunksInBlock.emplace_back(std::make_pair( + RawHash, + [RawSize = It->second.first, + IoBuffer = SharedBuffer(It->second.second)](const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return std::make_pair(RawSize, CompressedBuffer::FromCompressedNoValidate(IoBuffer.AsIoBuffer())); + })); LooseUploadAttachments.erase(It); } else { - ChunksInBlock.emplace_back(std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) { - return CompositeBuffer(SharedBuffer(ChunkStore.FindChunkByCid(RawHash))); - })); + ChunksInBlock.emplace_back( + std::make_pair(RawHash, [&ChunkStore](const IoHash& RawHash) -> std::pair<uint64_t, CompressedBuffer> { + IoBuffer Chunk = ChunkStore.FindChunkByCid(RawHash); + IoHash _; + uint64_t RawSize = 0; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(std::move(Chunk)), _, RawSize); + ZEN_ASSERT(Compressed); + return {RawSize, Compressed}; + })); } BlockSize += PayloadSize; @@ -2169,14 +2037,15 @@ BuildContainer(CidStore& ChunkStore, if (BlockAttachmentHashes.insert(ChunkHash).second) { const ChunkSource& Source = Chunked.ChunkSources[ChunkIndex]; - ChunksInBlock.emplace_back(std::make_pair( - ChunkHash, - [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size](const IoHash&) { - return CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), - OodleCompressor::Mermaid, - OodleCompressionLevel::None) - .GetCompressed(); - })); + ChunksInBlock.emplace_back( + std::make_pair(ChunkHash, + [Source = ChunkedFile.Source, Offset = Source.Offset, Size = Source.Size]( + const IoHash&) -> std::pair<uint64_t, CompressedBuffer> { + return {Size, + CompressedBuffer::Compress(SharedBuffer(IoBuffer(Source, Offset, Size)), + OodleCompressor::Mermaid, + OodleCompressionLevel::None)}; + })); BlockSize += CompressedBuffer::GetHeaderSizeForNoneEncoder() + Source.Size; if (BuildBlocks) { @@ -2298,9 +2167,9 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinary("ops"sv, CompressedOpsSection.GetCompressed().Flatten().AsIoBuffer()); OplogContinerWriter.BeginArray("blocks"sv); { - for (const RemoteProjectStore::Block& B : Blocks) + for (const ChunkBlockDescription& B : Blocks) { - ZEN_ASSERT(!B.ChunkHashes.empty()); + ZEN_ASSERT(!B.ChunkRawHashes.empty()); if (BuildBlocks) { ZEN_ASSERT(B.BlockHash != IoHash::Zero); @@ -2310,7 +2179,7 @@ BuildContainer(CidStore& ChunkStore, OplogContinerWriter.AddBinaryAttachment("rawhash"sv, B.BlockHash); OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddHash(RawHash); } @@ -2326,7 +2195,7 @@ BuildContainer(CidStore& ChunkStore, { OplogContinerWriter.BeginArray("chunks"sv); { - for (const IoHash& RawHash : B.ChunkHashes) + for (const IoHash& RawHash : B.ChunkRawHashes) { OplogContinerWriter.AddBinaryAttachment(RawHash); } @@ -2392,7 +2261,7 @@ BuildContainer(CidStore& ChunkStore, bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles) @@ -2458,13 +2327,13 @@ SaveOplog(CidStore& ChunkStore, std::unordered_map<IoHash, remotestore_impl::CreatedBlock, IoHash::Hasher> CreatedBlocks; tsl::robin_map<IoHash, TGetAttachmentBufferFunc, IoHash::Hasher> LooseLargeFiles; - auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto MakeTempBlock = [AttachmentTempPath, &RemoteResult, &AttachmentsLock, &CreatedBlocks](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { std::filesystem::path BlockPath = AttachmentTempPath; BlockPath.append(Block.BlockHash.ToHexString()); try { - IoBuffer BlockBuffer = remotestore_impl::WriteToTempFile(std::move(CompressedBlock), BlockPath); + IoBuffer BlockBuffer = WriteToTempFile(std::move(CompressedBlock).GetCompressed(), BlockPath); RwLock::ExclusiveLockScope __(AttachmentsLock); CreatedBlocks.insert({Block.BlockHash, {.Payload = std::move(BlockBuffer), .Block = std::move(Block)}}); ZEN_DEBUG("Saved temp block to '{}', {}", AttachmentTempPath, NiceBytes(BlockBuffer.GetSize())); @@ -2478,8 +2347,8 @@ SaveOplog(CidStore& ChunkStore, } }; - auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, - RemoteProjectStore::Block&& Block) { + auto UploadBlock = [&RemoteStore, &RemoteResult, &Info, OptionalContext](CompressedBuffer&& CompressedBlock, + ChunkBlockDescription&& Block) { IoHash BlockHash = Block.BlockHash; RemoteProjectStore::SaveAttachmentResult Result = RemoteStore.SaveAttachment(CompressedBlock.GetCompressed(), BlockHash, std::move(Block)); @@ -2512,7 +2381,7 @@ SaveOplog(CidStore& ChunkStore, ZEN_DEBUG("Found attachment {}", AttachmentHash); }; - std::function<void(CompressedBuffer&&, RemoteProjectStore::Block &&)> OnBlock; + std::function<void(CompressedBuffer&&, ChunkBlockDescription &&)> OnBlock; if (RemoteStoreInfo.UseTempBlockFiles) { OnBlock = MakeTempBlock; @@ -2522,7 +2391,7 @@ SaveOplog(CidStore& ChunkStore, OnBlock = UploadBlock; } - std::vector<RemoteProjectStore::Block> KnownBlocks; + std::vector<ThinChunkBlockDescription> KnownBlocks; uint64_t TransferWallTimeMS = 0; @@ -3214,9 +3083,9 @@ LoadOplog(CidStore& ChunkStore, OptionalContext]() { auto _ = MakeGuard([&DechunkLatch, &TempFileName] { std::error_code Ec; - if (std::filesystem::exists(TempFileName, Ec)) + if (IsFile(TempFileName, Ec)) { - std::filesystem::remove(TempFileName, Ec); + RemoveFile(TempFileName, Ec); if (Ec) { ZEN_INFO("Failed to remove temporary file '{}'. Reason: {}", TempFileName, Ec.message()); diff --git a/src/zenserver/projectstore/remoteprojectstore.h b/src/zenserver/projectstore/remoteprojectstore.h index e05cb9923..1210afc7c 100644 --- a/src/zenserver/projectstore/remoteprojectstore.h +++ b/src/zenserver/projectstore/remoteprojectstore.h @@ -5,6 +5,8 @@ #include <zencore/jobqueue.h> #include "projectstore.h" +#include <zenutil/chunkblock.h> + #include <unordered_set> namespace zen { @@ -16,14 +18,6 @@ struct ChunkedInfo; class RemoteProjectStore { public: - struct Block - { - IoHash BlockHash; - std::vector<IoHash> ChunkHashes; - std::vector<uint32_t> ChunkLengths; - uint32_t FirstChunkOffset = (uint32_t)-1; - }; - struct Result { int32_t ErrorCode{}; @@ -72,7 +66,7 @@ public: struct GetKnownBlocksResult : public Result { - std::vector<Block> Blocks; + std::vector<ThinChunkBlockDescription> Blocks; }; struct RemoteStoreInfo @@ -101,11 +95,11 @@ public: virtual RemoteStoreInfo GetInfo() const = 0; virtual Stats GetStats() const = 0; - virtual CreateContainerResult CreateContainer() = 0; - virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&& Block) = 0; - virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; - virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; + virtual CreateContainerResult CreateContainer() = 0; + virtual SaveResult SaveContainer(const IoBuffer& Payload) = 0; + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&& Block) = 0; + virtual FinalizeResult FinalizeContainer(const IoHash& RawHash) = 0; + virtual SaveAttachmentsResult SaveAttachments(const std::vector<SharedBuffer>& Payloads) = 0; virtual LoadContainerResult LoadContainer() = 0; virtual GetKnownBlocksResult GetKnownBlocks() = 0; @@ -125,7 +119,6 @@ struct RemoteStoreOptions }; typedef std::function<IoBuffer(const IoHash& AttachmentHash)> TGetAttachmentBufferFunc; -typedef std::function<CompositeBuffer(const IoHash& RawHash)> FetchChunkFunc; RemoteProjectStore::LoadContainerResult BuildContainer( CidStore& ChunkStore, @@ -137,7 +130,7 @@ RemoteProjectStore::LoadContainerResult BuildContainer( bool BuildBlocks, bool IgnoreMissingAttachments, bool AllowChunking, - const std::function<void(CompressedBuffer&&, RemoteProjectStore::Block&&)>& AsyncOnBlock, + const std::function<void(CompressedBuffer&&, ChunkBlockDescription&&)>& AsyncOnBlock, const std::function<void(const IoHash&, TGetAttachmentBufferFunc&&)>& OnLargeAttachment, const std::function<void(std::vector<std::pair<IoHash, FetchChunkFunc>>&&)>& OnBlockChunks, bool EmbedLooseFiles); @@ -173,9 +166,7 @@ RemoteProjectStore::Result LoadOplog(CidStore& ChunkStore, bool CleanOplog, JobContext* OptionalContext); -CompressedBuffer GenerateBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, RemoteProjectStore::Block& OutBlock); -bool IterateBlock(const SharedBuffer& BlockPayload, std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor); std::vector<IoHash> GetBlockHashesFromOplog(CbObjectView ContainerObject); -std::vector<RemoteProjectStore::Block> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); +std::vector<ThinChunkBlockDescription> GetBlocksFromOplog(CbObjectView ContainerObject, std::span<const IoHash> IncludeBlockHashes); } // namespace zen diff --git a/src/zenserver/projectstore/zenremoteprojectstore.cpp b/src/zenserver/projectstore/zenremoteprojectstore.cpp index 42519b108..2ebf58a5d 100644 --- a/src/zenserver/projectstore/zenremoteprojectstore.cpp +++ b/src/zenserver/projectstore/zenremoteprojectstore.cpp @@ -93,7 +93,7 @@ public: return Result; } - virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, Block&&) override + virtual SaveAttachmentResult SaveAttachment(const CompositeBuffer& Payload, const IoHash& RawHash, ChunkBlockDescription&&) override { std::string SaveRequest = fmt::format("/{}/oplog/{}/{}"sv, m_Project, m_Oplog, RawHash); HttpClient::Response Response = m_Client.Post(SaveRequest, Payload, ZenContentType::kCompressedBinary); diff --git a/src/zenserver/upstream/upstreamcache.cpp b/src/zenserver/upstream/upstreamcache.cpp index e438a840a..744b861dd 100644 --- a/src/zenserver/upstream/upstreamcache.cpp +++ b/src/zenserver/upstream/upstreamcache.cpp @@ -134,7 +134,7 @@ namespace detail { return {.State = UpstreamEndpointState::kOk}; } - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); const JupiterResult Result = Session.Authenticate(); if (Result.Success) @@ -181,7 +181,7 @@ namespace detail { try { - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); JupiterResult Result; std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Namespace); @@ -301,7 +301,7 @@ namespace detail { { ZEN_TRACE_CPU("Upstream::Jupiter::GetCacheRecords"); - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); GetUpstreamCacheResult Result; for (CacheKeyRequest* Request : Requests) @@ -365,7 +365,7 @@ namespace detail { try { - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); std::string_view BlobStoreNamespace = GetActualBlobStoreNamespace(Namespace); const JupiterResult Result = Session.GetCompressedBlob(BlobStoreNamespace, ValueContentId); @@ -398,7 +398,7 @@ namespace detail { { ZEN_TRACE_CPU("Upstream::Jupiter::GetCacheChunks"); - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); GetUpstreamCacheResult Result; for (CacheChunkRequest* RequestPtr : CacheChunkRequests) @@ -453,7 +453,7 @@ namespace detail { { ZEN_TRACE_CPU("Upstream::Jupiter::GetCacheValues"); - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); GetUpstreamCacheResult Result; for (CacheValueRequest* RequestPtr : CacheValueRequests) @@ -533,7 +533,7 @@ namespace detail { try { - JupiterSession Session(m_Client->Logger(), m_Client->Client()); + JupiterSession Session(m_Client->Logger(), m_Client->Client(), m_AllowRedirect); if (CacheRecord.Type == ZenContentType::kBinary) { @@ -756,6 +756,7 @@ namespace detail { UpstreamStatus m_Status; UpstreamEndpointStats m_Stats; RefPtr<JupiterClient> m_Client; + const bool m_AllowRedirect = false; }; class ZenUpstreamEndpoint final : public UpstreamEndpoint diff --git a/src/zenserver/vfs/vfsservice.cpp b/src/zenserver/vfs/vfsservice.cpp index d302a10ec..bf761f8d1 100644 --- a/src/zenserver/vfs/vfsservice.cpp +++ b/src/zenserver/vfs/vfsservice.cpp @@ -61,7 +61,7 @@ GetContentAsCbObject(HttpServerRequest& HttpReq, CbObject& Cb) // echo {"method": "mount", "params": {"path": "d:\\VFS_ROOT"}} | curl.exe http://localhost:8558/vfs --data-binary @- // echo {"method": "unmount"} | curl.exe http://localhost:8558/vfs --data-binary @- -VfsService::VfsService() +VfsService::VfsService(HttpStatusService& StatusService) : m_StatusService(StatusService) { m_Impl = new Impl; @@ -136,10 +136,12 @@ VfsService::VfsService() } }, HttpVerb::kPost); + m_StatusService.RegisterHandler("vfs", *this); } VfsService::~VfsService() { + m_StatusService.UnregisterHandler("vfs", *this); delete m_Impl; } @@ -169,8 +171,9 @@ VfsService::AddService(Ref<ZenCacheStore>&& Z$) #else -VfsService::VfsService() +VfsService::VfsService(HttpStatusService& StatusService) : m_StatusService(StatusService) { + ZEN_UNUSED(StatusService); } VfsService::~VfsService() @@ -209,6 +212,14 @@ VfsService::BaseUri() const } void +VfsService::HandleStatusRequest(HttpServerRequest& Request) +{ + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void VfsService::HandleRequest(HttpServerRequest& HttpServiceRequest) { m_Router.HandleRequest(HttpServiceRequest); diff --git a/src/zenserver/vfs/vfsservice.h b/src/zenserver/vfs/vfsservice.h index dcdc71e81..0d0168e23 100644 --- a/src/zenserver/vfs/vfsservice.h +++ b/src/zenserver/vfs/vfsservice.h @@ -4,6 +4,7 @@ #include <zenbase/refcount.h> #include <zenhttp/httpserver.h> +#include <zenhttp/httpstatus.h> #include <zenvfs/vfs.h> #include <memory> @@ -24,10 +25,10 @@ class ZenCacheStore; */ -class VfsService : public HttpService +class VfsService : public HttpService, public IHttpStatusProvider { public: - VfsService(); + explicit VfsService(HttpStatusService& StatusService); ~VfsService(); void Mount(std::string_view MountPoint); @@ -39,12 +40,14 @@ public: protected: virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& HttpServiceRequest) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: struct Impl; Impl* m_Impl = nullptr; - HttpRequestRouter m_Router; + HttpStatusService& m_StatusService; + HttpRequestRouter m_Router; friend struct VfsServiceDataSource; }; diff --git a/src/zenserver/workspaces/httpworkspaces.cpp b/src/zenserver/workspaces/httpworkspaces.cpp index 2d59c9357..7ef84743e 100644 --- a/src/zenserver/workspaces/httpworkspaces.cpp +++ b/src/zenserver/workspaces/httpworkspaces.cpp @@ -51,9 +51,9 @@ namespace { WriteWorkspaceConfig(Writer, WorkspaceConfig); if (std::optional<std::vector<Oid>> ShareIds = Workspaces.GetWorkspaceShares(WorkspaceConfig.Id); ShareIds) { - for (const Oid& ShareId : *ShareIds) + Writer.BeginArray("shares"); { - Writer.BeginArray("shares"); + for (const Oid& ShareId : *ShareIds) { if (std::optional<Workspaces::WorkspaceShareConfiguration> WorkspaceShareConfig = Workspaces.GetWorkspaceShareConfiguration(WorkspaceConfig.Id, ShareId); @@ -66,15 +66,19 @@ namespace { Writer.EndObject(); } } - Writer.EndArray(); } + Writer.EndArray(); } } } // namespace -HttpWorkspacesService::HttpWorkspacesService(HttpStatsService& StatsService, const WorkspacesServeConfig& Cfg, Workspaces& Workspaces) +HttpWorkspacesService::HttpWorkspacesService(HttpStatusService& StatusService, + HttpStatsService& StatsService, + const WorkspacesServeConfig& Cfg, + Workspaces& Workspaces) : m_Log(logging::Get("workspaces")) +, m_StatusService(StatusService) , m_StatsService(StatsService) , m_Config(Cfg) , m_Workspaces(Workspaces) @@ -84,7 +88,8 @@ HttpWorkspacesService::HttpWorkspacesService(HttpStatsService& StatsService, con HttpWorkspacesService::~HttpWorkspacesService() { - m_StatsService.UnregisterHandler("prj", *this); + m_StatsService.UnregisterHandler("ws", *this); + m_StatusService.UnregisterHandler("ws", *this); } const char* @@ -149,14 +154,21 @@ HttpWorkspacesService::HandleStatsRequest(HttpServerRequest& HttpReq) } void +HttpWorkspacesService::HandleStatusRequest(HttpServerRequest& Request) +{ + ZEN_TRACE_CPU("HttpWorkspacesService::Status"); + CbObjectWriter Cbo; + Cbo << "ok" << true; + Request.WriteResponse(HttpResponseCode::OK, Cbo.Save()); +} + +void HttpWorkspacesService::Initialize() { using namespace std::literals; ZEN_LOG_INFO(LogFs, "Initializing Workspaces Service"); - m_StatsService.RegisterHandler("ws", *this); - m_Router.AddPattern("workspace_id", "([[:xdigit:]]{24})"); m_Router.AddPattern("share_id", "([[:xdigit:]]{24})"); m_Router.AddPattern("chunk", "([[:xdigit:]]{24})"); @@ -238,6 +250,9 @@ HttpWorkspacesService::Initialize() HttpVerb::kGet); RefreshState(); + + m_StatsService.RegisterHandler("ws", *this); + m_StatusService.RegisterHandler("ws", *this); } std::filesystem::path @@ -589,7 +604,7 @@ void HttpWorkspacesService::ShareAliasFilesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -608,7 +623,7 @@ void HttpWorkspacesService::ShareAliasChunkInfoRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -635,7 +650,7 @@ void HttpWorkspacesService::ShareAliasBatchRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -654,7 +669,7 @@ void HttpWorkspacesService::ShareAliasEntriesRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -673,7 +688,7 @@ void HttpWorkspacesService::ShareAliasChunkRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -700,7 +715,7 @@ void HttpWorkspacesService::ShareAliasRequest(HttpRouterRequest& Req) { HttpServerRequest& ServerRequest = Req.ServerRequest(); - std::string Alias = Req.GetCapture(1); + std::string_view Alias = Req.GetCapture(1); if (Alias.empty()) { return ServerRequest.WriteResponse(HttpResponseCode::BadRequest, @@ -1100,7 +1115,7 @@ HttpWorkspacesService::ShareRequest(HttpRouterRequest& Req, const Oid& Workspace } } - if (!std::filesystem::is_directory(Workspace.RootPath / NewConfig.SharePath)) + if (!IsDir(Workspace.RootPath / NewConfig.SharePath)) { return ServerRequest.WriteResponse(HttpResponseCode::NotFound, HttpContentType::kText, diff --git a/src/zenserver/workspaces/httpworkspaces.h b/src/zenserver/workspaces/httpworkspaces.h index f01f58b86..89a8e8bdc 100644 --- a/src/zenserver/workspaces/httpworkspaces.h +++ b/src/zenserver/workspaces/httpworkspaces.h @@ -5,6 +5,7 @@ #include <zencore/stats.h> #include <zenhttp/httpserver.h> #include <zenhttp/httpstats.h> +#include <zenhttp/httpstatus.h> namespace zen { @@ -16,16 +17,20 @@ struct WorkspacesServeConfig bool AllowConfigurationChanges = false; }; -class HttpWorkspacesService final : public HttpService, public IHttpStatsProvider +class HttpWorkspacesService final : public HttpService, public IHttpStatusProvider, public IHttpStatsProvider { public: - HttpWorkspacesService(HttpStatsService& StatsService, const WorkspacesServeConfig& Cfg, Workspaces& Workspaces); + HttpWorkspacesService(HttpStatusService& StatusService, + HttpStatsService& StatsService, + const WorkspacesServeConfig& Cfg, + Workspaces& Workspaces); virtual ~HttpWorkspacesService(); virtual const char* BaseUri() const override; virtual void HandleRequest(HttpServerRequest& Request) override; virtual void HandleStatsRequest(HttpServerRequest& Request) override; + virtual void HandleStatusRequest(HttpServerRequest& Request) override; private: struct WorkspacesStats @@ -80,6 +85,7 @@ private: void ChunkRequest(HttpRouterRequest& Req, const Oid& WorkspaceId, const Oid& ShareId, const Oid& ChunkId); void ShareRequest(HttpRouterRequest& Req, const Oid& WorkspaceId, const Oid& InShareId); + HttpStatusService& m_StatusService; HttpStatsService& m_StatsService; const WorkspacesServeConfig m_Config; HttpRequestRouter m_Router; diff --git a/src/zenserver/xmake.lua b/src/zenserver/xmake.lua index a3d7aa124..470fbd24e 100644 --- a/src/zenserver/xmake.lua +++ b/src/zenserver/xmake.lua @@ -28,8 +28,6 @@ target("zenserver") add_cxxflags("/bigobj") add_links("delayimp", "projectedfslib") add_ldflags("/delayload:ProjectedFSLib.dll") - - add_links("dbghelp", "winhttp", "version") -- for Sentry else remove_files("windows/**") end @@ -41,7 +39,6 @@ target("zenserver") add_ldflags("-framework Foundation") add_ldflags("-framework Security") add_ldflags("-framework SystemConfiguration") - add_syslinks("bsm") end add_options("compute") @@ -57,18 +54,6 @@ target("zenserver") "vcpkg::sol2" ) - if has_config("zensentry") then - add_packages("vcpkg::sentry-native") - end - - if is_plat("linux") then - -- As sentry_native uses symbols from breakpad_client, the latter must - -- be specified after the former with GCC-like toolchains. xmake however - -- is unaware of this and simply globs files from vcpkg's output. The - -- line below forces breakpad_client to be to the right of sentry_native - add_syslinks("breakpad_client") - end - -- to work around some unfortunate Ctrl-C behaviour on Linux/Mac due to -- our use of setsid() at startup we pass in `--no-detach` to zenserver -- ensure that it recieves signals when the user requests termination diff --git a/src/zenserver/zenserver.cpp b/src/zenserver/zenserver.cpp index cc1be13e2..7e3baa997 100644 --- a/src/zenserver/zenserver.cpp +++ b/src/zenserver/zenserver.cpp @@ -2,8 +2,6 @@ #include "zenserver.h" -#include "sentryintegration.h" - #include <zenbase/refcount.h> #include <zencore/basicfile.h> #include <zencore/compactbinarybuilder.h> @@ -16,6 +14,7 @@ #include <zencore/jobqueue.h> #include <zencore/logging.h> #include <zencore/scopeguard.h> +#include <zencore/sentryintegration.h> #include <zencore/session.h> #include <zencore/string.h> #include <zencore/thread.h> @@ -23,6 +22,7 @@ #include <zencore/trace.h> #include <zencore/workthreadpool.h> #include <zenhttp/httpserver.h> +#include <zenstore/buildstore/buildstore.h> #include <zenstore/cidstore.h> #include <zenstore/scrubcontext.h> #include <zenstore/workspaces.h> @@ -137,7 +137,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen return -1; } - m_UseSentry = ServerOptions.NoSentry == false; + m_UseSentry = ServerOptions.SentryConfig.Disable == false; m_ServerEntry = ServerEntry; m_DebugOptionForcedCrash = ServerOptions.ShouldCrash; m_IsPowerCycle = ServerOptions.IsPowerCycle; @@ -250,18 +250,27 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen *m_JobQueue, *m_OpenProcessCache, ProjectStore::Configuration{}); - m_HttpProjectService.reset(new HttpProjectService{*m_CidStore, m_ProjectStore, m_StatsService, *m_AuthMgr}); + m_HttpProjectService.reset(new HttpProjectService{*m_CidStore, m_ProjectStore, m_StatusService, m_StatsService, *m_AuthMgr}); if (ServerOptions.WorksSpacesConfig.Enabled) { m_Workspaces.reset(new Workspaces()); m_HttpWorkspacesService.reset( - new HttpWorkspacesService(m_StatsService, + new HttpWorkspacesService(m_StatusService, + m_StatsService, {.SystemRootDir = ServerOptions.SystemRootDir, .AllowConfigurationChanges = ServerOptions.WorksSpacesConfig.AllowConfigurationChanges}, *m_Workspaces)); } + if (ServerOptions.BuildStoreConfig.Enabled) + { + BuildStoreConfig BuildsCfg; + BuildsCfg.RootDirectory = m_DataRoot / "builds"; + BuildsCfg.MaxDiskSpaceLimit = ServerOptions.BuildStoreConfig.MaxDiskSpaceLimit; + m_BuildStore = std::make_unique<BuildStore>(std::move(BuildsCfg), m_GcManager); + } + if (ServerOptions.StructuredCacheConfig.Enabled) { InitializeStructuredCache(ServerOptions); @@ -287,7 +296,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen m_Http->RegisterService(*m_HttpWorkspacesService); } - m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot); + m_FrontendService = std::make_unique<HttpFrontendService>(m_ContentRoot, m_StatusService); if (m_FrontendService) { @@ -306,12 +315,18 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen ObjCfg.Buckets.push_back(std::move(NewBucket)); } - m_ObjStoreService = std::make_unique<HttpObjectStoreService>(std::move(ObjCfg)); + m_ObjStoreService = std::make_unique<HttpObjectStoreService>(m_StatusService, std::move(ObjCfg)); m_Http->RegisterService(*m_ObjStoreService); } + if (ServerOptions.BuildStoreConfig.Enabled) + { + m_BuildStoreService = std::make_unique<HttpBuildStoreService>(m_StatusService, m_StatsService, *m_BuildStore); + m_Http->RegisterService(*m_BuildStoreService); + } + #if ZEN_WITH_VFS - m_VfsService = std::make_unique<VfsService>(); + m_VfsService = std::make_unique<VfsService>(m_StatusService); m_VfsService->AddService(Ref<ProjectStore>(m_ProjectStore)); m_VfsService->AddService(Ref<ZenCacheStore>(m_CacheStore)); m_Http->RegisterService(*m_VfsService); @@ -327,6 +342,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen .Interval = std::chrono::seconds(ServerOptions.GcConfig.IntervalSeconds), .MaxCacheDuration = std::chrono::seconds(ServerOptions.GcConfig.Cache.MaxDurationSeconds), .MaxProjectStoreDuration = std::chrono::seconds(ServerOptions.GcConfig.ProjectStore.MaxDurationSeconds), + .MaxBuildStoreDuration = std::chrono::seconds(ServerOptions.GcConfig.BuildStore.MaxDurationSeconds), .CollectSmallObjects = ServerOptions.GcConfig.CollectSmallObjects, .Enabled = ServerOptions.GcConfig.Enabled, .DiskReserveSize = ServerOptions.GcConfig.DiskReserveSize, @@ -347,6 +363,7 @@ ZenServer::Initialize(const ZenServerOptions& ServerOptions, ZenServerState::Zen m_CacheStore.Get(), m_CidStore.get(), m_ProjectStore, + m_BuildStore.get(), HttpAdminService::LogPaths{.AbsLogPath = ServerOptions.AbsLogFile, .HttpLogPath = ServerOptions.DataDir / "logs" / "http.log", .CacheLogPath = ServerOptions.DataDir / "logs" / "z$.log"}, @@ -418,7 +435,7 @@ ZenServer::InitializeState(const ZenServerOptions& ServerOptions) if (ManifestVersion != ZEN_CFG_SCHEMA_VERSION) { std::filesystem::path ManifestSkipSchemaChangePath = m_DataRoot / "root_manifest.ignore_schema_mismatch"; - if (ManifestVersion != 0 && std::filesystem::is_regular_file(ManifestSkipSchemaChangePath)) + if (ManifestVersion != 0 && IsFile(ManifestSkipSchemaChangePath)) { ZEN_INFO( "Schema version {} found in '{}' does not match {}, ignoring mismatch due to existance of '{}' and updating " @@ -467,7 +484,7 @@ ZenServer::InitializeState(const ZenServerOptions& ServerOptions) { ZEN_INFO("Deleting '{}'", DirEntry.path()); - std::filesystem::remove_all(DirEntry.path(), Ec); + DeleteDirectories(DirEntry.path(), Ec); if (Ec) { @@ -530,11 +547,28 @@ ZenServer::InitializeStructuredCache(const ZenServerOptions& ServerOptions) Config.AllowAutomaticCreationOfNamespaces = true; Config.Logging = {.EnableWriteLog = ServerOptions.StructuredCacheConfig.WriteLogEnabled, .EnableAccessLog = ServerOptions.StructuredCacheConfig.AccessLogEnabled}; - Config.NamespaceConfig.DiskLayerConfig.BucketConfig.MemCacheSizeThreshold = ServerOptions.StructuredCacheConfig.MemCacheSizeThreshold; - Config.NamespaceConfig.DiskLayerConfig.BucketConfig.LimitOverwrites = ServerOptions.StructuredCacheConfig.LimitOverwrites; - Config.NamespaceConfig.DiskLayerConfig.MemCacheTargetFootprintBytes = ServerOptions.StructuredCacheConfig.MemTargetFootprintBytes; - Config.NamespaceConfig.DiskLayerConfig.MemCacheTrimIntervalSeconds = ServerOptions.StructuredCacheConfig.MemTrimIntervalSeconds; - Config.NamespaceConfig.DiskLayerConfig.MemCacheMaxAgeSeconds = ServerOptions.StructuredCacheConfig.MemMaxAgeSeconds; + + for (const auto& It : ServerOptions.StructuredCacheConfig.PerBucketConfigs) + { + const std::string& BucketName = It.first; + const ZenStructuredCacheBucketConfig& ZenBucketConfig = It.second; + ZenCacheDiskLayer::BucketConfiguration BucketConfig = {.MaxBlockSize = ZenBucketConfig.MaxBlockSize, + .PayloadAlignment = ZenBucketConfig.PayloadAlignment, + .MemCacheSizeThreshold = ZenBucketConfig.MemCacheSizeThreshold, + .LargeObjectThreshold = ZenBucketConfig.LargeObjectThreshold}; + Config.NamespaceConfig.DiskLayerConfig.BucketConfigMap.insert_or_assign(BucketName, BucketConfig); + } + Config.NamespaceConfig.DiskLayerConfig.BucketConfig.MaxBlockSize = ServerOptions.StructuredCacheConfig.BucketConfig.MaxBlockSize, + Config.NamespaceConfig.DiskLayerConfig.BucketConfig.PayloadAlignment = + ServerOptions.StructuredCacheConfig.BucketConfig.PayloadAlignment, + Config.NamespaceConfig.DiskLayerConfig.BucketConfig.MemCacheSizeThreshold = + ServerOptions.StructuredCacheConfig.BucketConfig.MemCacheSizeThreshold, + Config.NamespaceConfig.DiskLayerConfig.BucketConfig.LargeObjectThreshold = + ServerOptions.StructuredCacheConfig.BucketConfig.LargeObjectThreshold, + Config.NamespaceConfig.DiskLayerConfig.BucketConfig.LimitOverwrites = ServerOptions.StructuredCacheConfig.LimitOverwrites; + Config.NamespaceConfig.DiskLayerConfig.MemCacheTargetFootprintBytes = ServerOptions.StructuredCacheConfig.MemTargetFootprintBytes; + Config.NamespaceConfig.DiskLayerConfig.MemCacheTrimIntervalSeconds = ServerOptions.StructuredCacheConfig.MemTrimIntervalSeconds; + Config.NamespaceConfig.DiskLayerConfig.MemCacheMaxAgeSeconds = ServerOptions.StructuredCacheConfig.MemMaxAgeSeconds; if (ServerOptions.IsDedicated) { @@ -802,6 +836,9 @@ ZenServer::Cleanup() m_ObjStoreService.reset(); m_FrontendService.reset(); + m_BuildStoreService.reset(); + m_BuildStore = {}; + m_StructuredCacheService.reset(); m_UpstreamService.reset(); m_UpstreamCache.reset(); @@ -896,7 +933,7 @@ ZenServer::CheckStateMarker() std::filesystem::path StateMarkerPath = m_DataRoot / "state_marker"; try { - if (!std::filesystem::exists(StateMarkerPath)) + if (!IsFile(StateMarkerPath)) { ZEN_WARN("state marker at {} has been deleted, exiting", StateMarkerPath); RequestExit(1); diff --git a/src/zenserver/zenserver.h b/src/zenserver/zenserver.h index 80054dc35..5cfa04ba1 100644 --- a/src/zenserver/zenserver.h +++ b/src/zenserver/zenserver.h @@ -25,6 +25,7 @@ ZEN_THIRD_PARTY_INCLUDES_END #include <zenstore/cache/structuredcachestore.h> #include <zenstore/gc.h> #include "admin/admin.h" +#include "buildstore/httpbuildstore.h" #include "cache/httpstructuredcache.h" #include "diag/diagsvcs.h" #include "frontend/frontend.h" @@ -127,6 +128,8 @@ private: Ref<ZenCacheStore> m_CacheStore; std::unique_ptr<OpenProcessCache> m_OpenProcessCache; HttpTestService m_TestService; + std::unique_ptr<BuildStore> m_BuildStore; + #if ZEN_WITH_TESTS HttpTestingService m_TestingService; #endif @@ -140,6 +143,7 @@ private: HttpHealthService m_HealthService; std::unique_ptr<HttpFrontendService> m_FrontendService; std::unique_ptr<HttpObjectStoreService> m_ObjStoreService; + std::unique_ptr<HttpBuildStoreService> m_BuildStoreService; std::unique_ptr<VfsService> m_VfsService; std::unique_ptr<JobQueue> m_JobQueue; std::unique_ptr<HttpAdminService> m_AdminService; diff --git a/src/zenstore-test/zenstore-test.cpp b/src/zenstore-test/zenstore-test.cpp index e5b312984..04f5a8e10 100644 --- a/src/zenstore-test/zenstore-test.cpp +++ b/src/zenstore-test/zenstore-test.cpp @@ -2,6 +2,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> +#include <zencore/trace.h> #include <zenstore/zenstore.h> #include <zencore/memory/newdelete.h> @@ -17,6 +18,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenstore_forcelinktests(); + zen::TraceInit("zenstore-test"); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenstore/blockstore.cpp b/src/zenstore/blockstore.cpp index e976c061d..7b56c64bd 100644 --- a/src/zenstore/blockstore.cpp +++ b/src/zenstore/blockstore.cpp @@ -70,7 +70,7 @@ BlockStoreFile::Open() return false; } ZEN_WARN("Failed to open cas block '{}', reason: '{}', retries left: {}.", m_Path, Ec.message(), RetriesLeft); - Sleep(100 - (3 - RetriesLeft) * 100); // Total 600 ms + Sleep(100 + (3 - RetriesLeft) * 100); // Total 600 ms RetriesLeft--; return true; }); @@ -85,7 +85,7 @@ BlockStoreFile::Create(uint64_t InitialSize) ZEN_TRACE_CPU("BlockStoreFile::Create"); auto ParentPath = m_Path.parent_path(); - if (!std::filesystem::is_directory(ParentPath)) + if (!IsDir(ParentPath)) { CreateDirectories(ParentPath); } @@ -153,14 +153,28 @@ void BlockStoreFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset) { ZEN_TRACE_CPU("BlockStoreFile::Write"); +#if ZEN_BUILD_DEBUG + if (uint64_t CachedFileSize = m_CachedFileSize.load(); CachedFileSize > 0) + { + ZEN_ASSERT(FileOffset + Size <= CachedFileSize); + } +#endif // ZEN_BUILD_DEBUG m_File.Write(Data, Size, FileOffset); } void -BlockStoreFile::Flush() +BlockStoreFile::Flush(uint64_t FinalSize) { ZEN_TRACE_CPU("BlockStoreFile::Flush"); m_File.Flush(); + if (FinalSize != (uint64_t)-1) + { + uint64_t ExpectedSize = 0; + if (!m_CachedFileSize.compare_exchange_weak(ExpectedSize, FinalSize)) + { + ZEN_ASSERT(m_CachedFileSize.load() == FinalSize); + } + } } BasicFile& @@ -215,7 +229,7 @@ IsMetaDataValid(const std::filesystem::path& BlockPath, const std::filesystem::p } if (MetaWriteTime < BlockWriteTime) { - std::filesystem::remove(MetaPath, Ec); + RemoveFile(MetaPath, Ec); return false; } return true; @@ -239,7 +253,7 @@ BlockStoreFile::MetaSize() const if (IsMetaDataValid(m_Path, MetaPath)) { std::error_code DummyEc; - if (uint64_t Size = std::filesystem::file_size(MetaPath, DummyEc); !DummyEc) + if (uint64_t Size = FileSizeFromPath(MetaPath, DummyEc); !DummyEc) { return Size; } @@ -252,7 +266,7 @@ BlockStoreFile::RemoveMeta() { std::filesystem::path MetaPath = GetMetaPath(); std::error_code DummyEc; - std::filesystem::remove(MetaPath, DummyEc); + RemoveFile(MetaPath, DummyEc); } std::filesystem::path @@ -272,6 +286,14 @@ BlockStore::BlockStore() BlockStore::~BlockStore() { + try + { + Close(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~BlockStore() failed with: ", Ex.what()); + } } void @@ -291,8 +313,9 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max m_MaxBlockSize = MaxBlockSize; m_MaxBlockCount = MaxBlockCount; - if (std::filesystem::is_directory(m_BlocksBasePath)) + if (IsDir(m_BlocksBasePath)) { + std::vector<std::filesystem::path> EmptyBlockFiles; uint32_t NextBlockIndex = 0; std::vector<std::filesystem::path> FoldersToScan; FoldersToScan.push_back(m_BlocksBasePath); @@ -320,6 +343,12 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max { continue; } + if (Entry.file_size() == 0) + { + EmptyBlockFiles.push_back(Path); + continue; + } + Ref<BlockStoreFile> BlockFile{new BlockStoreFile(Path)}; BlockFile->Open(); m_TotalSize.fetch_add(BlockFile->TotalSize(), std::memory_order::relaxed); @@ -333,6 +362,17 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max } ++FolderOffset; } + + for (const std::filesystem::path& EmptyBlockFile : EmptyBlockFiles) + { + std::error_code Ec; + RemoveFile(EmptyBlockFile, Ec); + if (Ec) + { + ZEN_WARN("Unable to remove empty block file {}. Reason: {}", EmptyBlockFile, Ec.message()); + } + } + m_WriteBlockIndex.store(NextBlockIndex, std::memory_order_release); } else @@ -341,7 +381,7 @@ BlockStore::Initialize(const std::filesystem::path& BlocksBasePath, uint64_t Max } } -void +BlockStore::BlockIndexSet BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks) { ZEN_MEMSCOPE(GetBlocksTag()); @@ -349,8 +389,8 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks) RwLock::ExclusiveLockScope InsertLock(m_InsertLock); - tsl::robin_set<uint32_t> MissingBlocks; - tsl::robin_set<uint32_t> DeleteBlocks; + BlockIndexSet MissingBlocks; + BlockIndexSet DeleteBlocks; DeleteBlocks.reserve(m_ChunkBlocks.size()); for (auto It : m_ChunkBlocks) { @@ -369,13 +409,6 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks) MissingBlocks.insert(BlockIndex); } } - for (std::uint32_t BlockIndex : MissingBlocks) - { - std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex); - Ref<BlockStoreFile> NewBlockFile(new BlockStoreFile(BlockPath)); - NewBlockFile->Create(0); - m_ChunkBlocks[BlockIndex] = NewBlockFile; - } for (std::uint32_t BlockIndex : DeleteBlocks) { std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockIndex); @@ -386,6 +419,7 @@ BlockStore::SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks) } m_ChunkBlocks.erase(BlockIndex); } + return MissingBlocks; } BlockStore::BlockEntryCountMap @@ -500,7 +534,7 @@ BlockStore::GetFreeBlockIndex(uint32_t ProbeIndex, RwLock::ExclusiveLockScope&, { OutBlockPath = GetBlockPath(m_BlocksBasePath, ProbeIndex); std::error_code Ec; - bool Exists = std::filesystem::exists(OutBlockPath, Ec); + bool Exists = IsFile(OutBlockPath, Ec); if (Ec) { ZEN_WARN("Failed to probe existence of file '{}' when trying to allocate a new block. Reason: '{}'", @@ -540,7 +574,7 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, cons { if (m_WriteBlock) { - m_WriteBlock->Flush(); + m_WriteBlock->Flush(m_CurrentInsertOffset); m_WriteBlock = nullptr; } @@ -578,7 +612,7 @@ BlockStore::WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, cons } void -BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback) +BlockStore::WriteChunks(std::span<const IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback) { ZEN_MEMSCOPE(GetBlocksTag()); ZEN_TRACE_CPU("BlockStore::WriteChunks"); @@ -674,6 +708,27 @@ BlockStore::WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const Wri } } +bool +BlockStore::HasChunk(const BlockStoreLocation& Location) const +{ + ZEN_TRACE_CPU("BlockStore::TryGetChunk"); + RwLock::SharedLockScope InsertLock(m_InsertLock); + if (auto BlockIt = m_ChunkBlocks.find(Location.BlockIndex); BlockIt != m_ChunkBlocks.end()) + { + if (const Ref<BlockStoreFile>& Block = BlockIt->second; Block) + { + InsertLock.ReleaseNow(); + + const uint64_t BlockSize = Block->FileSize(); + if (Location.Offset + Location.Size <= BlockSize) + { + return true; + } + } + } + return false; +} + IoBuffer BlockStore::TryGetChunk(const BlockStoreLocation& Location) const { @@ -706,7 +761,7 @@ BlockStore::Flush(bool ForceNewBlock) { if (m_WriteBlock) { - m_WriteBlock->Flush(); + m_WriteBlock->Flush(m_CurrentInsertOffset); } m_WriteBlock = nullptr; m_CurrentInsertOffset = 0; @@ -735,6 +790,8 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, return true; } + ZEN_ASSERT(ChunkLocations.size() >= InChunkIndexes.size()); + if (LargeSizeLimit == 0) { LargeSizeLimit = DefaultIterateSmallChunkWindowSize; @@ -746,7 +803,10 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, IterateSmallChunkWindowSize = Min((LargeSizeLimit + IterateSmallChunkMaxGapSize) * ChunkLocations.size(), IterateSmallChunkWindowSize); - uint32_t BlockIndex = ChunkLocations[InChunkIndexes[0]].BlockIndex; + const size_t FirstLocationIndex = InChunkIndexes[0]; + ZEN_ASSERT(FirstLocationIndex < ChunkLocations.size()); + + const uint32_t BlockIndex = ChunkLocations[FirstLocationIndex].BlockIndex; std::vector<size_t> ChunkIndexes(InChunkIndexes.begin(), InChunkIndexes.end()); std::sort(ChunkIndexes.begin(), ChunkIndexes.end(), [&](size_t IndexA, size_t IndexB) -> bool { return ChunkLocations[IndexA].Offset < ChunkLocations[IndexB].Offset; @@ -756,8 +816,9 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, IterateSmallChunkWindowSize, IterateSmallChunkMaxGapSize, &ChunkLocations](uint64_t BlockFileSize, std::span<const size_t> ChunkIndexes, size_t StartIndexOffset) -> size_t { - size_t ChunkCount = 0; - size_t StartIndex = ChunkIndexes[StartIndexOffset]; + size_t ChunkCount = 0; + size_t StartIndex = ChunkIndexes[StartIndexOffset]; + ZEN_ASSERT(StartIndex < ChunkLocations.size()); const BlockStoreLocation& StartLocation = ChunkLocations[StartIndex]; uint64_t StartOffset = StartLocation.Offset; uint64_t LastEnd = StartOffset + StartLocation.Size; @@ -810,22 +871,26 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, ZEN_ASSERT(BlockFile); InsertLock.ReleaseNow(); + const size_t BlockSize = BlockFile->FileSize(); + IoBuffer ReadBuffer; void* BufferBase = nullptr; size_t LocationIndexOffset = 0; while (LocationIndexOffset < ChunkIndexes.size()) { - size_t ChunkIndex = ChunkIndexes[LocationIndexOffset]; + size_t ChunkIndex = ChunkIndexes[LocationIndexOffset]; + ZEN_ASSERT(ChunkIndex < ChunkLocations.size()); const BlockStoreLocation& FirstLocation = ChunkLocations[ChunkIndex]; + ZEN_ASSERT(FirstLocation.BlockIndex == BlockIndex); - const size_t BlockSize = BlockFile->FileSize(); const size_t RangeCount = GetNextRange(BlockSize, ChunkIndexes, LocationIndexOffset); if (RangeCount > 1) { - size_t LastChunkIndex = ChunkIndexes[LocationIndexOffset + RangeCount - 1]; - const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex]; - uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset; + size_t LastChunkIndex = ChunkIndexes[LocationIndexOffset + RangeCount - 1]; + ZEN_ASSERT(LastChunkIndex < ChunkLocations.size()); + const BlockStoreLocation& LastLocation = ChunkLocations[LastChunkIndex]; + uint64_t Size = LastLocation.Offset + LastLocation.Size - FirstLocation.Offset; if (ReadBuffer.GetSize() < Size) { ReadBuffer = IoBuffer(Min(Size * 2, IterateSmallChunkWindowSize)); @@ -834,8 +899,9 @@ BlockStore::IterateBlock(std::span<const BlockStoreLocation> ChunkLocations, BlockFile->Read(BufferBase, Size, FirstLocation.Offset); for (size_t RangeIndex = 0; RangeIndex < RangeCount; ++RangeIndex) { - size_t NextChunkIndex = ChunkIndexes[LocationIndexOffset + RangeIndex]; - const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex]; + size_t NextChunkIndex = ChunkIndexes[LocationIndexOffset + RangeIndex]; + ZEN_ASSERT(NextChunkIndex < ChunkLocations.size()); + const BlockStoreLocation& ChunkLocation = ChunkLocations[NextChunkIndex]; if (ChunkLocation.Size == 0 || ((ChunkLocation.Offset + ChunkLocation.Size) > BlockSize)) { ZEN_LOG_SCOPE("chunk [{},{}] out of bounds (block #{} file size = {})", @@ -958,6 +1024,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, uint32_t NewBlockIndex = 0; MovedChunksArray MovedChunks; + ChunkIndexArray ScrubbedChunks; uint64_t AddedSize = 0; uint64_t RemovedSize = 0; @@ -986,14 +1053,16 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, auto ReportChanges = [&]() -> bool { bool Continue = true; - if (!MovedChunks.empty() || RemovedSize > 0) + if (!MovedChunks.empty() || !ScrubbedChunks.empty() || RemovedSize > 0) { - Continue = ChangeCallback(MovedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0); + Continue = ChangeCallback(MovedChunks, ScrubbedChunks, RemovedSize > AddedSize ? RemovedSize - AddedSize : 0); DeletedSize += RemovedSize; + m_TotalSize.fetch_add(AddedSize); RemovedSize = 0; AddedSize = 0; MovedCount += MovedChunks.size(); MovedChunks.clear(); + ScrubbedChunks.clear(); } return Continue; }; @@ -1022,6 +1091,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, LogPrefix, m_BlocksBasePath, BlockIndex); + ScrubbedChunks.insert(ScrubbedChunks.end(), KeepChunkIndexes.begin(), KeepChunkIndexes.end()); return true; } if (!It->second) @@ -1030,6 +1100,7 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, LogPrefix, m_BlocksBasePath, BlockIndex); + ScrubbedChunks.insert(ScrubbedChunks.end(), KeepChunkIndexes.begin(), KeepChunkIndexes.end()); return true; } OldBlockFile = It->second; @@ -1051,11 +1122,10 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, std::sort(SortedChunkIndexes.begin(), SortedChunkIndexes.end(), [&ChunkLocations](size_t Lhs, size_t Rhs) { return ChunkLocations[Lhs].Offset < ChunkLocations[Rhs].Offset; }); - BasicFileBuffer SourceFileBuffer(OldBlockFile->GetBasicFile(), Min(65536u, OldBlockSize)); + BasicFileBuffer SourceFileBuffer(OldBlockFile->GetBasicFile(), Min(256u * 1024u, OldBlockSize)); - uint64_t WrittenBytesToBlock = 0; - uint64_t MovedFromBlock = 0; - std::vector<uint8_t> Chunk; + uint64_t MovedFromBlock = 0; + std::vector<uint8_t> ChunkBuffer; for (const size_t& ChunkIndex : SortedChunkIndexes) { const BlockStoreLocation ChunkLocation = ChunkLocations[ChunkIndex]; @@ -1070,19 +1140,29 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, ChunkLocation.Size, OldBlockFile->GetPath(), OldBlockSize); + ScrubbedChunks.push_back(ChunkIndex); continue; } - Chunk.resize(ChunkLocation.Size); - SourceFileBuffer.Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset); + MemoryView ChunkView = SourceFileBuffer.MakeView(ChunkLocation.Size, ChunkLocation.Offset); + if (ChunkView.GetSize() != ChunkLocation.Size) + { + ChunkBuffer.resize(ChunkLocation.Size); + SourceFileBuffer.Read(ChunkBuffer.data(), ChunkLocation.Size, ChunkLocation.Offset); + ChunkView = MemoryView(ChunkBuffer.data(), ChunkLocation.Size); + } - if ((WriteOffset + Chunk.size()) > m_MaxBlockSize) + if ((WriteOffset + ChunkView.GetSize()) > m_MaxBlockSize) { - TargetFileBuffer.reset(); + if (TargetFileBuffer) + { + TargetFileBuffer->Flush(); + TargetFileBuffer.reset(); + } if (NewBlockFile) { ZEN_ASSERT_SLOW(NewBlockFile->IsOpen()); - NewBlockFile->Flush(); + NewBlockFile->Flush(WriteOffset); uint64_t NewBlockSize = NewBlockFile->FileSize(); MovedSize += NewBlockSize; NewBlockFile = nullptr; @@ -1161,22 +1241,23 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, NiceBytes(Space.Free + ReclaimedSpace)); } NewBlockFile->Create(m_MaxBlockSize); - NewBlockIndex = NextBlockIndex; - WriteOffset = 0; - AddedSize += WrittenBytesToBlock; - WrittenBytesToBlock = 0; - TargetFileBuffer = std::make_unique<BasicFileWriter>(NewBlockFile->GetBasicFile(), Min(65536u, m_MaxBlockSize)); + NewBlockIndex = NextBlockIndex; + WriteOffset = 0; + TargetFileBuffer = std::make_unique<BasicFileWriter>(NewBlockFile->GetBasicFile(), Min(256u * 1024u, m_MaxBlockSize)); } - TargetFileBuffer->Write(Chunk.data(), ChunkLocation.Size, WriteOffset); + const uint64_t OldWriteOffset = WriteOffset; + WriteOffset = TargetFileBuffer->AlignTo(PayloadAlignment); + + TargetFileBuffer->Write(ChunkView.GetData(), ChunkLocation.Size, WriteOffset); MovedChunks.push_back( {ChunkIndex, {.BlockIndex = NewBlockIndex, .Offset = gsl::narrow<uint32_t>(WriteOffset), .Size = ChunkLocation.Size}}); - WrittenBytesToBlock = WriteOffset + ChunkLocation.Size; + WriteOffset += ChunkLocation.Size; MovedFromBlock += RoundUp(ChunkLocation.Offset + ChunkLocation.Size, PayloadAlignment) - ChunkLocation.Offset; - WriteOffset = RoundUp(WriteOffset + ChunkLocation.Size, PayloadAlignment); + uint64_t WrittenBytes = WriteOffset - OldWriteOffset; + AddedSize += WrittenBytes; } - AddedSize += WrittenBytesToBlock; ZEN_INFO("{}moved {} chunks ({}) from '{}' to new block, freeing {}", LogPrefix, KeepChunkIndexes.size(), @@ -1209,10 +1290,16 @@ BlockStore::CompactBlocks(const BlockStoreCompactState& CompactState, return true; }); + if (TargetFileBuffer) + { + TargetFileBuffer->Flush(); + TargetFileBuffer.reset(); + } + if (NewBlockFile) { ZEN_ASSERT_SLOW(NewBlockFile->IsOpen()); - NewBlockFile->Flush(); + NewBlockFile->Flush(WriteOffset); uint64_t NewBlockSize = NewBlockFile->FileSize(); MovedSize += NewBlockSize; NewBlockFile = nullptr; @@ -1343,6 +1430,8 @@ TEST_CASE("blockstore.blockfile") CHECK(std::string(Boop) == "boop"); File1.Flush(); CHECK(File1.FileSize() == 10); + File1.Flush(10); + CHECK(File1.FileSize() == 10); } { BlockStoreFile File1(RootDirectory / "1"); @@ -1375,14 +1464,14 @@ TEST_CASE("blockstore.blockfile") BoopChunk = File1.GetChunk(5, 5); } - CHECK(std::filesystem::exists(RootDirectory / "1")); + CHECK(IsFile(RootDirectory / "1")); const char* Data = static_cast<const char*>(DataChunk.GetData()); CHECK(std::string(Data) == "data"); const char* Boop = static_cast<const char*>(BoopChunk.GetData()); CHECK(std::string(Boop) == "boop"); } - CHECK(std::filesystem::exists(RootDirectory / "1")); + CHECK(IsFile(RootDirectory / "1")); { IoBuffer DataChunk; @@ -1401,7 +1490,7 @@ TEST_CASE("blockstore.blockfile") const char* Boop = static_cast<const char*>(BoopChunk.GetData()); CHECK(std::string(Boop) == "boop"); } - CHECK(!std::filesystem::exists(RootDirectory / "1")); + CHECK(!IsFile(RootDirectory / "1")); } namespace blockstore::impl { @@ -1800,7 +1889,7 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray&, uint64_t) { + [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&, uint64_t) { CHECK(false); return true; }, @@ -1825,9 +1914,10 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) { + [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) { RemovedSize += Removed; CHECK(Moved.empty()); + CHECK(Scrubbed.empty()); return true; }, []() { return 0; }); @@ -1850,9 +1940,10 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) { + [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) { RemovedSize += Removed; CHECK(Moved.empty()); + CHECK(Scrubbed.empty()); return true; }, []() { return 0; }); @@ -1860,7 +1951,7 @@ TEST_CASE("blockstore.compact.blocks") CHECK_LE(Store.TotalSize(), 1088); CHECK_GT(Store.TotalSize(), 0); } - SUBCASE("keep everthing") + SUBCASE("keep everything") { Store.Flush(true); @@ -1873,7 +1964,7 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray&, uint64_t) { + [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray&, uint64_t) { CHECK(false); return true; }, @@ -1904,8 +1995,9 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) { + [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) { CHECK(Moved.empty()); + CHECK(Scrubbed.empty()); RemovedSize += Removed; return true; }, @@ -1939,7 +2031,8 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) { + [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) { + CHECK(Scrubbed.empty()); for (const auto& Move : Moved) { const BlockStoreLocation& OldLocation = State.GetLocation(Move.first); @@ -2016,7 +2109,8 @@ TEST_CASE("blockstore.compact.blocks") Store.CompactBlocks( State, Alignment, - [&](const BlockStore::MovedChunksArray& Moved, uint64_t Removed) { + [&](const BlockStore::MovedChunksArray& Moved, const BlockStore::ChunkIndexArray& Scrubbed, uint64_t Removed) { + CHECK(Scrubbed.empty()); for (const auto& Move : Moved) { const BlockStoreLocation& OldLocation = State.GetLocation(Move.first); @@ -2051,6 +2145,42 @@ TEST_CASE("blockstore.compact.blocks") } CHECK_LT(Store.TotalSize(), PreSize); } + SUBCASE("scrub") + { + Store.Flush(true); + + BlockStoreCompactState State; + for (const BlockStoreLocation& Location : ChunkLocations) + { + State.IncludeBlock(Location.BlockIndex); + CHECK(State.AddKeepLocation(Location)); + } + State.IncludeBlock(0); + State.IncludeBlock(999); + std::vector<size_t> ExpectedScrubbedIndexes; + ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 0); + State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 0, .Offset = 2000, .Size = 322}); + ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 1); + State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 0, .Offset = 10, .Size = 3220}); + ExpectedScrubbedIndexes.push_back(ChunkLocations.size() + 2); + State.AddKeepLocation(BlockStoreLocation{.BlockIndex = 999, .Offset = 2, .Size = 40}); + + std::vector<size_t> ScrubbedIndexes; + + Store.CompactBlocks( + State, + Alignment, + [&](const BlockStore::MovedChunksArray&, const BlockStore::ChunkIndexArray& ScrubbedArray, uint64_t) { + ScrubbedIndexes.insert(ScrubbedIndexes.end(), ScrubbedArray.begin(), ScrubbedArray.end()); + return true; + }, + []() { + CHECK(false); + return 0; + }); + std::sort(ScrubbedIndexes.begin(), ScrubbedIndexes.end()); + CHECK_EQ(ExpectedScrubbedIndexes, ScrubbedIndexes); + } } #endif diff --git a/src/zenstore/buildstore/buildstore.cpp b/src/zenstore/buildstore/buildstore.cpp new file mode 100644 index 000000000..20dc55bca --- /dev/null +++ b/src/zenstore/buildstore/buildstore.cpp @@ -0,0 +1,2053 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/buildstore/buildstore.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/memory/llm.h> +#include <zencore/scopeguard.h> +#include <zencore/trace.h> +#include <zencore/workthreadpool.h> +#include <zenutil/parallelwork.h> + +#include <zencore/uid.h> +#include <zencore/xxhash.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +#if ZEN_WITH_TESTS +# include <zencore/compactbinarybuilder.h> +# include <zencore/compress.h> +# include <zencore/testing.h> +# include <zencore/testutils.h> +# include <zenutil/workerpools.h> +#endif // ZEN_WITH_TESTS + +namespace zen { +const FLLMTag& +GetBuildstoreTag() +{ + static FLLMTag _("store", FLLMTag("builds")); + + return _; +} + +using namespace std::literals; + +namespace blobstore::impl { + + const std::string BaseName = "builds"; + const std::string ManifestExtension = ".cbo"; + const char* IndexExtension = ".uidx"; + const char* LogExtension = ".slog"; + const char* AccessTimeExtension = ".zacs"; + + const uint32_t ManifestVersion = (1 << 16) | (0 << 8) | (0); + + std::filesystem::path GetManifestPath(const std::filesystem::path& RootDirectory) + { + return RootDirectory / (BaseName + ManifestExtension); + } + + std::filesystem::path GetBlobIndexPath(const std::filesystem::path& RootDirectory) + { + return RootDirectory / (BaseName + IndexExtension); + } + + std::filesystem::path GetBlobLogPath(const std::filesystem::path& RootDirectory) { return RootDirectory / (BaseName + LogExtension); } + + std::filesystem::path GetMetaIndexPath(const std::filesystem::path& RootDirectory) + { + return RootDirectory / (BaseName + "_meta" + IndexExtension); + } + + std::filesystem::path GetMetaLogPath(const std::filesystem::path& RootDirectory) + { + return RootDirectory / (BaseName + "_meta" + LogExtension); + } + + std::filesystem::path GetAccessTimesPath(const std::filesystem::path& RootDirectory) + { + return RootDirectory / (BaseName + AccessTimeExtension); + } + + struct AccessTimeRecord + { + IoHash Key; + std::uint32_t SecondsSinceEpoch = 0; + }; + + static_assert(sizeof(AccessTimeRecord) == 24); + +#pragma pack(push) +#pragma pack(1) + struct AccessTimesHeader + { + static constexpr uint32_t ExpectedMagic = 0x7363617a; // 'zacs'; + static constexpr uint32_t CurrentVersion = 1; + static constexpr uint64_t DataAlignment = 8; + + uint32_t Magic = ExpectedMagic; + uint32_t Version = CurrentVersion; + uint32_t AccessTimeCount = 0; + uint32_t Checksum = 0; + + static uint32_t ComputeChecksum(const AccessTimesHeader& Header) + { + return XXH32(&Header.Magic, sizeof(AccessTimesHeader) - sizeof(uint32_t), 0xC0C0'BABA); + } + }; +#pragma pack(pop) + + static_assert(sizeof(AccessTimesHeader) == 16); + +} // namespace blobstore::impl + +BuildStore::BuildStore(const BuildStoreConfig& Config, GcManager& Gc) +: m_Log(logging::Get("builds")) +, m_Config(Config) +, m_Gc(Gc) +, m_LargeBlobStore(m_Gc) +, m_SmallBlobStore(Gc) +, m_MetadataBlockStore() +{ + ZEN_TRACE_CPU("BuildStore::BuildStore"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + try + { + bool IsNew = true; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_INFO("{} build store at {} in {}", + IsNew ? "Initialized" : "Read", + m_Config.RootDirectory, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + std::filesystem::path BlobLogPath = blobstore::impl::GetBlobLogPath(Config.RootDirectory); + std::filesystem::path MetaLogPath = blobstore::impl::GetMetaLogPath(Config.RootDirectory); + std::filesystem::path ManifestPath = blobstore::impl::GetManifestPath(Config.RootDirectory); + std::filesystem::path AccessTimesPath = blobstore::impl::GetAccessTimesPath(Config.RootDirectory); + if (IsFile(ManifestPath) && IsFile(BlobLogPath) && IsFile(MetaLogPath)) + { + IsNew = false; + } + + if (!IsNew) + { + RwLock::ExclusiveLockScope Lock(m_Lock); + + CbObject ManifestReader = LoadCompactBinaryObject(ReadFile(ManifestPath).Flatten()); + Oid ManifestId = ManifestReader["id"].AsObjectId(); + uint32_t Version = ManifestReader["version"].AsUInt32(); + DateTime CreationDate = ManifestReader["createdAt"].AsDateTime(); + ZEN_UNUSED(CreationDate); + if (ManifestId == Oid::Zero || Version != blobstore::impl::ManifestVersion) + { + ZEN_WARN("Invalid manifest at {}, wiping state", ManifestPath); + IsNew = true; + } + else + { + m_BlobLogFlushPosition = ReadPayloadLog(Lock, BlobLogPath, 0); + m_MetaLogFlushPosition = ReadMetadataLog(Lock, MetaLogPath, 0); + if (IsFile(AccessTimesPath)) + { + ReadAccessTimes(Lock, AccessTimesPath); + } + } + } + + if (IsNew) + { + CleanDirectory(Config.RootDirectory, false); + CbObjectWriter ManifestWriter; + ManifestWriter.AddObjectId("id", Oid::NewOid()); + ManifestWriter.AddInteger("version", blobstore::impl::ManifestVersion); + ManifestWriter.AddDateTime("createdAt", DateTime::Now()); + TemporaryFile::SafeWriteFile(ManifestPath, ManifestWriter.Save().GetBuffer().AsIoBuffer()); + } + m_LargeBlobStore.Initialize(Config.RootDirectory / "file_cas", IsNew); + m_SmallBlobStore.Initialize(Config.RootDirectory, + "blob_cas", + m_Config.SmallBlobBlockStoreMaxBlockSize, + m_Config.SmallBlobBlockStoreAlignement, + IsNew); + m_MetadataBlockStore.Initialize(Config.RootDirectory / "metadata", m_Config.MetadataBlockStoreMaxBlockSize, 1u << 20); + + BlockStore::BlockIndexSet KnownBlocks; + for (const BlobEntry& Blob : m_BlobEntries) + { + if (const MetadataIndex MetaIndex = Blob.Metadata; MetaIndex) + { + const MetadataEntry& Metadata = m_MetadataEntries[MetaIndex]; + KnownBlocks.insert(Metadata.Location.BlockIndex); + } + } + BlockStore::BlockIndexSet MissingBlocks = m_MetadataBlockStore.SyncExistingBlocksOnDisk(KnownBlocks); + + m_PayloadlogFile.Open(BlobLogPath, CasLogFile::Mode::kWrite); + m_MetadatalogFile.Open(MetaLogPath, CasLogFile::Mode::kWrite); + + if (!MissingBlocks.empty()) + { + std::vector<MetadataDiskEntry> MissingMetadatas; + for (auto& It : m_BlobLookup) + { + const IoHash& BlobHash = It.first; + const BlobIndex ReadBlobIndex = It.second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + if (ReadBlobEntry.Metadata) + { + const MetadataEntry& MetaData = m_MetadataEntries[ReadBlobEntry.Metadata]; + if (MissingBlocks.contains(MetaData.Location.BlockIndex)) + { + MissingMetadatas.push_back( + MetadataDiskEntry{.Entry = m_MetadataEntries[ReadBlobEntry.Metadata], .BlobHash = BlobHash}); + MissingMetadatas.back().Entry.Flags |= MetadataEntry::kTombStone; + m_MetadataEntries[ReadBlobEntry.Metadata] = {}; + m_BlobEntries[ReadBlobIndex].Metadata = {}; + } + } + } + ZEN_ASSERT(!MissingMetadatas.empty()); + + for (const MetadataDiskEntry& Entry : MissingMetadatas) + { + auto It = m_BlobLookup.find(Entry.BlobHash); + ZEN_ASSERT(It != m_BlobLookup.end()); + + const BlobIndex ReadBlobIndex = It->second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + if (!ReadBlobEntry.Payload) + { + m_BlobLookup.erase(It); + } + } + m_MetadatalogFile.Append(MissingMetadatas); + CompactState(); + } + + m_Gc.AddGcReferencer(*this); + m_Gc.AddGcReferenceLocker(*this); + m_Gc.AddGcStorage(this); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed to initialize build store. Reason: '{}'", Ex.what()); + m_Gc.RemoveGcStorage(this); + m_Gc.RemoveGcReferenceLocker(*this); + m_Gc.RemoveGcReferencer(*this); + } +} + +BuildStore::~BuildStore() +{ + try + { + ZEN_TRACE_CPU("BuildStore::~BuildStore"); + m_Gc.RemoveGcStorage(this); + m_Gc.RemoveGcReferenceLocker(*this); + m_Gc.RemoveGcReferencer(*this); + Flush(); + m_MetadatalogFile.Close(); + m_PayloadlogFile.Close(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~BuildStore() threw exception: {}", Ex.what()); + } +} + +void +BuildStore::PutBlob(const IoHash& BlobHash, const IoBuffer& Payload) +{ + ZEN_TRACE_CPU("BuildStore::PutBlob"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCompressedBinary); + { + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex BlobIndex = It->second; + if (m_BlobEntries[BlobIndex].Payload) + { + return; + } + } + } + + uint64_t PayloadSize = Payload.GetSize(); + PayloadEntry Entry; + if (Payload.GetSize() > m_Config.SmallBlobBlockStoreMaxBlockEmbedSize) + { + CasStore::InsertResult Result = m_LargeBlobStore.InsertChunk(Payload, BlobHash); + ZEN_UNUSED(Result); + Entry = PayloadEntry(PayloadEntry::kStandalone, PayloadSize); + } + else + { + CasStore::InsertResult Result = m_SmallBlobStore.InsertChunk(Payload, BlobHash); + ZEN_UNUSED(Result); + Entry = PayloadEntry(0, PayloadSize); + } + + { + RwLock::ExclusiveLockScope _(m_Lock); + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex]; + if (Blob.Payload) + { + m_PayloadEntries[Blob.Payload] = Entry; + } + else + { + Blob.Payload = PayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size())); + m_PayloadEntries.push_back(Entry); + } + Blob.LastAccessTime = GcClock::TickCount(); + } + else + { + PayloadIndex NewPayloadIndex = PayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size())); + m_PayloadEntries.push_back(Entry); + + const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size())); + // we only remove during GC and compact this then... + m_BlobEntries.push_back(BlobEntry{.Payload = NewPayloadIndex, .LastAccessTime = AccessTime(GcClock::TickCount())}); + m_BlobLookup.insert({BlobHash, NewBlobIndex}); + } + } + m_PayloadlogFile.Append(PayloadDiskEntry{.Entry = Entry, .BlobHash = BlobHash}); + m_LastAccessTimeUpdateCount++; +} + +IoBuffer +BuildStore::GetBlob(const IoHash& BlobHash) +{ + ZEN_TRACE_CPU("BuildStore::GetBlob"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + RwLock::SharedLockScope Lock(m_Lock); + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex]; + Blob.LastAccessTime = GcClock::TickCount(); + if (Blob.Payload) + { + const PayloadEntry& Entry = m_PayloadEntries[Blob.Payload]; + const bool IsStandalone = (Entry.GetFlags() & PayloadEntry::kStandalone) != 0; + Lock.ReleaseNow(); + + IoBuffer Chunk; + if (IsStandalone) + { + ZEN_TRACE_CPU("GetLarge"); + Chunk = m_LargeBlobStore.FindChunk(BlobHash); + } + else + { + ZEN_TRACE_CPU("GetSmall"); + Chunk = m_SmallBlobStore.FindChunk(BlobHash); + } + if (Chunk) + { + Chunk.SetContentType(ZenContentType::kCompressedBinary); + return Chunk; + } + else + { + ZEN_WARN("Inconsistencies in build store, {} is in index but not {}", BlobHash, IsStandalone ? "on disk" : "in block"); + } + } + } + return {}; +} + +std::vector<BuildStore::BlobExistsResult> +BuildStore::BlobsExists(std::span<const IoHash> BlobHashes) +{ + ZEN_TRACE_CPU("BuildStore::BlobsExists"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + std::vector<BuildStore::BlobExistsResult> Result; + Result.reserve(BlobHashes.size()); + RwLock::SharedLockScope _(m_Lock); + for (const IoHash& BlobHash : BlobHashes) + { + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex]; + bool HasPayload = !!Blob.Payload; + bool HasMetadata = !!Blob.Metadata; + Result.push_back(BlobExistsResult{.HasBody = HasPayload, .HasMetadata = HasMetadata}); + } + else + { + Result.push_back({}); + } + } + return Result; +} + +void +BuildStore::PutMetadatas(std::span<const IoHash> BlobHashes, std::span<const IoBuffer> MetaDatas) +{ + ZEN_TRACE_CPU("BuildStore::PutMetadatas"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + size_t WriteBlobIndex = 0; + m_MetadataBlockStore.WriteChunks(MetaDatas, m_Config.MetadataBlockStoreAlignement, [&](std::span<BlockStoreLocation> Locations) { + RwLock::ExclusiveLockScope _(m_Lock); + for (size_t LocationIndex = 0; LocationIndex < Locations.size(); LocationIndex++) + { + const IoBuffer& Data = MetaDatas[WriteBlobIndex]; + const IoHash& BlobHash = BlobHashes[WriteBlobIndex]; + const BlockStoreLocation& Location = Locations[LocationIndex]; + + MetadataEntry Entry = {.Location = Location, .ContentType = Data.GetContentType(), .Flags = 0}; + + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& Blob = m_BlobEntries[ExistingBlobIndex]; + if (Blob.Metadata) + { + m_MetadataEntries[Blob.Metadata] = Entry; + } + else + { + Blob.Metadata = MetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size())); + m_MetadataEntries.push_back(Entry); + } + Blob.LastAccessTime = GcClock::TickCount(); + } + else + { + MetadataIndex NewMetadataIndex = MetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size())); + m_MetadataEntries.push_back(Entry); + + const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size())); + m_BlobEntries.push_back(BlobEntry{.Metadata = NewMetadataIndex, .LastAccessTime = AccessTime(GcClock::TickCount())}); + m_BlobLookup.insert({BlobHash, NewBlobIndex}); + } + + m_MetadatalogFile.Append(MetadataDiskEntry{.Entry = Entry, .BlobHash = BlobHash}); + + m_LastAccessTimeUpdateCount++; + WriteBlobIndex++; + if (m_TrackedCacheKeys) + { + m_TrackedCacheKeys->insert(BlobHash); + } + } + }); +} + +std::vector<IoBuffer> +BuildStore::GetMetadatas(std::span<const IoHash> BlobHashes, WorkerThreadPool* OptionalWorkerPool) +{ + ZEN_TRACE_CPU("BuildStore::GetMetadatas"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + std::vector<BlockStoreLocation> MetaLocations; + std::vector<size_t> MetaLocationResultIndexes; + MetaLocations.reserve(BlobHashes.size()); + MetaLocationResultIndexes.reserve(BlobHashes.size()); + tsl::robin_set<uint32_t> ReferencedBlocks; + + std::vector<IoBuffer> Result; + std::vector<ZenContentType> ResultContentTypes; + Result.resize(BlobHashes.size()); + ResultContentTypes.resize(BlobHashes.size(), ZenContentType::kUnknownContentType); + { + RwLock::SharedLockScope _(m_Lock); + for (size_t Index = 0; Index < BlobHashes.size(); Index++) + { + const IoHash& BlobHash = BlobHashes[Index]; + if (auto It = m_BlobLookup.find(BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& ExistingBlobEntry = m_BlobEntries[ExistingBlobIndex]; + if (ExistingBlobEntry.Metadata) + { + const MetadataEntry& ExistingMetadataEntry = m_MetadataEntries[ExistingBlobEntry.Metadata]; + MetaLocations.push_back(ExistingMetadataEntry.Location); + MetaLocationResultIndexes.push_back(Index); + ReferencedBlocks.insert(ExistingMetadataEntry.Location.BlockIndex); + ResultContentTypes[Index] = ExistingMetadataEntry.ContentType; + } + ExistingBlobEntry.LastAccessTime = AccessTime(GcClock::TickCount()); + m_LastAccessTimeUpdateCount++; + } + } + } + + auto DoOneBlock = [this](std::span<const BlockStoreLocation> MetaLocations, + std::span<const size_t> MetaLocationResultIndexes, + std::span<const size_t> ChunkIndexes, + std::vector<IoBuffer>& Result) { + if (ChunkIndexes.size() < 4) + { + for (size_t ChunkIndex : ChunkIndexes) + { + IoBuffer Chunk = m_MetadataBlockStore.TryGetChunk(MetaLocations[ChunkIndex]); + if (Chunk) + { + size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex]; + Result[ResultIndex] = std::move(Chunk); + } + } + return true; + } + return m_MetadataBlockStore.IterateBlock( + MetaLocations, + ChunkIndexes, + [&MetaLocationResultIndexes, &Result](size_t ChunkIndex, const void* Data, uint64_t Size) { + if (Data != nullptr) + { + size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex]; + Result[ResultIndex] = IoBuffer(IoBuffer::Clone, Data, Size); + } + return true; + }, + [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { + size_t ResultIndex = MetaLocationResultIndexes[ChunkIndex]; + Result[ResultIndex] = File.GetChunk(Offset, Size); + return true; + }, + 8u * 1024u); + }; + + if (!MetaLocations.empty()) + { + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + + try + { + m_MetadataBlockStore.IterateChunks( + MetaLocations, + [this, OptionalWorkerPool, &Work, &Result, &MetaLocations, &MetaLocationResultIndexes, &ReferencedBlocks, DoOneBlock]( + uint32_t BlockIndex, + std::span<const size_t> ChunkIndexes) -> bool { + ZEN_UNUSED(BlockIndex); + if (ChunkIndexes.size() == MetaLocations.size() || OptionalWorkerPool == nullptr || ReferencedBlocks.size() == 1) + { + return DoOneBlock(MetaLocations, MetaLocationResultIndexes, ChunkIndexes, Result); + } + else + { + ZEN_ASSERT(OptionalWorkerPool != nullptr); + std::vector<size_t> TmpChunkIndexes(ChunkIndexes.begin(), ChunkIndexes.end()); + Work.ScheduleWork( + *OptionalWorkerPool, + [this, + &Result, + &MetaLocations, + &MetaLocationResultIndexes, + DoOneBlock, + ChunkIndexes = std::move(TmpChunkIndexes)](std::atomic<bool>& AbortFlag) { + if (AbortFlag) + { + return; + } + try + { + if (!DoOneBlock(MetaLocations, MetaLocationResultIndexes, ChunkIndexes, Result)) + { + AbortFlag.store(true); + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed getting metadata for {} chunks. Reason: {}", ChunkIndexes.size(), Ex.what()); + } + }); + return !Work.IsAborted(); + } + }); + } + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_WARN("Failed iterating block metadata chunks in {}. Reason: '{}'", m_Config.RootDirectory, Ex.what()); + } + + Work.Wait(); + } + for (size_t Index = 0; Index < Result.size(); Index++) + { + if (Result[Index]) + { + Result[Index].SetContentType(ResultContentTypes[Index]); + } + } + return Result; +} + +void +BuildStore::Flush() +{ + ZEN_TRACE_CPU("BuildStore::Flush"); + try + { + Stopwatch Timer; + const auto _ = MakeGuard( + [&] { ZEN_INFO("Flushed build store at {} in {}", m_Config.RootDirectory, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); }); + + m_LargeBlobStore.Flush(); + m_SmallBlobStore.Flush(); + m_MetadataBlockStore.Flush(false); + + m_PayloadlogFile.Flush(); + m_MetadatalogFile.Flush(); + + if (uint64_t LastAccessTimeUpdateCount = m_LastAccessTimeUpdateCount.load(); LastAccessTimeUpdateCount > 0) + { + m_LastAccessTimeUpdateCount -= LastAccessTimeUpdateCount; + RwLock::ExclusiveLockScope UpdateLock(m_Lock); + WriteAccessTimes(UpdateLock, blobstore::impl::GetAccessTimesPath(m_Config.RootDirectory)); + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("BuildStore::Flush failed. Reason: {}", Ex.what()); + } +} + +BuildStore::StorageStats +BuildStore::GetStorageStats() const +{ + StorageStats Result; + { + RwLock::SharedLockScope _(m_Lock); + Result.EntryCount = m_BlobLookup.size(); + + for (auto LookupIt : m_BlobLookup) + { + const BlobIndex ReadBlobIndex = LookupIt.second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + if (ReadBlobEntry.Payload) + { + const PayloadEntry& Payload = m_PayloadEntries[ReadBlobEntry.Payload]; + uint64_t Size = Payload.GetSize(); + if ((Payload.GetFlags() & PayloadEntry::kStandalone) != 0) + { + Result.LargeBlobCount++; + Result.LargeBlobBytes += Size; + } + else + { + Result.SmallBlobCount++; + Result.SmallBlobBytes += Size; + } + } + if (ReadBlobEntry.Metadata) + { + const MetadataEntry& Metadata = m_MetadataEntries[ReadBlobEntry.Metadata]; + Result.MetadataCount++; + Result.MetadataByteCount += Metadata.Location.Size; + } + } + } + return Result; +} + +#if ZEN_WITH_TESTS +std::optional<AccessTime> +BuildStore::GetLastAccessTime(const IoHash& Key) const +{ + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end()) + { + const BlobIndex Index = It->second; + const BlobEntry& Entry = m_BlobEntries[Index]; + return Entry.LastAccessTime; + } + return {}; +} + +bool +BuildStore::SetLastAccessTime(const IoHash& Key, const AccessTime& Time) +{ + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end()) + { + const BlobIndex Index = It->second; + BlobEntry& Entry = m_BlobEntries[Index]; + Entry.LastAccessTime = Time; + return true; + } + return false; +} +#endif // ZEN_WITH_TESTS + +void +BuildStore::CompactState() +{ + ZEN_TRACE_CPU("BuildStore::CompactState"); + + std::vector<BlobEntry> BlobEntries; + std::vector<PayloadEntry> PayloadEntries; + std::vector<MetadataEntry> MetadataEntries; + + tsl::robin_map<IoHash, BlobIndex, IoHash::Hasher> BlobLookup; + + RwLock::ExclusiveLockScope _(m_Lock); + const size_t EntryCount = m_BlobLookup.size(); + BlobLookup.reserve(EntryCount); + const size_t PayloadCount = m_PayloadEntries.size(); + PayloadEntries.reserve(PayloadCount); + const size_t MetadataCount = m_MetadataEntries.size(); + MetadataEntries.reserve(MetadataCount); + + for (auto LookupIt : m_BlobLookup) + { + const IoHash& BlobHash = LookupIt.first; + const BlobIndex ReadBlobIndex = LookupIt.second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + + const BlobIndex WriteBlobIndex(gsl::narrow<uint32_t>(BlobEntries.size())); + BlobEntries.push_back(ReadBlobEntry); + BlobEntry& WriteBlobEntry = BlobEntries.back(); + + if (WriteBlobEntry.Payload) + { + const PayloadEntry& ReadPayloadEntry = m_PayloadEntries[ReadBlobEntry.Payload]; + WriteBlobEntry.Payload = PayloadIndex(gsl::narrow<uint32_t>(PayloadEntries.size())); + PayloadEntries.push_back(ReadPayloadEntry); + } + if (ReadBlobEntry.Metadata) + { + const MetadataEntry& ReadMetadataEntry = m_MetadataEntries[ReadBlobEntry.Metadata]; + WriteBlobEntry.Metadata = MetadataIndex(gsl::narrow<uint32_t>(MetadataEntries.size())); + MetadataEntries.push_back(ReadMetadataEntry); + } + + BlobLookup.insert({BlobHash, WriteBlobIndex}); + } + m_BlobEntries.swap(BlobEntries); + m_PayloadEntries.swap(PayloadEntries); + m_MetadataEntries.swap(MetadataEntries); + m_BlobLookup.swap(BlobLookup); +} + +uint64_t +BuildStore::ReadPayloadLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount) +{ + ZEN_TRACE_CPU("BuildStore::ReadPayloadLog"); + if (!IsFile(LogPath)) + { + return 0; + } + + uint64_t LogEntryCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_INFO("read build store '{}' payload log containing {} entries in {}", + LogPath, + LogEntryCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + TCasLogFile<PayloadDiskEntry> CasLog; + if (!CasLog.IsValid(LogPath)) + { + RemoveFile(LogPath); + return 0; + } + CasLog.Open(LogPath, CasLogFile::Mode::kRead); + if (!CasLog.Initialize()) + { + return 0; + } + + const uint64_t EntryCount = CasLog.GetLogCount(); + if (EntryCount < SkipEntryCount) + { + ZEN_WARN("reading full payload log at '{}', reason: Log position from index snapshot is out of range", LogPath); + SkipEntryCount = 0; + } + + LogEntryCount = EntryCount - SkipEntryCount; + uint64_t InvalidEntryCount = 0; + + CasLog.Replay( + [&](const PayloadDiskEntry& Record) { + std::string InvalidEntryReason; + if (Record.Entry.GetFlags() & PayloadEntry::kTombStone) + { + // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation + if (auto ExistingIt = m_BlobLookup.find(Record.BlobHash); ExistingIt != m_BlobLookup.end()) + { + if (!m_BlobEntries[ExistingIt->second].Metadata) + { + m_BlobLookup.erase(ExistingIt); + } + else + { + m_BlobEntries[ExistingIt->second].Payload = {}; + } + } + return; + } + + if (!ValidatePayloadDiskEntry(Record, InvalidEntryReason)) + { + ZEN_WARN("skipping invalid payload entry in '{}', reason: '{}'", LogPath, InvalidEntryReason); + ++InvalidEntryCount; + return; + } + if (auto It = m_BlobLookup.find(Record.BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& ExistingBlob = m_BlobEntries[ExistingBlobIndex]; + if (ExistingBlob.Payload) + { + const PayloadIndex ExistingPayloadIndex = ExistingBlob.Payload; + m_PayloadEntries[ExistingPayloadIndex] = Record.Entry; + } + else + { + const PayloadIndex NewPayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size())); + m_PayloadEntries.push_back(Record.Entry); + ExistingBlob.Payload = NewPayloadIndex; + } + } + else + { + const PayloadIndex NewPayloadIndex(gsl::narrow<uint32_t>(m_PayloadEntries.size())); + m_PayloadEntries.push_back(Record.Entry); + + const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size())); + m_BlobEntries.push_back(BlobEntry{.Payload = NewPayloadIndex, .LastAccessTime = AccessTime(GcClock::TickCount())}); + m_BlobLookup.insert_or_assign(Record.BlobHash, NewBlobIndex); + } + }, + SkipEntryCount); + + if (InvalidEntryCount) + { + ZEN_WARN("found {} invalid payload entries in '{}'", InvalidEntryCount, LogPath); + } + + return LogEntryCount; +} + +uint64_t +BuildStore::ReadMetadataLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount) +{ + ZEN_TRACE_CPU("BuildStore::ReadMetadataLog"); + if (!IsFile(LogPath)) + { + return 0; + } + + uint64_t LogEntryCount = 0; + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_INFO("read build store '{}' metadata log containing {} entries in {}", + LogPath, + LogEntryCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + TCasLogFile<MetadataDiskEntry> CasLog; + if (!CasLog.IsValid(LogPath)) + { + RemoveFile(LogPath); + return 0; + } + CasLog.Open(LogPath, CasLogFile::Mode::kRead); + if (!CasLog.Initialize()) + { + return 0; + } + + const uint64_t EntryCount = CasLog.GetLogCount(); + if (EntryCount < SkipEntryCount) + { + ZEN_WARN("reading full metadata log at '{}', reason: Log position from index snapshot is out of range", LogPath); + SkipEntryCount = 0; + } + + LogEntryCount = EntryCount - SkipEntryCount; + uint64_t InvalidEntryCount = 0; + + CasLog.Replay( + [&](const MetadataDiskEntry& Record) { + std::string InvalidEntryReason; + if (Record.Entry.Flags & MetadataEntry::kTombStone) + { + // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation + // Note: this leaves m_BlobLookup and other arrays with 'holes' in them, this will get clean up in compact gc operation + if (auto ExistingIt = m_BlobLookup.find(Record.BlobHash); ExistingIt != m_BlobLookup.end()) + { + if (!m_BlobEntries[ExistingIt->second].Payload) + { + m_BlobLookup.erase(ExistingIt); + } + else + { + m_BlobEntries[ExistingIt->second].Metadata = {}; + } + } + return; + } + + if (!ValidateMetadataDiskEntry(Record, InvalidEntryReason)) + { + ZEN_WARN("skipping invalid metadata entry in '{}', reason: '{}'", LogPath, InvalidEntryReason); + ++InvalidEntryCount; + return; + } + if (auto It = m_BlobLookup.find(Record.BlobHash); It != m_BlobLookup.end()) + { + const BlobIndex ExistingBlobIndex = It->second; + BlobEntry& ExistingBlob = m_BlobEntries[ExistingBlobIndex]; + if (ExistingBlob.Metadata) + { + const MetadataIndex ExistingMetadataIndex = ExistingBlob.Metadata; + m_MetadataEntries[ExistingMetadataIndex] = Record.Entry; + } + else + { + const MetadataIndex NewMetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size())); + m_MetadataEntries.push_back(Record.Entry); + ExistingBlob.Metadata = NewMetadataIndex; + } + } + else + { + const MetadataIndex NewMetadataIndex(gsl::narrow<uint32_t>(m_MetadataEntries.size())); + m_MetadataEntries.push_back(Record.Entry); + + const BlobIndex NewBlobIndex(gsl::narrow<uint32_t>(m_BlobEntries.size())); + m_BlobEntries.push_back(BlobEntry{.Metadata = NewMetadataIndex, .LastAccessTime = AccessTime(GcClock::TickCount())}); + m_BlobLookup.insert_or_assign(Record.BlobHash, NewBlobIndex); + } + }, + SkipEntryCount); + + if (InvalidEntryCount) + { + ZEN_WARN("found {} invalid metadata entries in '{}'", InvalidEntryCount, LogPath); + } + + return LogEntryCount; +} + +void +BuildStore::ReadAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath) +{ + ZEN_TRACE_CPU("BuildStore::ReadAccessTimes"); + + using namespace blobstore::impl; + + BasicFile AccessTimesFile; + AccessTimesFile.Open(AccessTimesPath, BasicFile::Mode::kRead); + uint64_t Size = AccessTimesFile.FileSize(); + if (Size >= sizeof(AccessTimesHeader)) + { + AccessTimesHeader Header; + uint64_t Offset = 0; + AccessTimesFile.Read(&Header, sizeof(Header), 0); + Offset += sizeof(AccessTimesHeader); + Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment); + if ((Header.Magic == AccessTimesHeader::ExpectedMagic) && (Header.Version == AccessTimesHeader::CurrentVersion) && + (Header.Checksum == AccessTimesHeader::ComputeChecksum(Header))) + { + uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount; + if (AccessTimesFile.FileSize() >= Offset + RecordsSize) + { + std::vector<AccessTimeRecord> AccessRecords(Header.AccessTimeCount); + AccessTimesFile.Read(AccessRecords.data(), RecordsSize, Offset); + for (const AccessTimeRecord& Record : AccessRecords) + { + const IoHash& Key = Record.Key; + const uint32_t SecondsSinceEpoch = Record.SecondsSinceEpoch; + if (auto It = m_BlobLookup.find(Key); It != m_BlobLookup.end()) + { + const BlobIndex Index = It->second; + BlobEntry& Entry = m_BlobEntries[Index]; + Entry.LastAccessTime.SetSecondsSinceEpoch(SecondsSinceEpoch); + } + else + { + m_LastAccessTimeUpdateCount++; + } + } + } + else + { + m_LastAccessTimeUpdateCount++; + } + } + else + { + m_LastAccessTimeUpdateCount++; + } + } + else + { + m_LastAccessTimeUpdateCount++; + } +} + +void +BuildStore::WriteAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath) +{ + ZEN_TRACE_CPU("BuildStore::WriteAccessTimes"); + + using namespace blobstore::impl; + + uint32_t Count = gsl::narrow<uint32_t>(m_BlobLookup.size()); + AccessTimesHeader Header = {.AccessTimeCount = Count}; + Header.Checksum = AccessTimesHeader::ComputeChecksum(Header); + + TemporaryFile TempFile; + std::error_code Ec; + if (TempFile.CreateTemporary(AccessTimesPath.parent_path(), Ec); Ec) + { + throw std::runtime_error(fmt::format("Failed to create temporary file {} to write access times. Reason ({}) {}", + TempFile.GetPath(), + Ec.value(), + Ec.message())); + } + { + uint64_t Offset = 0; + TempFile.Write(&Header, sizeof(AccessTimesHeader), Offset); + Offset += sizeof(AccessTimesHeader); + Offset = RoundUp(Offset, AccessTimesHeader::DataAlignment); + + std::vector<AccessTimeRecord> AccessRecords; + AccessRecords.reserve(Header.AccessTimeCount); + + for (auto It : m_BlobLookup) + { + const IoHash& Key = It.first; + const BlobIndex Index = It.second; + const BlobEntry& Entry = m_BlobEntries[Index]; + const uint32_t SecondsSinceEpoch = Entry.LastAccessTime.GetSecondsSinceEpoch(); + AccessRecords.emplace_back(AccessTimeRecord{.Key = Key, .SecondsSinceEpoch = SecondsSinceEpoch}); + } + uint64_t RecordsSize = sizeof(AccessTimeRecord) * Header.AccessTimeCount; + TempFile.Write(AccessRecords.data(), RecordsSize, Offset); + Offset += sizeof(AccessTimesHeader) * Header.AccessTimeCount; + } + if (TempFile.MoveTemporaryIntoPlace(AccessTimesPath, Ec); Ec) + { + throw std::runtime_error(fmt::format("Failed to move temporary file {} to {} when write access times. Reason ({}) {}", + TempFile.GetPath(), + AccessTimesPath, + Ec.value(), + Ec.message())); + } +} + +bool +BuildStore::ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string& OutReason) +{ + if (Entry.BlobHash == IoHash::Zero) + { + OutReason = fmt::format("Invalid blob hash {}", Entry.BlobHash.ToHexString()); + return false; + } + if (Entry.Entry.GetFlags() & ~(PayloadEntry::kTombStone | PayloadEntry::kStandalone)) + { + OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Entry.GetFlags(), Entry.BlobHash.ToHexString()); + return false; + } + if (Entry.Entry.GetFlags() & PayloadEntry::kTombStone) + { + return true; + } + if (Entry.Entry.GetSize() == 0 || Entry.Entry.GetSize() == 0x00ffffffffffffffu) + { + OutReason = fmt::format("Invalid size field {} for meta entry {}", Entry.Entry.GetSize(), Entry.BlobHash.ToHexString()); + return false; + } + return true; +} + +bool +BuildStore::ValidateMetadataDiskEntry(const MetadataDiskEntry& Entry, std::string& OutReason) +{ + if (Entry.BlobHash == IoHash::Zero) + { + OutReason = fmt::format("Invalid blob hash {} for meta entry", Entry.BlobHash.ToHexString()); + return false; + } + if (Entry.Entry.Location.Size == 0) + { + OutReason = fmt::format("Invalid meta blob size {} for meta entry", Entry.Entry.Location.Size); + return false; + } + if (Entry.Entry.Reserved1 != 0 || Entry.Entry.Reserved2 != 0) + { + OutReason = fmt::format("Invalid reserved fields for meta entry {}", Entry.BlobHash.ToHexString()); + return false; + } + if (Entry.Entry.Flags & MetadataEntry::kTombStone) + { + return true; + } + if (Entry.Entry.ContentType == ZenContentType::kCOUNT) + { + OutReason = fmt::format("Invalid content type for meta entry {}", Entry.BlobHash.ToHexString()); + return false; + } + if (Entry.Reserved1 != 0 || Entry.Reserved2 != 0 || Entry.Reserved3 != 0 || Entry.Reserved4 != 0) + { + OutReason = fmt::format("Invalid reserved fields for meta entry {}", Entry.BlobHash.ToHexString()); + return false; + } + return true; +} + +class BuildStoreGcReferenceChecker : public GcReferenceChecker +{ +public: + BuildStoreGcReferenceChecker(BuildStore& Store) : m_Store(Store) {} + virtual std::string GetGcName(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + return fmt::format("buildstore: '{}'", m_Store.m_Config.RootDirectory.string()); + } + + virtual void PreCache(GcCtx& Ctx) override { ZEN_UNUSED(Ctx); } + + virtual void UpdateLockedState(GcCtx& Ctx) override + { + ZEN_TRACE_CPU("Builds::UpdateLockedState"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + m_References.reserve(m_Store.m_BlobLookup.size()); + for (const auto& It : m_Store.m_BlobLookup) + { + const BuildStore::BlobIndex ExistingBlobIndex = It.second; + if (m_Store.m_BlobEntries[ExistingBlobIndex].Payload) + { + m_References.push_back(It.first); + } + } + FilterReferences(Ctx, fmt::format("buildstore [LOCKSTATE] '{}'", "buildstore"), m_References); + } + + virtual std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override + { + ZEN_UNUSED(Ctx); + ZEN_TRACE_CPU("Builds::GetUnusedReferences"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + size_t InitialCount = IoCids.size(); + size_t UsedCount = InitialCount; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: buildstore [FILTER REFERENCES] '{}': filtered out {} used references out of {} in {}", + "buildstore", + UsedCount, + InitialCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + std::span<IoHash> UnusedReferences = KeepUnusedReferences(m_References, IoCids); + UsedCount = IoCids.size() - UnusedReferences.size(); + return UnusedReferences; + } + +private: + BuildStore& m_Store; + std::vector<IoHash> m_References; +}; + +std::string +BuildStore::GetGcName(GcCtx& Ctx) +{ + ZEN_UNUSED(Ctx); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + return fmt::format("buildstore: '{}'", m_Config.RootDirectory.string()); +} + +class BuildStoreGcCompator : public GcStoreCompactor +{ + using BlobEntry = BuildStore::BlobEntry; + using PayloadEntry = BuildStore::PayloadEntry; + using MetadataEntry = BuildStore::MetadataEntry; + using MetadataDiskEntry = BuildStore::MetadataDiskEntry; + using BlobIndex = BuildStore::BlobIndex; + using PayloadIndex = BuildStore::PayloadIndex; + using MetadataIndex = BuildStore::MetadataIndex; + +public: + BuildStoreGcCompator(BuildStore& Store, std::vector<IoHash>&& RemovedBlobs) : m_Store(Store), m_RemovedBlobs(std::move(RemovedBlobs)) {} + + virtual void CompactStore(GcCtx& Ctx, GcCompactStoreStats& Stats, const std::function<uint64_t()>& ClaimDiskReserveCallback) override + { + ZEN_UNUSED(ClaimDiskReserveCallback); + ZEN_TRACE_CPU("Builds::CompactStore"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (!Ctx.Settings.Verbose) + { + return; + } + ZEN_INFO("GCV2: buildstore [COMPACT] '{}': RemovedDisk: {} in {}", + m_Store.m_Config.RootDirectory, + NiceBytes(Stats.RemovedDisk), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + + const auto __ = MakeGuard([&] { m_Store.Flush(); }); + + if (!m_RemovedBlobs.empty()) + { + if (Ctx.Settings.CollectSmallObjects) + { + m_Store.m_Lock.WithExclusiveLock([this]() { m_Store.m_TrackedCacheKeys = std::make_unique<HashSet>(); }); + auto __ = MakeGuard([this]() { m_Store.m_Lock.WithExclusiveLock([&]() { m_Store.m_TrackedCacheKeys.reset(); }); }); + + BlockStore::BlockUsageMap BlockUsage; + { + RwLock::SharedLockScope __(m_Store.m_Lock); + + for (auto LookupIt : m_Store.m_BlobLookup) + { + const BlobIndex ReadBlobIndex = LookupIt.second; + const BlobEntry& ReadBlobEntry = m_Store.m_BlobEntries[ReadBlobIndex]; + + if (ReadBlobEntry.Metadata) + { + const MetadataEntry& ReadMetadataEntry = m_Store.m_MetadataEntries[ReadBlobEntry.Metadata]; + + uint32_t BlockIndex = ReadMetadataEntry.Location.BlockIndex; + uint64_t ChunkSize = RoundUp(ReadMetadataEntry.Location.Size, m_Store.m_Config.MetadataBlockStoreAlignement); + + if (auto BlockUsageIt = BlockUsage.find(BlockIndex); BlockUsageIt != BlockUsage.end()) + { + BlockStore::BlockUsageInfo& Info = BlockUsageIt.value(); + Info.EntryCount++; + Info.DiskUsage += ChunkSize; + } + else + { + BlockUsage.insert_or_assign(BlockIndex, + BlockStore::BlockUsageInfo{.DiskUsage = ChunkSize, .EntryCount = 1}); + } + } + } + } + + BlockStore::BlockEntryCountMap BlocksToCompact = m_Store.m_MetadataBlockStore.GetBlocksToCompact(BlockUsage, 90); + BlockStoreCompactState BlockCompactState; + std::vector<IoHash> BlockCompactStateKeys; + BlockCompactState.IncludeBlocks(BlocksToCompact); + + if (BlocksToCompact.size() > 0) + { + { + RwLock::SharedLockScope ___(m_Store.m_Lock); + for (const auto& Entry : m_Store.m_BlobLookup) + { + BlobIndex Index = Entry.second; + + if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta) + { + if (BlockCompactState.AddKeepLocation(m_Store.m_MetadataEntries[Meta].Location)) + { + BlockCompactStateKeys.push_back(Entry.first); + } + } + } + } + + if (Ctx.Settings.IsDeleteMode) + { + if (Ctx.Settings.Verbose) + { + ZEN_INFO("GCV2: buildstore [COMPACT] '{}': compacting {} blocks", + m_Store.m_Config.RootDirectory, + BlocksToCompact.size()); + } + + m_Store.m_MetadataBlockStore.CompactBlocks( + BlockCompactState, + m_Store.m_Config.MetadataBlockStoreAlignement, + [&](const BlockStore::MovedChunksArray& MovedArray, + const BlockStore::ChunkIndexArray& ScrubbedArray, + uint64_t FreedDiskSpace) { + std::vector<MetadataDiskEntry> MovedEntries; + MovedEntries.reserve(MovedArray.size()); + RwLock::ExclusiveLockScope _(m_Store.m_Lock); + for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) + { + size_t ChunkIndex = Moved.first; + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + + ZEN_ASSERT(m_Store.m_TrackedCacheKeys); + if (m_Store.m_TrackedCacheKeys->contains(Key)) + { + continue; + } + + if (auto It = m_Store.m_BlobLookup.find(Key); It != m_Store.m_BlobLookup.end()) + { + const BlobIndex Index = It->second; + + if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta) + { + m_Store.m_MetadataEntries[Meta].Location = Moved.second; + MovedEntries.push_back( + MetadataDiskEntry{.Entry = m_Store.m_MetadataEntries[Meta], .BlobHash = Key}); + } + } + } + + for (size_t Scrubbed : ScrubbedArray) + { + const IoHash& Key = BlockCompactStateKeys[Scrubbed]; + if (auto It = m_Store.m_BlobLookup.find(Key); It != m_Store.m_BlobLookup.end()) + { + const BlobIndex Index = It->second; + + if (MetadataIndex Meta = m_Store.m_BlobEntries[Index].Metadata; Meta) + { + MovedEntries.push_back( + MetadataDiskEntry{.Entry = m_Store.m_MetadataEntries[Meta], .BlobHash = Key}); + MovedEntries.back().Entry.Flags |= MetadataEntry::kTombStone; + m_Store.m_MetadataEntries[Meta] = {}; + m_Store.m_BlobEntries[Index].Metadata = {}; + } + } + } + + m_Store.m_MetadatalogFile.Append(MovedEntries); + + Stats.RemovedDisk += FreedDiskSpace; + if (Ctx.IsCancelledFlag.load()) + { + return false; + } + return true; + }, + ClaimDiskReserveCallback, + fmt::format("GCV2: buildstore [COMPACT] '{}': ", m_Store.m_Config.RootDirectory)); + } + else + { + if (Ctx.Settings.Verbose) + { + ZEN_INFO("GCV2: buildstore [COMPACT] '{}': skipped compacting of {} eligible blocks", + m_Store.m_Config.RootDirectory, + BlocksToCompact.size()); + } + } + } + } + } + } + + virtual std::string GetGcName(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + return fmt::format("buildstore: '{}'", m_Store.m_Config.RootDirectory.string()); + } + +private: + BuildStore& m_Store; + const std::vector<IoHash> m_RemovedBlobs; +}; + +GcStoreCompactor* +BuildStore::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) +{ + ZEN_TRACE_CPU("Builds::RemoveExpiredData"); + ZEN_MEMSCOPE(GetBuildstoreTag()); + + auto Log = [&Ctx]() { return Ctx.Logger; }; + + Stopwatch Timer; + const auto _ = MakeGuard([&] { + if (Ctx.Settings.Verbose) + { + ZEN_INFO("GCV2: buildstore [REMOVE EXPIRED] '{}': Count: {}, Expired: {}, Deleted: {}, FreedMemory: {} in {}", + m_Config.RootDirectory, + Stats.CheckedCount, + Stats.FoundCount, + Stats.DeletedCount, + NiceBytes(Stats.FreedMemory), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + } + }); + + const GcClock::Tick ExpireTicks = Ctx.Settings.BuildStoreExpireTime.time_since_epoch().count(); + std::vector<IoHash> ExpiredBlobs; + tsl::robin_set<IoHash, IoHash::Hasher> SizeDroppedBlobs; + + { + struct SizeInfo + { + const IoHash Key; + uint32_t SecondsSinceEpoch = 0; + uint64_t BlobSize = 0; + }; + + bool DiskSizeExceeded = false; + const uint64_t CurrentDiskSize = + m_LargeBlobStore.StorageSize().DiskSize + m_SmallBlobStore.StorageSize().DiskSize + m_MetadataBlockStore.TotalSize(); + if (CurrentDiskSize > m_Config.MaxDiskSpaceLimit) + { + DiskSizeExceeded = true; + } + + uint64_t ExpiredDataSize = 0; + + std::vector<SizeInfo> NonExpiredBlobSizeInfos; + + { + RwLock::SharedLockScope __(m_Lock); + if (DiskSizeExceeded) + { + NonExpiredBlobSizeInfos.reserve(m_BlobLookup.size()); + } + for (const auto& It : m_BlobLookup) + { + const BlobIndex ReadBlobIndex = It.second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + uint64_t Size = 0; + if (ReadBlobEntry.Payload) + { + const PayloadEntry& Payload = m_PayloadEntries[ReadBlobEntry.Payload]; + Size += Payload.GetSize(); + } + if (ReadBlobEntry.Metadata) + { + const MetadataEntry& Metadata = m_MetadataEntries[ReadBlobEntry.Metadata]; + Size += Metadata.Location.Size; + } + + const GcClock::Tick AccessTick = ReadBlobEntry.LastAccessTime; + if (AccessTick < ExpireTicks) + { + ExpiredBlobs.push_back(It.first); + ExpiredDataSize += ExpiredDataSize; + } + else if (DiskSizeExceeded) + { + NonExpiredBlobSizeInfos.emplace_back(SizeInfo{.Key = It.first, + .SecondsSinceEpoch = ReadBlobEntry.LastAccessTime.GetSecondsSinceEpoch(), + .BlobSize = Size}); + } + } + Stats.CheckedCount += m_BlobLookup.size(); + Stats.FoundCount += ExpiredBlobs.size(); + } + + if (DiskSizeExceeded) + { + const uint64_t NewSizeLimit = + m_Config.MaxDiskSpaceLimit - + (m_Config.MaxDiskSpaceLimit >> 4); // Remove a bit more than just below the limit so we have some space to grow + if ((CurrentDiskSize - ExpiredDataSize) > NewSizeLimit) + { + std::vector<size_t> NonExpiredOrder; + NonExpiredOrder.resize(NonExpiredBlobSizeInfos.size()); + for (size_t Index = 0; Index < NonExpiredOrder.size(); Index++) + { + NonExpiredOrder[Index] = Index; + } + std::sort(NonExpiredOrder.begin(), NonExpiredOrder.end(), [&NonExpiredBlobSizeInfos](const size_t Lhs, const size_t Rhs) { + const SizeInfo& LhsInfo = NonExpiredBlobSizeInfos[Lhs]; + const SizeInfo& RhsInfo = NonExpiredBlobSizeInfos[Rhs]; + return LhsInfo.SecondsSinceEpoch < RhsInfo.SecondsSinceEpoch; + }); + + auto It = NonExpiredOrder.begin(); + while (It != NonExpiredOrder.end()) + { + const SizeInfo& Info = NonExpiredBlobSizeInfos[*It]; + if ((CurrentDiskSize - ExpiredDataSize) < NewSizeLimit) + { + break; + } + ExpiredDataSize += Info.BlobSize; + ExpiredBlobs.push_back(Info.Key); + SizeDroppedBlobs.insert(Info.Key); + It++; + } + } + } + } + + std::vector<IoHash> RemovedBlobs; + if (!ExpiredBlobs.empty()) + { + if (Ctx.Settings.IsDeleteMode) + { + RemovedBlobs.reserve(ExpiredBlobs.size()); + + std::vector<PayloadDiskEntry> RemovedPayloads; + std::vector<MetadataDiskEntry> RemoveMetadatas; + + RwLock::ExclusiveLockScope __(m_Lock); + if (Ctx.IsCancelledFlag.load()) + { + return nullptr; + } + + for (const IoHash& ExpiredBlob : ExpiredBlobs) + { + if (auto It = m_BlobLookup.find(ExpiredBlob); It != m_BlobLookup.end()) + { + const BlobIndex ReadBlobIndex = It->second; + const BlobEntry& ReadBlobEntry = m_BlobEntries[ReadBlobIndex]; + + const GcClock::Tick AccessTick = ReadBlobEntry.LastAccessTime; + + if (SizeDroppedBlobs.contains(ExpiredBlob) || (AccessTick < ExpireTicks)) + { + if (ReadBlobEntry.Payload) + { + RemovedPayloads.push_back( + PayloadDiskEntry{.Entry = m_PayloadEntries[ReadBlobEntry.Payload], .BlobHash = ExpiredBlob}); + RemovedPayloads.back().Entry.AddFlag(PayloadEntry::kTombStone); + m_PayloadEntries[ReadBlobEntry.Payload] = {}; + m_BlobEntries[ReadBlobIndex].Payload = {}; + } + if (ReadBlobEntry.Metadata) + { + RemoveMetadatas.push_back( + MetadataDiskEntry{.Entry = m_MetadataEntries[ReadBlobEntry.Metadata], .BlobHash = ExpiredBlob}); + RemoveMetadatas.back().Entry.Flags |= MetadataEntry::kTombStone; + m_MetadataEntries[ReadBlobEntry.Metadata] = {}; + m_BlobEntries[ReadBlobIndex].Metadata = {}; + } + + m_BlobLookup.erase(It); + m_LastAccessTimeUpdateCount++; + + RemovedBlobs.push_back(ExpiredBlob); + Stats.DeletedCount++; + } + } + } + if (!RemovedPayloads.empty()) + { + m_PayloadlogFile.Append(RemovedPayloads); + } + if (!RemoveMetadatas.empty()) + { + m_MetadatalogFile.Append(RemoveMetadatas); + } + } + } + + if (!RemovedBlobs.empty()) + { + CompactState(); + } + + return new BuildStoreGcCompator(*this, std::move(RemovedBlobs)); +} + +std::vector<GcReferenceChecker*> +BuildStore::CreateReferenceCheckers(GcCtx& Ctx) +{ + ZEN_UNUSED(Ctx); + ZEN_MEMSCOPE(GetBuildstoreTag()); + return {new BuildStoreGcReferenceChecker(*this)}; +} + +std::vector<GcReferenceValidator*> +BuildStore::CreateReferenceValidators(GcCtx& Ctx) +{ + ZEN_UNUSED(Ctx); + return {}; +} + +std::vector<RwLock::SharedLockScope> +BuildStore::LockState(GcCtx& Ctx) +{ + ZEN_UNUSED(Ctx); + std::vector<RwLock::SharedLockScope> Locks; + Locks.emplace_back(RwLock::SharedLockScope(m_Lock)); + return Locks; +} + +void +BuildStore::ScrubStorage(ScrubContext& ScrubCtx) +{ + ZEN_UNUSED(ScrubCtx); + // TODO +} + +GcStorageSize +BuildStore::StorageSize() const +{ + GcStorageSize Result; + Result.DiskSize = m_MetadataBlockStore.TotalSize(); + return Result; +} + +/* + ___________ __ + \__ ___/___ _______/ |_ ______ + | |_/ __ \ / ___/\ __\/ ___/ + | |\ ___/ \___ \ | | \___ \ + |____| \___ >____ > |__| /____ > + \/ \/ \/ +*/ + +#if ZEN_WITH_TESTS + +TEST_CASE("BuildStore.Blobs") +{ + ScopedTemporaryDirectory _; + + BuildStoreConfig Config; + Config.RootDirectory = _.Path() / "build_store"; + + std::vector<IoHash> CompressedBlobsHashes; + { + GcManager Gc; + BuildStore Store(Config, Gc); + + for (size_t I = 0; I < 5; I++) + { + IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7); + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob))); + CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash()); + IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + + Store.PutBlob(CompressedBlobsHashes.back(), Payload); + } + + for (const IoHash& RawHash : CompressedBlobsHashes) + { + IoBuffer Payload = Store.GetBlob(RawHash); + CHECK(Payload); + CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary); + IoHash VerifyRawHash; + uint64_t VerifyRawSize; + CompressedBuffer CompressedBlob = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize); + CHECK(CompressedBlob); + CHECK(VerifyRawHash == RawHash); + IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer(); + CHECK(IoHash::HashBuffer(Decompressed) == RawHash); + } + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (const IoHash& RawHash : CompressedBlobsHashes) + { + IoBuffer Payload = Store.GetBlob(RawHash); + CHECK(Payload); + CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary); + IoHash VerifyRawHash; + uint64_t VerifyRawSize; + CompressedBuffer CompressedBlob = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize); + CHECK(CompressedBlob); + CHECK(VerifyRawHash == RawHash); + IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer(); + CHECK(IoHash::HashBuffer(Decompressed) == RawHash); + } + + for (size_t I = 0; I < 5; I++) + { + IoBuffer Blob = CreateSemiRandomBlob(5713 + I * 7); + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob))); + CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash()); + IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + + Store.PutBlob(CompressedBlobsHashes.back(), Payload); + } + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (const IoHash& RawHash : CompressedBlobsHashes) + { + IoBuffer Payload = Store.GetBlob(RawHash); + CHECK(Payload); + CHECK(Payload.GetContentType() == ZenContentType::kCompressedBinary); + IoHash VerifyRawHash; + uint64_t VerifyRawSize; + CompressedBuffer CompressedBlob = + CompressedBuffer::FromCompressed(SharedBuffer(std::move(Payload)), VerifyRawHash, VerifyRawSize); + CHECK(CompressedBlob); + CHECK(VerifyRawHash == RawHash); + IoBuffer Decompressed = CompressedBlob.Decompress().AsIoBuffer(); + CHECK(IoHash::HashBuffer(Decompressed) == RawHash); + } + } +} + +namespace blockstore::testing { + IoBuffer MakeMetaData(const IoHash& BlobHash, const std::vector<std::pair<std::string, std::string>>& KeyValues) + { + CbObjectWriter Writer; + Writer.AddHash("rawHash"sv, BlobHash); + Writer.BeginObject("values"); + { + for (const auto& V : KeyValues) + { + Writer.AddString(V.first, V.second); + } + } + Writer.EndObject(); // values + return Writer.Save().GetBuffer().AsIoBuffer(); + }; + +} // namespace blockstore::testing + +TEST_CASE("BuildStore.Metadata") +{ + using namespace blockstore::testing; + + ScopedTemporaryDirectory _; + + WorkerThreadPool& WorkerPool = GetSmallWorkerPool(EWorkloadType::Burst); + + BuildStoreConfig Config; + Config.RootDirectory = _.Path() / "build_store"; + + std::vector<IoHash> BlobHashes; + std::vector<IoBuffer> MetaPayloads; + { + GcManager Gc; + BuildStore Store(Config, Gc); + + for (size_t I = 0; I < 5; I++) + { + BlobHashes.push_back(IoHash::HashBuffer(&I, sizeof(I))); + MetaPayloads.push_back(MakeMetaData(BlobHashes.back(), {{"index", fmt::format("{}", I)}})); + MetaPayloads.back().SetContentType(ZenContentType::kCbObject); + } + Store.PutMetadatas(BlobHashes, MetaPayloads); + + std::vector<IoBuffer> ValidateMetaPayloads = Store.GetMetadatas(BlobHashes, &WorkerPool); + CHECK(ValidateMetaPayloads.size() == MetaPayloads.size()); + for (size_t I = 0; I < ValidateMetaPayloads.size(); I++) + { + const IoHash ExpectedHash = IoHash::HashBuffer(MetaPayloads[I]); + CHECK_EQ(IoHash::HashBuffer(ValidateMetaPayloads[I]), ExpectedHash); + } + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + std::vector<IoBuffer> ValidateMetaPayloads = Store.GetMetadatas(BlobHashes, &WorkerPool); + CHECK(ValidateMetaPayloads.size() == MetaPayloads.size()); + for (size_t I = 0; I < ValidateMetaPayloads.size(); I++) + { + const IoHash ExpectedHash = IoHash::HashBuffer(MetaPayloads[I]); + CHECK_EQ(IoHash::HashBuffer(ValidateMetaPayloads[I]), ExpectedHash); + } + for (const IoHash& BlobHash : BlobHashes) + { + CHECK(!Store.GetBlob(BlobHash)); + } + } + std::vector<IoHash> CompressedBlobsHashes; + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (size_t I = 0; I < 5; I++) + { + IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7); + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob))); + CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash()); + IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + + Store.PutBlob(CompressedBlobsHashes.back(), Payload); + } + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool); + for (const auto& MetadataIt : MetadataPayloads) + { + CHECK(!MetadataIt); + } + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + CHECK(Blob); + IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer(); + CHECK(DecompressedBlob); + CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash); + } + } + + std::vector<IoBuffer> BlobMetaPayloads; + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}})); + BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject); + } + Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads); + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + CHECK_EQ(IoHash::HashBuffer(MetadataPayload), IoHash::HashBuffer(BlobMetaPayloads[I])); + } + } + + { + GcManager Gc; + BuildStore Store(Config, Gc); + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I])); + } + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + CHECK(Blob); + IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer(); + CHECK(DecompressedBlob); + CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash); + } + + BlobMetaPayloads.clear(); + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + BlobMetaPayloads.push_back( + MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}, {"replaced", fmt::format("{}", true)}})); + BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject); + } + Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads); + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, &WorkerPool); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I])); + } + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + CHECK(Blob); + IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer(); + CHECK(DecompressedBlob); + CHECK_EQ(IoHash::HashBuffer(DecompressedBlob), BlobHash); + } + } +} + +TEST_CASE("BuildStore.GC") +{ + using namespace blockstore::testing; + + ScopedTemporaryDirectory _; + + BuildStoreConfig Config; + Config.RootDirectory = _.Path() / "build_store"; + + std::vector<IoHash> CompressedBlobsHashes; + std::vector<IoBuffer> BlobMetaPayloads; + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (size_t I = 0; I < 5; I++) + { + IoBuffer Blob = CreateSemiRandomBlob(4711 + I * 7); + CompressedBuffer CompressedBlob = CompressedBuffer::Compress(SharedBuffer(std::move(Blob))); + CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash()); + IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + + Store.PutBlob(CompressedBlobsHashes.back(), Payload); + } + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}})); + BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject); + } + Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads); + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + + { + GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() - std::chrono::hours(1), + .CollectSmallObjects = false, + .IsDeleteMode = false, + .Verbose = true}); + CHECK(!Result.WasCancelled); + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + CHECK(Blob); + IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer(); + CHECK(DecompressedBlob); + CHECK(IoHash::HashBuffer(DecompressedBlob) == BlobHash); + } + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I])); + } + } + { + GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() + std::chrono::hours(1), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .Verbose = true}); + CHECK(!Result.WasCancelled); + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + CHECK(!Blob); + } + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + CHECK(!MetadataPayload); + } + } + } +} + +TEST_CASE("BuildStore.SizeLimit") +{ + using namespace blockstore::testing; + + ScopedTemporaryDirectory _; + + BuildStoreConfig Config = {.MaxDiskSpaceLimit = 1024u * 1024u}; + Config.RootDirectory = _.Path() / "build_store"; + + std::vector<IoHash> CompressedBlobsHashes; + std::vector<IoBuffer> BlobMetaPayloads; + { + GcManager Gc; + BuildStore Store(Config, Gc); + for (size_t I = 0; I < 64; I++) + { + IoBuffer Blob = CreateSemiRandomBlob(65537 + I * 7); + CompressedBuffer CompressedBlob = + CompressedBuffer::Compress(SharedBuffer(std::move(Blob)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + CompressedBlobsHashes.push_back(CompressedBlob.DecodeRawHash()); + IoBuffer Payload = std::move(CompressedBlob).GetCompressed().Flatten().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCompressedBinary); + Store.PutBlob(CompressedBlobsHashes.back(), Payload); + } + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + BlobMetaPayloads.push_back(MakeMetaData(BlobHash, {{"blobHash", fmt::format("{}", BlobHash)}})); + BlobMetaPayloads.back().SetContentType(ZenContentType::kCbObject); + } + Store.PutMetadatas(CompressedBlobsHashes, BlobMetaPayloads); + + { + for (size_t I = 0; I < 64; I++) + { + const IoHash& Key = CompressedBlobsHashes[I]; + GcClock::Tick AccessTick = (GcClock::Now() + std::chrono::minutes(I)).time_since_epoch().count(); + + Store.SetLastAccessTime(Key, AccessTime(AccessTick)); + } + } + } + { + GcManager Gc; + BuildStore Store(Config, Gc); + + { + GcResult Result = Gc.CollectGarbage(GcSettings{.BuildStoreExpireTime = GcClock::Now() - std::chrono::hours(1), + .CollectSmallObjects = true, + .IsDeleteMode = true, + .Verbose = true}); + + uint32_t DeletedBlobs = 0; + + CHECK(!Result.WasCancelled); + for (const IoHash& BlobHash : CompressedBlobsHashes) + { + IoBuffer Blob = Store.GetBlob(BlobHash); + if (!Blob) + { + DeletedBlobs++; + } + else + { + IoBuffer DecompressedBlob = CompressedBuffer::FromCompressedNoValidate(std::move(Blob)).Decompress().AsIoBuffer(); + CHECK(DecompressedBlob); + CHECK(IoHash::HashBuffer(DecompressedBlob) == BlobHash); + } + } + CHECK(DeletedBlobs == 50); + + std::vector<IoBuffer> MetadataPayloads = Store.GetMetadatas(CompressedBlobsHashes, nullptr); + CHECK(MetadataPayloads.size() == BlobMetaPayloads.size()); + for (size_t I = 0; I < MetadataPayloads.size(); I++) + { + const IoBuffer& MetadataPayload = MetadataPayloads[I]; + if (I < DeletedBlobs) + { + CHECK(!MetadataPayload); + } + else + { + CHECK(IoHash::HashBuffer(MetadataPayload) == IoHash::HashBuffer(BlobMetaPayloads[I])); + } + } + } + } +} + +void +buildstore_forcelink() +{ +} + +#endif + +} // namespace zen diff --git a/src/zenstore/cache/cachedisklayer.cpp b/src/zenstore/cache/cachedisklayer.cpp index 72a767645..1ebb8f144 100644 --- a/src/zenstore/cache/cachedisklayer.cpp +++ b/src/zenstore/cache/cachedisklayer.cpp @@ -14,6 +14,7 @@ #include <zencore/trace.h> #include <zencore/workthreadpool.h> #include <zencore/xxhash.h> +#include <zenutil/parallelwork.h> #include <zenutil/referencemetadata.h> #include <zenutil/workerpools.h> @@ -195,34 +196,33 @@ namespace cache::impl { return true; } - bool MoveAndDeleteDirectory(const std::filesystem::path& Dir) + std::filesystem::path MoveDroppedDirectory(const std::filesystem::path& Dir) { int DropIndex = 0; do { - if (!std::filesystem::exists(Dir)) + if (!IsDir(Dir)) { - return false; + return {}; } std::string DroppedName = fmt::format("[dropped]{}({})", Dir.filename().string(), DropIndex); std::filesystem::path DroppedBucketPath = Dir.parent_path() / DroppedName; - if (std::filesystem::exists(DroppedBucketPath)) + if (IsDir(DroppedBucketPath)) { DropIndex++; continue; } std::error_code Ec; - std::filesystem::rename(Dir, DroppedBucketPath, Ec); + RenameDirectory(Dir, DroppedBucketPath, Ec); if (!Ec) { - DeleteDirectories(DroppedBucketPath); - return true; + return DroppedBucketPath; } - // TODO: Do we need to bail at some point? zen::Sleep(100); - } while (true); + } while (DropIndex < 10); + return {}; } } // namespace cache::impl @@ -373,10 +373,10 @@ private: #pragma pack(4) struct ManifestData { - uint32_t RawSize; // 4 - AccessTime Timestamp; // 4 - IoHash RawHash; // 20 - IoHash Key; // 20 + uint32_t RawSize; // 4 + uint32_t SecondsSinceEpoch; // 4 + IoHash RawHash; // 20 + IoHash Key; // 20 }; #pragma pack(pop) @@ -658,7 +658,7 @@ BucketManifestSerializer::ReadSidecarFile(RwLock::ExclusiveLockScope& B ZenCacheDiskLayer::CacheBucket::BucketPayload& PayloadEntry = Payloads[PlIndex]; - AccessTimes[PlIndex] = Entry->Timestamp; + AccessTimes[PlIndex].SetSecondsSinceEpoch(Entry->SecondsSinceEpoch); if (Entry->RawSize && Entry->RawHash != IoHash::Zero) { @@ -685,6 +685,16 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&, { ZEN_TRACE_CPU("Z$::WriteSidecarFile"); + ZEN_DEBUG("writing store sidecar for '{}'", SidecarPath); + const uint64_t EntryCount = Index.size(); + Stopwatch Timer; + const auto _ = MakeGuard([&] { + ZEN_INFO("wrote store sidecar for '{}' containing {} entries in {}", + SidecarPath, + EntryCount, + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); + }); + BucketMetaHeader Header; Header.EntryCount = m_ManifestEntryCount; Header.LogPosition = SnapshotLogPosition; @@ -702,43 +712,44 @@ BucketManifestSerializer::WriteSidecarFile(RwLock::SharedLockScope&, SidecarFile.Write(&Header, sizeof Header, 0); - // TODO: make this batching for better performance { uint64_t WriteOffset = sizeof Header; - // BasicFileWriter SidecarWriter(SidecarFile, 128 * 1024); + const size_t MaxManifestDataBufferCount = (512u * 1024u) / sizeof(ManifestData); - std::vector<ManifestData> ManifestDataBuffer; - const size_t MaxManifestDataBufferCount = Min(Index.size(), 8192u); // 512 Kb - ManifestDataBuffer.reserve(MaxManifestDataBufferCount); + std::vector<ManifestData> ManifestDataBuffer(Min(m_ManifestEntryCount, MaxManifestDataBufferCount)); + auto WriteIt = ManifestDataBuffer.begin(); for (auto& Kv : Index) { - const IoHash& Key = Kv.first; - const PayloadIndex PlIndex = Kv.second; + ManifestData& Data = *WriteIt++; - IoHash RawHash = IoHash::Zero; - uint32_t RawSize = 0; + const PayloadIndex PlIndex = Kv.second; + Data.Key = Kv.first; + Data.SecondsSinceEpoch = AccessTimes[PlIndex].GetSecondsSinceEpoch(); if (const MetaDataIndex MetaIndex = Payloads[PlIndex].MetaData) { - RawHash = MetaDatas[MetaIndex].RawHash; - RawSize = MetaDatas[MetaIndex].RawSize; + Data.RawHash = MetaDatas[MetaIndex].RawHash; + Data.RawSize = MetaDatas[MetaIndex].RawSize; + } + else + { + Data.RawHash = IoHash::Zero; + Data.RawSize = 0; } - ManifestDataBuffer.emplace_back( - ManifestData{.RawSize = RawSize, .Timestamp = AccessTimes[PlIndex], .RawHash = RawHash, .Key = Key}); - if (ManifestDataBuffer.size() == MaxManifestDataBufferCount) + if (WriteIt == ManifestDataBuffer.end()) { - const uint64_t WriteSize = sizeof(ManifestData) * ManifestDataBuffer.size(); + uint64_t WriteSize = std::distance(ManifestDataBuffer.begin(), WriteIt) * sizeof(ManifestData); SidecarFile.Write(ManifestDataBuffer.data(), WriteSize, WriteOffset); WriteOffset += WriteSize; - ManifestDataBuffer.clear(); - ManifestDataBuffer.reserve(MaxManifestDataBufferCount); + WriteIt = ManifestDataBuffer.begin(); } } - if (ManifestDataBuffer.size() > 0) + if (WriteIt != ManifestDataBuffer.begin()) { - SidecarFile.Write(ManifestDataBuffer.data(), sizeof(ManifestData) * ManifestDataBuffer.size(), WriteOffset); + uint64_t WriteSize = std::distance(ManifestDataBuffer.begin(), WriteIt) * sizeof(ManifestData); + SidecarFile.Write(ManifestDataBuffer.data(), WriteSize, WriteOffset); } } @@ -763,11 +774,11 @@ namespace zen { ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, - std::string BucketName, + std::string_view BucketName, const BucketConfiguration& Config) : m_Gc(Gc) , m_OuterCacheMemoryUsage(OuterCacheMemoryUsage) -, m_BucketName(std::move(BucketName)) +, m_BucketName(BucketName) , m_Configuration(Config) , m_BucketId(Oid::Zero) { @@ -795,6 +806,16 @@ ZenCacheDiskLayer::CacheBucket::CacheBucket(GcManager& Gc, ZenCacheDiskLayer::CacheBucket::~CacheBucket() { + try + { + m_SlogFile.Flush(); + m_SlogFile.Close(); + m_BlockStore.Close(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~CacheBucket() failed with: ", Ex.what()); + } m_Gc.RemoveGcReferencer(*this); } @@ -868,12 +889,13 @@ ZenCacheDiskLayer::CacheBucket::OpenOrCreate(std::filesystem::path BucketDir, bo } void -ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition, const std::function<uint64_t()>& ClaimDiskReserveFunc) +ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(uint64_t LogPosition, + bool ResetLog, + const std::function<uint64_t()>& ClaimDiskReserveFunc) { ZEN_TRACE_CPU("Z$::Bucket::WriteIndexSnapshot"); - const uint64_t LogCount = FlushLockPosition ? 0 : m_SlogFile.GetLogCount(); - if (m_LogFlushPosition == LogCount) + if (m_LogFlushPosition == LogPosition) { return; } @@ -890,7 +912,7 @@ ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition, namespace fs = std::filesystem; - fs::path IndexPath = cache::impl::GetIndexPath(m_BucketDir, m_BucketName); + const fs::path IndexPath = cache::impl::GetIndexPath(m_BucketDir, m_BucketName); try { @@ -922,66 +944,70 @@ ZenCacheDiskLayer::CacheBucket::WriteIndexSnapshotLocked(bool FlushLockPosition, throw std::system_error(Ec, fmt::format("failed to create new snapshot file in '{}'", m_BucketDir)); } - { - // This is in a separate scope just to ensure IndexWriter goes out - // of scope before the file is flushed/closed, in order to ensure - // all data is written to the file - BasicFileWriter IndexWriter(ObjectIndexFile, 128 * 1024); + const uint64_t IndexLogPosition = ResetLog ? 0 : LogPosition; - cache::impl::CacheBucketIndexHeader Header = {.EntryCount = EntryCount, - .LogPosition = LogCount, - .PayloadAlignment = gsl::narrow<uint32_t>(m_Configuration.PayloadAlignment)}; + cache::impl::CacheBucketIndexHeader Header = {.EntryCount = EntryCount, + .LogPosition = IndexLogPosition, + .PayloadAlignment = gsl::narrow<uint32_t>(m_Configuration.PayloadAlignment)}; - Header.Checksum = cache::impl::CacheBucketIndexHeader::ComputeChecksum(Header); - IndexWriter.Write(&Header, sizeof(cache::impl::CacheBucketIndexHeader), 0); + Header.Checksum = cache::impl::CacheBucketIndexHeader::ComputeChecksum(Header); + ObjectIndexFile.Write(&Header, sizeof(cache::impl::CacheBucketIndexHeader), 0); + if (EntryCount > 0) + { uint64_t IndexWriteOffset = sizeof(cache::impl::CacheBucketIndexHeader); + size_t MaxWriteEntryCount = (512u * 1024u) / sizeof(DiskIndexEntry); + std::vector<DiskIndexEntry> DiskEntryBuffer(Min(m_Index.size(), MaxWriteEntryCount)); + + auto WriteIt = DiskEntryBuffer.begin(); for (auto& Entry : m_Index) { - DiskIndexEntry IndexEntry; - IndexEntry.Key = Entry.first; - IndexEntry.Location = m_Payloads[Entry.second].Location; - IndexWriter.Write(&IndexEntry, sizeof(DiskIndexEntry), IndexWriteOffset); - - IndexWriteOffset += sizeof(DiskIndexEntry); + *WriteIt++ = {.Key = Entry.first, .Location = m_Payloads[Entry.second].Location}; + if (WriteIt == DiskEntryBuffer.end()) + { + uint64_t WriteSize = std::distance(DiskEntryBuffer.begin(), WriteIt) * sizeof(DiskIndexEntry); + ObjectIndexFile.Write(DiskEntryBuffer.data(), WriteSize, IndexWriteOffset); + IndexWriteOffset += WriteSize; + WriteIt = DiskEntryBuffer.begin(); + } } - IndexWriter.Flush(); + if (WriteIt != DiskEntryBuffer.begin()) + { + uint64_t WriteSize = std::distance(DiskEntryBuffer.begin(), WriteIt) * sizeof(DiskIndexEntry); + ObjectIndexFile.Write(DiskEntryBuffer.data(), WriteSize, IndexWriteOffset); + } } ObjectIndexFile.Flush(); ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec); if (Ec) { - std::filesystem::path TempFilePath = ObjectIndexFile.GetPath(); - ZEN_WARN("snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", TempFilePath, IndexPath, Ec.message()); + throw std::system_error(Ec, + fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", + ObjectIndexFile.GetPath(), + IndexPath, + Ec.message())); } - else + + if (ResetLog) { - // We must only update the log flush position once the snapshot write succeeds - if (FlushLockPosition) - { - std::filesystem::path LogPath = cache::impl::GetLogPath(m_BucketDir, m_BucketName); + const std::filesystem::path LogPath = cache::impl::GetLogPath(m_BucketDir, m_BucketName); - if (std::filesystem::is_regular_file(LogPath)) + if (IsFile(LogPath)) + { + m_SlogFile.Close(); + if (!RemoveFile(LogPath, Ec) || Ec) { - if (!std::filesystem::remove(LogPath, Ec) || Ec) - { - ZEN_WARN("snapshot failed to clean log file '{}', removing index at '{}', reason: '{}'", - LogPath, - IndexPath, - Ec.message()); - std::error_code RemoveIndexEc; - std::filesystem::remove(IndexPath, RemoveIndexEc); - } + // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in + // the end it will be the same result + ZEN_WARN("snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message()); } - } - if (!Ec) - { - m_LogFlushPosition = LogCount; + m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); } } + m_LogFlushPosition = IndexLogPosition; } catch (const std::exception& Err) { @@ -994,7 +1020,7 @@ ZenCacheDiskLayer::CacheBucket::ReadIndexFile(RwLock::ExclusiveLockScope&, const { ZEN_TRACE_CPU("Z$::Bucket::ReadIndexFile"); - if (!std::filesystem::is_regular_file(IndexPath)) + if (!IsFile(IndexPath)) { return 0; } @@ -1078,7 +1104,7 @@ ZenCacheDiskLayer::CacheBucket::ReadLog(RwLock::ExclusiveLockScope&, const std:: { ZEN_TRACE_CPU("Z$::Bucket::ReadLog"); - if (!std::filesystem::is_regular_file(LogPath)) + if (!IsFile(LogPath)) { return 0; } @@ -1158,47 +1184,40 @@ ZenCacheDiskLayer::CacheBucket::InitializeIndexFromDisk(RwLock::ExclusiveLockSco if (IsNew) { - fs::remove(LogPath); - fs::remove(IndexPath); - fs::remove_all(m_BlocksBasePath); + RemoveFile(LogPath); + RemoveFile(IndexPath); + DeleteDirectories(m_BlocksBasePath); } CreateDirectories(m_BucketDir); m_BlockStore.Initialize(m_BlocksBasePath, m_Configuration.MaxBlockSize, BlockStoreDiskLocation::MaxBlockIndex + 1); - if (std::filesystem::is_regular_file(IndexPath)) + if (IsFile(IndexPath)) { uint32_t IndexVersion = 0; m_LogFlushPosition = ReadIndexFile(IndexLock, IndexPath, IndexVersion); if (IndexVersion == 0) { ZEN_WARN("removing invalid index file at '{}'", IndexPath); - std::filesystem::remove(IndexPath); + RemoveFile(IndexPath); } } uint64_t LogEntryCount = 0; - if (std::filesystem::is_regular_file(LogPath)) + if (IsFile(LogPath)) { if (TCasLogFile<DiskIndexEntry>::IsValid(LogPath)) { LogEntryCount = ReadLog(IndexLock, LogPath, m_LogFlushPosition); } - else if (fs::is_regular_file(LogPath)) + else if (IsFile(LogPath)) { ZEN_WARN("removing invalid log at '{}'", LogPath); - std::filesystem::remove(LogPath); + RemoveFile(LogPath); } } - if (IsNew || LogEntryCount > 0 || m_LogFlushPosition != 0) - { - WriteIndexSnapshot(IndexLock, /*Flush log*/ true); - } - - m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); - BlockStore::BlockIndexSet KnownBlocks; for (const auto& Entry : m_Index) { @@ -1216,7 +1235,53 @@ ZenCacheDiskLayer::CacheBucket::InitializeIndexFromDisk(RwLock::ExclusiveLockSco KnownBlocks.insert(BlockIndex); } } - m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks); + BlockStore::BlockIndexSet MissingBlocks = m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks); + m_SlogFile.Open(LogPath, CasLogFile::Mode::kWrite); + + bool RemovedEntries = false; + if (!MissingBlocks.empty()) + { + std::vector<DiskIndexEntry> MissingEntries; + + for (auto& It : m_Index) + { + BucketPayload& Payload = m_Payloads[It.second]; + DiskLocation Location = Payload.Location; + if (!Location.IsFlagSet(DiskLocation::kStandaloneFile)) + { + if (MissingBlocks.contains(Location.Location.BlockLocation.GetBlockIndex())) + { + RemoveMemCachedData(IndexLock, Payload); + RemoveMetaData(IndexLock, Payload); + } + } + Location.Flags |= DiskLocation::kTombStone; + MissingEntries.push_back(DiskIndexEntry{.Key = It.first, .Location = Location}); + } + + ZEN_ASSERT(!MissingEntries.empty()); + + for (const DiskIndexEntry& Entry : MissingEntries) + { + m_Index.erase(Entry.Key); + } + m_SlogFile.Append(MissingEntries); + m_SlogFile.Flush(); + { + std::vector<BucketPayload> Payloads; + std::vector<AccessTime> AccessTimes; + std::vector<BucketMetaData> MetaDatas; + std::vector<MemCacheData> MemCachedPayloads; + IndexMap Index; + CompactState(IndexLock, Payloads, AccessTimes, MetaDatas, MemCachedPayloads, Index); + } + RemovedEntries = true; + } + + if (IsNew || LogEntryCount > 0 || m_LogFlushPosition != 0 || RemovedEntries) + { + WriteIndexSnapshot(IndexLock, m_SlogFile.GetLogCount(), /*Flush log*/ true); + } } void @@ -1384,7 +1449,7 @@ ZenCacheDiskLayer::CacheBucket::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) { Keys.reserve(OutResults.capacity()); ResultIndexes.reserve(OutResults.capacity()); @@ -1395,11 +1460,11 @@ struct ZenCacheDiskLayer::CacheBucket::GetBatchHandle std::vector<IoHash> Keys; std::vector<size_t> ResultIndexes; - std::vector<ZenCacheValue>& OutResults; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::CacheBucket::GetBatchHandle* -ZenCacheDiskLayer::CacheBucket::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheDiskLayer::CacheBucket::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZEN_TRACE_CPU("Z$::Bucket::BeginGetBatch"); return new GetBatchHandle(OutResult); @@ -1419,13 +1484,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!Batch->ResultIndexes.empty()) { - std::vector<DiskLocation> StandaloneDiskLocations; - std::vector<size_t> StandaloneKeyIndexes; - std::vector<size_t> MemCachedKeyIndexes; - std::vector<DiskLocation> InlineDiskLocations; - std::vector<BlockStoreLocation> InlineBlockLocations; - std::vector<size_t> InlineKeyIndexes; - std::vector<bool> FillRawHashAndRawSize(Batch->Keys.size(), false); + eastl::fixed_vector<DiskLocation, 16> StandaloneDiskLocations; + eastl::fixed_vector<size_t, 16> StandaloneKeyIndexes; + eastl::fixed_vector<size_t, 16> MemCachedKeyIndexes; + eastl::fixed_vector<DiskLocation, 16> InlineDiskLocations; + eastl::fixed_vector<BlockStoreLocation, 16> InlineBlockLocations; + eastl::fixed_vector<size_t, 16> InlineKeyIndexes; + eastl::fixed_vector<bool, 16> FillRawHashAndRawSize(Batch->Keys.size(), false); { RwLock::SharedLockScope IndexLock(m_IndexLock); for (size_t KeyIndex = 0; KeyIndex < Batch->Keys.size(); KeyIndex++) @@ -1479,6 +1544,13 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept } } } + else + { + if (m_Configuration.MemCacheSizeThreshold > 0) + { + m_MemoryMissCount++; + } + } } } @@ -1487,7 +1559,7 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept // Often we will find the metadata due to the thread setting the mem cached part doing it before us so it is worth // checking if it is present once more before spending time fetching and setting the RawHash and RawSize in metadata - auto FillOne = [&](const DiskLocation& Location, size_t KeyIndex, IoBuffer&& Value) { + auto FillOne = [&](const DiskLocation& Location, size_t KeyIndex, IoBuffer&& Value, bool UsesTemporaryMemory) { if (!Value) { return; @@ -1510,6 +1582,12 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept } } + if (AddToMemCache || UsesTemporaryMemory) + { + // We need to own it if we want to add it to the memcache or the buffer is just a range of the block iteration buffer + OutValue.Value.MakeOwned(); + } + if (SetMetaInfo) { // See ZenCacheDiskLayer::CacheBucket::Get - it sets the memcache part first and then if it needs to it set the @@ -1581,33 +1659,42 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept if (!InlineDiskLocations.empty()) { ZEN_TRACE_CPU("Z$::Bucket::EndGetBatch::ReadInline"); - m_BlockStore.IterateChunks(InlineBlockLocations, [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { - // Only read into memory the IoBuffers we could potentially add to memcache - const uint64_t LargeChunkSizeLimit = Max(m_Configuration.MemCacheSizeThreshold, 1u * 1024u); - m_BlockStore.IterateBlock( - InlineBlockLocations, - ChunkIndexes, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - const void* Data, - uint64_t Size) -> bool { - if (Data != nullptr) - { - FillOne(InlineDiskLocations[ChunkIndex], - InlineKeyIndexes[ChunkIndex], - IoBufferBuilder::MakeCloneFromMemory(Data, Size)); - } - return true; - }, - [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, - BlockStoreFile& File, - uint64_t Offset, - uint64_t Size) -> bool { - FillOne(InlineDiskLocations[ChunkIndex], InlineKeyIndexes[ChunkIndex], File.GetChunk(Offset, Size)); - return true; - }, - LargeChunkSizeLimit); - return true; - }); + m_BlockStore.IterateChunks(std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + [&](uint32_t, std::span<const size_t> ChunkIndexes) -> bool { + // Up to 8KB or m_Configuration.MemCacheSizeThreshold depending on configuration + const uint64_t LargeChunkSizeLimit = + m_Configuration.MemCacheSizeThreshold == 0 + ? Min(m_Configuration.LargeObjectThreshold, 8u * 1024u) + : Max(m_Configuration.MemCacheSizeThreshold, 8u * 1024u); + + m_BlockStore.IterateBlock( + std::span{begin(InlineBlockLocations), end(InlineBlockLocations)}, + ChunkIndexes, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + const void* Data, + uint64_t Size) -> bool { + if (Data != nullptr) + { + FillOne(InlineDiskLocations[ChunkIndex], + InlineKeyIndexes[ChunkIndex], + IoBufferBuilder::MakeFromMemory(MemoryView(Data, Size)), + /*UsesTemporaryMemory*/ true); + } + return true; + }, + [this, &FillOne, &InlineDiskLocations, &InlineKeyIndexes](size_t ChunkIndex, + BlockStoreFile& File, + uint64_t Offset, + uint64_t Size) -> bool { + FillOne(InlineDiskLocations[ChunkIndex], + InlineKeyIndexes[ChunkIndex], + File.GetChunk(Offset, Size), + /*UsesTemporaryMemory*/ false); + return true; + }, + LargeChunkSizeLimit); + return true; + }); } if (!StandaloneDiskLocations.empty()) @@ -1617,7 +1704,7 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept { size_t KeyIndex = StandaloneKeyIndexes[Index]; const DiskLocation& Location = StandaloneDiskLocations[Index]; - FillOne(Location, KeyIndex, GetStandaloneCacheValue(Location, Batch->Keys[KeyIndex])); + FillOne(Location, KeyIndex, GetStandaloneCacheValue(Location, Batch->Keys[KeyIndex]), /*UsesTemporaryMemory*/ false); } } @@ -1697,10 +1784,6 @@ ZenCacheDiskLayer::CacheBucket::EndGetBatch(GetBatchHandle* Batch) noexcept else { m_DiskMissCount++; - if (m_Configuration.MemCacheSizeThreshold > 0) - { - m_MemoryMissCount++; - } } } } @@ -2029,11 +2112,13 @@ ZenCacheDiskLayer::CacheBucket::GetUsageByAccess(GcClock::TimePoint Now, GcClock } } -bool +std::function<void()> ZenCacheDiskLayer::CacheBucket::Drop() { ZEN_TRACE_CPU("Z$::Bucket::Drop"); + m_Gc.RemoveGcReferencer(*this); + RwLock::ExclusiveLockScope _(m_IndexLock); std::vector<std::unique_ptr<RwLock::ExclusiveLockScope>> ShardLocks; @@ -2045,7 +2130,7 @@ ZenCacheDiskLayer::CacheBucket::Drop() m_BlockStore.Close(); m_SlogFile.Close(); - const bool Deleted = cache::impl::MoveAndDeleteDirectory(m_BucketDir); + std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(m_BucketDir); m_Index.clear(); m_Payloads.clear(); @@ -2058,7 +2143,21 @@ ZenCacheDiskLayer::CacheBucket::Drop() m_OuterCacheMemoryUsage.fetch_sub(m_MemCachedSize.load()); m_MemCachedSize.store(0); - return Deleted; + if (DroppedPath.empty()) + { + return {}; + } + else + { + return [DroppedPath = std::move(DroppedPath)]() { + std::error_code Ec; + (void)DeleteDirectories(DroppedPath, Ec); + if (Ec) + { + ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message()); + } + }; + } } void @@ -2093,6 +2192,9 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl ZEN_TRACE_CPU("Z$::Bucket::SaveSnapshot"); try { + // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock + const uint64_t LogPosition = m_SlogFile.GetLogCount(); + bool UseLegacyScheme = false; IoBuffer Buffer; @@ -2107,7 +2209,7 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl { RwLock::SharedLockScope IndexLock(m_IndexLock); - WriteIndexSnapshot(IndexLock, /*Flush log*/ false); + WriteIndexSnapshot(IndexLock, LogPosition, /*Flush log*/ false); // Note: this copy could be eliminated on shutdown to // reduce memory usage and execution time Index = m_Index; @@ -2147,7 +2249,7 @@ ZenCacheDiskLayer::CacheBucket::SaveSnapshot(const std::function<uint64_t()>& Cl else { RwLock::SharedLockScope IndexLock(m_IndexLock); - WriteIndexSnapshot(IndexLock, /*Flush log*/ false); + WriteIndexSnapshot(IndexLock, LogPosition, /*Flush log*/ false); const uint64_t EntryCount = m_Index.size(); Buffer = ManifestWriter.MakeSidecarManifest(m_BucketId, EntryCount); uint64_t SidecarSize = ManifestWriter.GetSidecarSize(); @@ -2257,7 +2359,7 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) RwLock::SharedLockScope ValueLock(LockForHash(HashKey)); std::error_code Ec; - uintmax_t size = std::filesystem::file_size(DataFilePath.ToPath(), Ec); + uintmax_t size = FileSizeFromPath(DataFilePath.ToPath(), Ec); if (Ec) { ReportBadKey(HashKey); @@ -2398,11 +2500,11 @@ ZenCacheDiskLayer::CacheBucket::ScrubStorage(ScrubContext& Ctx) BuildPath(Path, Entry.Key); fs::path FilePath = Path.ToPath(); RwLock::ExclusiveLockScope ValueLock(LockForHash(Entry.Key)); - if (fs::is_regular_file(FilePath)) + if (IsFile(FilePath)) { ZEN_DEBUG("deleting bad standalone cache file '{}'", Path.ToUtf8()); std::error_code Ec; - fs::remove(FilePath, Ec); // We don't care if we fail, we are no longer tracking this file... + RemoveFile(FilePath, Ec); // We don't care if we fail, we are no longer tracking this file... } } } @@ -2535,7 +2637,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c if (CleanUpTempFile) { std::error_code Ec; - std::filesystem::remove(DataFile.GetPath(), Ec); + RemoveFile(DataFile.GetPath(), Ec); if (Ec) { ZEN_WARN("Failed to clean up temporary file '{}' for put in '{}', reason '{}'", @@ -2563,7 +2665,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c RwLock::ExclusiveLockScope ValueLock(LockForHash(HashKey)); // We do a speculative remove of the file instead of probing with a exists call and check the error code instead - std::filesystem::remove(FsPath, Ec); + RemoveFile(FsPath, Ec); if (Ec) { if (Ec.value() != ENOENT) @@ -2571,7 +2673,7 @@ ZenCacheDiskLayer::CacheBucket::PutStandaloneCacheValue(const IoHash& HashKey, c ZEN_WARN("Failed to remove file '{}' for put in '{}', reason: '{}', retrying.", FsPath, m_BucketDir, Ec.message()); Sleep(100); Ec.clear(); - std::filesystem::remove(FsPath, Ec); + RemoveFile(FsPath, Ec); if (Ec && Ec.value() != ENOENT) { throw std::system_error(Ec, fmt::format("Failed to remove file '{}' for put in '{}'", FsPath, m_BucketDir)); @@ -2796,7 +2898,6 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, ZEN_MEMSCOPE(GetCacheDiskTag()); ZEN_TRACE_CPU("Z$::Bucket::UpdateLocation"); DiskLocation Location(BlockStoreLocation, m_Configuration.PayloadAlignment, EntryFlags); - m_SlogFile.Append({.Key = HashKey, .Location = Location}); RwLock::ExclusiveLockScope IndexLock(m_IndexLock); if (m_TrackedCacheKeys) @@ -2826,6 +2927,7 @@ ZenCacheDiskLayer::CacheBucket::PutInlineCacheValue(const IoHash& HashKey, m_AccessTimes.emplace_back(GcClock::TickCount()); m_Index.insert_or_assign(HashKey, EntryIndex); } + m_SlogFile.Append({.Key = HashKey, .Location = Location}); }); } @@ -2842,9 +2944,10 @@ class DiskBucketStoreCompactor : public GcStoreCompactor using CacheBucket = ZenCacheDiskLayer::CacheBucket; public: - DiskBucketStoreCompactor(CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys) + DiskBucketStoreCompactor(CacheBucket& Bucket, std::vector<std::pair<IoHash, uint64_t>>&& ExpiredStandaloneKeys, bool FlushBucket) : m_Bucket(Bucket) , m_ExpiredStandaloneKeys(std::move(ExpiredStandaloneKeys)) + , m_FlushBucket(FlushBucket) { m_ExpiredStandaloneKeys.shrink_to_fit(); } @@ -2902,7 +3005,7 @@ public: ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': deleting standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8()); std::error_code Ec; - if (!fs::remove(FilePath, Ec)) + if (!RemoveFile(FilePath, Ec)) { continue; } @@ -2923,7 +3026,7 @@ public: ZEN_DEBUG("GCV2: cachebucket [COMPACT] '{}': checking standalone cache file '{}'", m_Bucket.m_BucketDir, Path.ToUtf8()); std::error_code Ec; - bool Existed = std::filesystem::is_regular_file(FilePath, Ec); + bool Existed = IsFile(FilePath, Ec); if (Ec) { ZEN_WARN("GCV2: cachebucket [COMPACT] '{}': failed checking cache payload file '{}'. Reason '{}'", @@ -3023,10 +3126,12 @@ public: m_Bucket.m_BlockStore.CompactBlocks( BlockCompactState, m_Bucket.m_Configuration.PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + [&](const BlockStore::MovedChunksArray& MovedArray, + const BlockStore::ChunkIndexArray& ScrubbedArray, + uint64_t FreedDiskSpace) { std::vector<DiskIndexEntry> MovedEntries; MovedEntries.reserve(MovedArray.size()); - RwLock::ExclusiveLockScope _(m_Bucket.m_IndexLock); + RwLock::ExclusiveLockScope IndexLock(m_Bucket.m_IndexLock); for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) { size_t ChunkIndex = Moved.first; @@ -3048,6 +3153,24 @@ public: MovedEntries.push_back({.Key = Key, .Location = Payload.Location}); } } + + for (size_t ScrubbedIndex : ScrubbedArray) + { + const IoHash& Key = BlockCompactStateKeys[ScrubbedIndex]; + + if (auto It = m_Bucket.m_Index.find(Key); It != m_Bucket.m_Index.end()) + { + BucketPayload& Payload = m_Bucket.m_Payloads[It->second]; + DiskLocation Location = Payload.Location; + + m_Bucket.RemoveMemCachedData(IndexLock, Payload); + m_Bucket.RemoveMetaData(IndexLock, Payload); + + Location.Flags |= DiskLocation::kTombStone; + MovedEntries.push_back(DiskIndexEntry{.Key = Key, .Location = Location}); + } + } + m_Bucket.m_SlogFile.Append(MovedEntries); Stats.RemovedDisk += FreedDiskSpace; if (Ctx.IsCancelledFlag.load()) @@ -3071,6 +3194,10 @@ public: } } } + if (m_FlushBucket) + { + m_Bucket.Flush(); + } } virtual std::string GetGcName(GcCtx& Ctx) override { return m_Bucket.GetGcName(Ctx); } @@ -3078,6 +3205,7 @@ public: private: ZenCacheDiskLayer::CacheBucket& m_Bucket; std::vector<std::pair<IoHash, uint64_t>> m_ExpiredStandaloneKeys; + bool m_FlushBucket = false; }; GcStoreCompactor* @@ -3101,24 +3229,6 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) NiceBytes(Stats.FreedMemory), NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } - if (Stats.DeletedCount > 0) - { - bool Expected = false; - if (m_IsFlushing || !m_IsFlushing.compare_exchange_strong(Expected, true)) - { - return; - } - auto FlushingGuard = MakeGuard([&] { m_IsFlushing.store(false); }); - - try - { - SaveSnapshot([]() { return 0; }); - } - catch (const std::exception& Ex) - { - ZEN_WARN("Failed to write index and manifest after RemoveExpiredData in '{}'. Reason: '{}'", m_BucketDir, Ex.what()); - } - } }); const GcClock::Tick ExpireTicks = Ctx.Settings.CacheExpireTime.time_since_epoch().count(); @@ -3170,7 +3280,7 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) return nullptr; } - if (Ctx.Settings.IsDeleteMode) + if (Ctx.Settings.IsDeleteMode && !ExpiredEntries.empty()) { for (const DiskIndexEntry& Entry : ExpiredEntries) { @@ -3205,7 +3315,7 @@ ZenCacheDiskLayer::CacheBucket::RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) return nullptr; } - return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys)); + return new DiskBucketStoreCompactor(*this, std::move(ExpiredStandaloneKeys), /*FlushBucket*/ Stats.DeletedCount > 0); } bool @@ -3395,7 +3505,7 @@ ZenCacheDiskLayer::CacheBucket::GetReferences(const LoggerRef& Logger, CaptureAttachments(ChunkIndex, File.GetChunk(Offset, Size).GetView()); return !IsCancelledFlag.load(); }, - 0); + 32u * 1024); if (Continue) { @@ -3698,11 +3808,11 @@ ZenCacheDiskLayer::CacheBucket* ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) { ZEN_TRACE_CPU("Z$::GetOrCreateBucket"); - const auto BucketName = std::string(InBucket); { RwLock::SharedLockScope SharedLock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), eastl::equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } @@ -3710,31 +3820,40 @@ ZenCacheDiskLayer::GetOrCreateBucket(std::string_view InBucket) // We create the bucket without holding a lock since contructor calls GcManager::AddGcReferencer which takes an exclusive lock. // This can cause a deadlock, if GC is running we would block while holding ZenCacheDiskLayer::m_Lock - std::unique_ptr<CacheBucket> Bucket( - std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig)); + BucketConfiguration* BucketConfig = &m_Configuration.BucketConfig; + if (auto It = m_Configuration.BucketConfigMap.find_as(InBucket, + std::hash<std::string_view>(), + eastl::equal_to_2<std::string, std::string_view>()); + It != m_Configuration.BucketConfigMap.end()) + { + BucketConfig = &It->second; + } + std::unique_ptr<CacheBucket> Bucket(std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, InBucket, *BucketConfig)); RwLock::ExclusiveLockScope Lock(m_Lock); - if (auto It = m_Buckets.find(BucketName); It != m_Buckets.end()) + if (auto It = m_Buckets.find_as(InBucket, std::hash<std::string_view>(), eastl::equal_to_2<std::string, std::string_view>()); + It != m_Buckets.end()) { return It->second.get(); } std::filesystem::path BucketPath = m_RootDir; - BucketPath /= BucketName; + BucketPath /= InBucket; try { if (!Bucket->OpenOrCreate(BucketPath)) { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", InBucket, m_RootDir); return nullptr; } } catch (const std::exception& Err) { - ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + ZEN_WARN("Creating bucket '{}' in '{}' FAILED, reason: '{}'", InBucket, BucketPath, Err.what()); throw; } + std::string BucketName{InBucket}; CacheBucket* Result = Bucket.get(); m_Buckets.emplace(BucketName, std::move(Bucket)); if (m_CapturedBuckets) @@ -3833,7 +3952,7 @@ ZenCacheDiskLayer::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheDiskLayer::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResults) : OutResults(OutResults) {} + GetBatchHandle(ZenCacheValueVec_t& OutResults) : OutResults(OutResults) {} struct BucketHandle { CacheBucket* Bucket; @@ -3893,13 +4012,13 @@ struct ZenCacheDiskLayer::GetBatchHandle return NewBucketHandle; } - RwLock Lock; - std::vector<BucketHandle> BucketHandles; - std::vector<ZenCacheValue>& OutResults; + RwLock Lock; + eastl::fixed_vector<BucketHandle, 4> BucketHandles; + ZenCacheValueVec_t& OutResults; }; ZenCacheDiskLayer::GetBatchHandle* -ZenCacheDiskLayer::BeginGetBatch(std::vector<ZenCacheValue>& OutResults) +ZenCacheDiskLayer::BeginGetBatch(ZenCacheValueVec_t& OutResults) { return new GetBatchHandle(OutResults); } @@ -3994,7 +4113,11 @@ ZenCacheDiskLayer::DiscoverBuckets() if (IsKnownBadBucketName(BucketName)) { BadBucketDirectories.push_back(BucketPath); - + continue; + } + else if (BucketName.starts_with("[dropped]")) + { + BadBucketDirectories.push_back(BucketPath); continue; } @@ -4027,50 +4150,66 @@ ZenCacheDiskLayer::DiscoverBuckets() RwLock SyncLock; WorkerThreadPool& Pool = GetLargeWorkerPool(EWorkloadType::Burst); - Latch WorkLatch(1); - for (auto& BucketPath : FoundBucketDirectories) + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - WorkLatch.AddCount(1); - Pool.ScheduleWork([this, &WorkLatch, &SyncLock, BucketPath]() { - ZEN_MEMSCOPE(GetCacheDiskTag()); - - auto _ = MakeGuard([&]() { WorkLatch.CountDown(); }); - const std::string BucketName = PathToUtf8(BucketPath.stem()); - try - { - std::unique_ptr<CacheBucket> NewBucket = - std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, m_Configuration.BucketConfig); + for (auto& BucketPath : FoundBucketDirectories) + { + Work.ScheduleWork(Pool, [this, &SyncLock, BucketPath](std::atomic<bool>&) { + ZEN_MEMSCOPE(GetCacheDiskTag()); - CacheBucket* Bucket = nullptr; + const std::string BucketName = PathToUtf8(BucketPath.stem()); + try { - RwLock::ExclusiveLockScope __(SyncLock); - auto InsertResult = m_Buckets.emplace(BucketName, std::move(NewBucket)); - Bucket = InsertResult.first->second.get(); - } - ZEN_ASSERT(Bucket); + BucketConfiguration* BucketConfig = &m_Configuration.BucketConfig; + if (auto It = m_Configuration.BucketConfigMap.find_as(std::string_view(BucketName), + std::hash<std::string_view>(), + eastl::equal_to_2<std::string, std::string_view>()); + It != m_Configuration.BucketConfigMap.end()) + { + BucketConfig = &It->second; + } - if (!Bucket->OpenOrCreate(BucketPath, /* AllowCreate */ false)) - { - ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + std::unique_ptr<CacheBucket> NewBucket = + std::make_unique<CacheBucket>(m_Gc, m_TotalMemCachedSize, BucketName, *BucketConfig); + CacheBucket* Bucket = nullptr; { RwLock::ExclusiveLockScope __(SyncLock); - m_Buckets.erase(BucketName); + auto InsertResult = m_Buckets.emplace(BucketName, std::move(NewBucket)); + Bucket = InsertResult.first->second.get(); + } + ZEN_ASSERT(Bucket); + + if (!Bucket->OpenOrCreate(BucketPath, /* AllowCreate */ false)) + { + ZEN_WARN("Found directory '{}' in our base directory '{}' but it is not a valid bucket", BucketName, m_RootDir); + + { + RwLock::ExclusiveLockScope __(SyncLock); + m_Buckets.erase(BucketName); + } } } - } - catch (const std::exception& Err) - { - ZEN_ERROR("Opening bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); - return; - } - }); + catch (const std::exception& Err) + { + ZEN_ERROR("Opening bucket '{}' in '{}' FAILED, reason: '{}'", BucketName, BucketPath, Err.what()); + return; + } + }); + } + } + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_WARN("Failed discovering buckets in {}. Reason: '{}'", m_RootDir, Ex.what()); } - WorkLatch.CountDown(); - WorkLatch.Wait(); + Work.Wait(); } -bool +std::function<void()> ZenCacheDiskLayer::DropBucket(std::string_view InBucket) { ZEN_TRACE_CPU("Z$::DropBucket"); @@ -4088,33 +4227,72 @@ ZenCacheDiskLayer::DropBucket(std::string_view InBucket) return Bucket.Drop(); } - // Make sure we remove the folder even if we don't know about the bucket std::filesystem::path BucketPath = m_RootDir; BucketPath /= std::string(InBucket); - return cache::impl::MoveAndDeleteDirectory(BucketPath); + std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(BucketPath); + if (DroppedPath.empty()) + { + return {}; + } + else + { + return [DroppedPath = std::move(DroppedPath)]() { + std::error_code Ec; + (void)DeleteDirectories(DroppedPath, Ec); + if (Ec) + { + ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message()); + } + }; + } } -bool +std::function<void()> ZenCacheDiskLayer::Drop() { ZEN_TRACE_CPU("Z$::Drop"); - RwLock::ExclusiveLockScope _(m_Lock); - - std::vector<std::unique_ptr<CacheBucket>> Buckets; - Buckets.reserve(m_Buckets.size()); - while (!m_Buckets.empty()) + std::vector<std::function<void()>> PostDropOps; { - const auto& It = m_Buckets.begin(); - CacheBucket& Bucket = *It->second; - m_DroppedBuckets.push_back(std::move(It->second)); - m_Buckets.erase(It->first); - if (!Bucket.Drop()) + RwLock::ExclusiveLockScope _(m_Lock); + PostDropOps.reserve(m_Buckets.size()); + while (!m_Buckets.empty()) { - return false; + const auto& It = m_Buckets.begin(); + CacheBucket& Bucket = *It->second; + m_DroppedBuckets.push_back(std::move(It->second)); + m_Buckets.erase(It->first); + if (std::function<void()> PostDropOp = Bucket.Drop(); !PostDropOp) + { + return {}; + } + else + { + PostDropOps.emplace_back(std::move(PostDropOp)); + } } } - return cache::impl::MoveAndDeleteDirectory(m_RootDir); + + std::filesystem::path DroppedPath = cache::impl::MoveDroppedDirectory(m_RootDir); + if (DroppedPath.empty()) + { + return {}; + } + else + { + return [DroppedPath = std::move(DroppedPath), PostDropOps = std::move(PostDropOps)]() { + for (auto& PostDropOp : PostDropOps) + { + PostDropOp(); + } + std::error_code Ec; + (void)DeleteDirectories(DroppedPath, Ec); + if (Ec) + { + ZEN_WARN("Failed to clean up dropped bucket directory '{}', reason: '{}'", DroppedPath, Ec.message()); + } + }; + } } void @@ -4144,16 +4322,16 @@ ZenCacheDiskLayer::Flush() } { WorkerThreadPool& Pool = GetMediumWorkerPool(EWorkloadType::Burst); - Latch WorkLatch(1); + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); try { for (auto& Bucket : Buckets) { - WorkLatch.AddCount(1); - Pool.ScheduleWork([&WorkLatch, Bucket]() { + Work.ScheduleWork(Pool, [Bucket](std::atomic<bool>&) { ZEN_MEMSCOPE(GetCacheDiskTag()); - auto _ = MakeGuard([&]() { WorkLatch.CountDown(); }); try { Bucket->Flush(); @@ -4167,13 +4345,14 @@ ZenCacheDiskLayer::Flush() } catch (const std::exception& Ex) { + AbortFlag.store(true); ZEN_ERROR("Failed to flush buckets at '{}'. Reason: '{}'", m_RootDir, Ex.what()); } - WorkLatch.CountDown(); - while (!WorkLatch.Wait(1000)) - { - ZEN_DEBUG("Waiting for {} buckets at '{}' to flush", WorkLatch.Remaining(), m_RootDir); - } + + Work.Wait(1000, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t RemainingWork) { + ZEN_UNUSED(IsAborted, IsPaused); + ZEN_DEBUG("Waiting for {} buckets at '{}' to flush", RemainingWork, m_RootDir); + }); } } diff --git a/src/zenstore/cache/cacherpc.cpp b/src/zenstore/cache/cacherpc.cpp index 20c244250..436e8a083 100644 --- a/src/zenstore/cache/cacherpc.cpp +++ b/src/zenstore/cache/cacherpc.cpp @@ -20,6 +20,8 @@ #include <zencore/memory/llm.h> +#include <EASTL/fixed_vector.h> + ////////////////////////////////////////////////////////////////////////// namespace zen { @@ -89,7 +91,7 @@ GetRpcRequestCacheKey(const CbObjectView& KeyView, CacheKey& Key) return false; } IoHash Hash = HashField.AsHash(); - Key = CacheKey::Create(*Bucket, Hash); + Key = CacheKey::CreateValidated(std::move(*Bucket), Hash); return true; } @@ -218,6 +220,11 @@ CacheRpcHandler::HandleRpcRequest(const CacheRequestContext& Context, ZEN_WARN("Content format not supported, expected package message format"); return RpcResponseCode::BadRequest; } + if (CbValidateError Error = ValidateCompactBinary(Object.GetView(), CbValidateMode::Default); Error != CbValidateError::None) + { + ZEN_WARN("Content format is corrupt, compact binary format validation failed. Reason: '{}'", ToString(Error)); + return RpcResponseCode::BadRequest; + } } if (!UriNamespace.empty()) @@ -305,7 +312,7 @@ CacheRpcHandler::HandleRpcPutCacheRecords(const CacheRequestContext& Context, co } DefaultPolicy = !PolicyText.empty() ? ParseCachePolicy(PolicyText) : CachePolicy::Default; - std::vector<bool> Results; + eastl::fixed_vector<bool, 32> Results; CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); for (CbFieldView RequestField : RequestsArray) @@ -495,16 +502,15 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb bool Exists = false; bool ReadFromUpstream = false; }; - struct RecordRequestData + struct RecordRequestData : public CacheKeyRequest { - CacheKeyRequest Upstream; - CbObjectView RecordObject; - IoBuffer RecordCacheValue; - CacheRecordPolicy DownstreamPolicy; - std::vector<ValueRequestData> Values; - bool Complete = false; - const UpstreamEndpointInfo* Source = nullptr; - uint64_t ElapsedTimeUs; + CbObjectView RecordObject; + IoBuffer RecordCacheValue; + CacheRecordPolicy DownstreamPolicy; + eastl::fixed_vector<ValueRequestData, 4> Values; + bool Complete = false; + const UpstreamEndpointInfo* Source = nullptr; + uint64_t ElapsedTimeUs; }; std::string_view PolicyText = Params["DefaultPolicy"sv].AsString(); @@ -517,8 +523,8 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<RecordRequestData> Requests; - std::vector<size_t> UpstreamIndexes; + eastl::fixed_vector<RecordRequestData, 16> Requests; + eastl::fixed_vector<size_t, 16> UpstreamIndexes; auto ParseValues = [](RecordRequestData& Request) { CbArrayView ValuesArray = Request.RecordObject["Values"sv].AsArrayView(); @@ -549,7 +555,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbObjectView RequestObject = RequestField.AsObjectView(); CbObjectView KeyObject = RequestObject["Key"sv].AsObjectView(); - CacheKey& Key = Request.Upstream.Key; + CacheKey& Key = Request.Key; if (!GetRpcRequestCacheKey(KeyObject, Key)) { return CbPackage{}; @@ -571,6 +577,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb { FoundLocalInvalid = true; } + else if (CbValidateError Error = ValidateCompactBinary(Request.RecordCacheValue.GetView(), CbValidateMode::Default); + Error != CbValidateError::None) + { + ZEN_WARN("HandleRpcGetCacheRecords stored record is corrupt, compact binary format validation failed. Reason: '{}'", + ToString(Error)); + FoundLocalInvalid = true; + } else { Request.RecordObject = CbObjectView(Request.RecordCacheValue.GetData()); @@ -654,7 +667,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb { m_CidStore.IterateChunks( CidHashes, - [this, &Request, ValueCount, &RequestValueIndexes](size_t Index, const IoBuffer& Payload) -> bool { + [this, &Request, &RequestValueIndexes](size_t Index, const IoBuffer& Payload) -> bool { try { const size_t ValueIndex = RequestValueIndexes[Index]; @@ -721,7 +734,7 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb for (size_t Index : UpstreamIndexes) { RecordRequestData& Request = Requests[Index]; - UpstreamRequests.push_back(&Request.Upstream); + UpstreamRequests.push_back(&Request); if (Request.Values.size()) { @@ -735,13 +748,13 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb UpstreamPolicy |= !Value.ReadFromUpstream ? CachePolicy::SkipData : CachePolicy::None; Builder.AddValuePolicy(Value.ValueId, UpstreamPolicy); } - Request.Upstream.Policy = Builder.Build(); + Request.Policy = Builder.Build(); } else { // We don't know which Values exist in the Record; ask the upstrem for all values that the client wants, // and convert the CacheRecordPolicy to an upstream policy - Request.Upstream.Policy = Request.DownstreamPolicy.ConvertToUpstream(); + Request.Policy = Request.DownstreamPolicy.ConvertToUpstream(); } } @@ -751,10 +764,9 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb return; } - RecordRequestData& Request = - *reinterpret_cast<RecordRequestData*>(reinterpret_cast<char*>(&Params.Request) - offsetof(RecordRequestData, Upstream)); + RecordRequestData& Request = *static_cast<RecordRequestData*>(&Params.Request); Request.ElapsedTimeUs += static_cast<uint64_t>(Params.ElapsedSeconds * 1000000.0); - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; Stopwatch Timer; auto TimeGuard = MakeGuard([&Timer, &Request]() { Request.ElapsedTimeUs += Timer.GetElapsedTimeUs(); }); if (!Request.RecordObject) @@ -852,10 +864,12 @@ CacheRpcHandler::HandleRpcGetCacheRecords(const CacheRequestContext& Context, Cb CbPackage ResponsePackage; CbObjectWriter ResponseObject{2048}; + ResponsePackage.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (RecordRequestData& Request : Requests) { - const CacheKey& Key = Request.Upstream.Key; + const CacheKey& Key = Request.Key; if (Request.Complete || (Request.RecordObject && EnumHasAllFlags(Request.DownstreamPolicy.GetRecordPolicy(), CachePolicy::PartialRecord))) { @@ -930,11 +944,12 @@ CacheRpcHandler::HandleRpcPutCacheValues(const CacheRequestContext& Context, con const bool HasUpstream = m_UpstreamCache.IsActive(); CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<ZenCacheStore::PutResult> BatchResults; - std::vector<size_t> BatchResultIndexes; - std::vector<ZenCacheStore::PutResult> Results; - std::vector<CacheKey> UpstreamCacheKeys; - uint64_t RequestCount = RequestsArray.Num(); + std::vector<ZenCacheStore::PutResult> BatchResults; + eastl::fixed_vector<size_t, 32> BatchResultIndexes; + eastl::fixed_vector<ZenCacheStore::PutResult, 32> Results; + eastl::fixed_vector<CacheKey, 32> UpstreamCacheKeys; + + uint64_t RequestCount = RequestsArray.Num(); { Results.reserve(RequestCount); std::unique_ptr<ZenCacheStore::PutBatch> Batch; @@ -1145,15 +1160,15 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO uint64_t RawSize = 0; CompressedBuffer Result; }; - std::vector<RequestData> Requests; + eastl::fixed_vector<RequestData, 16> Requests; - std::vector<size_t> RemoteRequestIndexes; + eastl::fixed_vector<size_t, 16> RemoteRequestIndexes; const bool HasUpstream = m_UpstreamCache.IsActive(); - CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); - std::vector<ZenCacheValue> CacheValues; - const uint64_t RequestCount = RequestsArray.Num(); + CbArrayView RequestsArray = Params["Requests"sv].AsArrayView(); + ZenCacheValueVec_t CacheValues; + const uint64_t RequestCount = RequestsArray.Num(); CacheValues.reserve(RequestCount); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1182,7 +1197,6 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO CacheKey& Key = Request.Key; CachePolicy Policy = Request.Policy; - ZenCacheValue CacheValue; if (EnumHasAllFlags(Policy, CachePolicy::QueryLocal)) { if (Batch) @@ -1328,6 +1342,9 @@ CacheRpcHandler::HandleRpcGetCacheValues(const CacheRequestContext& Context, CbO ZEN_TRACE_CPU("Z$::RpcGetCacheValues::Response"); CbPackage RpcResponse; CbObjectWriter ResponseObject{1024}; + + RpcResponse.ReserveAttachments(Requests.size()); + ResponseObject.BeginArray("Result"sv); for (const RequestData& Request : Requests) { @@ -1622,18 +1639,27 @@ CacheRpcHandler::GetLocalCacheRecords(const CacheRequestContext& Context, Record.ValuesRead = true; if (Record.CacheValue && Record.CacheValue.GetContentType() == ZenContentType::kCbObject) { - CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData()); - CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView(); - Record.Values.reserve(ValuesArray.Num()); - for (CbFieldView ValueField : ValuesArray) + if (CbValidateError Error = ValidateCompactBinary(Record.CacheValue.GetView(), CbValidateMode::Default); + Error != CbValidateError::None) { - CbObjectView ValueObject = ValueField.AsObjectView(); - Oid ValueId = ValueObject["Id"sv].AsObjectId(); - CbFieldView RawHashField = ValueObject["RawHash"sv]; - IoHash RawHash = RawHashField.AsBinaryAttachment(); - if (ValueId && !RawHashField.HasError()) + ZEN_WARN("GetLocalCacheRecords stored record for is corrupt, compact binary format validation failed. Reason: '{}'", + ToString(Error)); + } + else + { + CbObjectView RecordObject = CbObjectView(Record.CacheValue.GetData()); + CbArrayView ValuesArray = RecordObject["Values"sv].AsArrayView(); + Record.Values.reserve(ValuesArray.Num()); + for (CbFieldView ValueField : ValuesArray) { - Record.Values.push_back({ValueId, RawHash, ValueObject["RawSize"sv].AsUInt64()}); + CbObjectView ValueObject = ValueField.AsObjectView(); + Oid ValueId = ValueObject["Id"sv].AsObjectId(); + CbFieldView RawHashField = ValueObject["RawHash"sv]; + IoHash RawHash = RawHashField.AsBinaryAttachment(); + if (ValueId && !RawHashField.HasError()) + { + Record.Values.push_back({ValueId, RawHash, ValueObject["RawSize"sv].AsUInt64()}); + } } } } @@ -1706,7 +1732,7 @@ CacheRpcHandler::GetLocalCacheValues(const CacheRequestContext& Context, using namespace cache::detail; const bool HasUpstream = m_UpstreamCache.IsActive(); - std::vector<ZenCacheValue> Chunks; + ZenCacheValueVec_t Chunks; Chunks.reserve(ValueRequests.size()); { std::unique_ptr<ZenCacheStore::GetBatch> Batch; @@ -1866,6 +1892,8 @@ CacheRpcHandler::WriteGetCacheChunksResponse([[maybe_unused]] const CacheRequest CbPackage RpcResponse; CbObjectWriter Writer{1024}; + RpcResponse.ReserveAttachments(Requests.size()); + Writer.BeginArray("Result"sv); for (ChunkRequest& Request : Requests) { diff --git a/src/zenstore/cache/structuredcachestore.cpp b/src/zenstore/cache/structuredcachestore.cpp index a3f80099f..973af52b2 100644 --- a/src/zenstore/cache/structuredcachestore.cpp +++ b/src/zenstore/cache/structuredcachestore.cpp @@ -178,13 +178,13 @@ ZenCacheNamespace::EndPutBatch(PutBatchHandle* Batch) noexcept struct ZenCacheNamespace::GetBatchHandle { - GetBatchHandle(std::vector<ZenCacheValue>& OutResult) : Results(OutResult) {} - std::vector<ZenCacheValue>& Results; + GetBatchHandle(ZenCacheValueVec_t& OutResult) : Results(OutResult) {} + ZenCacheValueVec_t& Results; ZenCacheDiskLayer::GetBatchHandle* DiskLayerHandle = nullptr; }; ZenCacheNamespace::GetBatchHandle* -ZenCacheNamespace::BeginGetBatch(std::vector<ZenCacheValue>& OutResult) +ZenCacheNamespace::BeginGetBatch(ZenCacheValueVec_t& OutResult) { ZenCacheNamespace::GetBatchHandle* Handle = new ZenCacheNamespace::GetBatchHandle(OutResult); Handle->DiskLayerHandle = m_DiskLayer.BeginGetBatch(OutResult); @@ -282,11 +282,14 @@ ZenCacheNamespace::DropBucket(std::string_view Bucket) { ZEN_INFO("dropping bucket '{}'", Bucket); - const bool Dropped = m_DiskLayer.DropBucket(Bucket); - - ZEN_INFO("bucket '{}' was {}", Bucket, Dropped ? "dropped" : "not found"); - - return Dropped; + std::function<void()> PostDropOp = m_DiskLayer.DropBucket(Bucket); + if (!PostDropOp) + { + ZEN_INFO("bucket '{}' was not found in {}", Bucket, m_RootDir); + return false; + } + PostDropOp(); + return true; } void @@ -296,9 +299,10 @@ ZenCacheNamespace::EnumerateBucketContents(std::string_view m_DiskLayer.EnumerateBucketContents(Bucket, Fn); } -bool +std::function<void()> ZenCacheNamespace::Drop() { + m_Gc.RemoveGcStorage(this); return m_DiskLayer.Drop(); } @@ -585,7 +589,7 @@ ZenCacheStore::PutBatch::~PutBatch() } } -ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, std::vector<ZenCacheValue>& OutResult) +ZenCacheStore::GetBatch::GetBatch(ZenCacheStore& CacheStore, std::string_view InNamespace, ZenCacheValueVec_t& OutResult) : m_CacheStore(CacheStore) , Results(OutResult) { @@ -800,16 +804,27 @@ ZenCacheStore::DropBucket(std::string_view Namespace, std::string_view Bucket) bool ZenCacheStore::DropNamespace(std::string_view InNamespace) { - RwLock::SharedLockScope _(m_NamespacesLock); - if (auto It = m_Namespaces.find(std::string(InNamespace)); It != m_Namespaces.end()) + std::function<void()> PostDropOp; + { + RwLock::SharedLockScope _(m_NamespacesLock); + if (auto It = m_Namespaces.find(std::string(InNamespace)); It != m_Namespaces.end()) + { + ZenCacheNamespace& Namespace = *It->second; + m_DroppedNamespaces.push_back(std::move(It->second)); + m_Namespaces.erase(It); + PostDropOp = Namespace.Drop(); + } + else + { + ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::DropNamespace", InNamespace); + return false; + } + } + if (PostDropOp) { - ZenCacheNamespace& Namespace = *It->second; - m_DroppedNamespaces.push_back(std::move(It->second)); - m_Namespaces.erase(It); - return Namespace.Drop(); + PostDropOp(); } - ZEN_WARN("request for unknown namespace '{}' in ZenCacheStore::DropNamespace", InNamespace); - return false; + return true; } void diff --git a/src/zenstore/cas.cpp b/src/zenstore/cas.cpp index 73c10a6db..460f0e10d 100644 --- a/src/zenstore/cas.cpp +++ b/src/zenstore/cas.cpp @@ -118,7 +118,7 @@ CasImpl::Initialize(const CidStoreConfiguration& InConfig) // Ensure root directory exists - create if it doesn't exist already - std::filesystem::create_directories(m_Config.RootDirectory); + CreateDirectories(m_Config.RootDirectory); // Open or create manifest @@ -412,6 +412,7 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds, uint64_t LargeSizeLimit) { ZEN_TRACE_CPU("CAS::IterateChunks"); + if (!m_SmallStrategy.IterateChunks( DecompressedIds, [&](size_t Index, const IoBuffer& Payload) { @@ -420,10 +421,11 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds, return AsyncCallback(Index, Payload); }, OptionalWorkerPool, - LargeSizeLimit)) + LargeSizeLimit == 0 ? m_Config.HugeValueThreshold : Min(LargeSizeLimit, m_Config.HugeValueThreshold))) { return false; } + if (!m_TinyStrategy.IterateChunks( DecompressedIds, [&](size_t Index, const IoBuffer& Payload) { @@ -432,10 +434,11 @@ CasImpl::IterateChunks(std::span<IoHash> DecompressedIds, return AsyncCallback(Index, Payload); }, OptionalWorkerPool, - LargeSizeLimit)) + LargeSizeLimit == 0 ? m_Config.TinyValueThreshold : Min(LargeSizeLimit, m_Config.TinyValueThreshold))) { return false; } + if (!m_LargeStrategy.IterateChunks( DecompressedIds, [&](size_t Index, const IoBuffer& Payload) { diff --git a/src/zenstore/caslog.cpp b/src/zenstore/caslog.cpp index 6c7b1b297..492ce9317 100644 --- a/src/zenstore/caslog.cpp +++ b/src/zenstore/caslog.cpp @@ -37,7 +37,7 @@ CasLogFile::~CasLogFile() bool CasLogFile::IsValid(std::filesystem::path FileName, size_t RecordSize) { - if (!std::filesystem::is_regular_file(FileName)) + if (!IsFile(FileName)) { return false; } diff --git a/src/zenstore/compactcas.cpp b/src/zenstore/compactcas.cpp index 2be0542db..b00abb2cb 100644 --- a/src/zenstore/compactcas.cpp +++ b/src/zenstore/compactcas.cpp @@ -15,6 +15,7 @@ #include <zencore/trace.h> #include <zencore/workthreadpool.h> #include <zenstore/scrubcontext.h> +#include <zenutil/parallelwork.h> #include <gsl/gsl-lite.hpp> @@ -144,6 +145,16 @@ CasContainerStrategy::CasContainerStrategy(GcManager& Gc) : m_Log(logging::Get(" CasContainerStrategy::~CasContainerStrategy() { + try + { + m_BlockStore.Close(); + m_CasLog.Flush(); + m_CasLog.Close(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~CasContainerStrategy failed with: ", Ex.what()); + } m_Gc.RemoveGcReferenceStore(*this); m_Gc.RemoveGcStorage(this); } @@ -203,12 +214,12 @@ CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const ZEN_TRACE_CPU("CasContainer::UpdateLocation"); BlockStoreDiskLocation DiskLocation(Location, m_PayloadAlignment); const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = DiskLocation}; - m_CasLog.Append(IndexEntry); { RwLock::ExclusiveLockScope _(m_LocationMapLock); m_LocationMap.emplace(ChunkHash, m_Locations.size()); m_Locations.push_back(DiskLocation); } + m_CasLog.Append(IndexEntry); }); return CasStore::InsertResult{.New = true}; @@ -226,7 +237,7 @@ CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash) } std::vector<CasStore::InsertResult> -CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> ChunkHashes) +CasContainerStrategy::InsertChunks(std::span<const IoBuffer> Chunks, std::span<const IoHash> ChunkHashes) { ZEN_MEMSCOPE(GetCasContainerTag()); @@ -272,7 +283,6 @@ CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> IndexEntries.emplace_back( CasDiskIndexEntry{.Key = ChunkHashes[ChunkIndex], .Location = BlockStoreDiskLocation(Location, m_PayloadAlignment)}); } - m_CasLog.Append(IndexEntries); { RwLock::ExclusiveLockScope _(m_LocationMapLock); for (const CasDiskIndexEntry& DiskIndexEntry : IndexEntries) @@ -281,6 +291,7 @@ CasContainerStrategy::InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> m_Locations.push_back(DiskIndexEntry.Location); } } + m_CasLog.Append(IndexEntries); }); return Result; } @@ -306,7 +317,12 @@ bool CasContainerStrategy::HaveChunk(const IoHash& ChunkHash) { RwLock::SharedLockScope _(m_LocationMapLock); - return m_LocationMap.contains(ChunkHash); + if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end()) + { + const BlockStoreLocation& Location = m_Locations[KeyIt->second].Get(m_PayloadAlignment); + return m_BlockStore.HasChunk(Location); + } + return false; } void @@ -323,7 +339,7 @@ CasContainerStrategy::FilterChunks(HashKeySet& InOutChunks) } bool -CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes, +CasContainerStrategy::IterateChunks(std::span<const IoHash> ChunkHashes, const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback, WorkerThreadPool* OptionalWorkerPool, uint64_t LargeSizeLimit) @@ -360,7 +376,11 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes, return true; } - auto DoOneBlock = [&](std::span<const size_t> ChunkIndexes) { + auto DoOneBlock = [this](const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback, + uint64_t LargeSizeLimit, + std::span<const size_t> FoundChunkIndexes, + std::span<const BlockStoreLocation> FoundChunkLocations, + std::span<const size_t> ChunkIndexes) { if (ChunkIndexes.size() < 4) { for (size_t ChunkIndex : ChunkIndexes) @@ -376,57 +396,96 @@ CasContainerStrategy::IterateChunks(std::span<IoHash> ChunkHashes, return m_BlockStore.IterateBlock( FoundChunkLocations, ChunkIndexes, - [&](size_t ChunkIndex, const void* Data, uint64_t Size) { + [AsyncCallback, FoundChunkIndexes](size_t ChunkIndex, const void* Data, uint64_t Size) { if (Data == nullptr) { return AsyncCallback(FoundChunkIndexes[ChunkIndex], IoBuffer()); } return AsyncCallback(FoundChunkIndexes[ChunkIndex], IoBuffer(IoBuffer::Wrap, Data, Size)); }, - [&](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { + [AsyncCallback, FoundChunkIndexes](size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size) { return AsyncCallback(FoundChunkIndexes[ChunkIndex], File.GetChunk(Offset, Size)); }, LargeSizeLimit); }; - Latch WorkLatch(1); - std::atomic_bool AsyncContinue = true; - bool Continue = m_BlockStore.IterateChunks(FoundChunkLocations, [&](uint32_t BlockIndex, std::span<const size_t> ChunkIndexes) { - if (OptionalWorkerPool && (ChunkIndexes.size() > 3)) + std::atomic<bool> AbortFlag; + { + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - WorkLatch.AddCount(1); - OptionalWorkerPool->ScheduleWork([&, ChunkIndexes = std::vector<size_t>(ChunkIndexes.begin(), ChunkIndexes.end())]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - if (!AsyncContinue) - { - return; - } - try - { - bool Continue = DoOneBlock(ChunkIndexes); - if (!Continue) + const bool Continue = m_BlockStore.IterateChunks( + FoundChunkLocations, + [this, + &Work, + &AbortFlag, + &AsyncCallback, + LargeSizeLimit, + DoOneBlock, + &FoundChunkIndexes, + &FoundChunkLocations, + OptionalWorkerPool](uint32_t BlockIndex, std::span<const size_t> ChunkIndexes) { + if (OptionalWorkerPool && (ChunkIndexes.size() > 3)) { - AsyncContinue.store(false); - } - } - catch (const std::exception& Ex) - { - ZEN_WARN("Failed iterating chunks for cas root path {}, block {}. Reason: '{}'", - m_RootDirectory, + std::vector<size_t> TmpChunkIndexes(ChunkIndexes.begin(), ChunkIndexes.end()); + Work.ScheduleWork( + *OptionalWorkerPool, + [this, + &AsyncCallback, + LargeSizeLimit, + DoOneBlock, BlockIndex, - Ex.what()); - } - }); - return AsyncContinue.load(); + &FoundChunkIndexes, + &FoundChunkLocations, + ChunkIndexes = std::move(TmpChunkIndexes)](std::atomic<bool>& AbortFlag) { + if (AbortFlag) + { + return; + } + try + { + bool Continue = + DoOneBlock(AsyncCallback, LargeSizeLimit, FoundChunkIndexes, FoundChunkLocations, ChunkIndexes); + if (!Continue) + { + AbortFlag.store(true); + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed iterating chunks for cas root path {}, block {}. Reason: '{}'", + m_RootDirectory, + BlockIndex, + Ex.what()); + AbortFlag.store(true); + } + }); + return !AbortFlag.load(); + } + else + { + if (!DoOneBlock(AsyncCallback, LargeSizeLimit, FoundChunkIndexes, FoundChunkLocations, ChunkIndexes)) + { + AbortFlag.store(true); + } + return !AbortFlag.load(); + } + }); + if (!Continue) + { + AbortFlag.store(true); + } } - else + catch (const std::exception& Ex) { - return DoOneBlock(ChunkIndexes); + AbortFlag.store(true); + ZEN_WARN("Failed iterating chunks for cas root path {}. Reason: '{}'", m_RootDirectory, Ex.what()); } - }); - WorkLatch.CountDown(); - WorkLatch.Wait(); - return AsyncContinue.load() && Continue; + + Work.Wait(); + } + return !AbortFlag.load(); } void @@ -437,7 +496,7 @@ CasContainerStrategy::Flush() ZEN_TRACE_CPU("CasContainer::Flush"); m_BlockStore.Flush(/*ForceNewBlock*/ false); m_CasLog.Flush(); - MakeIndexSnapshot(); + MakeIndexSnapshot(/*ResetLog*/ false); } void @@ -677,7 +736,9 @@ public: m_CasContainerStrategy.m_BlockStore.CompactBlocks( BlockCompactState, m_CasContainerStrategy.m_PayloadAlignment, - [&](const BlockStore::MovedChunksArray& MovedArray, uint64_t FreedDiskSpace) { + [&](const BlockStore::MovedChunksArray& MovedArray, + const BlockStore::ChunkIndexArray& ScrubbedArray, + uint64_t FreedDiskSpace) { std::vector<CasDiskIndexEntry> MovedEntries; RwLock::ExclusiveLockScope _(m_CasContainerStrategy.m_LocationMapLock); for (const std::pair<size_t, BlockStoreLocation>& Moved : MovedArray) @@ -702,7 +763,27 @@ public: MovedEntries.push_back(CasDiskIndexEntry{.Key = Key, .Location = Location}); } } + for (size_t ChunkIndex : ScrubbedArray) + { + const IoHash& Key = BlockCompactStateKeys[ChunkIndex]; + if (auto It = m_CasContainerStrategy.m_LocationMap.find(Key); + It != m_CasContainerStrategy.m_LocationMap.end()) + { + BlockStoreDiskLocation& Location = m_CasContainerStrategy.m_Locations[It->second]; + const BlockStoreLocation& OldLocation = BlockCompactState.GetLocation(ChunkIndex); + if (Location.Get(m_CasContainerStrategy.m_PayloadAlignment) != OldLocation) + { + // Someone has moved our chunk so lets just skip the new location we were provided, it will be + // GC:d at a later time + continue; + } + MovedEntries.push_back( + CasDiskIndexEntry{.Key = Key, .Location = Location, .Flags = CasDiskIndexEntry::kTombstone}); + m_CasContainerStrategy.m_LocationMap.erase(It); + } + } m_CasContainerStrategy.m_CasLog.Append(MovedEntries); + m_CasContainerStrategy.m_CasLog.Flush(); Stats.RemovedDisk += FreedDiskSpace; if (Ctx.IsCancelledFlag.load()) { @@ -900,13 +981,12 @@ CasContainerStrategy::StorageSize() const } void -CasContainerStrategy::MakeIndexSnapshot() +CasContainerStrategy::MakeIndexSnapshot(bool ResetLog) { ZEN_MEMSCOPE(GetCasContainerTag()); ZEN_TRACE_CPU("CasContainer::MakeIndexSnapshot"); - uint64_t LogCount = m_CasLog.GetLogCount(); - if (m_LogFlushPosition == LogCount) + if (m_LogFlushPosition == m_CasLog.GetLogCount()) { return; } @@ -923,34 +1003,17 @@ CasContainerStrategy::MakeIndexSnapshot() namespace fs = std::filesystem; - fs::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName); - fs::path TempIndexPath = cas::impl::GetTempIndexPath(m_RootDirectory, m_ContainerBaseName); - - // Move index away, we keep it if something goes wrong - if (fs::is_regular_file(TempIndexPath)) - { - std::error_code Ec; - if (!fs::remove(TempIndexPath, Ec) || Ec) - { - ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", TempIndexPath, Ec.message()); - return; - } - } + const fs::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName); try { - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, TempIndexPath); - } - // Write the current state of the location map to a new index state std::vector<CasDiskIndexEntry> Entries; - uint64_t IndexLogPosition = 0; + // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock + const uint64_t IndexLogPosition = ResetLog ? 0 : m_CasLog.GetLogCount(); { RwLock::SharedLockScope ___(m_LocationMapLock); - IndexLogPosition = m_CasLog.GetLogCount(); Entries.resize(m_LocationMap.size()); uint64_t EntryIndex = 0; @@ -960,6 +1023,7 @@ CasContainerStrategy::MakeIndexSnapshot() IndexEntry.Key = Entry.first; IndexEntry.Location = m_Locations[Entry.second]; } + EntryCount = m_LocationMap.size(); } TemporaryFile ObjectIndexFile; @@ -969,7 +1033,7 @@ CasContainerStrategy::MakeIndexSnapshot() { throw std::system_error(Ec, fmt::format("Failed to create temp file for index snapshot at '{}'", IndexPath)); } - CasDiskIndexHeader Header = {.EntryCount = Entries.size(), + CasDiskIndexHeader Header = {.EntryCount = EntryCount, .LogPosition = IndexLogPosition, .PayloadAlignment = gsl::narrow<uint32_t>(m_PayloadAlignment)}; @@ -981,35 +1045,34 @@ CasContainerStrategy::MakeIndexSnapshot() ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec); if (Ec) { - throw std::system_error(Ec, fmt::format("Failed to move temp file '{}' to '{}'", ObjectIndexFile.GetPath(), IndexPath)); + throw std::system_error(Ec, + fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", + ObjectIndexFile.GetPath(), + IndexPath, + Ec.message())); } - EntryCount = Entries.size(); - m_LogFlushPosition = IndexLogPosition; - } - catch (const std::exception& Err) - { - ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what()); - - // Restore any previous snapshot - if (fs::is_regular_file(TempIndexPath)) + if (ResetLog) { - std::error_code Ec; - fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless - fs::rename(TempIndexPath, IndexPath, Ec); - if (Ec) + const std::filesystem::path LogPath = cas::impl::GetLogPath(m_RootDirectory, m_ContainerBaseName); + + if (IsFile(LogPath)) { - ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", TempIndexPath, Ec.message()); + m_CasLog.Close(); + if (!RemoveFile(LogPath, Ec) || Ec) + { + // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in + // the end it will be the same result + ZEN_WARN("Snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message()); + } + m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); } } + m_LogFlushPosition = IndexLogPosition; } - if (fs::is_regular_file(TempIndexPath)) + catch (const std::exception& Err) { - std::error_code Ec; - if (!fs::remove(TempIndexPath, Ec) || Ec) - { - ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", TempIndexPath, Ec.message()); - } + ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what()); } } @@ -1092,7 +1155,7 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski if (!TCasLogFile<CasDiskIndexEntry>::IsValid(LogPath)) { ZEN_WARN("removing invalid cas log at '{}'", LogPath); - std::filesystem::remove(LogPath); + RemoveFile(LogPath); return 0; } @@ -1115,7 +1178,7 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski ZEN_WARN("reading full log at '{}', reason: Log position from index snapshot is out of range", LogPath); SkipEntryCount = 0; } - LogEntryCount = EntryCount - SkipEntryCount; + LogEntryCount = SkipEntryCount; CasLog.Replay( [&](const CasDiskIndexEntry& Record) { LogEntryCount++; @@ -1134,7 +1197,6 @@ CasContainerStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t Ski m_Locations.push_back(Record.Location); }, SkipEntryCount); - return LogEntryCount; } return 0; @@ -1155,7 +1217,7 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) if (IsNewStore) { - std::filesystem::remove_all(BasePath); + DeleteDirectories(BasePath); } CreateDirectories(BasePath); @@ -1165,19 +1227,19 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) std::filesystem::path LogPath = cas::impl::GetLogPath(m_RootDirectory, m_ContainerBaseName); std::filesystem::path IndexPath = cas::impl::GetIndexPath(m_RootDirectory, m_ContainerBaseName); - if (std::filesystem::is_regular_file(IndexPath)) + if (IsFile(IndexPath)) { uint32_t IndexVersion = 0; m_LogFlushPosition = ReadIndexFile(IndexPath, IndexVersion); if (IndexVersion == 0) { ZEN_WARN("removing invalid index file at '{}'", IndexPath); - std::filesystem::remove(IndexPath); + RemoveFile(IndexPath); } } uint64_t LogEntryCount = 0; - if (std::filesystem::is_regular_file(LogPath)) + if (IsFile(LogPath)) { if (TCasLogFile<CasDiskIndexEntry>::IsValid(LogPath)) { @@ -1186,12 +1248,10 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) else { ZEN_WARN("removing invalid cas log at '{}'", LogPath); - std::filesystem::remove(LogPath); + RemoveFile(LogPath); } } - m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); - BlockStore::BlockIndexSet KnownBlocks; for (const auto& Entry : m_LocationMap) @@ -1201,11 +1261,41 @@ CasContainerStrategy::OpenContainer(bool IsNewStore) KnownBlocks.insert(BlockIndex); } - m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks); + BlockStore::BlockIndexSet MissingBlocks = m_BlockStore.SyncExistingBlocksOnDisk(KnownBlocks); + + m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); + + bool RemovedEntries = false; + if (!MissingBlocks.empty()) + { + std::vector<CasDiskIndexEntry> MissingEntries; + for (auto& It : m_LocationMap) + { + const uint32_t BlockIndex = m_Locations[It.second].GetBlockIndex(); + if (MissingBlocks.contains(BlockIndex)) + { + MissingEntries.push_back({.Key = It.first, .Location = m_Locations[It.second], .Flags = CasDiskIndexEntry::kTombstone}); + } + } + ZEN_ASSERT(!MissingEntries.empty()); + + for (const CasDiskIndexEntry& Entry : MissingEntries) + { + m_LocationMap.erase(Entry.Key); + } + m_CasLog.Append(MissingEntries); + m_CasLog.Flush(); + + { + RwLock::ExclusiveLockScope IndexLock(m_LocationMapLock); + CompactIndex(IndexLock); + } + RemovedEntries = true; + } - if (IsNewStore || (LogEntryCount > 0)) + if (IsNewStore || (LogEntryCount > 0) || RemovedEntries) { - MakeIndexSnapshot(); + MakeIndexSnapshot(/*ResetLog*/ true); } // TODO: should validate integrity of container files here @@ -1573,6 +1663,423 @@ TEST_CASE("compactcas.threadedinsert") } } +TEST_CASE("compactcas.restart") +{ + uint64_t ExpectedSize = 0; + + auto GenerateChunks = [&](CasContainerStrategy& Cas, size_t ChunkCount, uint64_t ChunkSize, std::vector<IoHash>& Hashes) { + WorkerThreadPool ThreadPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "put"); + + Latch WorkLatch(1); + tsl::robin_set<IoHash, IoHash::Hasher> ChunkHashesLookup; + ChunkHashesLookup.reserve(ChunkCount); + RwLock InsertLock; + for (size_t Offset = 0; Offset < ChunkCount;) + { + size_t BatchCount = Min<size_t>(ChunkCount - Offset, 512u); + WorkLatch.AddCount(1); + ThreadPool.ScheduleWork( + [&WorkLatch, &InsertLock, &ChunkHashesLookup, &ExpectedSize, &Hashes, &Cas, Offset, BatchCount, ChunkSize]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + + std::vector<IoBuffer> BatchBlobs; + std::vector<IoHash> BatchHashes; + BatchBlobs.reserve(BatchCount); + BatchHashes.reserve(BatchCount); + + while (BatchBlobs.size() < BatchCount) + { + IoBuffer Chunk = + CreateSemiRandomBlob(ChunkSize + ((BatchHashes.size() % 100) + (BatchHashes.size() % 7) * 315u + Offset % 377)); + IoHash Hash = IoHash::HashBuffer(Chunk); + { + RwLock::ExclusiveLockScope __(InsertLock); + if (ChunkHashesLookup.contains(Hash)) + { + continue; + } + ChunkHashesLookup.insert(Hash); + ExpectedSize += Chunk.Size(); + } + + BatchBlobs.emplace_back(CompressedBuffer::Compress(SharedBuffer(Chunk)).GetCompressed().Flatten().AsIoBuffer()); + BatchHashes.push_back(Hash); + } + + Cas.InsertChunks(BatchBlobs, BatchHashes); + { + RwLock::ExclusiveLockScope __(InsertLock); + Hashes.insert(Hashes.end(), BatchHashes.begin(), BatchHashes.end()); + } + }); + Offset += BatchCount; + } + WorkLatch.CountDown(); + WorkLatch.Wait(); + }; + + ScopedTemporaryDirectory TempDir; + std::filesystem::path CasPath = TempDir.Path(); + CreateDirectories(CasPath); + + bool Generate = false; + if (!Generate) + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + } + + const uint64_t kChunkSize = 1048 + 395; + const size_t kChunkCount = 7167; + + std::vector<IoHash> Hashes; + Hashes.reserve(kChunkCount); + + auto ValidateChunks = [&](CasContainerStrategy& Cas, std::span<const IoHash> Hashes, bool ShouldExist) { + for (const IoHash& Hash : Hashes) + { + if (ShouldExist) + { + CHECK(Cas.HaveChunk(Hash)); + IoBuffer Buffer = Cas.FindChunk(Hash); + CHECK(Buffer); + IoHash ValidateHash; + uint64_t ValidateRawSize; + CompressedBuffer Compressed = CompressedBuffer::FromCompressed(SharedBuffer(Buffer), ValidateHash, ValidateRawSize); + CHECK(Compressed); + CHECK(ValidateHash == Hash); + } + else + { + CHECK(!Cas.HaveChunk(Hash)); + IoBuffer Buffer = Cas.FindChunk(Hash); + CHECK(!Buffer); + } + } + }; + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, true); + GenerateChunks(Cas, kChunkCount, kChunkSize, Hashes); + ValidateChunks(Cas, Hashes, true); + Cas.Flush(); + ValidateChunks(Cas, Hashes, true); + } + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + ValidateChunks(Cas, Hashes, true); + GenerateChunks(Cas, kChunkCount, kChunkSize / 4, Hashes); + ValidateChunks(Cas, Hashes, true); + } + + class GcRefChecker : public GcReferenceChecker + { + public: + explicit GcRefChecker(std::vector<IoHash>&& HashesToKeep) : m_HashesToKeep(std::move(HashesToKeep)) {} + ~GcRefChecker() {} + std::string GetGcName(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + return "test"; + } + void PreCache(GcCtx& Ctx) override { FilterReferences(Ctx, "test", m_HashesToKeep); } + void UpdateLockedState(GcCtx& Ctx) override { ZEN_UNUSED(Ctx); } + std::span<IoHash> GetUnusedReferences(GcCtx& Ctx, std::span<IoHash> IoCids) override + { + ZEN_UNUSED(Ctx); + return KeepUnusedReferences(m_HashesToKeep, IoCids); + } + + private: + std::vector<IoHash> m_HashesToKeep; + }; + + class GcRef : public GcReferencer + { + public: + GcRef(GcManager& Gc, std::span<const IoHash> HashesToKeep) : m_Gc(Gc) + { + m_HashesToKeep.insert(m_HashesToKeep.begin(), HashesToKeep.begin(), HashesToKeep.end()); + m_Gc.AddGcReferencer(*this); + } + ~GcRef() { m_Gc.RemoveGcReferencer(*this); } + std::string GetGcName(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + return "test"; + } + GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override + { + ZEN_UNUSED(Ctx, Stats); + return nullptr; + } + std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + return {new GcRefChecker(std::move(m_HashesToKeep))}; + } + std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override + { + ZEN_UNUSED(Ctx); + return {}; + } + + private: + GcManager& m_Gc; + std::vector<IoHash> m_HashesToKeep; + }; + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + GenerateChunks(Cas, kChunkCount, kChunkSize / 5, Hashes); + } + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + ValidateChunks(Cas, Hashes, true); + GenerateChunks(Cas, kChunkCount, kChunkSize / 2, Hashes); + ValidateChunks(Cas, Hashes, true); + if (true) + { + std::vector<IoHash> DropHashes; + std::vector<IoHash> KeepHashes; + for (size_t Index = 0; Index < Hashes.size(); Index++) + { + if (Index % 5 == 0) + { + KeepHashes.push_back(Hashes[Index]); + } + else + { + DropHashes.push_back(Hashes[Index]); + } + } + // std::span<const IoHash> KeepHashes(Hashes); + // ZEN_ASSERT(ExpectedGcCount < Hashes.size()); + // KeepHashes = KeepHashes.subspan(ExpectedGcCount); + GcRef Ref(Gc, KeepHashes); + Gc.CollectGarbage(GcSettings{.CollectSmallObjects = true, .IsDeleteMode = true}); + ValidateChunks(Cas, KeepHashes, true); + ValidateChunks(Cas, DropHashes, false); + Hashes = KeepHashes; + } + GenerateChunks(Cas, kChunkCount, kChunkSize / 3, Hashes); + } + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + ValidateChunks(Cas, Hashes, true); + Cas.Flush(); + ValidateChunks(Cas, Hashes, true); + } + + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + Cas.Initialize(CasPath, "test", 65536 * 128, 8, false); + ValidateChunks(Cas, Hashes, true); + } +} + +TEST_CASE("compactcas.iteratechunks") +{ + std::atomic<size_t> WorkCompleted = 0; + WorkerThreadPool ThreadPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "put"); + + const uint64_t kChunkSize = 1048 + 395; + const size_t kChunkCount = 63840; + + for (uint32_t N = 0; N < 4; N++) + { + GcManager Gc; + CasContainerStrategy Cas(Gc); + ScopedTemporaryDirectory TempDir; + Cas.Initialize(TempDir.Path(), "test", 65536 * 128, 8, true); + + CHECK(Cas.IterateChunks( + {}, + [](size_t Index, const IoBuffer& Payload) { + ZEN_UNUSED(Index, Payload); + return true; + }, + &ThreadPool, + 2048u)); + + uint64_t ExpectedSize = 0; + + std::vector<IoHash> Hashes; + Hashes.reserve(kChunkCount); + + { + Latch WorkLatch(1); + tsl::robin_set<IoHash, IoHash::Hasher> ChunkHashesLookup; + ChunkHashesLookup.reserve(kChunkCount); + RwLock InsertLock; + for (size_t Offset = 0; Offset < kChunkCount;) + { + size_t BatchCount = Min<size_t>(kChunkCount - Offset, 512u); + WorkLatch.AddCount(1); + ThreadPool.ScheduleWork( + [N, &WorkLatch, &InsertLock, &ChunkHashesLookup, &ExpectedSize, &Hashes, &Cas, Offset, BatchCount]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + + std::vector<IoBuffer> BatchBlobs; + std::vector<IoHash> BatchHashes; + BatchBlobs.reserve(BatchCount); + BatchHashes.reserve(BatchCount); + + while (BatchBlobs.size() < BatchCount) + { + IoBuffer Chunk = CreateRandomBlob( + N + kChunkSize + ((BatchHashes.size() % 100) + (BatchHashes.size() % 7) * 315u + Offset % 377)); + IoHash Hash = IoHash::HashBuffer(Chunk); + { + RwLock::ExclusiveLockScope __(InsertLock); + if (ChunkHashesLookup.contains(Hash)) + { + continue; + } + ChunkHashesLookup.insert(Hash); + ExpectedSize += Chunk.Size(); + } + + BatchBlobs.emplace_back(std::move(Chunk)); + BatchHashes.push_back(Hash); + } + + Cas.InsertChunks(BatchBlobs, BatchHashes); + { + RwLock::ExclusiveLockScope __(InsertLock); + Hashes.insert(Hashes.end(), BatchHashes.begin(), BatchHashes.end()); + } + }); + Offset += BatchCount; + } + WorkLatch.CountDown(); + WorkLatch.Wait(); + } + + WorkerThreadPool BatchWorkerPool(Max(std::thread::hardware_concurrency() - 1u, 2u), "fetch"); + { + std::vector<std::atomic<bool>> FetchedFlags(Hashes.size()); + std::atomic<uint64_t> FetchedSize = 0; + CHECK(Cas.IterateChunks( + Hashes, + [&Hashes, &FetchedFlags, &FetchedSize](size_t Index, const IoBuffer& Payload) { + CHECK(FetchedFlags[Index].load() == false); + FetchedFlags[Index].store(true); + const IoHash& Hash = Hashes[Index]; + CHECK(Hash == IoHash::HashBuffer(Payload)); + FetchedSize += Payload.GetSize(); + return true; + }, + &BatchWorkerPool, + 2048u)); + for (const auto& Flag : FetchedFlags) + { + CHECK(Flag.load()); + } + CHECK(FetchedSize == ExpectedSize); + } + + Latch WorkLatch(1); + for (size_t I = 0; I < 2; I++) + { + WorkLatch.AddCount(1); + ThreadPool.ScheduleWork([&Cas, &Hashes, &BatchWorkerPool, &WorkLatch, I]() { + auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); + std::vector<IoHash> PartialHashes; + PartialHashes.reserve(Hashes.size() / 4); + for (size_t Index = 0; Index < Hashes.size(); Index++) + { + size_t TestIndex = Index + I; + if ((TestIndex % 7 == 1) || (TestIndex % 13 == 1) || (TestIndex % 17 == 1)) + { + PartialHashes.push_back(Hashes[Index]); + } + } + std::reverse(PartialHashes.begin(), PartialHashes.end()); + + std::vector<IoHash> NoFoundHashes; + std::vector<size_t> NoFindIndexes; + + NoFoundHashes.reserve(9); + for (size_t J = 0; J < 9; J++) + { + std::string Data = fmt::format("oh no, we don't exist {}", J + 1); + NoFoundHashes.push_back(IoHash::HashBuffer(Data.data(), Data.length())); + } + + NoFindIndexes.reserve(9); + + // Sprinkle in chunks that are not found! + auto It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 0, NoFoundHashes[0]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 0 + 1, NoFoundHashes[1]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 1, NoFoundHashes[2]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 1 + 1, NoFoundHashes[3]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 2, NoFoundHashes[4]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 3, NoFoundHashes[5]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 3 + 1, NoFoundHashes[6]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.begin() + (PartialHashes.size() / 4) * 4, NoFoundHashes[7]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + It = PartialHashes.insert(PartialHashes.end(), NoFoundHashes[8]); + NoFindIndexes.push_back(std::distance(PartialHashes.begin(), It)); + + std::vector<std::atomic<bool>> FoundFlags(PartialHashes.size() + NoFoundHashes.size()); + std::vector<std::atomic<uint32_t>> FetchedCounts(PartialHashes.size() + NoFoundHashes.size()); + + CHECK(Cas.IterateChunks( + PartialHashes, + [&PartialHashes, &FoundFlags, &FetchedCounts, &NoFindIndexes](size_t Index, const IoBuffer& Payload) { + CHECK_EQ(NoFindIndexes.end(), std::find(NoFindIndexes.begin(), NoFindIndexes.end(), Index)); + uint32_t PreviousCount = FetchedCounts[Index].fetch_add(1); + CHECK(PreviousCount == 0); + FoundFlags[Index] = !!Payload; + const IoHash& Hash = PartialHashes[Index]; + CHECK(Hash == IoHash::HashBuffer(Payload)); + return true; + }, + &BatchWorkerPool, + 2048u)); + + for (size_t FoundIndex = 0; FoundIndex < PartialHashes.size(); FoundIndex++) + { + CHECK(FetchedCounts[FoundIndex].load() <= 1); + if (std::find(NoFindIndexes.begin(), NoFindIndexes.end(), FoundIndex) == NoFindIndexes.end()) + { + CHECK(FoundFlags[FoundIndex]); + } + else + { + CHECK(!FoundFlags[FoundIndex]); + } + } + }); + } + WorkLatch.CountDown(); + WorkLatch.Wait(); + } +} + #endif void diff --git a/src/zenstore/compactcas.h b/src/zenstore/compactcas.h index 07e620086..15e4cbf81 100644 --- a/src/zenstore/compactcas.h +++ b/src/zenstore/compactcas.h @@ -52,11 +52,11 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore ~CasContainerStrategy(); CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash); - std::vector<CasStore::InsertResult> InsertChunks(std::span<IoBuffer> Chunks, std::span<IoHash> ChunkHashes); + std::vector<CasStore::InsertResult> InsertChunks(std::span<const IoBuffer> Chunks, std::span<const IoHash> ChunkHashes); IoBuffer FindChunk(const IoHash& ChunkHash); bool HaveChunk(const IoHash& ChunkHash); void FilterChunks(HashKeySet& InOutChunks); - bool IterateChunks(std::span<IoHash> ChunkHashes, + bool IterateChunks(std::span<const IoHash> ChunkHashes, const std::function<bool(size_t Index, const IoBuffer& Payload)>& AsyncCallback, WorkerThreadPool* OptionalWorkerPool, uint64_t LargeSizeLimit); @@ -77,7 +77,7 @@ struct CasContainerStrategy final : public GcStorage, public GcReferenceStore private: CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash); - void MakeIndexSnapshot(); + void MakeIndexSnapshot(bool ResetLog); uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion); uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntryCount); void OpenContainer(bool IsNewStore); diff --git a/src/zenstore/filecas.cpp b/src/zenstore/filecas.cpp index 14123528c..68644be2d 100644 --- a/src/zenstore/filecas.cpp +++ b/src/zenstore/filecas.cpp @@ -20,6 +20,7 @@ #include <zencore/workthreadpool.h> #include <zenstore/gc.h> #include <zenstore/scrubcontext.h> +#include <zenutil/parallelwork.h> #if ZEN_WITH_TESTS # include <zencore/compactbinarybuilder.h> @@ -176,16 +177,16 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN if (IsNewStore) { - std::filesystem::remove(LogPath); - std::filesystem::remove(IndexPath); + RemoveFile(LogPath); + RemoveFile(IndexPath); - if (std::filesystem::is_directory(m_RootDirectory)) + if (IsDir(m_RootDirectory)) { // We need to explicitly only delete sharded root folders as the cas manifest, tinyobject and smallobject cas folders may reside // in this folder as well struct Visitor : public FileSystemTraversal::TreeVisitor { - virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t) override + virtual void VisitFile(const std::filesystem::path&, const path_view&, uint64_t, uint32_t, uint64_t) override { // We don't care about files } @@ -211,24 +212,24 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN Traversal.TraverseFileSystem(m_RootDirectory, CasVisitor); for (const std::filesystem::path& SharededRoot : CasVisitor.ShardedRoots) { - std::filesystem::remove_all(SharededRoot); + DeleteDirectories(SharededRoot); } } } - if (std::filesystem::is_regular_file(IndexPath)) + if (IsFile(IndexPath)) { uint32_t IndexVersion = 0; m_LogFlushPosition = ReadIndexFile(IndexPath, IndexVersion); if (IndexVersion == 0) { ZEN_WARN("removing invalid index file at '{}'", IndexPath); - std::filesystem::remove(IndexPath); + RemoveFile(IndexPath); } } uint64_t LogEntryCount = 0; - if (std::filesystem::is_regular_file(LogPath)) + if (IsFile(LogPath)) { if (TCasLogFile<FileCasIndexEntry>::IsValid(LogPath)) { @@ -237,7 +238,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN else { ZEN_WARN("removing invalid cas log at '{}'", LogPath); - std::filesystem::remove(LogPath); + RemoveFile(LogPath); } } @@ -251,7 +252,7 @@ FileCasStrategy::Initialize(const std::filesystem::path& RootDirectory, bool IsN if (IsNewStore || LogEntryCount > 0) { - MakeIndexSnapshot(); + MakeIndexSnapshot(/*ResetLog*/ true); } } @@ -327,7 +328,7 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash, CasStore:: { std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString()); std::error_code Ec; - std::filesystem::rename(ChunkPath, TempPath, Ec); + RenameFile(ChunkPath, TempPath, Ec); if (Ec) { throw std::system_error(Ec, fmt::format("unable to move existing CAS file {} to {}", ChunkPath, TempPath)); @@ -452,7 +453,7 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash, CasStore:: { PayloadFile.Close(); std::error_code DummyEc; - std::filesystem::remove(ChunkPath, DummyEc); + RemoveFile(ChunkPath, DummyEc); throw; } bool IsNew = UpdateIndex(ChunkHash, Chunk.Size()); @@ -503,7 +504,7 @@ FileCasStrategy::SafeOpenChunk(const IoHash& ChunkHash, uint64 ExpectedSize) { std::error_code Ec; std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString()); - std::filesystem::rename(ChunkPath, TempPath, Ec); + RenameFile(ChunkPath, TempPath, Ec); if (!Ec) { Chunk.SetDeleteOnClose(true); @@ -563,8 +564,24 @@ FileCasStrategy::HaveChunk(const IoHash& ChunkHash) { ZEN_ASSERT(m_IsInitialized); - RwLock::SharedLockScope _(m_Lock); - return m_Index.contains(ChunkHash); + { + RwLock::SharedLockScope _(m_Lock); + if (auto It = m_Index.find(ChunkHash); It == m_Index.end()) + { + return false; + } + } + + ShardingHelper Name(m_RootDirectory, ChunkHash); + const std::filesystem::path ChunkPath = Name.ShardedPath.ToPath(); + RwLock::SharedLockScope ShardLock(LockForHash(ChunkHash)); + + if (IsFile(ChunkPath)) + { + return true; + } + + return false; } void @@ -574,7 +591,7 @@ FileCasStrategy::DeleteChunk(const IoHash& ChunkHash, std::error_code& Ec) ShardingHelper Name(m_RootDirectory, ChunkHash); const std::filesystem::path ChunkPath = Name.ShardedPath.ToPath(); - uint64_t FileSize = static_cast<uint64_t>(std::filesystem::file_size(ChunkPath, Ec)); + uint64_t FileSize = static_cast<uint64_t>(FileSizeFromPath(ChunkPath, Ec)); if (Ec) { ZEN_WARN("get file size FAILED, file cas '{}'", ChunkPath); @@ -582,9 +599,9 @@ FileCasStrategy::DeleteChunk(const IoHash& ChunkHash, std::error_code& Ec) } ZEN_DEBUG("deleting CAS payload file '{}' {}", ChunkPath, NiceBytes(FileSize)); - std::filesystem::remove(ChunkPath, Ec); + RemoveFile(ChunkPath, Ec); - if (!Ec || !std::filesystem::exists(ChunkPath)) + if (!Ec || !IsFile(ChunkPath)) { { RwLock::ExclusiveLockScope _(m_Lock); @@ -632,10 +649,11 @@ FileCasStrategy::IterateChunks(std::span<IoHash> ChunkHashes, } } } - std::atomic_bool Continue = true; + std::atomic<bool> AsyncContinue = true; if (!FoundChunkIndexes.empty()) { - auto ProcessOne = [this, &ChunkHashes, &Continue, &AsyncCallback](size_t ChunkIndex, uint64_t ExpectedSize) { + auto ProcessOne = [this, &ChunkHashes, &AsyncCallback](size_t ChunkIndex, uint64_t ExpectedSize) { + ZEN_ASSERT(ChunkIndex < ChunkHashes.size()); const IoHash& ChunkHash = ChunkHashes[ChunkIndex]; IoBuffer Payload = SafeOpenChunk(ChunkHash, ExpectedSize); if (!AsyncCallback(ChunkIndex, std::move(Payload))) @@ -645,49 +663,70 @@ FileCasStrategy::IterateChunks(std::span<IoHash> ChunkHashes, return true; }; - Latch WorkLatch(1); - for (size_t Index = 0; Index < FoundChunkIndexes.size(); Index++) + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + try { - size_t ChunkIndex = FoundChunkIndexes[Index]; - uint64_t ExpectedSize = FoundChunkExpectedSizes[Index]; - if (!Continue) + for (size_t Index = 0; Index < FoundChunkIndexes.size(); Index++) { - break; - } - if (OptionalWorkerPool) - { - WorkLatch.AddCount(1); - OptionalWorkerPool->ScheduleWork([this, &WorkLatch, &ProcessOne, &ChunkHashes, ChunkIndex, ExpectedSize, &Continue]() { - auto _ = MakeGuard([&WorkLatch]() { WorkLatch.CountDown(); }); - if (!Continue) - { - return; - } - try - { - if (!ProcessOne(ChunkIndex, ExpectedSize)) - { - Continue = false; - } - } - catch (const std::exception& Ex) + if (AbortFlag) + { + AsyncContinue.store(false); + } + if (!AsyncContinue) + { + break; + } + size_t ChunkIndex = FoundChunkIndexes[Index]; + uint64_t ExpectedSize = FoundChunkExpectedSizes[Index]; + if (OptionalWorkerPool) + { + Work.ScheduleWork( + *OptionalWorkerPool, + [this, &ProcessOne, &ChunkHashes, ChunkIndex, ExpectedSize, &AsyncContinue](std::atomic<bool>& AbortFlag) { + if (AbortFlag) + { + AsyncContinue.store(false); + } + if (!AsyncContinue) + { + return; + } + try + { + if (!ProcessOne(ChunkIndex, ExpectedSize)) + { + AsyncContinue.store(false); + } + } + catch (const std::exception& Ex) + { + ZEN_WARN("Failed iterating chunks for cas root path {}, chunk {}. Reason: '{}'", + m_RootDirectory, + ChunkHashes[ChunkIndex], + Ex.what()); + AsyncContinue.store(false); + } + }); + } + else + { + if (!ProcessOne(ChunkIndex, ExpectedSize)) { - ZEN_WARN("Failed iterating chunks for cas root path {}, chunk {}. Reason: '{}'", - m_RootDirectory, - ChunkHashes[ChunkIndex], - Ex.what()); + AsyncContinue.store(false); } - }); - } - else - { - Continue = Continue && ProcessOne(ChunkIndex, ExpectedSize); + } } } - WorkLatch.CountDown(); - WorkLatch.Wait(); + catch (const std::exception& Ex) + { + AbortFlag.store(true); + ZEN_WARN("Failed iterating chunks in {}. Reason: '{}'", this->m_RootDirectory, Ex.what()); + } + Work.Wait(); } - return Continue; + return AsyncContinue.load(); } void @@ -727,7 +766,7 @@ FileCasStrategy::Flush() ZEN_TRACE_CPU("FileCas::Flush"); m_CasLog.Flush(); - MakeIndexSnapshot(); + MakeIndexSnapshot(/*ResetLog*/ false); } void @@ -912,15 +951,14 @@ FileCasStrategy::ValidateEntry(const FileCasIndexEntry& Entry, std::string& OutR } void -FileCasStrategy::MakeIndexSnapshot() +FileCasStrategy::MakeIndexSnapshot(bool ResetLog) { ZEN_MEMSCOPE(GetFileCasTag()); ZEN_TRACE_CPU("FileCas::MakeIndexSnapshot"); using namespace filecas::impl; - uint64_t LogCount = m_CasLog.GetLogCount(); - if (m_LogFlushPosition == LogCount) + if (m_LogFlushPosition == m_CasLog.GetLogCount()) { return; } @@ -937,34 +975,17 @@ FileCasStrategy::MakeIndexSnapshot() namespace fs = std::filesystem; - fs::path IndexPath = GetIndexPath(m_RootDirectory); - fs::path STmpIndexPath = GetTempIndexPath(m_RootDirectory); - - // Move index away, we keep it if something goes wrong - if (fs::is_regular_file(STmpIndexPath)) - { - std::error_code Ec; - if (!fs::remove(STmpIndexPath, Ec) || Ec) - { - ZEN_WARN("snapshot failed to clean up temp snapshot at {}, reason: '{}'", STmpIndexPath, Ec.message()); - return; - } - } + const fs::path IndexPath = GetIndexPath(m_RootDirectory); try { - if (fs::is_regular_file(IndexPath)) - { - fs::rename(IndexPath, STmpIndexPath); - } - // Write the current state of the location map to a new index state std::vector<FileCasIndexEntry> Entries; - uint64_t IndexLogPosition = 0; + // Be defensive regarding log position as it is written to without acquiring m_LocationMapLock + const uint64_t IndexLogPosition = ResetLog ? 0 : m_CasLog.GetLogCount(); { RwLock::SharedLockScope __(m_Lock); - IndexLogPosition = m_CasLog.GetLogCount(); Entries.resize(m_Index.size()); uint64_t EntryIndex = 0; @@ -974,6 +995,7 @@ FileCasStrategy::MakeIndexSnapshot() IndexEntry.Key = Entry.first; IndexEntry.Size = Entry.second.Size; } + EntryCount = m_Index.size(); } TemporaryFile ObjectIndexFile; @@ -983,47 +1005,47 @@ FileCasStrategy::MakeIndexSnapshot() { throw std::system_error(Ec, fmt::format("Failed to create temp file for index snapshot at '{}'", IndexPath)); } - filecas::impl::FileCasIndexHeader Header = {.EntryCount = Entries.size(), .LogPosition = IndexLogPosition}; + filecas::impl::FileCasIndexHeader Header = {.EntryCount = EntryCount, .LogPosition = IndexLogPosition}; Header.Checksum = filecas::impl::FileCasIndexHeader::ComputeChecksum(Header); ObjectIndexFile.Write(&Header, sizeof(filecas::impl::FileCasIndexHeader), 0); - ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(FileCasIndexEntry), sizeof(filecas::impl::FileCasIndexHeader)); + ObjectIndexFile.Write(Entries.data(), EntryCount * sizeof(FileCasIndexEntry), sizeof(filecas::impl::FileCasIndexHeader)); ObjectIndexFile.Flush(); ObjectIndexFile.MoveTemporaryIntoPlace(IndexPath, Ec); if (Ec) { - throw std::system_error(Ec, fmt::format("Failed to move temp file '{}' to '{}'", ObjectIndexFile.GetPath(), IndexPath)); + throw std::system_error(Ec, + fmt::format("Snapshot failed to rename new snapshot '{}' to '{}', reason: '{}'", + ObjectIndexFile.GetPath(), + IndexPath, + Ec.message())); } - EntryCount = Entries.size(); - m_LogFlushPosition = IndexLogPosition; - } - catch (const std::exception& Err) - { - ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what()); - - // Restore any previous snapshot - if (fs::is_regular_file(STmpIndexPath)) + if (ResetLog) { - std::error_code Ec; - fs::remove(IndexPath, Ec); // We don't care if this fails, we try to move the old temp file regardless - fs::rename(STmpIndexPath, IndexPath, Ec); - if (Ec) + const std::filesystem::path LogPath = GetLogPath(m_RootDirectory); + + if (IsFile(LogPath)) { - ZEN_WARN("snapshot failed to restore old snapshot from {}, reason: '{}'", STmpIndexPath, Ec.message()); + m_CasLog.Close(); + if (!RemoveFile(LogPath, Ec) || Ec) + { + // This is non-critical, it only means that we will replay the events of the log over the snapshot - inefficent but in + // the end it will be the same result + ZEN_WARN("Snapshot failed to clean log file '{}', reason: '{}'", LogPath, IndexPath, Ec.message()); + } + m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite); } } + m_LogFlushPosition = IndexLogPosition; } - if (fs::is_regular_file(STmpIndexPath)) + catch (const std::exception& Err) { - std::error_code Ec; - if (!fs::remove(STmpIndexPath, Ec) || Ec) - { - ZEN_WARN("snapshot failed to remove temporary file {}, reason: '{}'", STmpIndexPath, Ec.message()); - } + ZEN_WARN("snapshot FAILED, reason: '{}'", Err.what()); } } + uint64_t FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion) { @@ -1032,7 +1054,7 @@ FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& using namespace filecas::impl; std::vector<FileCasIndexEntry> Entries; - if (std::filesystem::is_regular_file(IndexPath)) + if (IsFile(IndexPath)) { Stopwatch Timer; const auto _ = MakeGuard([&] { @@ -1077,7 +1099,7 @@ FileCasStrategy::ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& return 0; } - if (std::filesystem::is_directory(m_RootDirectory)) + if (IsDir(m_RootDirectory)) { ZEN_INFO("missing index for file cas, scanning for cas files in {}", m_RootDirectory); TCasLogFile<FileCasIndexEntry> CasLog; @@ -1116,7 +1138,7 @@ FileCasStrategy::ReadLog(const std::filesystem::path& LogPath, uint64_t SkipEntr using namespace filecas::impl; - if (std::filesystem::is_regular_file(LogPath)) + if (IsFile(LogPath)) { uint64_t LogEntryCount = 0; Stopwatch Timer; @@ -1174,7 +1196,7 @@ FileCasStrategy::ScanFolderForCasFiles(const std::filesystem::path& RootDir) struct Visitor : public FileSystemTraversal::TreeVisitor { Visitor(const std::filesystem::path& RootDir, std::vector<FileCasIndexEntry>& Entries) : RootDirectory(RootDir), Entries(Entries) {} - virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t) override + virtual void VisitFile(const std::filesystem::path& Parent, const path_view& File, uint64_t FileSize, uint32_t, uint64_t) override { std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); @@ -1274,12 +1296,12 @@ public: ChunkPath); } std::error_code Ec; - uint64_t SizeOnDisk = std::filesystem::file_size(ChunkPath, Ec); + uint64_t SizeOnDisk = FileSizeFromPath(ChunkPath, Ec); if (Ec) { SizeOnDisk = 0; } - bool Existed = std::filesystem::remove(ChunkPath, Ec); + bool Existed = RemoveFile(ChunkPath, Ec); if (Ec) { // Target file may be open for read, attempt to move it to a temp file and mark it delete on close @@ -1290,7 +1312,7 @@ public: if (OldChunk) { std::filesystem::path TempPath(ChunkPath.parent_path() / Oid::NewOid().ToString()); - std::filesystem::rename(ChunkPath, TempPath, Ec); + RenameFile(ChunkPath, TempPath, Ec); if (!Ec) { OldChunk.SetDeleteOnClose(true); @@ -1317,7 +1339,7 @@ public: else { std::error_code Ec; - bool Existed = std::filesystem::is_regular_file(ChunkPath, Ec); + bool Existed = IsFile(ChunkPath, Ec); if (Ec) { if (Ctx.Settings.Verbose) @@ -1516,7 +1538,7 @@ TEST_CASE("cas.chunk.moveoverwrite") Payload1.SetDeleteOnClose(true); CasStore::InsertResult Result = FileCas.InsertChunk(Payload1, CompressedPayload1.DecodeRawHash()); CHECK_EQ(Result.New, true); - CHECK(!std::filesystem::exists(Payload1Path)); + CHECK(!IsFile(Payload1Path)); } { std::filesystem::path Payload1BPath{TempDir.Path() / "payload_1"}; @@ -1526,9 +1548,9 @@ TEST_CASE("cas.chunk.moveoverwrite") CasStore::InsertResult Result = FileCas.InsertChunk(Payload1B, CompressedPayload1.DecodeRawHash()); CHECK_EQ(Result.New, false); - CHECK(std::filesystem::exists(Payload1BPath)); + CHECK(IsFile(Payload1BPath)); Payload1B = {}; - CHECK(!std::filesystem::exists(Payload1BPath)); + CHECK(!IsFile(Payload1BPath)); } IoBuffer FetchedPayload = FileCas.FindChunk(CompressedPayload1.DecodeRawHash()); @@ -1554,7 +1576,7 @@ TEST_CASE("cas.chunk.moveoverwrite") } Payload2 = {}; - CHECK(!std::filesystem::exists(Payload2Path)); + CHECK(!IsFile(Payload2Path)); { IoHash RawHash; @@ -1598,9 +1620,9 @@ TEST_CASE("cas.chunk.copyoverwrite") CasStore::InsertResult Result = FileCas.InsertChunk(Payload1, CompressedPayload1.DecodeRawHash(), CasStore::InsertMode::kCopyOnly); CHECK_EQ(Result.New, true); - CHECK(std::filesystem::exists(Payload1Path)); + CHECK(IsFile(Payload1Path)); Payload1 = {}; - CHECK(!std::filesystem::exists(Payload1Path)); + CHECK(!IsFile(Payload1Path)); } { std::filesystem::path Payload1BPath{TempDir.Path() / "payload_1"}; @@ -1611,9 +1633,9 @@ TEST_CASE("cas.chunk.copyoverwrite") CasStore::InsertResult Result = FileCas.InsertChunk(Payload1B, CompressedPayload1.DecodeRawHash(), CasStore::InsertMode::kCopyOnly); CHECK_EQ(Result.New, false); - CHECK(std::filesystem::exists(Payload1BPath)); + CHECK(IsFile(Payload1BPath)); Payload1B = {}; - CHECK(!std::filesystem::exists(Payload1BPath)); + CHECK(!IsFile(Payload1BPath)); } IoBuffer FetchedPayload = FileCas.FindChunk(CompressedPayload1.DecodeRawHash()); @@ -1640,7 +1662,7 @@ TEST_CASE("cas.chunk.copyoverwrite") } Payload2 = {}; - CHECK(!std::filesystem::exists(Payload2Path)); + CHECK(!IsFile(Payload2Path)); { IoHash RawHash; diff --git a/src/zenstore/filecas.h b/src/zenstore/filecas.h index 21d8c3b9e..e93356927 100644 --- a/src/zenstore/filecas.h +++ b/src/zenstore/filecas.h @@ -50,7 +50,7 @@ struct FileCasStrategy final : public GcStorage, public GcReferenceStore virtual GcReferencePruner* CreateReferencePruner(GcCtx& Ctx, GcReferenceStoreStats& Stats) override; private: - void MakeIndexSnapshot(); + void MakeIndexSnapshot(bool ResetLog); uint64_t ReadIndexFile(const std::filesystem::path& IndexPath, uint32_t& OutVersion); uint64_t ReadLog(const std::filesystem::path& LogPath, uint64_t LogPosition); LoggerRef Log() { return m_Log; } diff --git a/src/zenstore/gc.cpp b/src/zenstore/gc.cpp index 7ac10d613..a15a2e084 100644 --- a/src/zenstore/gc.cpp +++ b/src/zenstore/gc.cpp @@ -62,11 +62,11 @@ namespace { { if (Size == 0) { - std::filesystem::remove(Path); + RemoveFile(Path); return std::error_code{}; } CreateDirectories(Path.parent_path()); - if (std::filesystem::is_regular_file(Path) && std::filesystem::file_size(Path) == Size) + if (IsFile(Path) && FileSizeFromPath(Path) == Size) { return std::error_code(); } @@ -709,7 +709,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) RwLock StoreCompactorsLock; std::unordered_map<std::unique_ptr<GcReferenceValidator>, size_t> ReferenceValidators; RwLock ReferenceValidatorsLock; - WorkerThreadPool& PreCachePhaseThreadPool = + WorkerThreadPool& ParallelWorkThreadPool = Settings.SingleThread ? GetSyncWorkerPool() : GetSmallWorkerPool(EWorkloadType::Background); if (!m_GcReferencers.empty()) @@ -721,7 +721,6 @@ GcManager::CollectGarbage(const GcSettings& Settings) ZEN_INFO("GCV2: Removing expired data from {} referencers", m_GcReferencers.size()); ZEN_TRACE_CPU("GcV2::RemoveExpiredData"); - Latch WorkLeft(1); { // First remove any cache keys that may own references SCOPED_TIMER(Result.RemoveExpiredDataMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { @@ -733,39 +732,45 @@ GcManager::CollectGarbage(const GcSettings& Settings) { if (CheckGCCancel()) { - WorkLeft.CountDown(); - WorkLeft.Wait(); return Sum(Result, true); } GcReferencer* Owner = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, &WorkLeft, Owner, Stats, &StoreCompactorsLock, &StoreCompactors]() { - ZEN_MEMSCOPE(GetGcTag()); - - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - try + try + { + Stats->first = Owner->GetGcName(Ctx); + SCOPED_TIMER(Stats->second.RemoveExpiredDataStats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + std::unique_ptr<GcStoreCompactor> StoreCompactor( + Owner->RemoveExpiredData(Ctx, Stats->second.RemoveExpiredDataStats)); + if (StoreCompactor) { - Stats->first = Owner->GetGcName(Ctx); - SCOPED_TIMER(Stats->second.RemoveExpiredDataStats.ElapsedMS = - std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - std::unique_ptr<GcStoreCompactor> StoreCompactor( - Owner->RemoveExpiredData(Ctx, Stats->second.RemoveExpiredDataStats)); - if (StoreCompactor) - { - RwLock::ExclusiveLockScope __(StoreCompactorsLock); - StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->second.CompactStoreStats); - } + RwLock::ExclusiveLockScope __(StoreCompactorsLock); + StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->second.CompactStoreStats); } - catch (const std::exception& Ex) + } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what()); + } + else { ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what()); - SetCancelGC(true); } - }); + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed removing expired data for {}. Reason: '{}'", Owner->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } } - WorkLeft.CountDown(); - WorkLeft.Wait(); } } @@ -810,7 +815,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferenceStore* ReferenceStore = m_GcReferenceStores[Index]; std::pair<std::string, GcReferenceStoreStats>* Stats = &Result.ReferenceStoreStats[Index]; WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork( + ParallelWorkThreadPool.ScheduleWork( [this, &Ctx, ReferenceStore, Stats, Index, &WorkLeft, &ReferencePrunersLock, &ReferencePruners]() { ZEN_MEMSCOPE(GetGcTag()); @@ -832,6 +837,29 @@ GcManager::CollectGarbage(const GcSettings& Settings) ReferencePruners.insert_or_assign(Index, std::move(ReferencePruner)); } } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed creating reference pruners for {}. Reason: '{}'", + ReferenceStore->GetGcName(Ctx), + Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'", + ReferenceStore->GetGcName(Ctx), + Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'", + ReferenceStore->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed creating reference pruners for {}. Reason: '{}'", @@ -885,41 +913,70 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferencer* Referencer = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork( + ParallelWorkThreadPool.ScheduleWork( [this, &Ctx, &WorkLeft, Referencer, Index, Stats, &ReferenceCheckersLock, &ReferenceCheckers]() { ZEN_MEMSCOPE(GetGcTag()); auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - // The Referencer will create a reference checker that guarantees that the references do not change - // as long as it lives - std::vector<GcReferenceChecker*> Checkers; - try + if (!CheckGCCancel()) { + // The Referencer will create a reference checker that guarantees that the references do not + // change as long as it lives + std::vector<GcReferenceChecker*> Checkers; + auto __ = MakeGuard([&Checkers]() { + while (!Checkers.empty()) + { + delete Checkers.back(); + Checkers.pop_back(); + } + }); + try { - SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS = - std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checkers = Referencer->CreateReferenceCheckers(Ctx); + { + SCOPED_TIMER(Stats->second.CreateReferenceCheckersMS = + std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checkers = Referencer->CreateReferenceCheckers(Ctx); + } + if (!Checkers.empty()) + { + RwLock::ExclusiveLockScope __(ReferenceCheckersLock); + for (auto& Checker : Checkers) + { + ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), + Index); + Checker = nullptr; + } + } } - if (!Checkers.empty()) + catch (const std::system_error& Ex) { - RwLock::ExclusiveLockScope __(ReferenceCheckersLock); - for (auto& Checker : Checkers) + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed creating reference checkers for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + } + else { - ReferenceCheckers.insert_or_assign(std::unique_ptr<GcReferenceChecker>(Checker), Index); - Checker = nullptr; + ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); } + SetCancelGC(true); } - } - catch (const std::exception& Ex) - { - ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", - Referencer->GetGcName(Ctx), - Ex.what()); - SetCancelGC(true); - while (!Checkers.empty()) + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } + catch (const std::exception& Ex) { - delete Checkers.back(); - Checkers.pop_back(); + ZEN_ERROR("GCV2: Failed creating reference checkers for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); } } }); @@ -962,19 +1019,26 @@ GcManager::CollectGarbage(const GcSettings& Settings) GcReferencer* Referencer = m_GcReferencers[Index]; std::pair<std::string, GcReferencerStats>* ReferemcerStats = &Result.ReferencerStats[Index]; WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork([this, - &Ctx, - &WorkLeft, - Referencer, - Index, - Result = &Result, - ReferemcerStats, - &ReferenceValidatorsLock, - &ReferenceValidators]() { + ParallelWorkThreadPool.ScheduleWork([this, + &Ctx, + &WorkLeft, + Referencer, + Index, + Result = &Result, + ReferemcerStats, + &ReferenceValidatorsLock, + &ReferenceValidators]() { ZEN_MEMSCOPE(GetGcTag()); auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); std::vector<GcReferenceValidator*> Validators; + auto __ = MakeGuard([&Validators]() { + while (!Validators.empty()) + { + delete Validators.back(); + Validators.pop_back(); + } + }); try { { @@ -995,17 +1059,35 @@ GcManager::CollectGarbage(const GcSettings& Settings) } } } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed creating reference validators for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'", + Referencer->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed creating reference validators for {}. Reason: '{}'", Referencer->GetGcName(Ctx), Ex.what()); SetCancelGC(true); - while (!Validators.empty()) - { - delete Validators.back(); - Validators.pop_back(); - } } }); } @@ -1023,8 +1105,6 @@ GcManager::CollectGarbage(const GcSettings& Settings) ZEN_INFO("GCV2: Precaching state for {} reference checkers", ReferenceCheckers.size()); ZEN_TRACE_CPU("GcV2::PreCache"); - Latch WorkLeft(1); - { SCOPED_TIMER(Result.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs()); if (Ctx.Settings.Verbose) { @@ -1036,33 +1116,40 @@ GcManager::CollectGarbage(const GcSettings& Settings) { if (CheckGCCancel()) { - WorkLeft.CountDown(); - WorkLeft.Wait(); return Sum(Result, true); } GcReferenceChecker* Checker = It.first.get(); size_t Index = It.second; std::pair<std::string, GcReferencerStats>* Stats = &Result.ReferencerStats[Index]; - WorkLeft.AddCount(1); - PreCachePhaseThreadPool.ScheduleWork([this, &Ctx, Checker, Index, Stats, &WorkLeft]() { - ZEN_MEMSCOPE(GetGcTag()); - - auto _ = MakeGuard([&WorkLeft]() { WorkLeft.CountDown(); }); - try + try + { + SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); + Checker->PreCache(Ctx); + } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) { - SCOPED_TIMER(Stats->second.PreCacheStateMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); - Checker->PreCache(Ctx); + ZEN_WARN("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); } - catch (const std::exception& Ex) + else { ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); - SetCancelGC(true); } - }); + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("GCV2: Failed precaching for {}. Reason: '{}'", Checker->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } } - WorkLeft.CountDown(); - WorkLeft.Wait(); } } @@ -1081,7 +1168,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) ZEN_INFO("GCV2: Locking state for {} reference checkers", ReferenceCheckers.size()); { ZEN_TRACE_CPU("GcV2::LockReferencers"); - // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore/BuildStore) until // we delete the ReferenceLockers Latch WorkLeft(1); { @@ -1108,7 +1195,7 @@ GcManager::CollectGarbage(const GcSettings& Settings) ZEN_TRACE_CPU("GcV2::UpdateLockedState"); // Locking all references checkers so we have a steady state of which references are used - // From this point we have blocked all writes to all References (DiskBucket/ProjectStore) until + // From this point we have blocked all writes to all References (DiskBucket/ProjectStore/BuildStore) until // we delete the ReferenceCheckers Latch WorkLeft(1); @@ -1142,6 +1229,29 @@ GcManager::CollectGarbage(const GcSettings& Settings) std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Checker->UpdateLockedState(Ctx); } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed Updating locked state for {}. Reason: '{}'", + Checker->GetGcName(Ctx), + Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed Updating locked state for {}. Reason: '{}'", + Checker->GetGcName(Ctx), + Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_WARN("GCV2: Failed Updating locked state for {}. Reason: '{}'", + Checker->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed Updating locked state for {}. Reason: '{}'", @@ -1231,6 +1341,29 @@ GcManager::CollectGarbage(const GcSettings& Settings) StoreCompactors.insert_or_assign(std::move(StoreCompactor), &Stats->CompactStoreStats); } } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed removing unused data for {}. Reason: '{}'", + Pruner->GetGcName(Ctx), + Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'", + Pruner->GetGcName(Ctx), + Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'", + Pruner->GetGcName(Ctx), + Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed removing unused data for {}. Reason: '{}'", @@ -1262,12 +1395,12 @@ GcManager::CollectGarbage(const GcSettings& Settings) ZEN_TRACE_CPU("GcV2::CompactStores"); auto ClaimDiskReserve = [&]() -> uint64_t { - if (!std::filesystem::is_regular_file(Settings.DiskReservePath)) + if (!IsFile(Settings.DiskReservePath)) { return 0; } - uint64_t ReclaimedSize = std::filesystem::file_size(Settings.DiskReservePath); - if (std::filesystem::remove(Settings.DiskReservePath)) + uint64_t ReclaimedSize = FileSizeFromPath(Settings.DiskReservePath); + if (RemoveFile(Settings.DiskReservePath)) { return ReclaimedSize; } @@ -1294,6 +1427,23 @@ GcManager::CollectGarbage(const GcSettings& Settings) SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); Compactor->CompactStore(Ctx, Stats, ClaimDiskReserve); } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed compacting store {}. Reason: '{}'", Compactor->GetGcName(Ctx), Ex.what()); @@ -1335,6 +1485,23 @@ GcManager::CollectGarbage(const GcSettings& Settings) SCOPED_TIMER(Stats.ElapsedMS = std::chrono::milliseconds(Timer.GetElapsedTimeMs());); ReferenceValidator->Validate(Ctx, Stats); } + catch (const std::system_error& Ex) + { + if (IsOOD(Ex) || IsOOM(Ex)) + { + ZEN_WARN("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what()); + } + else + { + ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what()); + } + SetCancelGC(true); + } + catch (const std::bad_alloc& Ex) + { + ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what()); + SetCancelGC(true); + } catch (const std::exception& Ex) { ZEN_ERROR("GCV2: Failed validating referencer {}. Reason: '{}'", ReferenceValidator->GetGcName(Ctx), Ex.what()); @@ -1557,7 +1724,7 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config) m_Config.LightweightInterval = m_Config.MonitorInterval; } - std::filesystem::create_directories(Config.RootDirectory); + CreateDirectories(Config.RootDirectory); std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize); if (Ec) @@ -1739,6 +1906,7 @@ GcScheduler::AppendGCLog(std::string_view Id, GcClock::TimePoint StartTime, cons { Writer << "CacheExpireTime"sv << ToDateTime(Settings.CacheExpireTime); Writer << "ProjectStoreExpireTime"sv << ToDateTime(Settings.ProjectStoreExpireTime); + Writer << "BuildStoreExpireTime"sv << ToDateTime(Settings.BuildStoreExpireTime); Writer << "CollectSmallObjects"sv << Settings.CollectSmallObjects; Writer << "IsDeleteMode"sv << Settings.IsDeleteMode; Writer << "SkipCidDelete"sv << Settings.SkipCidDelete; @@ -1849,7 +2017,7 @@ GcScheduler::GetState() const if (Result.Config.DiskReserveSize != 0) { Ec.clear(); - Result.HasDiskReserve = std::filesystem::is_regular_file(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec; + Result.HasDiskReserve = IsFile(Result.Config.RootDirectory / "reserve.gc", Ec) && !Ec; } if (Result.Status != GcSchedulerStatus::kRunning) @@ -1900,17 +2068,46 @@ GcScheduler::SchedulerThread() ZEN_MEMSCOPE(GetGcTag()); SetCurrentThreadName("GcScheduler"); - std::chrono::seconds WaitTime{0}; - - bool SilenceErrors = false; + std::chrono::seconds WaitTime{0}; + const std::chrono::seconds ShortWaitTime{5}; + bool SilenceErrors = false; for (;;) { - bool Timeout = false; + (void)CheckDiskSpace(); + + std::chrono::seconds WaitedTime{0}; + bool Timeout = false; { ZEN_ASSERT(WaitTime.count() >= 0); std::unique_lock Lock(m_GcMutex); - Timeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, WaitTime); + while (!Timeout) + { + std::chrono::seconds ShortWait = Min(WaitTime, ShortWaitTime); + bool ShortTimeout = std::cv_status::timeout == m_GcSignal.wait_for(Lock, ShortWait); + if (ShortTimeout) + { + if (WaitTime > ShortWaitTime) + { + DiskSpace Space = CheckDiskSpace(); + if (!AreDiskWritesAllowed()) + { + ZEN_INFO("Triggering GC due to low disk space ({}) on {}", NiceBytes(Space.Free), m_Config.RootDirectory); + Timeout = true; + } + WaitTime -= ShortWaitTime; + } + else + { + Timeout = true; + } + } + else + { + // We got a signal + break; + } + } } if (Status() == GcSchedulerStatus::kStopped) @@ -1940,7 +2137,9 @@ GcScheduler::SchedulerThread() std::chrono::seconds LightweightGcInterval = m_Config.LightweightInterval; std::chrono::seconds MaxCacheDuration = m_Config.MaxCacheDuration; std::chrono::seconds MaxProjectStoreDuration = m_Config.MaxProjectStoreDuration; + std::chrono::seconds MaxBuildStoreDuration = m_Config.MaxBuildStoreDuration; uint64_t DiskSizeSoftLimit = m_Config.DiskSizeSoftLimit; + uint64_t MinimumFreeDiskSpaceToAllowWrites = m_Config.MinimumFreeDiskSpaceToAllowWrites; bool SkipCid = false; GcVersion UseGCVersion = m_Config.UseGCVersion; uint32_t CompactBlockUsageThresholdPercent = m_Config.CompactBlockUsageThresholdPercent; @@ -1955,8 +2154,9 @@ GcScheduler::SchedulerThread() uint8_t NextAttachmentPassIndex = ComputeAttachmentRange(m_AttachmentPassIndex, m_Config.AttachmentPassCount, AttachmentRangeMin, AttachmentRangeMax); - bool DiskSpaceGCTriggered = false; - bool TimeBasedGCTriggered = false; + bool LowDiskSpaceGCTriggered = false; + bool HighDiskSpaceUsageGCTriggered = false; + bool TimeBasedGCTriggered = false; GcClock::TimePoint Now = GcClock::Now(); @@ -1975,6 +2175,10 @@ GcScheduler::SchedulerThread() { MaxProjectStoreDuration = TriggerParams.MaxProjectStoreDuration; } + if (TriggerParams.MaxBuildStoreDuration != std::chrono::seconds::max()) + { + MaxBuildStoreDuration = TriggerParams.MaxBuildStoreDuration; + } if (TriggerParams.DiskSizeSoftLimit != 0) { DiskSizeSoftLimit = TriggerParams.DiskSizeSoftLimit; @@ -2046,6 +2250,8 @@ GcScheduler::SchedulerThread() MaxCacheDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxCacheDuration; GcClock::TimePoint ProjectStoreExpireTime = MaxProjectStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxProjectStoreDuration; + GcClock::TimePoint BuildStoreExpireTime = + MaxBuildStoreDuration == GcClock::Duration::max() ? GcClock::TimePoint::min() : Now - MaxBuildStoreDuration; const GcStorageSize TotalSize = m_GcManager.TotalStorageSize(); @@ -2087,12 +2293,32 @@ GcScheduler::SchedulerThread() } } - uint64_t GcDiskSpaceGoal = 0; + uint64_t MaximumDiskUseGcSpaceGoal = 0; + uint64_t MinimumFreeDiskGcSpaceGoal = 0; + if (DiskSizeSoftLimit != 0 && TotalSize.DiskSize > DiskSizeSoftLimit) { - GcDiskSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit; + MaximumDiskUseGcSpaceGoal = TotalSize.DiskSize - DiskSizeSoftLimit; + HighDiskSpaceUsageGCTriggered = true; + } + + if (MinimumFreeDiskSpaceToAllowWrites != 0 && Space.Free < MinimumFreeDiskSpaceToAllowWrites) + { + MinimumFreeDiskGcSpaceGoal = MinimumFreeDiskSpaceToAllowWrites - Space.Free; + if (MinimumFreeDiskGcSpaceGoal > MaximumDiskUseGcSpaceGoal) + { + LowDiskSpaceGCTriggered = true; + EnableValidation = false; + } + } + + if (MaximumDiskUseGcSpaceGoal > 0 || MinimumFreeDiskGcSpaceGoal > 0) + { + const uint64_t GcDiskSpaceRemoveGoal = Max(MaximumDiskUseGcSpaceGoal, MinimumFreeDiskGcSpaceGoal); + std::unique_lock Lock(m_GcMutex); - GcClock::Tick AgeTick = m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceGoal, Now.time_since_epoch().count()); + GcClock::Tick AgeTick = + m_DiskUsageWindow.FindTimepointThatRemoves(GcDiskSpaceRemoveGoal, Now.time_since_epoch().count()); GcClock::TimePoint SizeBasedExpireTime = GcClock::TimePointFromTick(AgeTick); if (SizeBasedExpireTime > CacheExpireTime) { @@ -2102,6 +2328,10 @@ GcScheduler::SchedulerThread() { ProjectStoreExpireTime = SizeBasedExpireTime; } + if (SizeBasedExpireTime > BuildStoreExpireTime) + { + BuildStoreExpireTime = SizeBasedExpireTime; + } } std::chrono::seconds RemainingTimeUntilGc = @@ -2130,29 +2360,33 @@ GcScheduler::SchedulerThread() RemainingTimeUntilLightweightGc = RemainingTimeUntilGc; } - if (GcDiskSpaceGoal > 0) - { - DiskSpaceGCTriggered = true; - } - else if (RemainingTimeUntilGc.count() == 0) - { - TimeBasedGCTriggered = true; - } - else if (RemainingTimeUntilLightweightGc.count() == 0) + if (MaximumDiskUseGcSpaceGoal == 0 && MinimumFreeDiskGcSpaceGoal == 0) { - TimeBasedGCTriggered = true; - SkipCid = true; + if (RemainingTimeUntilGc.count() == 0) + { + TimeBasedGCTriggered = true; + } + else if (RemainingTimeUntilLightweightGc.count() == 0) + { + TimeBasedGCTriggered = true; + SkipCid = true; + } } std::string NextTriggerStatus; - if (GcInterval.count() != 0 || LightweightGcInterval.count() != 0 || DiskSizeSoftLimit != 0) { ExtendableStringBuilder<256> Sb; - if (DiskSpaceGCTriggered) + if (LowDiskSpaceGCTriggered) + { + Sb.Append(fmt::format(" Free disk space is below {}, trying to reclaim {}.", + NiceBytes(MinimumFreeDiskSpaceToAllowWrites), + NiceBytes(MinimumFreeDiskGcSpaceGoal))); + } + else if (HighDiskSpaceUsageGCTriggered) { Sb.Append(fmt::format(" Disk space exceeds {}, trying to reclaim {}.", NiceBytes(DiskSizeSoftLimit), - NiceBytes(GcDiskSpaceGoal))); + NiceBytes(MaximumDiskUseGcSpaceGoal))); } else if (TimeBasedGCTriggered) { @@ -2182,6 +2416,10 @@ GcScheduler::SchedulerThread() { Sb.Append(fmt::format(" Disk usage GC in {}.", NiceBytes(DiskSizeSoftLimit - TotalSize.DiskSize))); } + else if (MinimumFreeDiskSpaceToAllowWrites != 0 && Space.Free > MinimumFreeDiskSpaceToAllowWrites) + { + Sb.Append(fmt::format(" Disk usage GC in {}.", NiceBytes(Space.Free - MinimumFreeDiskSpaceToAllowWrites))); + } } NextTriggerStatus = Sb; } @@ -2198,7 +2436,7 @@ GcScheduler::SchedulerThread() NiceBytes(MaxLoad / uint64_t(std::chrono::seconds(m_Config.MonitorInterval).count())), NextTriggerStatus); - if (!DiskSpaceGCTriggered && !TimeBasedGCTriggered) + if (!HighDiskSpaceUsageGCTriggered && !LowDiskSpaceGCTriggered && !TimeBasedGCTriggered) { WaitTime = m_Config.MonitorInterval; if (RemainingTimeUntilGc < WaitTime) @@ -2227,6 +2465,7 @@ GcScheduler::SchedulerThread() bool GcSuccess = CollectGarbage(CacheExpireTime, ProjectStoreExpireTime, + BuildStoreExpireTime, DoDelete, CollectSmallObjects, SkipCid, @@ -2333,6 +2572,7 @@ GcScheduler::ScrubStorage(bool DoDelete, bool SkipCid, std::chrono::seconds Time bool GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, + const GcClock::TimePoint& BuildStoreExpireTime, bool Delete, bool CollectSmallObjects, bool SkipCid, @@ -2375,12 +2615,12 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, { // We are low on disk, check if we can release our extra storage reserve, if we can't bail from doing GC auto ClaimDiskReserve = [&]() -> uint64_t { - if (!std::filesystem::is_regular_file(DiskReservePath)) + if (!IsFile(DiskReservePath)) { return 0; } - uint64_t ReclaimedSize = std::filesystem::file_size(DiskReservePath); - if (std::filesystem::remove(DiskReservePath)) + uint64_t ReclaimedSize = FileSizeFromPath(DiskReservePath); + if (RemoveFile(DiskReservePath)) { return ReclaimedSize; } @@ -2416,6 +2656,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcSettings Settings = {.CacheExpireTime = CacheExpireTime, .ProjectStoreExpireTime = ProjectStoreExpireTime, + .BuildStoreExpireTime = BuildStoreExpireTime, .CollectSmallObjects = CollectSmallObjects, .IsDeleteMode = Delete, .SkipCidDelete = SkipCid, @@ -2447,6 +2688,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, } SB.Append(fmt::format(" Cache cutoff time: {}\n", Settings.CacheExpireTime)); SB.Append(fmt::format(" Project store cutoff time: {}\n", Settings.ProjectStoreExpireTime)); + SB.Append(fmt::format(" Build store cutoff time: {}\n", Settings.BuildStoreExpireTime)); }; { @@ -2522,7 +2764,11 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, ZEN_INFO("GCV2: {}", SB.ToView()); - AppendGCLog(GcId, GcStartTime, Settings, Result); + CheckDiskSpace(); + if (!m_AreDiskWritesBlocked.load()) + { + AppendGCLog(GcId, GcStartTime, Settings, Result); + } if (SkipCid) { @@ -2552,6 +2798,7 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, if (Delete) { GcClock::TimePoint KeepRangeStart = Min(CacheExpireTime, ProjectStoreExpireTime); + KeepRangeStart = Min(KeepRangeStart, BuildStoreExpireTime); m_LastGcExpireTime = KeepRangeStart; std::unique_lock Lock(m_GcMutex); m_DiskUsageWindow.KeepRange(KeepRangeStart.time_since_epoch().count(), GcClock::Duration::max().count()); @@ -2563,65 +2810,69 @@ GcScheduler::CollectGarbage(const GcClock::TimePoint& CacheExpireTime, m_LastFullGCDiff = Diff; } - for (uint32_t RetryCount = 0; RetryCount < 3; RetryCount++) + CheckDiskSpace(); + if (!m_AreDiskWritesBlocked.load()) { - if (RetryCount > 0) + for (uint32_t RetryCount = 0; RetryCount < 3; RetryCount++) { - ZEN_INFO("Writing GC state failed {} time(s), pausing and trying again", RetryCount); - Sleep(250); - } - try - { - const fs::path Path = m_Config.RootDirectory / "gc_state"; - ZEN_DEBUG("saving scheduler state to '{}'", Path); - CbObjectWriter SchedulerState; - SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count()); - SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count()); - SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex; - - SaveCompactBinaryObject(Path, SchedulerState.Save()); if (RetryCount > 0) { - ZEN_INFO("Writing GC state succeeded after {} attempts", RetryCount + 1); - } - break; - } - catch (const std::system_error& SystemError) - { - if (IsOOM(SystemError.code())) - { - ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what()); + ZEN_INFO("Writing GC state failed {} time(s), pausing and trying again", RetryCount); + Sleep(250); } - else if (IsOOD(SystemError.code())) - { - ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what()); - } - if (RetryCount == 0) + try { - ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}' ({})", - SystemError.what(), - SystemError.code().value()); + const fs::path Path = m_Config.RootDirectory / "gc_state"; + ZEN_DEBUG("saving scheduler state to '{}'", Path); + CbObjectWriter SchedulerState; + SchedulerState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count()); + SchedulerState << "LastGcExpireTime"sv << static_cast<int64_t>(m_LastGcExpireTime.time_since_epoch().count()); + SchedulerState << "AttachmentPassIndex"sv << m_AttachmentPassIndex; + + SaveCompactBinaryObject(Path, SchedulerState.Save()); + if (RetryCount > 0) + { + ZEN_INFO("Writing GC state succeeded after {} attempts", RetryCount + 1); + } + break; } - else + catch (const std::system_error& SystemError) { - ZEN_WARN("writing gc scheduler state failed with system error exception: '{}' ({})", - SystemError.what(), - SystemError.code().value()); + if (IsOOM(SystemError.code())) + { + ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", SystemError.what()); + } + else if (IsOOD(SystemError.code())) + { + ZEN_WARN("writing gc scheduler state ran out of disk space: '{}'", SystemError.what()); + } + else if (RetryCount == 0) + { + ZEN_ERROR("writing gc scheduler state failed with system error exception: '{}' ({})", + SystemError.what(), + SystemError.code().value()); + } + else + { + ZEN_WARN("writing gc scheduler state failed with system error exception: '{}' ({})", + SystemError.what(), + SystemError.code().value()); + } } - } - catch (const std::bad_alloc& BadAlloc) - { - ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what()); - } - catch (const std::exception& Ex) - { - if (RetryCount == 0) + catch (const std::bad_alloc& BadAlloc) { - ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what()); + ZEN_WARN("writing gc scheduler state ran out of memory: '{}'", BadAlloc.what()); } - else + catch (const std::exception& Ex) { - ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what()); + if (RetryCount == 0) + { + ZEN_ERROR("writing gc scheduler state failed with: '{}'", Ex.what()); + } + else + { + ZEN_WARN("writing gc scheduler state failed with: '{}'", Ex.what()); + } } } } diff --git a/src/zenstore/include/zenstore/accesstime.h b/src/zenstore/include/zenstore/accesstime.h new file mode 100644 index 000000000..e53937b52 --- /dev/null +++ b/src/zenstore/include/zenstore/accesstime.h @@ -0,0 +1,53 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zenstore/gc.h> + +#include <gsl/gsl-lite.hpp> + +namespace zen { + +// This store the access time as seconds since epoch internally in a 32-bit value giving is a range of 136 years since epoch +struct AccessTime +{ + explicit AccessTime(GcClock::Tick Tick) noexcept : SecondsSinceEpoch(ToSecondsSinceEpoch(Tick)) {} + AccessTime& operator=(GcClock::Tick Tick) noexcept + { + SecondsSinceEpoch.store(ToSecondsSinceEpoch(Tick), std::memory_order_relaxed); + return *this; + } + operator GcClock::Tick() const noexcept + { + return std::chrono::duration_cast<GcClock::Duration>(std::chrono::seconds(SecondsSinceEpoch.load(std::memory_order_relaxed))) + .count(); + } + + AccessTime(AccessTime&& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} + AccessTime(const AccessTime& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} + AccessTime& operator=(AccessTime&& Rhs) noexcept + { + SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; + } + AccessTime& operator=(const AccessTime& Rhs) noexcept + { + SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); + return *this; + } + + void SetSecondsSinceEpoch(uint32_t InSecondsSinceEpoch) { SecondsSinceEpoch.store(InSecondsSinceEpoch); } + + uint32_t GetSecondsSinceEpoch() const { return SecondsSinceEpoch.load(); } + +private: + AccessTime(uint32_t InSecondsSinceEpoch) noexcept : SecondsSinceEpoch(InSecondsSinceEpoch) {} + + static uint32_t ToSecondsSinceEpoch(GcClock::Tick Tick) + { + return gsl::narrow<uint32_t>(std::chrono::duration_cast<std::chrono::seconds>(GcClock::Duration(Tick)).count()); + } + std::atomic_uint32_t SecondsSinceEpoch; +}; + +} // namespace zen diff --git a/src/zenstore/include/zenstore/blockstore.h b/src/zenstore/include/zenstore/blockstore.h index 97357e5cb..fce05766f 100644 --- a/src/zenstore/include/zenstore/blockstore.h +++ b/src/zenstore/include/zenstore/blockstore.h @@ -94,7 +94,7 @@ struct BlockStoreFile : public RefCounted IoBuffer GetChunk(uint64_t Offset, uint64_t Size); void Read(void* Data, uint64_t Size, uint64_t FileOffset); void Write(const void* Data, uint64_t Size, uint64_t FileOffset); - void Flush(); + void Flush(uint64_t FinalSize = (uint64_t)-1); BasicFile& GetBasicFile(); void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun); bool IsOpen() const; @@ -107,7 +107,7 @@ private: const std::filesystem::path m_Path; IoBuffer m_IoBuffer; BasicFile m_File; - uint64_t m_CachedFileSize = 0; + std::atomic<uint64_t> m_CachedFileSize = 0; }; class BlockStoreCompactState; @@ -127,7 +127,8 @@ public: typedef std::vector<std::pair<size_t, BlockStoreLocation>> MovedChunksArray; typedef std::vector<size_t> ChunkIndexArray; - typedef std::function<bool(const MovedChunksArray& MovedChunks, uint64_t FreedDiskSpace)> CompactCallback; + typedef std::function<bool(const MovedChunksArray& MovedChunks, const ChunkIndexArray& ScrubbedChunks, uint64_t FreedDiskSpace)> + CompactCallback; typedef std::function<uint64_t()> ClaimDiskReserveCallback; typedef std::function<bool(size_t ChunkIndex, const void* Data, uint64_t Size)> IterateChunksSmallSizeCallback; typedef std::function<bool(size_t ChunkIndex, BlockStoreFile& File, uint64_t Offset, uint64_t Size)> IterateChunksLargeSizeCallback; @@ -146,18 +147,19 @@ public: typedef tsl::robin_set<uint32_t> BlockIndexSet; - // Ask the store to create empty blocks for all locations that does not have a block // Remove any block that is not referenced - void SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks); - BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent); + // Return a list of blocks that are not present + [[nodiscard]] BlockIndexSet SyncExistingBlocksOnDisk(const BlockIndexSet& KnownBlocks); + BlockEntryCountMap GetBlocksToCompact(const BlockUsageMap& BlockUsage, uint32_t BlockUsageThresholdPercent); void Close(); void WriteChunk(const void* Data, uint64_t Size, uint32_t Alignment, const WriteChunkCallback& Callback); typedef std::function<void(std::span<BlockStoreLocation> Locations)> WriteChunksCallback; - void WriteChunks(std::span<IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback); + void WriteChunks(std::span<const IoBuffer> Datas, uint32_t Alignment, const WriteChunksCallback& Callback); + bool HasChunk(const BlockStoreLocation& Location) const; IoBuffer TryGetChunk(const BlockStoreLocation& Location) const; void Flush(bool ForceNewBlock); @@ -172,7 +174,7 @@ public: void CompactBlocks( const BlockStoreCompactState& CompactState, uint32_t PayloadAlignment, - const CompactCallback& ChangeCallback = [](const MovedChunksArray&, uint64_t) { return true; }, + const CompactCallback& ChangeCallback = [](const MovedChunksArray&, const ChunkIndexArray&, uint64_t) { return true; }, const ClaimDiskReserveCallback& DiskReserveCallback = []() { return 0; }, std::string_view LogPrefix = {}); diff --git a/src/zenstore/include/zenstore/buildstore/buildstore.h b/src/zenstore/include/zenstore/buildstore/buildstore.h new file mode 100644 index 000000000..adf48dc26 --- /dev/null +++ b/src/zenstore/include/zenstore/buildstore/buildstore.h @@ -0,0 +1,228 @@ + +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenstore/blockstore.h> + +#include <zencore/iohash.h> +#include <zenstore/accesstime.h> +#include <zenstore/caslog.h> +#include <zenstore/gc.h> +#include "../compactcas.h" +#include "../filecas.h" + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +struct BuildStoreConfig +{ + std::filesystem::path RootDirectory; + uint32_t SmallBlobBlockStoreMaxBlockSize = 256 * 1024 * 1024; + uint64_t SmallBlobBlockStoreMaxBlockEmbedSize = 1 * 1024 * 1024; + uint32_t SmallBlobBlockStoreAlignement = 16; + uint32_t MetadataBlockStoreMaxBlockSize = 64 * 1024 * 1024; + uint32_t MetadataBlockStoreAlignement = 8; + uint64_t MaxDiskSpaceLimit = 1u * 1024u * 1024u * 1024u * 1024u; // 1TB +}; + +class BuildStore : public GcReferencer, public GcReferenceLocker, public GcStorage +{ +public: + explicit BuildStore(const BuildStoreConfig& Config, GcManager& Gc); + virtual ~BuildStore(); + + void PutBlob(const IoHash& BlobHashes, const IoBuffer& Payload); + IoBuffer GetBlob(const IoHash& BlobHashes); + + struct BlobExistsResult + { + bool HasBody = 0; + bool HasMetadata = 0; + }; + + std::vector<BlobExistsResult> BlobsExists(std::span<const IoHash> BlobHashes); + + void PutMetadatas(std::span<const IoHash> BlobHashes, std::span<const IoBuffer> MetaDatas); + std::vector<IoBuffer> GetMetadatas(std::span<const IoHash> BlobHashes, WorkerThreadPool* OptionalWorkerPool); + + void Flush(); + + struct StorageStats + { + uint64_t EntryCount = 0; + uint64_t LargeBlobCount = 0; + uint64_t LargeBlobBytes = 0; + uint64_t SmallBlobCount = 0; + uint64_t SmallBlobBytes = 0; + uint64_t MetadataCount = 0; + uint64_t MetadataByteCount = 0; + }; + + StorageStats GetStorageStats() const; + +#if ZEN_WITH_TESTS + std::optional<AccessTime> GetLastAccessTime(const IoHash& Key) const; + bool SetLastAccessTime(const IoHash& Key, const AccessTime& Time); +#endif // ZEN_WITH_TESTS + +private: + LoggerRef Log() { return m_Log; } + + void CompactState(); + + uint64_t ReadPayloadLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount); + uint64_t ReadMetadataLog(const RwLock::ExclusiveLockScope&, const std::filesystem::path& LogPath, uint64_t SkipEntryCount); + void WriteAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath); + void ReadAccessTimes(const RwLock::ExclusiveLockScope&, const std::filesystem::path& AccessTimesPath); + + //////// GcReferencer + virtual std::string GetGcName(GcCtx& Ctx) override; + virtual GcStoreCompactor* RemoveExpiredData(GcCtx& Ctx, GcStats& Stats) override; + virtual std::vector<GcReferenceChecker*> CreateReferenceCheckers(GcCtx& Ctx) override; + virtual std::vector<GcReferenceValidator*> CreateReferenceValidators(GcCtx& Ctx) override; + + //////// GcReferenceLocker + virtual std::vector<RwLock::SharedLockScope> LockState(GcCtx& Ctx) override; + + //////// GcStorage + virtual void ScrubStorage(ScrubContext& ScrubCtx) override; + virtual GcStorageSize StorageSize() const override; + +#pragma pack(push) +#pragma pack(1) + struct PayloadEntry + { + PayloadEntry() {} + PayloadEntry(uint64_t Flags, uint64_t Size) + { + ZEN_ASSERT((Size & 0x00ffffffffffffffu) == Size); + ZEN_ASSERT((Flags & (kTombStone | kStandalone)) == Flags); + FlagsAndSize = (Size << 8) | Flags; + } + static const uint8_t kTombStone = 0x10u; // Represents a deleted key/value + static const uint8_t kStandalone = 0x20u; // This payload is stored as a standalone value + + uint64_t FlagsAndSize = 0; + uint64_t GetSize() const { return FlagsAndSize >> 8; } + uint8_t GetFlags() const { return uint8_t(FlagsAndSize & 0xff); } + void AddFlag(uint8_t Flag) { FlagsAndSize |= Flag; } + void SetSize(uint64_t Size) + { + ZEN_ASSERT((Size & 0x00ffffffffffffffu) == Size); + FlagsAndSize = (Size << 8) | (FlagsAndSize & 0xff); + } + void SetFlags(uint8_t Flags) { FlagsAndSize = (FlagsAndSize & 0xffffffffffffff00u) | Flags; } + }; + static_assert(sizeof(PayloadEntry) == 8); + + struct PayloadDiskEntry + { + PayloadEntry Entry; // 8 bytes + IoHash BlobHash; // 20 bytes + }; + static_assert(sizeof(PayloadDiskEntry) == 28); + + struct MetadataEntry + { + BlockStoreLocation Location; // 12 bytes + + ZenContentType ContentType = ZenContentType::kCOUNT; // 1 byte + static const uint8_t kTombStone = 0x10u; // Represents a deleted key/value + uint8_t Flags = 0; // 1 byte + + uint8_t Reserved1 = 0; + uint8_t Reserved2 = 0; + }; + static_assert(sizeof(MetadataEntry) == 16); + + struct MetadataDiskEntry + { + MetadataEntry Entry; // 16 bytes + IoHash BlobHash; // 20 bytes + uint8_t Reserved1 = 0; + uint8_t Reserved2 = 0; + uint8_t Reserved3 = 0; + uint8_t Reserved4 = 0; + }; + static_assert(sizeof(MetadataDiskEntry) == 40); + +#pragma pack(pop) + + static bool ValidatePayloadDiskEntry(const PayloadDiskEntry& Entry, std::string& OutReason); + static bool ValidateMetadataDiskEntry(const MetadataDiskEntry& Entry, std::string& OutReason); + + struct PayloadIndex + { + uint32_t Index = std::numeric_limits<uint32_t>::max(); + + operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); }; + PayloadIndex() = default; + explicit PayloadIndex(size_t InIndex) : Index(uint32_t(InIndex)) {} + operator size_t() const { return Index; }; + inline auto operator<=>(const PayloadIndex& Other) const = default; + }; + + struct MetadataIndex + { + uint32_t Index = std::numeric_limits<uint32_t>::max(); + + operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); }; + MetadataIndex() = default; + explicit MetadataIndex(size_t InIndex) : Index(uint32_t(InIndex)) {} + operator size_t() const { return Index; }; + inline auto operator<=>(const MetadataIndex& Other) const = default; + }; + + struct BlobIndex + { + uint32_t Index = std::numeric_limits<uint32_t>::max(); + + operator bool() const { return Index != std::numeric_limits<uint32_t>::max(); }; + BlobIndex() = default; + explicit BlobIndex(size_t InIndex) : Index(uint32_t(InIndex)) {} + operator size_t() const { return Index; }; + inline auto operator<=>(const BlobIndex& Other) const = default; + }; + + struct BlobEntry + { + PayloadIndex Payload; + MetadataIndex Metadata; + AccessTime LastAccessTime; + }; + static_assert(sizeof(BlobEntry) == 12); + + LoggerRef m_Log; + const BuildStoreConfig m_Config; + GcManager& m_Gc; + + mutable RwLock m_Lock; + + std::vector<PayloadEntry> m_PayloadEntries; + std::vector<MetadataEntry> m_MetadataEntries; + + std::vector<BlobEntry> m_BlobEntries; + tsl::robin_map<IoHash, BlobIndex, IoHash::Hasher> m_BlobLookup; + + FileCasStrategy m_LargeBlobStore; + CasContainerStrategy m_SmallBlobStore; + BlockStore m_MetadataBlockStore; + + TCasLogFile<PayloadDiskEntry> m_PayloadlogFile; + TCasLogFile<MetadataDiskEntry> m_MetadatalogFile; + uint64_t m_BlobLogFlushPosition = 0; + uint64_t m_MetaLogFlushPosition = 0; + + std::unique_ptr<HashSet> m_TrackedCacheKeys; + std::atomic<uint64_t> m_LastAccessTimeUpdateCount; + + friend class BuildStoreGcReferenceChecker; + friend class BuildStoreGcReferencePruner; + friend class BuildStoreGcCompator; +}; + +void buildstore_forcelink(); + +} // namespace zen diff --git a/src/zenstore/include/zenstore/cache/cachedisklayer.h b/src/zenstore/include/zenstore/cache/cachedisklayer.h index 4f5c905ee..11d13bede 100644 --- a/src/zenstore/include/zenstore/cache/cachedisklayer.h +++ b/src/zenstore/include/zenstore/cache/cachedisklayer.h @@ -5,6 +5,7 @@ #include "cacheshared.h" #include <zencore/stats.h> +#include <zenstore/accesstime.h> #include <zenstore/blockstore.h> #include <zenstore/caslog.h> @@ -12,8 +13,9 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <tsl/robin_map.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <EASTL/string.h> +#include <EASTL/unordered_map.h> #include <filesystem> -#include <unordered_map> namespace zen { @@ -118,6 +120,9 @@ public: struct Configuration { + typedef eastl::unordered_map<std::string, BucketConfiguration, std::hash<std::string>, std::equal_to<std::string>> + BucketConfigMap_t; + BucketConfigMap_t BucketConfigMap; BucketConfiguration BucketConfig; uint64_t MemCacheTargetFootprintBytes = 512 * 1024 * 1024; uint64_t MemCacheTrimIntervalSeconds = 60; @@ -176,7 +181,7 @@ public: ~ZenCacheDiskLayer(); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); void Get(std::string_view Bucket, const IoHash& HashKey, GetBatchHandle& BatchHandle); @@ -185,16 +190,16 @@ public: PutBatchHandle* BeginPutBatch(std::vector<PutResult>& OutResult); void EndPutBatch(PutBatchHandle* Batch) noexcept; - PutResult Put(std::string_view Bucket, - const IoHash& HashKey, - const ZenCacheValue& Value, - std::span<IoHash> References, - bool Overwrite, - PutBatchHandle* OptionalBatchHandle); - bool Drop(); - bool DropBucket(std::string_view Bucket); - void Flush(); - void ScrubStorage(ScrubContext& Ctx); + PutResult Put(std::string_view Bucket, + const IoHash& HashKey, + const ZenCacheValue& Value, + std::span<IoHash> References, + bool Overwrite, + PutBatchHandle* OptionalBatchHandle); + std::function<void()> Drop(); + std::function<void()> DropBucket(std::string_view Bucket); + void Flush(); + void ScrubStorage(ScrubContext& Ctx); void DiscoverBuckets(); GcStorageSize StorageSize() const; @@ -224,27 +229,30 @@ public: */ struct CacheBucket : public GcReferencer { - CacheBucket(GcManager& Gc, std::atomic_uint64_t& OuterCacheMemoryUsage, std::string BucketName, const BucketConfiguration& Config); + CacheBucket(GcManager& Gc, + std::atomic_uint64_t& OuterCacheMemoryUsage, + std::string_view BucketName, + const BucketConfiguration& Config); ~CacheBucket(); bool OpenOrCreate(std::filesystem::path BucketDir, bool AllowCreate = true); struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResult); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResult); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(const IoHash& HashKey, ZenCacheValue& OutValue); void Get(const IoHash& HashKey, GetBatchHandle& BatchHandle); struct PutBatchHandle; - PutBatchHandle* BeginPutBatch(std::vector<ZenCacheDiskLayer::PutResult>& OutResult); - void EndPutBatch(PutBatchHandle* Batch) noexcept; + PutBatchHandle* BeginPutBatch(std::vector<ZenCacheDiskLayer::PutResult>& OutResult); + void EndPutBatch(PutBatchHandle* Batch) noexcept; PutResult Put(const IoHash& HashKey, const ZenCacheValue& Value, std::span<IoHash> References, bool Overwrite, PutBatchHandle* OptionalBatchHandle); - uint64_t MemCacheTrim(GcClock::TimePoint ExpireTime); - bool Drop(); + uint64_t MemCacheTrim(GcClock::TimePoint ExpireTime); + std::function<void()> Drop(); void Flush(); void ScrubStorage(ScrubContext& Ctx); RwLock::SharedLockScope GetGcReferencerLock(); @@ -413,20 +421,23 @@ public: void SaveSnapshot(const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; }); void WriteIndexSnapshot( RwLock::ExclusiveLockScope&, - bool FlushLockPosition, + uint64_t LogPosition, + bool ResetLog, const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; }) { - WriteIndexSnapshotLocked(FlushLockPosition, ClaimDiskReserveFunc); + WriteIndexSnapshotLocked(LogPosition, ResetLog, ClaimDiskReserveFunc); } void WriteIndexSnapshot( RwLock::SharedLockScope&, - bool FlushLockPosition, + uint64_t LogPosition, + bool ResetLog, const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; }) { - WriteIndexSnapshotLocked(FlushLockPosition, ClaimDiskReserveFunc); + WriteIndexSnapshotLocked(LogPosition, ResetLog, ClaimDiskReserveFunc); } void WriteIndexSnapshotLocked( - bool FlushLockPosition, + uint64_t LogPosition, + bool ResetLog, const std::function<uint64_t()>& ClaimDiskReserveFunc = []() { return 0; }); void CompactState(RwLock::ExclusiveLockScope& IndexLock, @@ -498,18 +509,20 @@ private: bool StartAsyncMemCacheTrim(); void MemCacheTrim(); - GcManager& m_Gc; - JobQueue& m_JobQueue; - std::filesystem::path m_RootDir; - Configuration m_Configuration; - std::atomic_uint64_t m_TotalMemCachedSize{}; - std::atomic_bool m_IsMemCacheTrimming = false; - std::atomic<GcClock::Tick> m_NextAllowedTrimTick; - mutable RwLock m_Lock; - std::unordered_map<std::string, std::unique_ptr<CacheBucket>> m_Buckets; - std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; - uint32_t m_UpdateCaptureRefCounter = 0; - std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; + typedef eastl::unordered_map<std::string, std::unique_ptr<CacheBucket>, std::hash<std::string>, std::equal_to<std::string>> BucketMap_t; + + GcManager& m_Gc; + JobQueue& m_JobQueue; + std::filesystem::path m_RootDir; + Configuration m_Configuration; + std::atomic_uint64_t m_TotalMemCachedSize{}; + std::atomic_bool m_IsMemCacheTrimming = false; + std::atomic<GcClock::Tick> m_NextAllowedTrimTick; + mutable RwLock m_Lock; + BucketMap_t m_Buckets; + std::vector<std::unique_ptr<CacheBucket>> m_DroppedBuckets; + uint32_t m_UpdateCaptureRefCounter = 0; + std::unique_ptr<std::vector<std::string>> m_CapturedBuckets; ZenCacheDiskLayer(const ZenCacheDiskLayer&) = delete; ZenCacheDiskLayer& operator=(const ZenCacheDiskLayer&) = delete; diff --git a/src/zenstore/include/zenstore/cache/cacheshared.h b/src/zenstore/include/zenstore/cache/cacheshared.h index dc0c341d0..8f40ae727 100644 --- a/src/zenstore/include/zenstore/cache/cacheshared.h +++ b/src/zenstore/include/zenstore/cache/cacheshared.h @@ -6,6 +6,8 @@ #include <zencore/iohash.h> #include <zenstore/gc.h> +#include <EASTL/fixed_vector.h> + #include <gsl/gsl-lite.hpp> #include <unordered_map> @@ -32,6 +34,8 @@ struct ZenCacheValue IoHash RawHash = IoHash::Zero; }; +typedef eastl::fixed_vector<ZenCacheValue, 16> ZenCacheValueVec_t; + struct CacheValueDetails { struct ValueDetails @@ -76,42 +80,4 @@ enum class PutStatus bool IsKnownBadBucketName(std::string_view BucketName); bool ValidateIoBuffer(ZenContentType ContentType, IoBuffer Buffer); -////////////////////////////////////////////////////////////////////////// - -// This store the access time as seconds since epoch internally in a 32-bit value giving is a range of 136 years since epoch -struct AccessTime -{ - explicit AccessTime(GcClock::Tick Tick) noexcept : SecondsSinceEpoch(ToSeconds(Tick)) {} - AccessTime& operator=(GcClock::Tick Tick) noexcept - { - SecondsSinceEpoch.store(ToSeconds(Tick), std::memory_order_relaxed); - return *this; - } - operator GcClock::Tick() const noexcept - { - return std::chrono::duration_cast<GcClock::Duration>(std::chrono::seconds(SecondsSinceEpoch.load(std::memory_order_relaxed))) - .count(); - } - - AccessTime(AccessTime&& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} - AccessTime(const AccessTime& Rhs) noexcept : SecondsSinceEpoch(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed)) {} - AccessTime& operator=(AccessTime&& Rhs) noexcept - { - SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; - } - AccessTime& operator=(const AccessTime& Rhs) noexcept - { - SecondsSinceEpoch.store(Rhs.SecondsSinceEpoch.load(std::memory_order_relaxed), std::memory_order_relaxed); - return *this; - } - -private: - static uint32_t ToSeconds(GcClock::Tick Tick) - { - return gsl::narrow<uint32_t>(std::chrono::duration_cast<std::chrono::seconds>(GcClock::Duration(Tick)).count()); - } - std::atomic_uint32_t SecondsSinceEpoch; -}; - } // namespace zen diff --git a/src/zenstore/include/zenstore/cache/structuredcachestore.h b/src/zenstore/include/zenstore/cache/structuredcachestore.h index 581f7861b..b6e8e7565 100644 --- a/src/zenstore/include/zenstore/cache/structuredcachestore.h +++ b/src/zenstore/include/zenstore/cache/structuredcachestore.h @@ -88,7 +88,7 @@ public: void EndPutBatch(PutBatchHandle* Batch) noexcept; struct GetBatchHandle; - GetBatchHandle* BeginGetBatch(std::vector<ZenCacheValue>& OutResults); + GetBatchHandle* BeginGetBatch(ZenCacheValueVec_t& OutResults); void EndGetBatch(GetBatchHandle* Batch) noexcept; bool Get(std::string_view Bucket, const IoHash& HashKey, ZenCacheValue& OutValue); @@ -104,8 +104,8 @@ public: void EnumerateBucketContents(std::string_view Bucket, std::function<void(const IoHash& Key, const CacheValueDetails::ValueDetails& Details)>& Fn) const; - bool Drop(); - void Flush(); + std::function<void()> Drop(); + void Flush(); // GcStorage virtual void ScrubStorage(ScrubContext& ScrubCtx) override; @@ -225,14 +225,14 @@ public: class GetBatch { public: - GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, std::vector<ZenCacheValue>& OutResult); + GetBatch(ZenCacheStore& CacheStore, std::string_view Namespace, ZenCacheValueVec_t& OutResult); ~GetBatch(); private: ZenCacheStore& m_CacheStore; ZenCacheNamespace* m_Store = nullptr; ZenCacheNamespace::GetBatchHandle* m_NamespaceBatchHandle = nullptr; - std::vector<ZenCacheValue>& Results; + ZenCacheValueVec_t& Results; friend class ZenCacheStore; }; diff --git a/src/zenstore/include/zenstore/gc.h b/src/zenstore/include/zenstore/gc.h index 3daae0a93..3223fba39 100644 --- a/src/zenstore/include/zenstore/gc.h +++ b/src/zenstore/include/zenstore/gc.h @@ -55,6 +55,7 @@ struct GcSettings { GcClock::TimePoint CacheExpireTime = GcClock::Now(); GcClock::TimePoint ProjectStoreExpireTime = GcClock::Now(); + GcClock::TimePoint BuildStoreExpireTime = GcClock::Now(); bool CollectSmallObjects = false; bool IsDeleteMode = false; bool SkipCidDelete = false; @@ -412,6 +413,7 @@ struct GcSchedulerConfig std::chrono::seconds Interval{}; std::chrono::seconds MaxCacheDuration{86400}; std::chrono::seconds MaxProjectStoreDuration{604800}; + std::chrono::seconds MaxBuildStoreDuration{604800}; bool CollectSmallObjects = true; bool Enabled = true; uint64_t DiskReserveSize = 1ul << 28; @@ -496,6 +498,7 @@ public: bool CollectSmallObjects = false; std::chrono::seconds MaxCacheDuration = std::chrono::seconds::max(); std::chrono::seconds MaxProjectStoreDuration = std::chrono::seconds::max(); + std::chrono::seconds MaxBuildStoreDuration = std::chrono::seconds::max(); uint64_t DiskSizeSoftLimit = 0; bool SkipCid = false; bool SkipDelete = false; @@ -528,6 +531,7 @@ private: void SchedulerThread(); bool CollectGarbage(const GcClock::TimePoint& CacheExpireTime, const GcClock::TimePoint& ProjectStoreExpireTime, + const GcClock::TimePoint& BuildStoreExpireTime, bool Delete, bool CollectSmallObjects, bool SkipCid, @@ -582,16 +586,3 @@ private: void gc_forcelink(); } // namespace zen - -template<> -struct fmt::formatter<zen::GcClock::TimePoint> : formatter<string_view> -{ - template<typename FormatContext> - auto format(const zen::GcClock::TimePoint& TimePoint, FormatContext& ctx) const - { - std::time_t Time = std::chrono::system_clock::to_time_t(TimePoint); - char TimeString[std::size("yyyy-mm-ddThh:mm:ss")]; - std::strftime(std::data(TimeString), std::size(TimeString), "%FT%T", std::localtime(&Time)); - return fmt::format_to(ctx.out(), "{}", TimeString); - } -}; diff --git a/src/zenstore/workspaces.cpp b/src/zenstore/workspaces.cpp index 02a83d2a6..0ca2adab2 100644 --- a/src/zenstore/workspaces.cpp +++ b/src/zenstore/workspaces.cpp @@ -444,7 +444,7 @@ Workspaces::RefreshWorkspaceShares(const Oid& WorkspaceId) { const std::filesystem::path& RootPath = Workspace->GetConfig().RootPath; std::filesystem::path ConfigPath = RootPath / WorkspaceConfigName; - if (std::filesystem::exists(ConfigPath)) + if (IsFile(ConfigPath)) { std::string Error; std::vector<Workspaces::WorkspaceShareConfiguration> WorkspaceShares = ReadWorkspaceConfig(m_Log, RootPath, Error); @@ -458,7 +458,7 @@ Workspaces::RefreshWorkspaceShares(const Oid& WorkspaceId) { const std::filesystem::path& SharePath = Configuration.SharePath; - if (std::filesystem::is_directory(RootPath / SharePath)) + if (IsDir(RootPath / SharePath)) { DeletedShares.erase(Configuration.Id); @@ -808,7 +808,7 @@ Workspaces::ReadConfig(const LoggerRef& InLog, const std::filesystem::path& Work ZEN_DEBUG("Reading workspaces state from {}", WorkspaceStatePath); const std::filesystem::path ConfigPath = WorkspaceStatePath / WorkspacesConfigName; - if (std::filesystem::exists(ConfigPath)) + if (IsFile(ConfigPath)) { std::vector<Workspaces::WorkspaceConfiguration> Workspaces = WorkspacesFromJson(IoBufferBuilder::MakeFromFile(ConfigPath), OutError); @@ -847,7 +847,7 @@ Workspaces::ReadWorkspaceConfig(const LoggerRef& InLog, const std::filesystem::p ZEN_DEBUG("Reading workspace state from {}", WorkspaceRoot); std::filesystem::path ConfigPath = WorkspaceRoot / WorkspaceConfigName; - if (std::filesystem::exists(ConfigPath)) + if (IsFile(ConfigPath)) { std::vector<Workspaces::WorkspaceShareConfiguration> WorkspaceShares = WorkspaceSharesFromJson(IoBufferBuilder::MakeFromFile(ConfigPath), OutError); @@ -886,7 +886,7 @@ Workspaces::AddWorkspace(const LoggerRef& Log, const std::filesystem::path& Work { throw std::invalid_argument(fmt::format("invalid root path '{}' for workspace {}", Configuration.RootPath, Configuration.Id)); } - if (!std::filesystem::is_directory(Configuration.RootPath)) + if (!IsDir(Configuration.RootPath)) { throw std::invalid_argument( fmt::format("workspace root path '{}' does not exist for workspace '{}'", Configuration.RootPath, Configuration.Id)); @@ -965,7 +965,7 @@ Workspaces::AddWorkspaceShare(const LoggerRef& Log, throw std::invalid_argument( fmt::format("workspace share path '{}' is not a sub-path of workspace path '{}'", Configuration.SharePath, WorkspaceRoot)); } - if (!std::filesystem::is_directory(WorkspaceRoot / Configuration.SharePath)) + if (!IsDir(WorkspaceRoot / Configuration.SharePath)) { throw std::invalid_argument( fmt::format("workspace share path '{}' does not exist in workspace path '{}'", Configuration.SharePath, WorkspaceRoot)); @@ -1244,7 +1244,7 @@ Workspaces::FindWorkspaceShare(const Oid& WorkspaceId, const Oid& ShareId, bool const Workspaces::WorkspaceConfiguration& WorkspaceConfig = Workspace->GetConfig(); const Workspaces::WorkspaceShareConfiguration& ShareConfig = Share->GetConfig(); std::filesystem::path FullSharePath = WorkspaceConfig.RootPath / ShareConfig.SharePath; - if (std::filesystem::is_directory(FullSharePath)) + if (IsDir(FullSharePath)) { if (ForceRefresh || !Share->IsInitialized()) { @@ -1306,18 +1306,18 @@ namespace { std::filesystem::path EmptyFolder(RootPath / "empty_folder"); std::filesystem::path FirstFolder(RootPath / "first_folder"); - std::filesystem::create_directory(FirstFolder); + CreateDirectories(FirstFolder); Result.push_back(std::make_pair(FirstFolder / "first_folder_blob1.bin", CreateRandomBlob(22))); Result.push_back(std::make_pair(FirstFolder / "first_folder_blob2.bin", CreateRandomBlob(122))); std::filesystem::path SecondFolder(RootPath / "second_folder"); - std::filesystem::create_directory(SecondFolder); + CreateDirectories(SecondFolder); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob1.bin", CreateRandomBlob(522))); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob2.bin", CreateRandomBlob(122))); Result.push_back(std::make_pair(SecondFolder / "second_folder_blob3.bin", CreateRandomBlob(225))); std::filesystem::path SecondFolderChild(SecondFolder / "child_in_second"); - std::filesystem::create_directory(SecondFolderChild); + CreateDirectories(SecondFolderChild); Result.push_back(std::make_pair(SecondFolderChild / "second_child_folder_blob1.bin", CreateRandomBlob(622))); for (const auto& It : Result) @@ -1365,13 +1365,13 @@ TEST_CASE("workspaces.scanfolder") Structure->IterateEntries([&](const Oid& Id, const FolderStructure::FileEntry& Entry) { std::filesystem::path AbsPath = RootPath / Entry.RelativePath; - CHECK(std::filesystem::is_regular_file(AbsPath)); - CHECK(std::filesystem::file_size(AbsPath) == Entry.Size); + CHECK(IsFile(AbsPath)); + CHECK(FileSizeFromPath(AbsPath) == Entry.Size); const FolderStructure::FileEntry* FindEntry = Structure->FindEntry(Id); CHECK(FindEntry); std::filesystem::path Path = RootPath / FindEntry->RelativePath; CHECK(AbsPath == Path); - CHECK(std::filesystem::file_size(AbsPath) == FindEntry->Size); + CHECK(FileSizeFromPath(AbsPath) == FindEntry->Size); }); } diff --git a/src/zenstore/xmake.lua b/src/zenstore/xmake.lua index f0bd64d2e..031a66829 100644 --- a/src/zenstore/xmake.lua +++ b/src/zenstore/xmake.lua @@ -8,3 +8,4 @@ target('zenstore') add_includedirs("include", {public=true}) add_deps("zencore", "zenutil") add_packages("vcpkg::robin-map") + add_packages("vcpkg::eastl", {public=true}); diff --git a/src/zenstore/zenstore.cpp b/src/zenstore/zenstore.cpp index c697647d2..654fb3510 100644 --- a/src/zenstore/zenstore.cpp +++ b/src/zenstore/zenstore.cpp @@ -5,6 +5,7 @@ #if ZEN_WITH_TESTS # include <zenstore/blockstore.h> +# include <zenstore/buildstore/buildstore.h> # include <zenstore/cache/structuredcachestore.h> # include <zenstore/workspaces.h> # include <zenstore/gc.h> @@ -19,6 +20,7 @@ namespace zen { void zenstore_forcelinktests() { + buildstore_forcelink(); CAS_forcelink(); filecas_forcelink(); blockstore_forcelink(); diff --git a/src/zentest-appstub/zentest-appstub.cpp b/src/zentest-appstub/zentest-appstub.cpp index 66e6e03fd..24cf21e97 100644 --- a/src/zentest-appstub/zentest-appstub.cpp +++ b/src/zentest-appstub/zentest-appstub.cpp @@ -1,6 +1,7 @@ // Copyright Epic Games, Inc. All Rights Reserved. #include <stdio.h> +#include <chrono> #include <cstdlib> #include <cstring> #include <thread> diff --git a/src/zenutil-test/zenutil-test.cpp b/src/zenutil-test/zenutil-test.cpp index fadaf0995..ca8208314 100644 --- a/src/zenutil-test/zenutil-test.cpp +++ b/src/zenutil-test/zenutil-test.cpp @@ -2,6 +2,7 @@ #include <zencore/filesystem.h> #include <zencore/logging.h> +#include <zencore/trace.h> #include <zenutil/zenutil.h> #include <zencore/memory/newdelete.h> @@ -17,6 +18,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) #if ZEN_WITH_TESTS zen::zenutil_forcelinktests(); + zen::TraceInit("zencore-test"); zen::logging::InitializeLogging(); zen::MaximizeOpenFileCount(); diff --git a/src/zenutil/bufferedwritefilecache.cpp b/src/zenutil/bufferedwritefilecache.cpp new file mode 100644 index 000000000..a52850314 --- /dev/null +++ b/src/zenutil/bufferedwritefilecache.cpp @@ -0,0 +1,177 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/bufferedwritefilecache.h> + +#include <zencore/logging.h> +#include <zencore/trace.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +BufferedWriteFileCache::BufferedWriteFileCache() : m_CacheHitCount(0), m_CacheMissCount(0), m_OpenHandleCount(0), m_DroppedHandleCount(0) +{ +} + +BufferedWriteFileCache::~BufferedWriteFileCache() +{ + ZEN_TRACE_CPU("~BufferedWriteFileCache()"); + + try + { + for (TOpenHandles& OpenHandles : m_OpenFiles) + { + while (BasicFile* File = OpenHandles.Pop()) + { + std::unique_ptr<BasicFile> FileToClose(File); + m_OpenHandleCount--; + } + } + m_OpenFiles.clear(); + m_ChunkWriters.clear(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~BufferedWriteFileCache() threw exeption: {}", Ex.what()); + } +} + +std::unique_ptr<BasicFile> +BufferedWriteFileCache::Get(uint32_t FileIndex) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Get"); + + RwLock::ExclusiveLockScope _(m_WriterLock); + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + if (BasicFile* File = OpenHandles.Pop(); File != nullptr) + { + m_OpenHandleCount--; + m_CacheHitCount++; + return std::unique_ptr<BasicFile>(File); + } + } + m_CacheMissCount++; + return nullptr; +} + +void +BufferedWriteFileCache::Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Put"); + + if (m_OpenHandleCount.load() >= MaxBufferedCount) + { + m_DroppedHandleCount++; + return; + } + RwLock::ExclusiveLockScope _(m_WriterLock); + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + if (OpenHandles.Push(Writer.get())) + { + Writer.release(); + m_OpenHandleCount++; + } + else + { + m_DroppedHandleCount++; + } + } + else + { + const uint32_t HandleIndex = gsl::narrow<uint32_t>(m_OpenFiles.size()); + m_OpenFiles.push_back(TOpenHandles{}); + m_OpenFiles.back().Push(Writer.release()); + m_ChunkWriters.insert_or_assign(FileIndex, HandleIndex); + m_OpenHandleCount++; + } +} + +void +BufferedWriteFileCache::Close(std::span<uint32_t> FileIndexes) +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::Close"); + + std::vector<std::unique_ptr<BasicFile>> FilesToClose; + FilesToClose.reserve(FileIndexes.size()); + { + RwLock::ExclusiveLockScope _(m_WriterLock); + for (uint32_t FileIndex : FileIndexes) + { + if (auto It = m_ChunkWriters.find(FileIndex); It != m_ChunkWriters.end()) + { + const uint32_t HandleIndex = It->second; + TOpenHandles& OpenHandles = m_OpenFiles[HandleIndex]; + while (BasicFile* File = OpenHandles.Pop()) + { + FilesToClose.emplace_back(std::unique_ptr<BasicFile>(File)); + m_OpenHandleCount--; + } + m_ChunkWriters.erase(It); + } + } + } + FilesToClose.clear(); +} + +BufferedWriteFileCache::Local::Local(BufferedWriteFileCache& Cache) : m_Cache(Cache) +{ +} + +BufferedWriteFileCache::Local::Writer* +BufferedWriteFileCache::Local::GetWriter(uint32_t FileIndex) +{ + if (auto It = m_FileIndexToWriterIndex.find(FileIndex); It != m_FileIndexToWriterIndex.end()) + { + return m_ChunkWriters[It->second].get(); + } + std::unique_ptr<BasicFile> File = m_Cache.Get(FileIndex); + if (File) + { + const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size()); + m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex); + m_ChunkWriters.emplace_back(std::make_unique<Writer>(Writer{.File = std::move(File)})); + return m_ChunkWriters.back().get(); + } + return nullptr; +} + +BufferedWriteFileCache::Local::Writer* +BufferedWriteFileCache::Local::PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer) +{ + ZEN_ASSERT(!m_FileIndexToWriterIndex.contains(FileIndex)); + const uint32_t WriterIndex = gsl::narrow<uint32_t>(m_ChunkWriters.size()); + m_FileIndexToWriterIndex.insert_or_assign(FileIndex, WriterIndex); + m_ChunkWriters.emplace_back(std::move(Writer)); + return m_ChunkWriters.back().get(); +} + +BufferedWriteFileCache::Local::~Local() +{ + ZEN_TRACE_CPU("BufferedWriteFileCache::~Local()"); + try + { + for (auto& It : m_FileIndexToWriterIndex) + { + const uint32_t FileIndex = It.first; + const uint32_t WriterIndex = It.second; + m_ChunkWriters[WriterIndex]->Writer.reset(); + std::unique_ptr<BasicFile> File; + File.swap(m_ChunkWriters[WriterIndex]->File); + m_Cache.Put(FileIndex, std::move(File)); + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("BufferedWriteFileCache::~Local() threw exeption: {}", Ex.what()); + } +} + +} // namespace zen diff --git a/src/zenutil/buildstoragecache.cpp b/src/zenutil/buildstoragecache.cpp new file mode 100644 index 000000000..88238effd --- /dev/null +++ b/src/zenutil/buildstoragecache.cpp @@ -0,0 +1,407 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/buildstoragecache.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zencore/workthreadpool.h> +#include <zenhttp/httpclient.h> +#include <zenhttp/packageformat.h> +#include <zenutil/workerpools.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +class ZenBuildStorageCache : public BuildStorageCache +{ +public: + explicit ZenBuildStorageCache(HttpClient& HttpClient, + BuildStorageCache::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath, + bool BoostBackgroundThreadCount) + : m_HttpClient(HttpClient) + , m_Stats(Stats) + , m_Namespace(Namespace.empty() ? "none" : Namespace) + , m_Bucket(Bucket.empty() ? "none" : Bucket) + , m_TempFolderPath(std::filesystem::path(TempFolderPath).make_preferred()) + , m_BoostBackgroundThreadCount(BoostBackgroundThreadCount) + , m_BackgroundWorkPool(m_BoostBackgroundThreadCount ? GetSmallWorkerPool(EWorkloadType::Background) + : GetTinyWorkerPool(EWorkloadType::Background)) + , m_PendingBackgroundWorkCount(1) + , m_CancelBackgroundWork(false) + { + } + + virtual ~ZenBuildStorageCache() + { + try + { + m_CancelBackgroundWork.store(true); + if (!IsFlushed) + { + m_PendingBackgroundWorkCount.CountDown(); + m_PendingBackgroundWorkCount.Wait(); + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("~ZenBuildStorageCache() failed with: {}", Ex.what()); + } + } + + void ScheduleBackgroundWork(std::function<void()>&& Work) + { + m_PendingBackgroundWorkCount.AddCount(1); + try + { + m_BackgroundWorkPool.ScheduleWork([this, Work = std::move(Work)]() { + ZEN_TRACE_CPU("ZenBuildStorageCache::BackgroundWork"); + auto _ = MakeGuard([this]() { m_PendingBackgroundWorkCount.CountDown(); }); + if (!m_CancelBackgroundWork) + { + try + { + Work(); + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Failed executing background upload to build cache. Reason: {}", Ex.what()); + } + } + }); + } + catch (const std::exception& Ex) + { + m_PendingBackgroundWorkCount.CountDown(); + ZEN_ERROR("Failed scheduling background upload to build cache. Reason: {}", Ex.what()); + } + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_ASSERT(!IsFlushed); + ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); + ScheduleBackgroundWork( + [this, BuildId = Oid(BuildId), RawHash = IoHash(RawHash), ContentType, Payload = CompositeBuffer(Payload)]() { + ZEN_TRACE_CPU("ZenBuildStorageCache::PutBuildBlob"); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + HttpClient::Response CacheResponse = + m_HttpClient.Upload(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + Payload, + ContentType); + AddStatistic(CacheResponse); + if (!CacheResponse.IsSuccess()) + { + ZEN_DEBUG("Failed posting blob to cache: {}", CacheResponse.ErrorMessage(""sv)); + } + }); + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override + { + ZEN_TRACE_CPU("ZenBuildStorageCache::GetBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + HttpClient::KeyValueMap Headers; + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", RangeOffset, RangeOffset + RangeBytes - 1)}); + } + CreateDirectories(m_TempFolderPath); + HttpClient::Response CacheResponse = + m_HttpClient.Download(fmt::format("/builds/{}/{}/{}/blobs/{}", m_Namespace, m_Bucket, BuildId, RawHash.ToHexString()), + m_TempFolderPath, + Headers); + AddStatistic(CacheResponse); + if (CacheResponse.IsSuccess()) + { + return CacheResponse.ResponsePayload; + } + return {}; + } + + virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) override + { + ZEN_ASSERT(!IsFlushed); + ScheduleBackgroundWork([this, + BuildId = Oid(BuildId), + BlobRawHashes = std::vector<IoHash>(BlobHashes.begin(), BlobHashes.end()), + MetaDatas = std::vector<CbObject>(MetaDatas.begin(), MetaDatas.end())]() { + ZEN_TRACE_CPU("ZenBuildStorageCache::PutBlobMetadatas"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + const uint64_t BlobCount = BlobRawHashes.size(); + + CbPackage RequestPackage; + std::vector<CbAttachment> Attachments; + tsl::robin_set<IoHash, IoHash::Hasher> AttachmentHashes; + Attachments.reserve(BlobCount); + AttachmentHashes.reserve(BlobCount); + { + CbObjectWriter RequestWriter; + RequestWriter.BeginArray("blobHashes"); + for (size_t BlockHashIndex = 0; BlockHashIndex < BlobRawHashes.size(); BlockHashIndex++) + { + RequestWriter.AddHash(BlobRawHashes[BlockHashIndex]); + } + RequestWriter.EndArray(); // blobHashes + + RequestWriter.BeginArray("metadatas"); + for (size_t BlockHashIndex = 0; BlockHashIndex < BlobRawHashes.size(); BlockHashIndex++) + { + const IoHash ObjectHash = MetaDatas[BlockHashIndex].GetHash(); + RequestWriter.AddBinaryAttachment(ObjectHash); + if (!AttachmentHashes.contains(ObjectHash)) + { + Attachments.push_back(CbAttachment(MetaDatas[BlockHashIndex], ObjectHash)); + AttachmentHashes.insert(ObjectHash); + } + } + + RequestWriter.EndArray(); // metadatas + + RequestPackage.SetObject(RequestWriter.Save()); + } + RequestPackage.AddAttachments(Attachments); + + CompositeBuffer RpcRequestBuffer = FormatPackageMessageBuffer(RequestPackage); + + HttpClient::Response CacheResponse = + m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/putBlobMetadata", m_Namespace, m_Bucket, BuildId), + RpcRequestBuffer, + ZenContentType::kCbPackage); + AddStatistic(CacheResponse); + if (!CacheResponse.IsSuccess()) + { + ZEN_DEBUG("Failed posting blob metadata to cache: {}", CacheResponse.ErrorMessage(""sv)); + } + }); + } + + virtual std::vector<CbObject> GetBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes) override + { + ZEN_TRACE_CPU("ZenBuildStorageCache::GetBlobMetadatas"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + CbObjectWriter Request; + + Request.BeginArray("blobHashes"sv); + for (const IoHash& BlobHash : BlobHashes) + { + Request.AddHash(BlobHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + + HttpClient::Response Response = + m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/getBlobMetadata", m_Namespace, m_Bucket, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + AddStatistic(Response); + if (Response.IsSuccess()) + { + std::vector<CbObject> Result; + + CbPackage ResponsePackage = ParsePackageMessage(Response.ResponsePayload); + CbObject ResponseObject = ResponsePackage.GetObject(); + + CbArrayView BlobHashArray = ResponseObject["blobHashes"sv].AsArrayView(); + CbArrayView MetadatasArray = ResponseObject["metadatas"sv].AsArrayView(); + Result.reserve(MetadatasArray.Num()); + auto BlobHashesIt = BlobHashes.begin(); + auto BlobHashArrayIt = begin(BlobHashArray); + auto MetadataArrayIt = begin(MetadatasArray); + while (MetadataArrayIt != end(MetadatasArray)) + { + const IoHash BlobHash = (*BlobHashArrayIt).AsHash(); + while (BlobHash != *BlobHashesIt) + { + ZEN_ASSERT(BlobHashesIt != BlobHashes.end()); + BlobHashesIt++; + } + + ZEN_ASSERT(BlobHash == *BlobHashesIt); + + const IoHash MetaHash = (*MetadataArrayIt).AsAttachment(); + const CbAttachment* MetaAttachment = ResponsePackage.FindAttachment(MetaHash); + ZEN_ASSERT(MetaAttachment); + + CbObject Metadata = MetaAttachment->AsObject(); + Result.emplace_back(std::move(Metadata)); + + BlobHashArrayIt++; + MetadataArrayIt++; + BlobHashesIt++; + } + return Result; + } + return {}; + } + + virtual std::vector<BlobExistsResult> BlobsExists(const Oid& BuildId, std::span<const IoHash> BlobHashes) override + { + ZEN_TRACE_CPU("ZenBuildStorageCache::BlobsExists"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + + CbObjectWriter Request; + + Request.BeginArray("blobHashes"sv); + for (const IoHash& BlobHash : BlobHashes) + { + Request.AddHash(BlobHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + + HttpClient::Response Response = m_HttpClient.Post(fmt::format("/builds/{}/{}/{}/blobs/exists", m_Namespace, m_Bucket, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + AddStatistic(Response); + if (Response.IsSuccess()) + { + CbObject ResponseObject = LoadCompactBinaryObject(Response.ResponsePayload); + if (!ResponseObject) + { + throw std::runtime_error("BlobExists reponse is invalid, failed to load payload as compact binary object"); + } + CbArrayView BlobsExistsArray = ResponseObject["blobExists"sv].AsArrayView(); + if (!BlobsExistsArray) + { + throw std::runtime_error("BlobExists reponse is invalid, 'blobExists' array is missing"); + } + if (BlobsExistsArray.Num() != BlobHashes.size()) + { + throw std::runtime_error(fmt::format("BlobExists reponse is invalid, 'blobExists' array contains {} entries, expected {}", + BlobsExistsArray.Num(), + BlobHashes.size())); + } + + CbArrayView MetadatasExistsArray = ResponseObject["metadataExists"sv].AsArrayView(); + if (!MetadatasExistsArray) + { + throw std::runtime_error("BlobExists reponse is invalid, 'metadataExists' array is missing"); + } + if (MetadatasExistsArray.Num() != BlobHashes.size()) + { + throw std::runtime_error( + fmt::format("BlobExists reponse is invalid, 'metadataExists' array contains {} entries, expected {}", + MetadatasExistsArray.Num(), + BlobHashes.size())); + } + + std::vector<BlobExistsResult> Result; + Result.reserve(BlobHashes.size()); + auto BlobExistsIt = begin(BlobsExistsArray); + auto MetadataExistsIt = begin(MetadatasExistsArray); + while (BlobExistsIt != end(BlobsExistsArray)) + { + ZEN_ASSERT(MetadataExistsIt != end(MetadatasExistsArray)); + + const bool HasBody = (*BlobExistsIt).AsBool(); + const bool HasMetadata = (*MetadataExistsIt).AsBool(); + + Result.push_back({.HasBody = HasBody, .HasMetadata = HasMetadata}); + + BlobExistsIt++; + MetadataExistsIt++; + } + return Result; + } + return {}; + } + + virtual void Flush(int32_t UpdateIntervalMS, std::function<bool(intptr_t Remaining)>&& UpdateCallback) override + { + if (IsFlushed) + { + return; + } + if (!IsFlushed) + { + m_PendingBackgroundWorkCount.CountDown(); + IsFlushed = true; + } + if (m_PendingBackgroundWorkCount.Wait(100)) + { + return; + } + while (true) + { + intptr_t Remaining = m_PendingBackgroundWorkCount.Remaining(); + if (UpdateCallback(Remaining)) + { + if (m_PendingBackgroundWorkCount.Wait(UpdateIntervalMS)) + { + UpdateCallback(0); + return; + } + } + else + { + m_CancelBackgroundWork.store(true); + } + } + } + +private: + void AddStatistic(const HttpClient::Response& Result) + { + m_Stats.TotalBytesWritten += Result.UploadedBytes; + m_Stats.TotalBytesRead += Result.DownloadedBytes; + m_Stats.TotalRequestTimeUs += uint64_t(Result.ElapsedSeconds * 1000000.0); + m_Stats.TotalRequestCount++; + } + + HttpClient& m_HttpClient; + BuildStorageCache::Statistics& m_Stats; + const std::string m_Namespace; + const std::string m_Bucket; + const std::filesystem::path m_TempFolderPath; + const bool m_BoostBackgroundThreadCount; + bool IsFlushed = false; + + WorkerThreadPool& m_BackgroundWorkPool; + Latch m_PendingBackgroundWorkCount; + std::atomic<bool> m_CancelBackgroundWork; +}; + +std::unique_ptr<BuildStorageCache> +CreateZenBuildStorageCache(HttpClient& HttpClient, + BuildStorageCache::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath, + bool BoostBackgroundThreadCount) +{ + return std::make_unique<ZenBuildStorageCache>(HttpClient, Stats, Namespace, Bucket, TempFolderPath, BoostBackgroundThreadCount); +} + +} // namespace zen diff --git a/src/zenutil/cache/rpcrecording.cpp b/src/zenutil/cache/rpcrecording.cpp index 1f951167d..46e80f6b7 100644 --- a/src/zenutil/cache/rpcrecording.cpp +++ b/src/zenutil/cache/rpcrecording.cpp @@ -46,7 +46,7 @@ struct RecordedRequestsWriter void BeginWrite(const std::filesystem::path& BasePath) { m_BasePath = BasePath; - std::filesystem::create_directories(m_BasePath); + CreateDirectories(m_BasePath); } void EndWrite() @@ -366,6 +366,7 @@ private: }; std::unique_ptr<std::thread> m_WriterThread; + std::atomic_bool m_IsWriterReady{false}; std::atomic_bool m_IsActive{false}; std::atomic_int64_t m_PendingRequests{0}; RwLock m_RequestQueueLock; @@ -426,7 +427,7 @@ RecordedRequestsSegmentWriter::BeginWrite(const std::filesystem::path& BasePath, m_BasePath = BasePath; m_SegmentIndex = SegmentIndex; m_RequestBaseIndex = RequestBaseIndex; - std::filesystem::create_directories(m_BasePath); + CreateDirectories(m_BasePath); } void @@ -658,6 +659,8 @@ RecordedRequestsWriter::BeginWrite(const std::filesystem::path& BasePath) m_IsActive = true; m_WriterThread.reset(new std::thread(&RecordedRequestsWriter::WriterThreadMain, this)); + + m_IsWriterReady.wait(false); } void @@ -707,6 +710,9 @@ RecordedRequestsWriter::WriterThreadMain() SetCurrentThreadName("rpc_writer"); EnsureCurrentSegment(); + m_IsWriterReady.store(true); + m_IsWriterReady.notify_all(); + while (m_IsActive) { m_PendingRequests.wait(0); @@ -1051,14 +1057,14 @@ public: static bool IsCompatible(const std::filesystem::path& BasePath) { - if (std::filesystem::exists(BasePath / "rpc_recording_info.zcb")) + if (IsFile(BasePath / "rpc_recording_info.zcb")) { return true; } const std::filesystem::path SegmentZero = BasePath / MakeSegmentPath(0); - if (std::filesystem::exists(SegmentZero / "rpc_segment_info.zcb") && std::filesystem::exists(SegmentZero / "index.bin")) + if (IsFile(SegmentZero / "rpc_segment_info.zcb") && IsFile(SegmentZero / "index.bin")) { // top-level metadata is missing, possibly because of premature exit // on the recording side diff --git a/src/zenutil/chunkblock.cpp b/src/zenutil/chunkblock.cpp new file mode 100644 index 000000000..abfc0fb63 --- /dev/null +++ b/src/zenutil/chunkblock.cpp @@ -0,0 +1,257 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkblock.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> + +#include <vector> + +namespace zen { + +using namespace std::literals; + +ChunkBlockDescription +ParseChunkBlockDescription(const CbObjectView& BlockObject) +{ + ChunkBlockDescription Result; + Result.BlockHash = BlockObject["rawHash"sv].AsHash(); + if (Result.BlockHash != IoHash::Zero) + { + Result.HeaderSize = BlockObject["headerSize"sv].AsUInt64(); + CbArrayView ChunksArray = BlockObject["rawHashes"sv].AsArrayView(); + Result.ChunkRawHashes.reserve(ChunksArray.Num()); + for (CbFieldView ChunkView : ChunksArray) + { + Result.ChunkRawHashes.push_back(ChunkView.AsHash()); + } + + CbArrayView ChunkRawLengthsArray = BlockObject["chunkRawLengths"sv].AsArrayView(); + Result.ChunkRawLengths.reserve(ChunkRawLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkRawLengthsArray) + { + Result.ChunkRawLengths.push_back(ChunkView.AsUInt32()); + } + + CbArrayView ChunkCompressedLengthsArray = BlockObject["chunkCompressedLengths"sv].AsArrayView(); + Result.ChunkCompressedLengths.reserve(ChunkCompressedLengthsArray.Num()); + for (CbFieldView ChunkView : ChunkCompressedLengthsArray) + { + Result.ChunkCompressedLengths.push_back(ChunkView.AsUInt32()); + } + } + return Result; +} + +std::vector<ChunkBlockDescription> +ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject) +{ + if (!BlocksObject) + { + return {}; + } + std::vector<ChunkBlockDescription> Result; + CbArrayView Blocks = BlocksObject["blocks"sv].AsArrayView(); + Result.reserve(Blocks.Num()); + for (CbFieldView BlockView : Blocks) + { + CbObjectView BlockObject = BlockView.AsObjectView(); + Result.emplace_back(ParseChunkBlockDescription(BlockObject)); + } + return Result; +} + +CbObject +BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData) +{ + ZEN_ASSERT(Block.BlockHash != IoHash::Zero); + ZEN_ASSERT(Block.HeaderSize > 0); + ZEN_ASSERT(Block.ChunkRawLengths.size() == Block.ChunkRawHashes.size()); + ZEN_ASSERT(Block.ChunkCompressedLengths.size() == Block.ChunkRawHashes.size()); + + CbObjectWriter Writer; + Writer.AddHash("rawHash"sv, Block.BlockHash); + Writer.AddInteger("headerSize"sv, Block.HeaderSize); + Writer.BeginArray("rawHashes"sv); + { + for (const IoHash& ChunkHash : Block.ChunkRawHashes) + { + Writer.AddHash(ChunkHash); + } + } + Writer.EndArray(); + + Writer.BeginArray("chunkRawLengths"); + { + for (uint32_t ChunkSize : Block.ChunkRawLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.BeginArray("chunkCompressedLengths"); + { + for (uint32_t ChunkSize : Block.ChunkCompressedLengths) + { + Writer.AddInteger(ChunkSize); + } + } + Writer.EndArray(); + + Writer.AddObject("metadata", MetaData); + + return Writer.Save(); +} + +ChunkBlockDescription +GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash) +{ + ChunkBlockDescription BlockDescription = {{.BlockHash = IoHash::HashBuffer(BlockPayload)}}; + if (BlockDescription.BlockHash != RawHash) + { + throw std::runtime_error(fmt::format("Block {} content hash {} does not match block hash", RawHash, BlockDescription.BlockHash)); + } + if (IterateChunkBlock( + BlockPayload, + [&BlockDescription, RawHash](CompressedBuffer&& Chunk, const IoHash& AttachmentHash) { + if (CompositeBuffer Decompressed = Chunk.DecompressToComposite(); Decompressed) + { + IoHash ChunkHash = IoHash::HashBuffer(Decompressed.Flatten()); + if (ChunkHash != AttachmentHash) + { + throw std::runtime_error( + fmt::format("Chunk {} in block {} content hash {} does not match chunk", AttachmentHash, RawHash, ChunkHash)); + } + BlockDescription.ChunkRawHashes.push_back(AttachmentHash); + BlockDescription.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Decompressed.GetSize())); + BlockDescription.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(Chunk.GetCompressedSize())); + } + else + { + throw std::runtime_error(fmt::format("Chunk {} in block {} is not a compressed buffer", AttachmentHash, RawHash)); + } + }, + BlockDescription.HeaderSize)) + { + return BlockDescription; + } + else + { + throw std::runtime_error(fmt::format("Block {} is malformed", RawHash)); + } +} + +CompressedBuffer +GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock) +{ + const size_t ChunkCount = FetchChunks.size(); + + std::vector<SharedBuffer> ChunkSegments; + ChunkSegments.resize(1); + ChunkSegments.reserve(1 + ChunkCount); + OutBlock.ChunkRawHashes.reserve(ChunkCount); + OutBlock.ChunkRawLengths.reserve(ChunkCount); + OutBlock.ChunkCompressedLengths.reserve(ChunkCount); + { + IoBuffer TempBuffer(ChunkCount * 9); + MutableMemoryView View = TempBuffer.GetMutableView(); + uint8_t* BufferStartPtr = reinterpret_cast<uint8_t*>(View.GetData()); + uint8_t* BufferEndPtr = BufferStartPtr; + BufferEndPtr += WriteVarUInt(gsl::narrow<uint64_t>(ChunkCount), BufferEndPtr); + for (const auto& It : FetchChunks) + { + std::pair<uint64_t, CompressedBuffer> Chunk = It.second(It.first); + uint64_t ChunkSize = 0; + std::span<const SharedBuffer> Segments = Chunk.second.GetCompressed().GetSegments(); + for (const SharedBuffer& Segment : Segments) + { + ZEN_ASSERT(Segment.IsOwned()); + ChunkSize += Segment.GetSize(); + ChunkSegments.push_back(Segment); + } + BufferEndPtr += WriteVarUInt(ChunkSize, BufferEndPtr); + OutBlock.ChunkRawHashes.push_back(It.first); + OutBlock.ChunkRawLengths.push_back(gsl::narrow<uint32_t>(Chunk.first)); + OutBlock.ChunkCompressedLengths.push_back(gsl::narrow<uint32_t>(ChunkSize)); + } + ZEN_ASSERT(BufferEndPtr <= View.GetDataEnd()); + ptrdiff_t TempBufferLength = std::distance(BufferStartPtr, BufferEndPtr); + ChunkSegments[0] = SharedBuffer(IoBuffer(TempBuffer, 0, gsl::narrow<size_t>(TempBufferLength))); + OutBlock.HeaderSize = TempBufferLength; + } + CompressedBuffer CompressedBlock = + CompressedBuffer::Compress(CompositeBuffer(std::move(ChunkSegments)), OodleCompressor::Mermaid, OodleCompressionLevel::None); + OutBlock.BlockHash = CompressedBlock.DecodeRawHash(); + return CompressedBlock; +} + +std::vector<uint32_t> +ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize) +{ + const uint8_t* ReadPtr = reinterpret_cast<const uint8_t*>(BlockView.GetData()); + uint32_t NumberSize; + uint64_t ChunkCount = ReadVarUInt(ReadPtr, NumberSize); + ReadPtr += NumberSize; + std::vector<uint32_t> ChunkSizes; + ChunkSizes.reserve(ChunkCount); + while (ChunkCount--) + { + if (ReadPtr >= BlockView.GetDataEnd()) + { + throw std::runtime_error("Invalid block header, block data ended unexpectedly"); + } + uint64_t ChunkSize = ReadVarUInt(ReadPtr, NumberSize); + if (ChunkSize > std::numeric_limits<uint32_t>::max()) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + if (ChunkSize < 1) + { + throw std::runtime_error("Invalid block header, header data is corrupt"); + } + ChunkSizes.push_back(gsl::narrow<uint32_t>(ChunkSize)); + ReadPtr += NumberSize; + } + uint64_t Offset = std::distance((const uint8_t*)BlockView.GetData(), ReadPtr); + OutHeaderSize = Offset; + return ChunkSizes; +} + +bool +IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, + uint64_t& OutHeaderSize) +{ + ZEN_ASSERT(BlockPayload); + if (BlockPayload.GetSize() < 1) + { + return false; + } + + MemoryView BlockView = BlockPayload.GetView(); + + std::vector<uint32_t> ChunkSizes = ReadChunkBlockHeader(BlockView, OutHeaderSize); + uint64_t Offset = OutHeaderSize; + OutHeaderSize = Offset; + for (uint64_t ChunkSize : ChunkSizes) + { + IoBuffer Chunk(BlockPayload.AsIoBuffer(), Offset, ChunkSize); + IoHash AttachmentRawHash; + uint64_t AttachmentRawSize; + CompressedBuffer CompressedChunk = CompressedBuffer::FromCompressed(SharedBuffer(Chunk), AttachmentRawHash, AttachmentRawSize); + ZEN_ASSERT_SLOW(IoHash::HashBuffer(CompressedChunk.DecompressToComposite()) == AttachmentRawHash); + if (!CompressedChunk) + { + ZEN_ERROR("Invalid chunk in block"); + return false; + } + Visitor(std::move(CompressedChunk), AttachmentRawHash); + Offset += ChunkSize; + ZEN_ASSERT(Offset <= BlockView.GetSize()); + } + return true; +}; + +} // namespace zen diff --git a/src/zenutil/chunkedcontent.cpp b/src/zenutil/chunkedcontent.cpp new file mode 100644 index 000000000..cd1bf7dd7 --- /dev/null +++ b/src/zenutil/chunkedcontent.cpp @@ -0,0 +1,951 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkedcontent.h> + +#include <zencore/filesystem.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> + +#include <zenutil/chunkedfile.h> +#include <zenutil/chunkingcontroller.h> +#include <zenutil/parallelwork.h> +#include <zenutil/workerpools.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_set.h> +#include <gsl/gsl-lite.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +namespace { + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + const IoHash& RawHash, + std::span<const uint32_t> ChunkSequence, + std::span<const IoHash> ChunkHashes, + std::span<const uint64_t> ChunkRawSizes) + { + ZEN_ASSERT(ChunkHashes.size() == ChunkRawSizes.size()); + InOutChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(ChunkSequence.size())); + InOutChunkedContent.ChunkOrders.reserve(InOutChunkedContent.ChunkOrders.size() + ChunkSequence.size()); + + for (uint32_t ChunkedSequenceIndex : ChunkSequence) + { + const IoHash& ChunkHash = ChunkHashes[ChunkedSequenceIndex]; + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(ChunkHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(ChunkHash); + InOutChunkedContent.ChunkRawSizes.push_back(ChunkRawSizes[ChunkedSequenceIndex]); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += ChunkRawSizes[ChunkedSequenceIndex]; + } + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + void AddChunkSequence(ChunkingStatistics& Stats, + ChunkedContentData& InOutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + const IoHash& RawHash, + const uint64_t RawSize) + { + InOutChunkedContent.ChunkCounts.push_back(1); + + if (auto It = ChunkHashToChunkIndex.find(RawHash); It != ChunkHashToChunkIndex.end()) + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(It->second); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(InOutChunkedContent.ChunkHashes.size()); + ChunkHashToChunkIndex.insert_or_assign(RawHash, ChunkIndex); + InOutChunkedContent.ChunkHashes.push_back(RawHash); + InOutChunkedContent.ChunkRawSizes.push_back(RawSize); + InOutChunkedContent.ChunkOrders.push_back(ChunkIndex); + Stats.UniqueChunksFound++; + Stats.UniqueBytesFound += RawSize; + } + InOutChunkedContent.SequenceRawHashes.push_back(RawHash); + Stats.UniqueSequencesFound++; + } + + IoHash HashOneFile(ChunkingStatistics& Stats, + const ChunkingController& InChunkingController, + ChunkedFolderContent& OutChunkedContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, + RwLock& Lock, + const std::filesystem::path& FolderPath, + uint32_t PathIndex, + std::atomic<bool>& AbortFlag) + { + ZEN_TRACE_CPU("ChunkFolderContent"); + + const uint64_t RawSize = OutChunkedContent.RawSizes[PathIndex]; + const std::filesystem::path& Path = OutChunkedContent.Paths[PathIndex]; + + if (RawSize == 0) + { + return IoHash::Zero; + } + else + { + ChunkedInfoWithSource Chunked; + const bool DidChunking = + InChunkingController.ProcessFile((FolderPath / Path).make_preferred(), RawSize, Chunked, Stats.BytesHashed, AbortFlag); + if (DidChunking) + { + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Chunked.Info.RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {Chunked.Info.RawHash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + std::vector<uint64_t> ChunkSizes; + ChunkSizes.reserve(Chunked.ChunkSources.size()); + for (const ChunkSource& Source : Chunked.ChunkSources) + { + ChunkSizes.push_back(Source.Size); + } + AddChunkSequence(Stats, + OutChunkedContent.ChunkedContent, + ChunkHashToChunkIndex, + Chunked.Info.RawHash, + Chunked.Info.ChunkSequence, + Chunked.Info.ChunkHashes, + ChunkSizes); + Stats.UniqueSequencesFound++; + } + }); + Stats.FilesChunked++; + return Chunked.Info.RawHash; + } + else + { + ZEN_TRACE_CPU("HashOnly"); + + IoBuffer Buffer = IoBufferBuilder::MakeFromFile((FolderPath / Path).make_preferred()); + if (Buffer.GetSize() != RawSize) + { + throw std::runtime_error(fmt::format("Failed opening file '{}' for hashing", FolderPath / Path)); + } + const IoHash Hash = IoHash::HashBuffer(Buffer, &Stats.BytesHashed); + + Lock.WithExclusiveLock([&]() { + if (!RawHashToSequenceRawHashIndex.contains(Hash)) + { + RawHashToSequenceRawHashIndex.insert( + {Hash, gsl::narrow<uint32_t>(OutChunkedContent.ChunkedContent.SequenceRawHashes.size())}); + AddChunkSequence(Stats, OutChunkedContent.ChunkedContent, ChunkHashToChunkIndex, Hash, RawSize); + Stats.UniqueSequencesFound++; + } + }); + return Hash; + } + } + } + + std::string PathCompareString(const std::filesystem::path& Path) { return ToLower(Path.generic_string()); } + +} // namespace + +std::string_view FolderContentSourcePlatformNames[(size_t)SourcePlatform::_Count] = {"Windows"sv, "Linux"sv, "MacOS"sv}; + +std::string_view +ToString(SourcePlatform Platform) +{ + return FolderContentSourcePlatformNames[(size_t)Platform]; +} + +SourcePlatform +FromString(std::string_view Platform, SourcePlatform Default) +{ + for (size_t Index = 0; Index < (size_t)SourcePlatform::_Count; Index++) + { + if (Platform == FolderContentSourcePlatformNames[Index]) + { + return (SourcePlatform)Index; + } + } + return Default; +} + +SourcePlatform +GetSourceCurrentPlatform() +{ +#if ZEN_PLATFORM_WINDOWS + return SourcePlatform::Windows; +#endif +#if ZEN_PLATFORM_MAC + return SourcePlatform::MacOS; +#endif +#if ZEN_PLATFORM_LINUX + return SourcePlatform::Linux; +#endif +} + +bool +FolderContent::AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs) +{ +#if ZEN_PLATFORM_WINDOWS + return (Lhs & 0xff) == (Rhs & 0xff); +#endif +#if ZEN_PLATFORM_MAC + return Lhs == Rhs; +#endif +#if ZEN_PLATFORM_LINUX + return Lhs == Rhs; +#endif +} + +bool +FolderContent::operator==(const FolderContent& Rhs) const +{ + if ((Platform == Rhs.Platform) && (RawSizes == Rhs.RawSizes) && (Attributes == Rhs.Attributes) && + (ModificationTicks == Rhs.ModificationTicks) && (Paths.size() == Rhs.Paths.size())) + { + size_t PathCount = 0; + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (Paths[PathIndex].generic_string() != Rhs.Paths[PathIndex].generic_string()) + { + return false; + } + } + return true; + } + return false; +} + +bool +FolderContent::AreKnownFilesEqual(const FolderContent& Rhs) const +{ + ZEN_TRACE_CPU("FolderContent::AreKnownFilesEqual"); + tsl::robin_map<std::string, size_t> RhsPathToIndex; + const size_t RhsPathCount = Rhs.Paths.size(); + RhsPathToIndex.reserve(RhsPathCount); + for (size_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + const size_t PathCount = Paths.size(); + for (size_t PathIndex = 0; PathIndex < PathCount; PathIndex++) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const size_t RhsPathIndex = It->second; + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (!AreFileAttributesEqual(Attributes[PathIndex], Rhs.Attributes[RhsPathIndex])) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + return false; + } + } + else + { + return false; + } + } + return true; +} + +void +FolderContent::UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& OutPathIndexesOufOfDate) +{ + ZEN_TRACE_CPU("FolderContent::UpdateState"); + tsl::robin_map<std::string, uint32_t> RhsPathToIndex; + const uint32_t RhsPathCount = gsl::narrow<uint32_t>(Rhs.Paths.size()); + RhsPathToIndex.reserve(RhsPathCount); + for (uint32_t RhsPathIndex = 0; RhsPathIndex < RhsPathCount; RhsPathIndex++) + { + RhsPathToIndex.insert({Rhs.Paths[RhsPathIndex].generic_string(), RhsPathIndex}); + } + uint32_t PathCount = gsl::narrow<uint32_t>(Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < PathCount;) + { + if (auto It = RhsPathToIndex.find(Paths[PathIndex].generic_string()); It != RhsPathToIndex.end()) + { + const uint32_t RhsPathIndex = It->second; + + if ((RawSizes[PathIndex] != Rhs.RawSizes[RhsPathIndex]) || + (ModificationTicks[PathIndex] != Rhs.ModificationTicks[RhsPathIndex])) + { + RawSizes[PathIndex] = Rhs.RawSizes[RhsPathIndex]; + ModificationTicks[PathIndex] = Rhs.ModificationTicks[RhsPathIndex]; + OutPathIndexesOufOfDate.push_back(PathIndex); + } + Attributes[PathIndex] = Rhs.Attributes[RhsPathIndex]; + PathIndex++; + } + else + { + Paths.erase(Paths.begin() + PathIndex); + RawSizes.erase(RawSizes.begin() + PathIndex); + Attributes.erase(Attributes.begin() + PathIndex); + ModificationTicks.erase(ModificationTicks.begin() + PathIndex); + PathCount--; + } + } +} + +FolderContent +GetUpdatedContent(const FolderContent& Old, const FolderContent& New, std::vector<std::filesystem::path>& OutDeletedPaths) +{ + ZEN_TRACE_CPU("FolderContent::GetUpdatedContent"); + + const uint32_t NewPathCount = gsl::narrow<uint32_t>(New.Paths.size()); + + FolderContent Result = {.Platform = Old.Platform}; + Result.Paths.reserve(NewPathCount); + Result.RawSizes.reserve(NewPathCount); + Result.Attributes.reserve(NewPathCount); + Result.ModificationTicks.reserve(NewPathCount); + + tsl::robin_map<std::string, uint32_t> NewPathToIndex; + NewPathToIndex.reserve(NewPathCount); + for (uint32_t NewPathIndex = 0; NewPathIndex < NewPathCount; NewPathIndex++) + { + NewPathToIndex.insert({New.Paths[NewPathIndex].generic_string(), NewPathIndex}); + } + + uint32_t OldPathCount = gsl::narrow<uint32_t>(Old.Paths.size()); + for (uint32_t OldPathIndex = 0; OldPathIndex < OldPathCount; OldPathIndex++) + { + if (auto It = NewPathToIndex.find(Old.Paths[OldPathIndex].generic_string()); It != NewPathToIndex.end()) + { + const uint32_t NewPathIndex = It->second; + + if ((Old.RawSizes[OldPathIndex] != New.RawSizes[NewPathIndex]) || + (Old.ModificationTicks[OldPathIndex] != New.ModificationTicks[NewPathIndex])) + { + Result.Paths.push_back(New.Paths[NewPathIndex]); + Result.RawSizes.push_back(New.RawSizes[NewPathIndex]); + Result.Attributes.push_back(New.Attributes[NewPathIndex]); + Result.ModificationTicks.push_back(New.ModificationTicks[NewPathIndex]); + } + } + else + { + OutDeletedPaths.push_back(Old.Paths[OldPathIndex]); + } + } + return Result; +} + +void +SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output) +{ + ZEN_TRACE_CPU("SaveFolderContentToCompactBinary"); + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.ModificationTicks, "modificationTimes"sv, Output); +} + +FolderContent +LoadFolderContentToCompactBinary(CbObjectView Input) +{ + ZEN_TRACE_CPU("LoadFolderContentToCompactBinary"); + FolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("modificationTimes"sv, Input, Content.ModificationTicks); + return Content; +} + +FolderContent +GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateIntervalMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag) +{ + ZEN_TRACE_CPU("GetFolderContent"); + + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + FolderContent Content; + struct AsyncVisitor : public GetDirectoryContentVisitor + { + AsyncVisitor(GetFolderContentStatistics& Stats, + std::atomic<bool>& AbortFlag, + FolderContent& Content, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile) + : m_Stats(Stats) + , m_AbortFlag(AbortFlag) + , m_FoundContent(Content) + , m_AcceptDirectory(std::move(AcceptDirectory)) + , m_AcceptFile(std::move(AcceptFile)) + { + } + virtual void AsyncVisitDirectory(const std::filesystem::path& RelativeRoot, DirectoryContent&& Content) override + { + if (!m_AbortFlag) + { + m_Stats.FoundFileCount += Content.FileNames.size(); + for (uint64_t FileSize : Content.FileSizes) + { + m_Stats.FoundFileByteCount += FileSize; + } + std::string RelativeDirectoryPath = RelativeRoot.generic_string(); + if (m_AcceptDirectory(RelativeDirectoryPath)) + { + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<uint64_t> ModificatonTicks; + Paths.reserve(Content.FileNames.size()); + RawSizes.reserve(Content.FileNames.size()); + Attributes.reserve(Content.FileNames.size()); + ModificatonTicks.reserve(Content.FileModificationTicks.size()); + + for (size_t FileIndex = 0; FileIndex < Content.FileNames.size(); FileIndex++) + { + const std::filesystem::path& FileName = Content.FileNames[FileIndex]; + std::string RelativePath = (RelativeRoot / FileName).generic_string(); + std::replace(RelativePath.begin(), RelativePath.end(), '\\', '/'); + if (m_AcceptFile(RelativePath, Content.FileSizes[FileIndex], Content.FileAttributes[FileIndex])) + { + Paths.emplace_back(std::move(RelativePath)); + RawSizes.emplace_back(Content.FileSizes[FileIndex]); + Attributes.emplace_back(Content.FileAttributes[FileIndex]); + ModificatonTicks.emplace_back(Content.FileModificationTicks[FileIndex]); + + m_Stats.AcceptedFileCount++; + m_Stats.AcceptedFileByteCount += Content.FileSizes[FileIndex]; + } + } + m_Lock.WithExclusiveLock([&]() { + m_FoundContent.Paths.insert(m_FoundContent.Paths.end(), Paths.begin(), Paths.end()); + m_FoundContent.RawSizes.insert(m_FoundContent.RawSizes.end(), RawSizes.begin(), RawSizes.end()); + m_FoundContent.Attributes.insert(m_FoundContent.Attributes.end(), Attributes.begin(), Attributes.end()); + m_FoundContent.ModificationTicks.insert(m_FoundContent.ModificationTicks.end(), + ModificatonTicks.begin(), + ModificatonTicks.end()); + }); + } + } + } + + GetFolderContentStatistics& m_Stats; + std::atomic<bool>& m_AbortFlag; + RwLock m_Lock; + FolderContent& m_FoundContent; + std::function<bool(const std::string_view& RelativePath)> m_AcceptDirectory; + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)> m_AcceptFile; + } Visitor(Stats, AbortFlag, Content, std::move(AcceptDirectory), std::move(AcceptFile)); + + Latch PendingWork(1); + GetDirectoryContent(RootPath, + DirectoryContentFlags::IncludeFiles | DirectoryContentFlags::Recursive | DirectoryContentFlags::IncludeFileSizes | + DirectoryContentFlags::IncludeAttributes | DirectoryContentFlags::IncludeModificationTick, + Visitor, + WorkerPool, + PendingWork); + PendingWork.CountDown(); + while (!PendingWork.Wait(UpdateIntervalMS)) + { + UpdateCallback(AbortFlag.load(), PendingWork.Remaining()); + } + std::vector<size_t> Order; + size_t PathCount = Content.Paths.size(); + Order.resize(Content.Paths.size()); + std::vector<std::string> Parents; + Parents.reserve(PathCount); + std::vector<std::string> Filenames; + Filenames.reserve(PathCount); + for (size_t OrderIndex = 0; OrderIndex < PathCount; OrderIndex++) + { + Order[OrderIndex] = OrderIndex; + Parents.emplace_back(Content.Paths[OrderIndex].parent_path().generic_string()); + Filenames.emplace_back(Content.Paths[OrderIndex].filename().generic_string()); + } + std::sort(Order.begin(), Order.end(), [&Parents, &Filenames](size_t Lhs, size_t Rhs) { + const std::string& LhsParent = Parents[Lhs]; + const std::string& RhsParent = Parents[Rhs]; + if (LhsParent < RhsParent) + { + return true; + } + else if (LhsParent > RhsParent) + { + return false; + } + return Filenames[Lhs] < Filenames[Rhs]; + }); + FolderContent OrderedContent; + OrderedContent.Paths.reserve(PathCount); + OrderedContent.RawSizes.reserve(PathCount); + OrderedContent.Attributes.reserve(PathCount); + OrderedContent.ModificationTicks.reserve(PathCount); + for (size_t OrderIndex : Order) + { + OrderedContent.Paths.emplace_back(std::move(Content.Paths[OrderIndex])); + OrderedContent.RawSizes.emplace_back(Content.RawSizes[OrderIndex]); + OrderedContent.Attributes.emplace_back(Content.Attributes[OrderIndex]); + OrderedContent.ModificationTicks.emplace_back(Content.ModificationTicks[OrderIndex]); + } + return OrderedContent; +} + +void +SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output) +{ + ZEN_TRACE_CPU("SaveChunkedFolderContentToCompactBinary"); + Output.AddString("platform"sv, ToString(Content.Platform)); + compactbinary_helpers::WriteArray(Content.Paths, "paths"sv, Output); + compactbinary_helpers::WriteArray(Content.RawSizes, "rawSizes"sv, Output); + compactbinary_helpers::WriteArray(Content.Attributes, "attributes"sv, Output); + compactbinary_helpers::WriteArray(Content.RawHashes, "rawHashes"sv, Output); + + Output.BeginObject("chunkedContent"); + compactbinary_helpers::WriteArray(Content.ChunkedContent.SequenceRawHashes, "sequenceRawHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkCounts, "chunkCounts"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkOrders, "chunkOrders"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkHashes, "chunkHashes"sv, Output); + compactbinary_helpers::WriteArray(Content.ChunkedContent.ChunkRawSizes, "chunkRawSizes"sv, Output); + Output.EndObject(); // chunkedContent +} + +ChunkedFolderContent +LoadChunkedFolderContentToCompactBinary(CbObjectView Input) +{ + ZEN_TRACE_CPU("LoadChunkedFolderContentToCompactBinary"); + ChunkedFolderContent Content; + Content.Platform = FromString(Input["platform"sv].AsString(), GetSourceCurrentPlatform()); + compactbinary_helpers::ReadArray("paths"sv, Input, Content.Paths); + compactbinary_helpers::ReadArray("rawSizes"sv, Input, Content.RawSizes); + compactbinary_helpers::ReadArray("attributes"sv, Input, Content.Attributes); + compactbinary_helpers::ReadArray("rawHashes"sv, Input, Content.RawHashes); + + CbObjectView ChunkedContentView = Input["chunkedContent"sv].AsObjectView(); + compactbinary_helpers::ReadArray("sequenceRawHashes"sv, ChunkedContentView, Content.ChunkedContent.SequenceRawHashes); + compactbinary_helpers::ReadArray("chunkCounts"sv, ChunkedContentView, Content.ChunkedContent.ChunkCounts); + compactbinary_helpers::ReadArray("chunkOrders"sv, ChunkedContentView, Content.ChunkedContent.ChunkOrders); + compactbinary_helpers::ReadArray("chunkHashes"sv, ChunkedContentView, Content.ChunkedContent.ChunkHashes); + compactbinary_helpers::ReadArray("chunkRawSizes"sv, ChunkedContentView, Content.ChunkedContent.ChunkRawSizes); + return Content; +} + +ChunkedFolderContent +MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays) +{ + ZEN_TRACE_CPU("MergeChunkedFolderContents"); + + ZEN_ASSERT(!Overlays.empty()); + + ChunkedFolderContent Result; + const size_t BasePathCount = Base.Paths.size(); + Result.Paths.reserve(BasePathCount); + Result.RawSizes.reserve(BasePathCount); + Result.Attributes.reserve(BasePathCount); + Result.RawHashes.reserve(BasePathCount); + + const size_t BaseChunkCount = Base.ChunkedContent.ChunkHashes.size(); + Result.ChunkedContent.SequenceRawHashes.reserve(Base.ChunkedContent.SequenceRawHashes.size()); + Result.ChunkedContent.ChunkCounts.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkOrders.reserve(Base.ChunkedContent.ChunkOrders.size()); + + tsl::robin_map<std::string, std::filesystem::path> GenericPathToActualPath; + for (const std::filesystem::path& Path : Base.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + for (const ChunkedFolderContent& Overlay : Overlays) + { + for (const std::filesystem::path& Path : Overlay.Paths) + { + GenericPathToActualPath.insert({PathCompareString(Path), Path}); + } + } + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + + auto BuildOverlayPaths = [](std::span<const ChunkedFolderContent> Overlays) -> tsl::robin_set<std::string> { + tsl::robin_set<std::string> Result; + for (const ChunkedFolderContent& OverlayContent : Overlays) + { + for (const std::filesystem::path& Path : OverlayContent.Paths) + { + Result.insert(PathCompareString(Path)); + } + } + return Result; + }; + + auto AddContent = [&BuildOverlayPaths](ChunkedFolderContent& Result, + const ChunkedFolderContent& OverlayContent, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& ChunkHashToChunkIndex, + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher>& RawHashToSequenceRawHashIndex, + const tsl::robin_map<std::string, std::filesystem::path>& GenericPathToActualPath, + std::span<const ChunkedFolderContent> Overlays) { + const ChunkedContentLookup OverlayLookup = BuildChunkedContentLookup(OverlayContent); + tsl::robin_set<std::string> BaseOverlayPaths = BuildOverlayPaths(Overlays); + for (uint32_t PathIndex = 0; PathIndex < OverlayContent.Paths.size(); PathIndex++) + { + std::string GenericPath = PathCompareString(OverlayContent.Paths[PathIndex]); + if (!BaseOverlayPaths.contains(GenericPath)) + { + // This asset will not be overridden by a later layer - add it + + const std::filesystem::path OriginalPath = GenericPathToActualPath.at(GenericPath); + Result.Paths.push_back(OriginalPath); + const IoHash& RawHash = OverlayContent.RawHashes[PathIndex]; + Result.RawSizes.push_back(OverlayContent.RawSizes[PathIndex]); + Result.Attributes.push_back(OverlayContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + + if (OverlayContent.RawSizes[PathIndex] > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = OverlayLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = OverlayLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = OverlayContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + ChunkingStatistics Stats; + std::span<const uint32_t> OriginalChunkOrder = + std::span<const uint32_t>(OverlayContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + AddChunkSequence(Stats, + Result.ChunkedContent, + ChunkHashToChunkIndex, + RawHash, + OriginalChunkOrder, + OverlayContent.ChunkedContent.ChunkHashes, + OverlayContent.ChunkedContent.ChunkRawSizes); + Stats.UniqueSequencesFound++; + } + } + } + } + }; + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> MergedChunkHashToChunkIndex; + AddContent(Result, Base, MergedChunkHashToChunkIndex, RawHashToSequenceRawHashIndex, GenericPathToActualPath, Overlays); + for (uint32_t OverlayIndex = 0; OverlayIndex < Overlays.size(); OverlayIndex++) + { + AddContent(Result, + Overlays[OverlayIndex], + MergedChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + GenericPathToActualPath, + Overlays.subspan(OverlayIndex + 1)); + } + return Result; +} + +ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, + const ChunkedContentLookup& BaseContentLookup, + std::span<const std::filesystem::path> DeletedPaths) +{ + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + ChunkedFolderContent Result = {.Platform = BaseContent.Platform}; + if (DeletedPaths.size() < BaseContent.Paths.size()) + { + tsl::robin_set<std::string> DeletedPathSet; + DeletedPathSet.reserve(DeletedPaths.size()); + for (const std::filesystem::path& DeletedPath : DeletedPaths) + { + DeletedPathSet.insert(PathCompareString(DeletedPath)); + } + + const size_t BaseChunkCount = BaseContent.ChunkedContent.ChunkHashes.size(); + std::vector<uint32_t> NewChunkIndexes(BaseChunkCount, (uint32_t)-1); + + const size_t ExpectedPathCount = BaseContent.Paths.size() - DeletedPaths.size(); + Result.Paths.reserve(ExpectedPathCount); + Result.RawSizes.reserve(ExpectedPathCount); + Result.Attributes.reserve(ExpectedPathCount); + Result.RawHashes.reserve(ExpectedPathCount); + + Result.ChunkedContent.ChunkHashes.reserve(BaseChunkCount); + Result.ChunkedContent.ChunkRawSizes.reserve(BaseChunkCount); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + for (uint32_t PathIndex = 0; PathIndex < BaseContent.Paths.size(); PathIndex++) + { + const std::filesystem::path& Path = BaseContent.Paths[PathIndex]; + if (!DeletedPathSet.contains(PathCompareString(Path))) + { + const IoHash& RawHash = BaseContent.RawHashes[PathIndex]; + const uint64_t RawSize = BaseContent.RawSizes[PathIndex]; + Result.Paths.push_back(Path); + Result.RawSizes.push_back(RawSize); + Result.Attributes.push_back(BaseContent.Attributes[PathIndex]); + Result.RawHashes.push_back(RawHash); + if (RawSize > 0) + { + if (!RawHashToSequenceRawHashIndex.contains(RawHash)) + { + RawHashToSequenceRawHashIndex.insert( + {RawHash, gsl::narrow<uint32_t>(Result.ChunkedContent.SequenceRawHashes.size())}); + const uint32_t SequenceRawHashIndex = BaseContentLookup.RawHashToSequenceIndex.at(RawHash); + const uint32_t OrderIndexOffset = BaseContentLookup.SequenceIndexChunkOrderOffset[SequenceRawHashIndex]; + const uint32_t ChunkCount = BaseContent.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + + std::span<const uint32_t> OriginalChunkOrder = + std::span<const uint32_t>(BaseContent.ChunkedContent.ChunkOrders).subspan(OrderIndexOffset, ChunkCount); + + Result.ChunkedContent.ChunkCounts.push_back(gsl::narrow<uint32_t>(OriginalChunkOrder.size())); + + for (uint32_t OldChunkIndex : OriginalChunkOrder) + { + if (uint32_t FoundChunkIndex = NewChunkIndexes[OldChunkIndex]; FoundChunkIndex != (uint32_t)-1) + { + Result.ChunkedContent.ChunkOrders.push_back(FoundChunkIndex); + } + else + { + const uint32_t NewChunkIndex = gsl::narrow<uint32_t>(Result.ChunkedContent.ChunkHashes.size()); + NewChunkIndexes[OldChunkIndex] = NewChunkIndex; + const IoHash& ChunkHash = BaseContent.ChunkedContent.ChunkHashes[OldChunkIndex]; + const uint64_t OldChunkSize = BaseContent.ChunkedContent.ChunkRawSizes[OldChunkIndex]; + Result.ChunkedContent.ChunkHashes.push_back(ChunkHash); + Result.ChunkedContent.ChunkRawSizes.push_back(OldChunkSize); + Result.ChunkedContent.ChunkOrders.push_back(NewChunkIndex); + } + } + Result.ChunkedContent.SequenceRawHashes.push_back(RawHash); + } + } + } + } + } + return Result; +} + +ChunkedFolderContent +DeletePathsFromChunkedContent(const ChunkedFolderContent& BaseContent, std::span<const std::filesystem::path> DeletedPaths) +{ + ZEN_TRACE_CPU("DeletePathsFromChunkedContent"); + ZEN_ASSERT(DeletedPaths.size() <= BaseContent.Paths.size()); + if (DeletedPaths.size() == BaseContent.Paths.size()) + { + return {}; + } + const ChunkedContentLookup BaseLookup = BuildChunkedContentLookup(BaseContent); + return DeletePathsFromChunkedContent(BaseContent, BaseLookup, DeletedPaths); +} + +ChunkedFolderContent +ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateIntervalMS, + std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag) +{ + ZEN_TRACE_CPU("ChunkFolderContent"); + + Stopwatch Timer; + auto _ = MakeGuard([&Stats, &Timer]() { Stats.ElapsedWallTimeUS = Timer.GetElapsedTimeUs(); }); + + ChunkedFolderContent Result = {.Platform = Content.Platform, + .Paths = Content.Paths, + .RawSizes = Content.RawSizes, + .Attributes = Content.Attributes}; + const size_t ItemCount = Result.Paths.size(); + Result.RawHashes.resize(ItemCount, IoHash::Zero); + Result.ChunkedContent.SequenceRawHashes.reserve(ItemCount); // Up to 1 per file, maybe less + Result.ChunkedContent.ChunkCounts.reserve(ItemCount); // Up to one per file + Result.ChunkedContent.ChunkOrders.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkHashes.reserve(ItemCount); // At least 1 per file, maybe more + Result.ChunkedContent.ChunkRawSizes.reserve(ItemCount); // At least 1 per file, maybe more + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToChunkSequenceIndex; + RawHashToChunkSequenceIndex.reserve(ItemCount); + ChunkHashToChunkIndex.reserve(ItemCount); + { + std::vector<uint32_t> Order; + Order.resize(ItemCount); + for (uint32_t I = 0; I < ItemCount; I++) + { + Order[I] = I; + } + + // Handle the biggest files first so we don't end up with one straggling large file at the end + // std::sort(Order.begin(), Order.end(), [&](uint32_t Lhs, uint32_t Rhs) { return Result.RawSizes[Lhs] > Result.RawSizes[Rhs]; + //}); + + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceRawHashIndex; + RawHashToSequenceRawHashIndex.reserve(ItemCount); + + RwLock Lock; + + ParallelWork Work(AbortFlag, PauseFlag); + + for (uint32_t PathIndex : Order) + { + if (Work.IsAborted()) + { + break; + } + Work.ScheduleWork(WorkerPool, // GetSyncWorkerPool() + [&, PathIndex](std::atomic<bool>& AbortFlag) { + if (!AbortFlag) + { + IoHash RawHash = HashOneFile(Stats, + InChunkingController, + Result, + ChunkHashToChunkIndex, + RawHashToSequenceRawHashIndex, + Lock, + RootPath, + PathIndex, + AbortFlag); + Lock.WithExclusiveLock([&]() { Result.RawHashes[PathIndex] = RawHash; }); + Stats.FilesProcessed++; + } + }); + } + + Work.Wait(UpdateIntervalMS, [&](bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork) { + ZEN_UNUSED(PendingWork); + UpdateCallback(IsAborted, IsPaused, Work.PendingWork().Remaining()); + }); + } + return Result; +} + +ChunkedContentLookup +BuildChunkedContentLookup(const ChunkedFolderContent& Content) +{ + ZEN_TRACE_CPU("BuildChunkedContentLookup"); + + struct ChunkLocationReference + { + uint32_t ChunkIndex = (uint32_t)-1; + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; + }; + + ChunkedContentLookup Result; + { + const uint32_t SequenceRawHashesCount = gsl::narrow<uint32_t>(Content.ChunkedContent.SequenceRawHashes.size()); + Result.RawHashToSequenceIndex.reserve(SequenceRawHashesCount); + Result.SequenceIndexChunkOrderOffset.reserve(SequenceRawHashesCount); + uint32_t OrderOffset = 0; + for (uint32_t SequenceRawHashIndex = 0; SequenceRawHashIndex < Content.ChunkedContent.SequenceRawHashes.size(); + SequenceRawHashIndex++) + { + Result.RawHashToSequenceIndex.insert({Content.ChunkedContent.SequenceRawHashes[SequenceRawHashIndex], SequenceRawHashIndex}); + Result.SequenceIndexChunkOrderOffset.push_back(OrderOffset); + OrderOffset += Content.ChunkedContent.ChunkCounts[SequenceRawHashIndex]; + } + } + + std::vector<ChunkLocationReference> Locations; + Locations.reserve(Content.ChunkedContent.ChunkOrders.size()); + for (uint32_t SequenceIndex = 0; SequenceIndex < Content.ChunkedContent.SequenceRawHashes.size(); SequenceIndex++) + { + const uint32_t OrderOffset = Result.SequenceIndexChunkOrderOffset[SequenceIndex]; + const uint32_t ChunkCount = Content.ChunkedContent.ChunkCounts[SequenceIndex]; + uint64_t LocationOffset = 0; + for (size_t OrderIndex = OrderOffset; OrderIndex < OrderOffset + ChunkCount; OrderIndex++) + { + uint32_t ChunkIndex = Content.ChunkedContent.ChunkOrders[OrderIndex]; + + Locations.push_back(ChunkLocationReference{.ChunkIndex = ChunkIndex, .SequenceIndex = SequenceIndex, .Offset = LocationOffset}); + + LocationOffset += Content.ChunkedContent.ChunkRawSizes[ChunkIndex]; + } + } + + std::sort(Locations.begin(), Locations.end(), [](const ChunkLocationReference& Lhs, const ChunkLocationReference& Rhs) { + if (Lhs.ChunkIndex < Rhs.ChunkIndex) + { + return true; + } + if (Lhs.ChunkIndex > Rhs.ChunkIndex) + { + return false; + } + if (Lhs.SequenceIndex < Rhs.SequenceIndex) + { + return true; + } + if (Lhs.SequenceIndex > Rhs.SequenceIndex) + { + return false; + } + return Lhs.Offset < Rhs.Offset; + }); + + Result.ChunkSequenceLocations.reserve(Locations.size()); + const uint32_t ChunkCount = gsl::narrow<uint32_t>(Content.ChunkedContent.ChunkHashes.size()); + Result.ChunkHashToChunkIndex.reserve(ChunkCount); + size_t RangeOffset = 0; + for (uint32_t ChunkIndex = 0; ChunkIndex < ChunkCount; ChunkIndex++) + { + Result.ChunkHashToChunkIndex.insert({Content.ChunkedContent.ChunkHashes[ChunkIndex], ChunkIndex}); + uint32_t Count = 0; + while ((RangeOffset + Count < Locations.size()) && (Locations[RangeOffset + Count].ChunkIndex == ChunkIndex)) + { + const ChunkLocationReference& LocationReference = Locations[RangeOffset + Count]; + Result.ChunkSequenceLocations.push_back( + ChunkedContentLookup::ChunkSequenceLocation{.SequenceIndex = LocationReference.SequenceIndex, + .Offset = LocationReference.Offset}); + Count++; + } + Result.ChunkSequenceLocationOffset.push_back(RangeOffset); + Result.ChunkSequenceLocationCounts.push_back(Count); + RangeOffset += Count; + } + + Result.SequenceIndexFirstPathIndex.resize(Content.ChunkedContent.SequenceRawHashes.size(), (uint32_t)-1); + Result.PathExtensionHash.resize(Content.Paths.size()); + for (uint32_t PathIndex = 0; PathIndex < Content.Paths.size(); PathIndex++) + { + std::string LowercaseExtension = Content.Paths[PathIndex].extension().string(); + std::transform(LowercaseExtension.begin(), LowercaseExtension.end(), LowercaseExtension.begin(), ::tolower); + Result.PathExtensionHash[PathIndex] = HashStringDjb2(LowercaseExtension); + if (Content.RawSizes[PathIndex] > 0) + { + const IoHash& RawHash = Content.RawHashes[PathIndex]; + auto SequenceIndexIt = Result.RawHashToSequenceIndex.find(RawHash); + ZEN_ASSERT(SequenceIndexIt != Result.RawHashToSequenceIndex.end()); + const uint32_t SequenceIndex = SequenceIndexIt->second; + if (Result.SequenceIndexFirstPathIndex[SequenceIndex] == (uint32_t)-1) + { + Result.SequenceIndexFirstPathIndex[SequenceIndex] = PathIndex; + } + } + } + + return Result; +} + +} // namespace zen diff --git a/src/zenstore/chunkedfile.cpp b/src/zenutil/chunkedfile.cpp index f200bc1ec..a2c041ffd 100644 --- a/src/zenstore/chunkedfile.cpp +++ b/src/zenutil/chunkedfile.cpp @@ -1,7 +1,9 @@ // Copyright Epic Games, Inc. All Rights Reserved. +#include <zenutil/chunkedfile.h> + #include <zencore/basicfile.h> -#include <zenstore/chunkedfile.h> +#include <zencore/trace.h> #include "chunking.h" @@ -32,6 +34,7 @@ namespace { IoBuffer SerializeChunkedInfo(const ChunkedInfo& Info) { + ZEN_TRACE_CPU("SerializeChunkedInfo"); size_t HeaderSize = RoundUp(sizeof(ChunkedHeader), 16) + RoundUp(sizeof(uint32_t) * Info.ChunkSequence.size(), 16) + RoundUp(sizeof(IoHash) * Info.ChunkHashes.size(), 16); IoBuffer HeaderData(HeaderSize); @@ -64,6 +67,7 @@ SerializeChunkedInfo(const ChunkedInfo& Info) ChunkedInfo DeserializeChunkedInfo(IoBuffer& Buffer) { + ZEN_TRACE_CPU("DeserializeChunkedInfo"); MemoryView View = Buffer.GetView(); ChunkedHeader Header; { @@ -98,6 +102,7 @@ DeserializeChunkedInfo(IoBuffer& Buffer) void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk) { + ZEN_TRACE_CPU("Reconstruct"); BasicFile Reconstructed; Reconstructed.Open(TargetPath, BasicFile::Mode::kTruncate); BasicFileWriter ReconstructedWriter(Reconstructed, 64 * 1024); @@ -111,8 +116,15 @@ Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, st } ChunkedInfoWithSource -ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params) +ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params, + std::atomic<uint64_t>* BytesProcessed, + std::atomic<bool>* AbortFlag) { + ZEN_TRACE_CPU("ChunkData"); + ChunkedInfoWithSource Result; tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> FoundChunks; @@ -120,7 +132,7 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para Chunker.SetUseThreshold(Params.UseThreshold); Chunker.SetChunkSize(Params.MinSize, Params.MaxSize, Params.AvgSize); size_t End = Offset + Size; - const size_t ScanBufferSize = 1u * 1024 * 1024; // (Params.MaxSize * 9) / 3;//1 * 1024 * 1024; + const size_t ScanBufferSize = Max(1u * 1024 * 1024, Params.MaxSize); BasicFileBuffer RawBuffer(RawData, ScanBufferSize); MemoryView SliceView = RawBuffer.MakeView(Min(End - Offset, ScanBufferSize), Offset); ZEN_ASSERT(!SliceView.IsEmpty()); @@ -128,6 +140,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para IoHashStream RawHashStream; while (Offset < End) { + if (AbortFlag != nullptr && AbortFlag->load()) + { + return {}; + } size_t ScanLength = Chunker.ScanChunk(SliceView.GetData(), SliceSize); if (ScanLength == ZenChunkHelper::kNoBoundaryFound) { @@ -163,6 +179,10 @@ ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Para SliceSize = SliceView.GetSize(); Offset += ChunkLength; + if (BytesProcessed != nullptr) + { + BytesProcessed->fetch_add(ChunkLength); + } } Result.Info.RawSize = Size; Result.Info.RawHash = RawHashStream.GetHash(); diff --git a/src/zenstore/chunking.cpp b/src/zenutil/chunking.cpp index 30edd322a..30edd322a 100644 --- a/src/zenstore/chunking.cpp +++ b/src/zenutil/chunking.cpp diff --git a/src/zenstore/chunking.h b/src/zenutil/chunking.h index 09c56454f..09c56454f 100644 --- a/src/zenstore/chunking.h +++ b/src/zenutil/chunking.h diff --git a/src/zenutil/chunkingcontroller.cpp b/src/zenutil/chunkingcontroller.cpp new file mode 100644 index 000000000..6fb4182c0 --- /dev/null +++ b/src/zenutil/chunkingcontroller.cpp @@ -0,0 +1,359 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/chunkingcontroller.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/filesystem.h> +#include <zencore/trace.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { +using namespace std::literals; + +namespace { + std::vector<std::string> ReadStringArray(CbArrayView StringArray) + { + std::vector<std::string> Result; + Result.reserve(StringArray.Num()); + for (CbFieldView FieldView : StringArray) + { + Result.emplace_back(FieldView.AsString()); + } + return Result; + } + + ChunkedParams ReadChunkParams(CbObjectView Params) + { + bool UseThreshold = Params["UseThreshold"sv].AsBool(true); + size_t MinSize = Params["MinSize"sv].AsUInt64(DefaultChunkedParams.MinSize); + size_t MaxSize = Params["MaxSize"sv].AsUInt64(DefaultChunkedParams.MaxSize); + size_t AvgSize = Params["AvgSize"sv].AsUInt64(DefaultChunkedParams.AvgSize); + + return ChunkedParams{.UseThreshold = UseThreshold, .MinSize = MinSize, .MaxSize = MaxSize, .AvgSize = AvgSize}; + } + + void WriteChunkParams(CbObjectWriter& Writer, const ChunkedParams& Params) + { + Writer.BeginObject("ChunkingParams"sv); + { + Writer.AddBool("UseThreshold"sv, Params.UseThreshold); + + Writer.AddInteger("MinSize"sv, (uint64_t)Params.MinSize); + Writer.AddInteger("MaxSize"sv, (uint64_t)Params.MaxSize); + Writer.AddInteger("AvgSize"sv, (uint64_t)Params.AvgSize); + } + Writer.EndObject(); // ChunkingParams + } + + bool IsElfFile(BasicFile& Buffer) + { + if (Buffer.FileSize() > 4) + { + uint32_t ElfCheck = 0; + Buffer.Read(&ElfCheck, 4, 0); + if (ElfCheck == 0x464c457f) + { + return true; + } + } + return false; + } + + bool IsMachOFile(BasicFile& Buffer) + { + if (Buffer.FileSize() > 4) + { + uint32_t MachOCheck = 0; + Buffer.Read(&MachOCheck, 4, 0); + if ((MachOCheck == 0xfeedface) || (MachOCheck == 0xcefaedfe)) + { + return true; + } + } + return false; + } +} // namespace + +class BasicChunkingController : public ChunkingController +{ +public: + BasicChunkingController(const BasicChunkingControllerSettings& Settings) : m_Settings(Settings) {} + + BasicChunkingController(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const override + { + ZEN_TRACE_CPU("BasicChunkingController::ProcessFile"); + const bool ExcludeFromChunking = + std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != + m_Settings.ExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) + { + return false; + } + + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) + { + return false; + } + if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) + { + return false; + } + + OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); + return true; + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_Settings.ExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + + Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); + Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); + Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); + + WriteChunkParams(Writer, m_Settings.ChunkingParams); + + return Writer.Save(); + } + static constexpr std::string_view Name = "BasicChunkingController"sv; + +private: + static BasicChunkingControllerSettings ReadSettings(CbObjectView Parameters) + { + return BasicChunkingControllerSettings{ + .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), + .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), + .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), + .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), + .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView())}; + } + + const BasicChunkingControllerSettings m_Settings; +}; + +class ChunkingControllerWithFixedChunking : public ChunkingController +{ +public: + ChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Settings) : m_Settings(Settings) {} + + ChunkingControllerWithFixedChunking(CbObjectView Parameters) : m_Settings(ReadSettings(Parameters)) {} + + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const override + { + ZEN_TRACE_CPU("ChunkingControllerWithFixedChunking::ProcessFile"); + const bool ExcludeFromChunking = + std::find(m_Settings.ExcludeExtensions.begin(), m_Settings.ExcludeExtensions.end(), InputPath.extension()) != + m_Settings.ExcludeExtensions.end(); + + if (ExcludeFromChunking || (RawSize < m_Settings.ChunkFileSizeLimit)) + { + return false; + } + + const bool FixedChunkingExtension = + std::find(m_Settings.FixedChunkingExtensions.begin(), m_Settings.FixedChunkingExtensions.end(), InputPath.extension()) != + m_Settings.FixedChunkingExtensions.end(); + + if (FixedChunkingExtension) + { + if (RawSize < m_Settings.MinSizeForFixedChunking) + { + return false; + } + ZEN_TRACE_CPU("FixedChunking"); + IoHashStream FullHasher; + BasicFile Source(InputPath, BasicFile::Mode::kRead); + uint64_t Offset = 0; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + const uint64_t ExpectedChunkCount = 1 + (RawSize / m_Settings.FixedChunkingChunkSize); + ChunkHashToChunkIndex.reserve(ExpectedChunkCount); + OutChunked.Info.ChunkHashes.reserve(ExpectedChunkCount); + OutChunked.Info.ChunkSequence.reserve(ExpectedChunkCount); + OutChunked.ChunkSources.reserve(ExpectedChunkCount); + + static const uint64_t BufferingSize = 256u * 1024u; + + IoHashStream ChunkHasher; + + while (Offset < RawSize) + { + if (AbortFlag) + { + return false; + } + + ChunkHasher.Reset(); + + uint64_t ChunkSize = std::min<uint64_t>(RawSize - Offset, m_Settings.FixedChunkingChunkSize); + if (ChunkSize >= (BufferingSize + BufferingSize / 2)) + { + ScanFile(Source.Handle(), + Offset, + ChunkSize, + BufferingSize, + [&FullHasher, &ChunkHasher, &BytesProcessed](const void* Data, size_t Size) { + FullHasher.Append(Data, Size); + ChunkHasher.Append(Data, Size); + BytesProcessed.fetch_add(Size); + }); + } + else + { + IoBuffer ChunkData = Source.ReadRange(Offset, ChunkSize); + FullHasher.Append(ChunkData); + ChunkHasher.Append(ChunkData); + BytesProcessed.fetch_add(ChunkSize); + } + + const IoHash ChunkHash = ChunkHasher.GetHash(); + if (auto It = ChunkHashToChunkIndex.find(ChunkHash); It != ChunkHashToChunkIndex.end()) + { + OutChunked.Info.ChunkSequence.push_back(It->second); + } + else + { + uint32_t ChunkIndex = gsl::narrow<uint32_t>(OutChunked.Info.ChunkHashes.size()); + OutChunked.Info.ChunkHashes.push_back(ChunkHash); + OutChunked.Info.ChunkSequence.push_back(ChunkIndex); + OutChunked.ChunkSources.push_back({.Offset = Offset, .Size = gsl::narrow<uint32_t>(ChunkSize)}); + } + Offset += ChunkSize; + } + OutChunked.Info.RawSize = RawSize; + OutChunked.Info.RawHash = FullHasher.GetHash(); + return true; + } + else + { + BasicFile Buffer(InputPath, BasicFile::Mode::kRead); + if (m_Settings.ExcludeElfFiles && IsElfFile(Buffer)) + { + return false; + } + if (m_Settings.ExcludeMachOFiles && IsMachOFile(Buffer)) + { + return false; + } + + OutChunked = ChunkData(Buffer, 0, RawSize, m_Settings.ChunkingParams, &BytesProcessed, &AbortFlag); + return true; + } + } + + virtual std::string_view GetName() const override { return Name; } + + virtual CbObject GetParameters() const override + { + CbObjectWriter Writer; + Writer.BeginArray("FixedChunkingExtensions"); + { + for (const std::string& Extension : m_Settings.FixedChunkingExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + + Writer.BeginArray("ChunkExcludeExtensions"sv); + { + for (const std::string& Extension : m_Settings.ExcludeExtensions) + { + Writer.AddString(Extension); + } + } + Writer.EndArray(); // ChunkExcludeExtensions + + Writer.AddBool("ExcludeElfFiles"sv, m_Settings.ExcludeElfFiles); + Writer.AddBool("ExcludeMachOFiles"sv, m_Settings.ExcludeMachOFiles); + + Writer.AddInteger("ChunkFileSizeLimit"sv, m_Settings.ChunkFileSizeLimit); + + WriteChunkParams(Writer, m_Settings.ChunkingParams); + + Writer.AddInteger("FixedChunkingChunkSize"sv, m_Settings.FixedChunkingChunkSize); + Writer.AddInteger("MinSizeForFixedChunking"sv, m_Settings.MinSizeForFixedChunking); + return Writer.Save(); + } + + static constexpr std::string_view Name = "ChunkingControllerWithFixedChunking"sv; + +private: + static ChunkingControllerWithFixedChunkingSettings ReadSettings(CbObjectView Parameters) + { + return ChunkingControllerWithFixedChunkingSettings{ + .FixedChunkingExtensions = ReadStringArray(Parameters["FixedChunkingExtensions"sv].AsArrayView()), + .ExcludeExtensions = ReadStringArray(Parameters["ChunkExcludeExtensions"sv].AsArrayView()), + .ExcludeElfFiles = Parameters["ExcludeElfFiles"sv].AsBool(DefaultChunkingExcludeElfFiles), + .ExcludeMachOFiles = Parameters["ExcludeMachOFiles"sv].AsBool(DefaultChunkingExcludeMachOFiles), + .ChunkFileSizeLimit = Parameters["ChunkFileSizeLimit"sv].AsUInt64(DefaultChunkingFileSizeLimit), + .ChunkingParams = ReadChunkParams(Parameters["ChunkingParams"sv].AsObjectView()), + .FixedChunkingChunkSize = Parameters["FixedChunkingChunkSize"sv].AsUInt64(DefaultFixedChunkingChunkSize), + .MinSizeForFixedChunking = Parameters["MinSizeForFixedChunking"sv].AsUInt64(DefaultFixedChunkingChunkSize)}; + } + + const ChunkingControllerWithFixedChunkingSettings m_Settings; +}; + +std::unique_ptr<ChunkingController> +CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings) +{ + return std::make_unique<BasicChunkingController>(Settings); +} +std::unique_ptr<ChunkingController> +CreateBasicChunkingController(CbObjectView Parameters) +{ + return std::make_unique<BasicChunkingController>(Parameters); +} + +std::unique_ptr<ChunkingController> +CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting) +{ + return std::make_unique<ChunkingControllerWithFixedChunking>(Setting); +} +std::unique_ptr<ChunkingController> +CreateChunkingControllerWithFixedChunking(CbObjectView Parameters) +{ + return std::make_unique<ChunkingControllerWithFixedChunking>(Parameters); +} + +std::unique_ptr<ChunkingController> +CreateChunkingController(std::string_view Name, CbObjectView Parameters) +{ + if (Name == BasicChunkingController::Name) + { + return CreateBasicChunkingController(Parameters); + } + else if (Name == ChunkingControllerWithFixedChunking::Name) + { + return CreateChunkingControllerWithFixedChunking(Parameters); + } + return {}; +} + +} // namespace zen diff --git a/src/zenutil/commandlineoptions.cpp b/src/zenutil/commandlineoptions.cpp new file mode 100644 index 000000000..afef7f6f2 --- /dev/null +++ b/src/zenutil/commandlineoptions.cpp @@ -0,0 +1,221 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/commandlineoptions.h> + +#include <filesystem> +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +#endif // ZEN_WITH_TESTS + +void +cxxopts::values::parse_value(const std::string& text, std::filesystem::path& value) +{ + value = zen::StringToPath(text); +} + +namespace zen { + +std::vector<std::string> +ParseCommandLine(std::string_view CommandLine) +{ + auto IsWhitespaceOrEnd = [](std::string_view CommandLine, std::string::size_type Pos) { + if (Pos == CommandLine.length()) + { + return true; + } + if (CommandLine[Pos] == ' ') + { + return true; + } + return false; + }; + + bool IsParsingArg = false; + bool IsInQuote = false; + + std::string::size_type Pos = 0; + std::string::size_type ArgStart = 0; + std::vector<std::string> Args; + while (Pos < CommandLine.length()) + { + if (IsInQuote) + { + if (CommandLine[Pos] == '"' && IsWhitespaceOrEnd(CommandLine, Pos + 1)) + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart + 1))); + Pos++; + IsInQuote = false; + IsParsingArg = false; + } + else + { + Pos++; + } + } + else if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + if (CommandLine[Pos] == ' ') + { + Args.push_back(std::string(CommandLine.substr(ArgStart, Pos - ArgStart))); + Pos++; + IsParsingArg = false; + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + Pos++; + } + else + { + Pos++; + } + } + else if (CommandLine[Pos] == '"') + { + IsInQuote = true; + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else if (CommandLine[Pos] != ' ') + { + IsParsingArg = true; + ArgStart = Pos; + Pos++; + } + else + { + Pos++; + } + } + if (IsParsingArg) + { + ZEN_ASSERT(Pos > ArgStart); + Args.push_back(std::string(CommandLine.substr(ArgStart))); + } + + return Args; +} + +std::vector<char*> +StripCommandlineQuotes(std::vector<std::string>& InOutArgs) +{ + std::vector<char*> RawArgs; + RawArgs.reserve(InOutArgs.size()); + for (std::string& Arg : InOutArgs) + { + std::string::size_type EscapedQuotePos = Arg.find("\\\"", 1); + while (EscapedQuotePos != std::string::npos && Arg.rfind('\"', EscapedQuotePos - 1) != std::string::npos) + { + Arg.erase(EscapedQuotePos, 1); + EscapedQuotePos = Arg.find("\\\"", EscapedQuotePos); + } + + if (Arg.starts_with("\"")) + { + if (Arg.find('"', 1) == Arg.length() - 1) + { + if (Arg.find(' ', 1) == std::string::npos) + { + Arg = Arg.substr(1, Arg.length() - 2); + } + } + } + RawArgs.push_back(const_cast<char*>(Arg.c_str())); + } + return RawArgs; +} + +void +MakeSafeAbsolutePathÍnPlace(std::filesystem::path& Path) +{ + if (!Path.empty()) + { + std::filesystem::path AbsolutePath = std::filesystem::absolute(Path).make_preferred(); +#if ZEN_PLATFORM_WINDOWS + const std::string_view Prefix = "\\\\?\\"; + const std::u8string PrefixU8(Prefix.begin(), Prefix.end()); + std::u8string PathString = AbsolutePath.u8string(); + if (!PathString.empty() && !PathString.starts_with(PrefixU8)) + { + PathString.insert(0, PrefixU8); + Path = PathString; + } +#endif // ZEN_PLATFORM_WINDOWS + } +} + +std::filesystem::path +MakeSafeAbsolutePath(const std::filesystem::path& Path) +{ + std::filesystem::path Tmp(Path); + MakeSafeAbsolutePathÍnPlace(Tmp); + return Tmp; +} + +std::filesystem::path +StringToPath(const std::string_view& Path) +{ + std::string_view UnquotedPath = RemoveQuotes(Path); + + if (UnquotedPath.ends_with('/') || UnquotedPath.ends_with('\\') || UnquotedPath.ends_with(std::filesystem::path::preferred_separator)) + { + UnquotedPath = UnquotedPath.substr(0, UnquotedPath.length() - 1); + } + + return std::filesystem::path(UnquotedPath).make_preferred(); +} + +std::string_view +RemoveQuotes(const std::string_view& Arg) +{ + if (Arg.length() > 2) + { + if (Arg[0] == '"' && Arg[Arg.length() - 1] == '"') + { + return Arg.substr(1, Arg.length() - 2); + } + } + return Arg; +} + +#if ZEN_WITH_TESTS + +void +commandlineoptions_forcelink() +{ +} + +TEST_CASE("CommandLine") +{ + std::vector<std::string> v1 = ParseCommandLine("c:\\my\\exe.exe \"quoted arg\" \"one\",two,\"three\\\""); + CHECK_EQ(v1[0], "c:\\my\\exe.exe"); + CHECK_EQ(v1[1], "\"quoted arg\""); + CHECK_EQ(v1[2], "\"one\",two,\"three\\\""); + + std::vector<std::string> v2 = ParseCommandLine( + "--tracehost 127.0.0.1 builds download --url=https://jupiter.devtools.epicgames.com --namespace=ue.oplog " + "--bucket=citysample.packaged-build.fortnite-main.windows \"c:\\just\\a\\path\" " + "--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\" \"D:\\Dev\\Spaced Folder\\Target\\\" " + "--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\\\" 07dn23ifiwesnvoasjncasab --build-part-name win64,linux,ps5"); + + std::vector<char*> v2Stripped = StripCommandlineQuotes(v2); + CHECK_EQ(v2Stripped[0], std::string("--tracehost")); + CHECK_EQ(v2Stripped[1], std::string("127.0.0.1")); + CHECK_EQ(v2Stripped[2], std::string("builds")); + CHECK_EQ(v2Stripped[3], std::string("download")); + CHECK_EQ(v2Stripped[4], std::string("--url=https://jupiter.devtools.epicgames.com")); + CHECK_EQ(v2Stripped[5], std::string("--namespace=ue.oplog")); + CHECK_EQ(v2Stripped[6], std::string("--bucket=citysample.packaged-build.fortnite-main.windows")); + CHECK_EQ(v2Stripped[7], std::string("c:\\just\\a\\path")); + CHECK_EQ(v2Stripped[8], std::string("--access-token-path=\"C:\\Users\\dan.engelbrecht\\jupiter-token.json\"")); + CHECK_EQ(v2Stripped[9], std::string("\"D:\\Dev\\Spaced Folder\\Target\"")); + CHECK_EQ(v2Stripped[10], std::string("--alt-path=\"D:\\Dev\\Spaced Folder2\\Target\"")); + CHECK_EQ(v2Stripped[11], std::string("07dn23ifiwesnvoasjncasab")); + CHECK_EQ(v2Stripped[12], std::string("--build-part-name")); + CHECK_EQ(v2Stripped[13], std::string("win64,linux,ps5")); +} + +#endif +} // namespace zen diff --git a/src/zenutil/environmentoptions.cpp b/src/zenutil/environmentoptions.cpp new file mode 100644 index 000000000..1b7ce8029 --- /dev/null +++ b/src/zenutil/environmentoptions.cpp @@ -0,0 +1,84 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/environmentoptions.h> + +#include <zencore/filesystem.h> + +namespace zen { + +EnvironmentOptions::StringOption::StringOption(std::string& Value) : RefValue(Value) +{ +} +void +EnvironmentOptions::StringOption::Parse(std::string_view Value) +{ + RefValue = std::string(Value); +} + +EnvironmentOptions::FilePathOption::FilePathOption(std::filesystem::path& Value) : RefValue(Value) +{ +} +void +EnvironmentOptions::FilePathOption::Parse(std::string_view Value) +{ + RefValue = MakeSafeAbsolutePath(Value); +} + +EnvironmentOptions::BoolOption::BoolOption(bool& Value) : RefValue(Value) +{ +} +void +EnvironmentOptions::BoolOption::Parse(std::string_view Value) +{ + const std::string Lower = ToLower(Value); + if (Lower == "true" || Lower == "y" || Lower == "yes") + { + RefValue = true; + } + else if (Lower == "false" || Lower == "n" || Lower == "no") + { + RefValue = false; + } +} + +std::shared_ptr<EnvironmentOptions::OptionValue> +EnvironmentOptions::MakeOption(std::string& Value) +{ + return std::make_shared<StringOption>(Value); +} + +std::shared_ptr<EnvironmentOptions::OptionValue> +EnvironmentOptions::MakeOption(std::filesystem::path& Value) +{ + return std::make_shared<FilePathOption>(Value); +} + +std::shared_ptr<EnvironmentOptions::OptionValue> +EnvironmentOptions::MakeOption(bool& Value) +{ + return std::make_shared<BoolOption>(Value); +} + +EnvironmentOptions::EnvironmentOptions() +{ +} + +void +EnvironmentOptions::Parse(const cxxopts::ParseResult& CmdLineResult) +{ + for (auto& It : OptionMap) + { + std::string_view EnvName = It.first; + const Option& Opt = It.second; + if (CmdLineResult.count(Opt.CommandLineOptionName) == 0) + { + std::string EnvValue = GetEnvVariable(It.first); + if (!EnvValue.empty()) + { + Opt.Value->Parse(EnvValue); + } + } + } +} + +} // namespace zen diff --git a/src/zenutil/filebuildstorage.cpp b/src/zenutil/filebuildstorage.cpp new file mode 100644 index 000000000..c2cc5ab3c --- /dev/null +++ b/src/zenutil/filebuildstorage.cpp @@ -0,0 +1,726 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/filebuildstorage.h> + +#include <zencore/basicfile.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compactbinaryvalidation.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> + +namespace zen { + +using namespace std::literals; + +class FileBuildStorage : public BuildStorage +{ +public: + explicit FileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) + : m_StoragePath(StoragePath) + , m_Stats(Stats) + , m_EnableJsonOutput(EnableJsonOutput) + , m_LatencySec(LatencySec) + , m_DelayPerKBSec(DelayPerKBSec) + { + CreateDirectories(GetBuildsFolder()); + CreateDirectories(GetBlobsFolder()); + CreateDirectories(GetBlobsMetadataFolder()); + } + + virtual ~FileBuildStorage() {} + + virtual CbObject ListNamespaces(bool bRecursive) override + { + ZEN_TRACE_CPU("FileBuildStorage::ListNamespaces"); + ZEN_UNUSED(bRecursive); + + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter Writer; + Writer.BeginArray("results"); + { + } + Writer.EndArray(); // results + Writer.Save(); + SimulateLatency(Writer.GetSaveSize(), 0); + return Writer.Save(); + } + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_TRACE_CPU("FileBuildStorage::ListBuilds"); + ZEN_UNUSED(Query); + + SimulateLatency(Query.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildFolder = GetBuildsFolder(); + DirectoryContent Content; + GetDirectoryContent(BuildFolder, DirectoryContentFlags::IncludeDirs, Content); + CbObjectWriter Writer; + Writer.BeginArray("results"); + { + for (const std::filesystem::path& BuildPath : Content.Directories) + { + Oid BuildId = Oid::TryFromHexString(BuildPath.stem().string()); + if (BuildId != Oid::Zero) + { + Writer.BeginObject(); + { + Writer.AddObjectId("buildId", BuildId); + Writer.AddObject("metadata", ReadBuild(BuildId)["metadata"sv].AsObjectView()); + } + Writer.EndObject(); + } + } + } + Writer.EndArray(); // results + Writer.Save(); + SimulateLatency(Writer.GetSaveSize(), 0); + return Writer.Save(); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuild"); + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter BuildObject; + BuildObject.AddObject("metadata", MetaData); + BuildObject.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + WriteBuild(BuildId, BuildObject.Save()); + + CbObjectWriter BuildResponse; + BuildResponse.AddInteger("chunkSize"sv, 32u * 1024u * 1024u); + BuildResponse.Save(); + + SimulateLatency(0, BuildResponse.GetSaveSize()); + return BuildResponse.Save(); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuild"); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObject Build = ReadBuild(BuildId); + SimulateLatency(0, Build.GetSize()); + return Build; + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuild"); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + } + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildPart"); + SimulateLatency(MetaData.GetSize(), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + CreateDirectories(BuildPartDataPath.parent_path()); + + TemporaryFile::SafeWriteFile(BuildPartDataPath, MetaData.GetView()); + m_WrittenBytes += MetaData.GetSize(); + WriteAsJson(BuildPartDataPath, MetaData); + + IoHash RawHash = IoHash::HashBuffer(MetaData.GetView()); + + CbObjectWriter Writer; + { + CbObject BuildObject = ReadBuild(BuildId); + CbObjectView PartsObject = BuildObject["parts"sv].AsObjectView(); + CbObjectView MetaDataView = BuildObject["metadata"sv].AsObjectView(); + + Writer.AddObject("metadata"sv, MetaDataView); + Writer.BeginObject("parts"sv); + { + for (CbFieldView PartView : PartsObject) + { + if (PartView.GetName() != PartName) + { + Writer.AddObjectId(PartView.GetName(), PartView.AsObjectId()); + } + } + Writer.AddObjectId(PartName, BuildPartId); + } + Writer.EndObject(); // parts + } + WriteBuild(BuildId, Writer.Save()); + + std::vector<IoHash> NeededAttachments = GetNeededAttachments(MetaData); + + SimulateLatency(0, sizeof(IoHash) * NeededAttachments.size()); + + return std::make_pair(RawHash, std::move(NeededAttachments)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildPart"); + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + + SimulateLatency(0, BuildPartObject.GetSize()); + + return BuildPartObject; + } + + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + ZEN_TRACE_CPU("FileBuildStorage::FinalizeBuildPart"); + SimulateLatency(0, 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BuildPartDataPath = GetBuildPartPath(BuildId, BuildPartId); + IoBuffer Payload = ReadFile(BuildPartDataPath).Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + IoHash RawHash = IoHash::HashBuffer(Payload.GetView()); + if (RawHash != PartHash) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part {}: Expected hash {}, got {}", BuildPartId, PartHash, RawHash)); + } + + CbObject BuildPartObject = CbObject(SharedBuffer(Payload)); + std::vector<IoHash> NeededAttachments(GetNeededAttachments(BuildPartObject)); + + SimulateLatency(0, NeededAttachments.size() * sizeof(IoHash)); + + return NeededAttachments; + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBuildBlob"); + ZEN_UNUSED(BuildId); + ZEN_ASSERT(ContentType == ZenContentType::kCompressedBinary); + SimulateLatency(Payload.GetSize(), 0); + + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, Payload)); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!IsFile(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockPath, Payload.Flatten().GetView()); + } + m_Stats.TotalBytesWritten += Payload.GetSize(); + SimulateLatency(0, 0); + } + + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob"); + ZEN_UNUSED(BuildId); + ZEN_UNUSED(ContentType); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (!IsFile(BlockPath)) + { + CreateDirectories(BlockPath.parent_path()); + + struct WorkloadData + { + std::function<IoBuffer(uint64_t Offset, uint64_t Size)> Transmitter; + std::function<void(uint64_t, bool)> OnSentBytes; + TemporaryFile TempFile; + std::atomic<size_t> PartsLeft; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->Transmitter = std::move(Transmitter); + Workload->OnSentBytes = std::move(OnSentBytes); + std::error_code Ec; + Workload->TempFile.CreateTemporary(BlockPath.parent_path(), Ec); + + if (Ec) + { + throw std::runtime_error( + fmt::format("Failed opening temporary file '{}': {} ({})", Workload->TempFile.GetPath(), Ec.message(), Ec.value())); + } + + std::vector<std::function<void()>> WorkItems; + uint64_t Offset = 0; + while (Offset < PayloadSize) + { + uint64_t Size = Min(32u * 1024u * 1024u, PayloadSize - Offset); + + WorkItems.push_back([this, RawHash, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::PutLargeBuildBlob_Work"); + IoBuffer PartPayload = Workload->Transmitter(Offset, Size); + SimulateLatency(PartPayload.GetSize(), 0); + + std::error_code Ec; + Workload->TempFile.Write(PartPayload, Offset, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed writing to temporary file '{}': {} ({})", + Workload->TempFile.GetPath(), + Ec.message(), + Ec.value())); + } + uint64_t BytesWritten = PartPayload.GetSize(); + m_Stats.TotalBytesWritten += BytesWritten; + const bool IsLastPart = Workload->PartsLeft.fetch_sub(1) == 1; + if (IsLastPart) + { + Workload->TempFile.Flush(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(Workload->TempFile.ReadAll()))); + Workload->TempFile.MoveTemporaryIntoPlace(BlockPath, Ec); + if (Ec) + { + throw std::runtime_error(fmt::format("Failed moving temporary file '{}' to '{}': {} ({})", + Workload->TempFile.GetPath(), + BlockPath, + Ec.message(), + Ec.value())); + } + } + Workload->OnSentBytes(BytesWritten, IsLastPart); + SimulateLatency(0, 0); + }); + + Offset += Size; + } + Workload->PartsLeft.store(WorkItems.size()); + + SimulateLatency(0, 0); + return WorkItems; + } + SimulateLatency(0, 0); + return {}; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBuildBlob"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (IsFile(BlockPath)) + { + BasicFile File(BlockPath, BasicFile::Mode::kRead); + IoBuffer Payload; + if (RangeOffset != 0 || RangeBytes != (uint64_t)-1) + { + Payload = IoBuffer(RangeBytes); + File.Read(Payload.GetMutableView().GetData(), RangeBytes, RangeOffset); + } + else + { + Payload = File.ReadAll(); + ZEN_ASSERT_SLOW(ValidateCompressedBuffer(RawHash, CompositeBuffer(SharedBuffer(Payload)))); + } + Payload.SetContentType(ZenContentType::kCompressedBinary); + m_Stats.TotalBytesRead += Payload.GetSize(); + SimulateLatency(0, Payload.GetSize()); + return Payload; + } + SimulateLatency(0, 0); + return IoBuffer{}; + } + + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, + std::function<void()>&& OnComplete) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob"); + ZEN_UNUSED(BuildId); + SimulateLatency(0, 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockPath = GetBlobPayloadPath(RawHash); + if (IsFile(BlockPath)) + { + struct WorkloadData + { + std::atomic<uint64_t> BytesRemaining; + BasicFile BlobFile; + std::function<void(uint64_t Offset, const IoBuffer& Chunk)> OnReceive; + std::function<void()> OnComplete; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->BlobFile.Open(BlockPath, BasicFile::Mode::kRead); + const uint64_t BlobSize = Workload->BlobFile.FileSize(); + + Workload->OnReceive = std::move(OnReceive); + Workload->OnComplete = std::move(OnComplete); + Workload->BytesRemaining = BlobSize; + + std::vector<std::function<void()>> WorkItems; + uint64_t Offset = 0; + while (Offset < BlobSize) + { + uint64_t Size = Min(ChunkSize, BlobSize - Offset); + WorkItems.push_back([this, BlockPath, Workload, Offset, Size]() { + ZEN_TRACE_CPU("FileBuildStorage::GetLargeBuildBlob_Work"); + SimulateLatency(0, 0); + IoBuffer PartPayload(Size); + Workload->BlobFile.Read(PartPayload.GetMutableView().GetData(), Size, Offset); + m_Stats.TotalBytesRead += PartPayload.GetSize(); + Workload->OnReceive(Offset, PartPayload); + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Size); + if (ByteRemaning == Size) + { + Workload->OnComplete(); + } + SimulateLatency(Size, PartPayload.GetSize()); + }); + + Offset += Size; + } + SimulateLatency(0, 0); + return WorkItems; + } + return {}; + } + + virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("FileBuildStorage::PutBlockMetadata"); + ZEN_UNUSED(BuildId); + + SimulateLatency(MetaData.GetSize(), 0); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + const std::filesystem::path BlockMetaDataPath = GetBlobMetadataPath(BlockRawHash); + CreateDirectories(BlockMetaDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BlockMetaDataPath, MetaData.GetView()); + m_Stats.TotalBytesWritten += MetaData.GetSize(); + WriteAsJson(BlockMetaDataPath, MetaData); + SimulateLatency(0, 0); + return true; + } + + virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) override + { + ZEN_TRACE_CPU("FileBuildStorage::FindBlocks"); + ZEN_UNUSED(BuildId); + SimulateLatency(sizeof(BuildId), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + uint64_t FoundCount = 0; + + DirectoryContent Content; + GetDirectoryContent(GetBlobsMetadataFolder(), DirectoryContentFlags::IncludeFiles, Content); + CbObjectWriter Writer; + Writer.BeginArray("blocks"); + for (const std::filesystem::path& MetaDataFile : Content.Files) + { + IoHash ChunkHash; + if (IoHash::TryParse(MetaDataFile.stem().string(), ChunkHash)) + { + std::filesystem::path BlockPath = GetBlobPayloadPath(ChunkHash); + if (IsFile(BlockPath)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Writer.AddObject(BlockObject); + FoundCount++; + if (FoundCount == MaxBlockCount) + { + break; + } + } + } + } + Writer.EndArray(); // blocks + CbObject Result = Writer.Save(); + SimulateLatency(0, Result.GetSize()); + return Result; + } + + virtual CbObject GetBlockMetadatas(const Oid& BuildId, std::span<const IoHash> BlockHashes) override + { + ZEN_TRACE_CPU("FileBuildStorage::GetBlockMetadata"); + ZEN_UNUSED(BuildId); + SimulateLatency(sizeof(Oid) + sizeof(IoHash) * BlockHashes.size(), 0); + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + m_Stats.TotalRequestCount++; + + CbObjectWriter Writer; + Writer.BeginArray("blocks"); + + for (const IoHash& BlockHash : BlockHashes) + { + std::filesystem::path MetaDataFile = GetBlobMetadataPath(BlockHash); + if (IsFile(MetaDataFile)) + { + IoBuffer BlockMetaDataPayload = ReadFile(MetaDataFile).Flatten(); + + m_Stats.TotalBytesRead += BlockMetaDataPayload.GetSize(); + + CbObject BlockObject = CbObject(SharedBuffer(BlockMetaDataPayload)); + Writer.AddObject(BlockObject); + } + } + Writer.EndArray(); // blocks + CbObject Result = Writer.Save(); + SimulateLatency(0, Result.GetSize()); + return Result; + } + + virtual void PutBuildPartStats(const Oid& BuildId, + const Oid& BuildPartId, + const tsl::robin_map<std::string, double>& FloatStats) override + { + CbObjectWriter Request; + Request.BeginObject("floatStats"sv); + for (auto It : FloatStats) + { + Request.AddFloat(It.first, It.second); + } + Request.EndObject(); + CbObject Payload = Request.Save(); + + SimulateLatency(Payload.GetSize(), 0); + + const std::filesystem::path BuildPartStatsDataPath = GetBuildPartStatsPath(BuildId, BuildPartId); + CreateDirectories(BuildPartStatsDataPath.parent_path()); + + TemporaryFile::SafeWriteFile(BuildPartStatsDataPath, Payload.GetView()); + WriteAsJson(BuildPartStatsDataPath, Payload); + + SimulateLatency(0, 0); + } + +protected: + std::filesystem::path GetBuildsFolder() const { return m_StoragePath / "builds"; } + std::filesystem::path GetBlobsFolder() const { return m_StoragePath / "blobs"; } + std::filesystem::path GetBlobsMetadataFolder() const { return m_StoragePath / "blocks"; } + std::filesystem::path GetBuildFolder(const Oid& BuildId) const { return GetBuildsFolder() / BuildId.ToString(); } + + std::filesystem::path GetBuildPath(const Oid& BuildId) const { return GetBuildFolder(BuildId) / "metadata.cb"; } + + std::filesystem::path GetBuildPartFolder(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildFolder(BuildId) / "parts" / BuildPartId.ToString(); + } + + std::filesystem::path GetBuildPartPath(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildPartFolder(BuildId, BuildPartId) / "metadata.cb"; + } + + std::filesystem::path GetBuildPartStatsPath(const Oid& BuildId, const Oid& BuildPartId) const + { + return GetBuildPartFolder(BuildId, BuildPartId) / fmt::format("stats_{}.cb", Oid::NewOid()); + } + + std::filesystem::path GetBlobPayloadPath(const IoHash& RawHash) const { return GetBlobsFolder() / fmt::format("{}.cbz", RawHash); } + + std::filesystem::path GetBlobMetadataPath(const IoHash& RawHash) const + { + return GetBlobsMetadataFolder() / fmt::format("{}.cb", RawHash); + } + + void SimulateLatency(uint64_t ReceiveSize, uint64_t SendSize) + { + double SleepSec = m_LatencySec; + if (m_DelayPerKBSec > 0.0) + { + SleepSec += m_DelayPerKBSec * (double(SendSize + ReceiveSize) / 1024u); + } + if (SleepSec > 0) + { + Sleep(int(SleepSec * 1000)); + } + } + + void WriteAsJson(const std::filesystem::path& OriginalPath, CbObjectView Data) const + { + if (m_EnableJsonOutput) + { + ExtendableStringBuilder<128> SB; + CompactBinaryToJson(Data, SB); + std::filesystem::path JsonPath = OriginalPath; + JsonPath.replace_extension(".json"); + std::string_view JsonMetaData = SB.ToView(); + TemporaryFile::SafeWriteFile(JsonPath, MemoryView(JsonMetaData.data(), JsonMetaData.length())); + } + } + + void WriteBuild(const Oid& BuildId, CbObjectView Data) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + CreateDirectories(BuildDataPath.parent_path()); + TemporaryFile::SafeWriteFile(BuildDataPath, Data.GetView()); + m_Stats.TotalBytesWritten += Data.GetSize(); + WriteAsJson(BuildDataPath, Data); + } + + CbObject ReadBuild(const Oid& BuildId) + { + const std::filesystem::path BuildDataPath = GetBuildPath(BuildId); + FileContents Content = ReadFile(BuildDataPath); + if (Content.ErrorCode) + { + throw std::runtime_error(fmt::format("Failed reading build '{}' from '{}': {} ({})", + BuildId, + BuildDataPath, + Content.ErrorCode.message(), + Content.ErrorCode.value())); + } + IoBuffer Payload = Content.Flatten(); + m_Stats.TotalBytesRead += Payload.GetSize(); + ZEN_ASSERT(ValidateCompactBinary(Payload.GetView(), CbValidateMode::Default) == CbValidateError::None); + CbObject BuildObject = CbObject(SharedBuffer(Payload)); + return BuildObject; + } + + std::vector<IoHash> GetNeededAttachments(CbObjectView BuildPartObject) + { + std::vector<IoHash> NeededAttachments; + BuildPartObject.IterateAttachments([&](CbFieldView FieldView) { + const IoHash AttachmentHash = FieldView.AsBinaryAttachment(); + const std::filesystem::path BlockPath = GetBlobPayloadPath(AttachmentHash); + if (!IsFile(BlockPath)) + { + NeededAttachments.push_back(AttachmentHash); + } + }); + return NeededAttachments; + } + + bool ValidateCompressedBuffer(const IoHash& RawHash, const CompositeBuffer& Payload) + { + IoHash VerifyHash; + uint64_t VerifySize; + CompressedBuffer ValidateBuffer = CompressedBuffer::FromCompressed(Payload, VerifyHash, VerifySize); + if (!ValidateBuffer) + { + return false; + } + if (VerifyHash != RawHash) + { + return false; + } + + IoHashStream Hash; + bool CouldDecompress = ValidateBuffer.DecompressToStream( + 0, + (uint64_t)-1, + [&](uint64_t SourceOffset, uint64_t SourceSize, uint64_t Offset, const CompositeBuffer& RangeBuffer) { + ZEN_UNUSED(SourceOffset, SourceSize, Offset); + for (const SharedBuffer& Segment : RangeBuffer.GetSegments()) + { + Hash.Append(Segment.GetView()); + } + return true; + }); + if (!CouldDecompress) + { + return false; + } + if (Hash.GetHash() != VerifyHash) + { + return false; + } + return true; + } + +private: + const std::filesystem::path m_StoragePath; + BuildStorage::Statistics& m_Stats; + const bool m_EnableJsonOutput = false; + std::atomic<uint64_t> m_WrittenBytes; + + const double m_LatencySec = 0.0; + const double m_DelayPerKBSec = 0.0; +}; + +std::unique_ptr<BuildStorage> +CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec, + double DelayPerKBSec) +{ + return std::make_unique<FileBuildStorage>(StoragePath, Stats, EnableJsonOutput, LatencySec, DelayPerKBSec); +} + +} // namespace zen diff --git a/src/zenutil/include/zenutil/bufferedwritefilecache.h b/src/zenutil/include/zenutil/bufferedwritefilecache.h new file mode 100644 index 000000000..68d6c375e --- /dev/null +++ b/src/zenutil/include/zenutil/bufferedwritefilecache.h @@ -0,0 +1,106 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/basicfile.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CompositeBuffer; + +class BufferedWriteFileCache +{ +public: + BufferedWriteFileCache(const BufferedWriteFileCache&) = delete; + BufferedWriteFileCache& operator=(const BufferedWriteFileCache&) = delete; + + BufferedWriteFileCache(); + + ~BufferedWriteFileCache(); + + std::unique_ptr<BasicFile> Get(uint32_t FileIndex); + + void Put(uint32_t FileIndex, std::unique_ptr<BasicFile>&& Writer); + + void Close(std::span<uint32_t> FileIndexes); + + class Local + { + public: + struct Writer + { + std::unique_ptr<BasicFile> File; + std::unique_ptr<BasicFileWriter> Writer; + + inline void Write(const CompositeBuffer& Chunk, uint64_t FileOffset) + { + if (Writer) + { + Writer->Write(Chunk, FileOffset); + } + else + { + File->Write(Chunk, FileOffset); + } + } + }; + + Local(const Local&) = delete; + Local& operator=(const Local&) = delete; + + explicit Local(BufferedWriteFileCache& Cache); + ~Local(); + + Writer* GetWriter(uint32_t FileIndex); + Writer* PutWriter(uint32_t FileIndex, std::unique_ptr<Writer> Writer); + + private: + tsl::robin_map<uint32_t, uint32_t> m_FileIndexToWriterIndex; + std::vector<std::unique_ptr<Writer>> m_ChunkWriters; + BufferedWriteFileCache& m_Cache; + }; + +private: + static constexpr size_t MaxHandlesPerPath = 7; + static constexpr size_t MaxBufferedCount = 1024; + struct TOpenHandles + { + BasicFile* Files[MaxHandlesPerPath]; + uint64_t Size = 0; + inline BasicFile* Pop() + { + if (Size > 0) + { + return Files[--Size]; + } + else + { + return nullptr; + } + } + inline bool Push(BasicFile* File) + { + if (Size < MaxHandlesPerPath) + { + Files[Size++] = File; + return true; + } + return false; + } + }; + static_assert(sizeof(TOpenHandles) == 64); + + RwLock m_WriterLock; + tsl::robin_map<uint32_t, uint32_t> m_ChunkWriters; + std::vector<TOpenHandles> m_OpenFiles; + std::atomic<uint32_t> m_CacheHitCount; + std::atomic<uint32_t> m_CacheMissCount; + std::atomic<uint32_t> m_OpenHandleCount; + std::atomic<uint32_t> m_DroppedHandleCount; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/buildstorage.h b/src/zenutil/include/zenutil/buildstorage.h new file mode 100644 index 000000000..f49d4b42a --- /dev/null +++ b/src/zenutil/include/zenutil/buildstorage.h @@ -0,0 +1,65 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> +#include <zenutil/chunkblock.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class BuildStorage +{ +public: + struct Statistics + { + std::atomic<uint64_t> TotalBytesRead = 0; + std::atomic<uint64_t> TotalBytesWritten = 0; + std::atomic<uint64_t> TotalRequestCount = 0; + std::atomic<uint64_t> TotalRequestTimeUs = 0; + std::atomic<uint64_t> TotalExecutionTimeUs = 0; + }; + + virtual ~BuildStorage() {} + + virtual CbObject ListNamespaces(bool bRecursive = false) = 0; + virtual CbObject ListBuilds(CbObject Query) = 0; + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) = 0; + virtual CbObject GetBuild(const Oid& BuildId) = 0; + virtual void FinalizeBuild(const Oid& BuildId) = 0; + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) = 0; + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) = 0; + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) = 0; + virtual void PutBuildBlob(const Oid& BuildId, const IoHash& RawHash, ZenContentType ContentType, const CompositeBuffer& Payload) = 0; + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) = 0; + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, + std::function<void()>&& OnComplete) = 0; + + [[nodiscard]] virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) = 0; + virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) = 0; + virtual CbObject GetBlockMetadatas(const Oid& BuildId, std::span<const IoHash> BlockHashes) = 0; + + virtual void PutBuildPartStats(const Oid& BuildId, const Oid& BuildPartId, const tsl::robin_map<std::string, double>& FloatStats) = 0; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/buildstoragecache.h b/src/zenutil/include/zenutil/buildstoragecache.h new file mode 100644 index 000000000..e1fb73fd4 --- /dev/null +++ b/src/zenutil/include/zenutil/buildstoragecache.h @@ -0,0 +1,57 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> + +#include <zencore/compactbinary.h> +#include <zencore/compositebuffer.h> +#include <zenutil/chunkblock.h> + +namespace zen { + +class HttpClient; + +class BuildStorageCache +{ +public: + struct Statistics + { + std::atomic<uint64_t> TotalBytesRead = 0; + std::atomic<uint64_t> TotalBytesWritten = 0; + std::atomic<uint64_t> TotalRequestCount = 0; + std::atomic<uint64_t> TotalRequestTimeUs = 0; + std::atomic<uint64_t> TotalExecutionTimeUs = 0; + }; + + virtual ~BuildStorageCache() {} + + virtual void PutBuildBlob(const Oid& BuildId, const IoHash& RawHash, ZenContentType ContentType, const CompositeBuffer& Payload) = 0; + virtual IoBuffer GetBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t RangeOffset = 0, + uint64_t RangeBytes = (uint64_t)-1) = 0; + + virtual void PutBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes, std::span<const CbObject> MetaDatas) = 0; + virtual std::vector<CbObject> GetBlobMetadatas(const Oid& BuildId, std::span<const IoHash> BlobHashes) = 0; + + struct BlobExistsResult + { + bool HasBody = 0; + bool HasMetadata = 0; + }; + + virtual std::vector<BlobExistsResult> BlobsExists(const Oid& BuildId, std::span<const IoHash> BlobHashes) = 0; + + virtual void Flush( + int32_t UpdateIntervalMS, + std::function<bool(intptr_t Remaining)>&& UpdateCallback = [](intptr_t) { return true; }) = 0; +}; + +std::unique_ptr<BuildStorageCache> CreateZenBuildStorageCache(HttpClient& HttpClient, + BuildStorageCache::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + const std::filesystem::path& TempFolderPath, + bool BoostBackgroundThreadCount); +} // namespace zen diff --git a/src/zenutil/include/zenutil/cache/cachekey.h b/src/zenutil/include/zenutil/cache/cachekey.h index 741375946..0ab05f4f1 100644 --- a/src/zenutil/include/zenutil/cache/cachekey.h +++ b/src/zenutil/include/zenutil/cache/cachekey.h @@ -17,6 +17,12 @@ struct CacheKey static CacheKey Create(std::string_view Bucket, const IoHash& Hash) { return {.Bucket = ToLower(Bucket), .Hash = Hash}; } + // This should be used whenever the bucket name has already been validated to avoid redundant ToLower calls + static CacheKey CreateValidated(std::string&& BucketValidated, const IoHash& Hash) + { + return {.Bucket = std::move(BucketValidated), .Hash = Hash}; + } + auto operator<=>(const CacheKey& that) const { if (auto b = caseSensitiveCompareStrings(Bucket, that.Bucket); b != std::strong_ordering::equal) diff --git a/src/zenutil/include/zenutil/chunkblock.h b/src/zenutil/include/zenutil/chunkblock.h new file mode 100644 index 000000000..277580c74 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkblock.h @@ -0,0 +1,40 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/iohash.h> + +#include <zencore/compactbinary.h> +#include <zencore/compress.h> + +#include <optional> +#include <vector> + +namespace zen { + +struct ThinChunkBlockDescription +{ + IoHash BlockHash; + std::vector<IoHash> ChunkRawHashes; +}; + +struct ChunkBlockDescription : public ThinChunkBlockDescription +{ + uint64_t HeaderSize; + std::vector<uint32_t> ChunkRawLengths; + std::vector<uint32_t> ChunkCompressedLengths; +}; + +std::vector<ChunkBlockDescription> ParseChunkBlockDescriptionList(const CbObjectView& BlocksObject); +ChunkBlockDescription ParseChunkBlockDescription(const CbObjectView& BlockObject); +CbObject BuildChunkBlockDescription(const ChunkBlockDescription& Block, CbObjectView MetaData); +ChunkBlockDescription GetChunkBlockDescription(const SharedBuffer& BlockPayload, const IoHash& RawHash); +typedef std::function<std::pair<uint64_t, CompressedBuffer>(const IoHash& RawHash)> FetchChunkFunc; + +CompressedBuffer GenerateChunkBlock(std::vector<std::pair<IoHash, FetchChunkFunc>>&& FetchChunks, ChunkBlockDescription& OutBlock); +bool IterateChunkBlock(const SharedBuffer& BlockPayload, + std::function<void(CompressedBuffer&& Chunk, const IoHash& AttachmentHash)> Visitor, + uint64_t& OutHeaderSize); +std::vector<uint32_t> ReadChunkBlockHeader(const MemoryView BlockView, uint64_t& OutHeaderSize); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/chunkedcontent.h b/src/zenutil/include/zenutil/chunkedcontent.h new file mode 100644 index 000000000..306a5d990 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkedcontent.h @@ -0,0 +1,288 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/iohash.h> + +#include <filesystem> +#include <vector> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +class CbWriter; +class ChunkingController; +class WorkerThreadPool; + +enum class SourcePlatform +{ + Windows = 0, + Linux = 1, + MacOS = 2, + _Count +}; + +std::string_view ToString(SourcePlatform Platform); +SourcePlatform FromString(std::string_view Platform, SourcePlatform Default); +SourcePlatform GetSourceCurrentPlatform(); + +struct FolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<uint64_t> ModificationTicks; + + bool operator==(const FolderContent& Rhs) const; + + bool AreKnownFilesEqual(const FolderContent& Rhs) const; + void UpdateState(const FolderContent& Rhs, std::vector<uint32_t>& PathIndexesOufOfDate); + static bool AreFileAttributesEqual(const uint32_t Lhs, const uint32_t Rhs); +}; + +FolderContent GetUpdatedContent(const FolderContent& Old, + const FolderContent& New, + std::vector<std::filesystem::path>& OutDeletedPathIndexes); + +void SaveFolderContentToCompactBinary(const FolderContent& Content, CbWriter& Output); +FolderContent LoadFolderContentToCompactBinary(CbObjectView Input); + +struct GetFolderContentStatistics +{ + std::atomic<uint64_t> FoundFileCount = 0; + std::atomic<uint64_t> FoundFileByteCount = 0; + std::atomic<uint64_t> AcceptedFileCount = 0; + std::atomic<uint64_t> AcceptedFileByteCount = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +FolderContent GetFolderContent(GetFolderContentStatistics& Stats, + const std::filesystem::path& RootPath, + std::function<bool(const std::string_view& RelativePath)>&& AcceptDirectory, + std::function<bool(std::string_view RelativePath, uint64_t Size, uint32_t Attributes)>&& AcceptFile, + WorkerThreadPool& WorkerPool, + int32_t UpdateIntervalMS, + std::function<void(bool IsAborted, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag); + +struct ChunkedContentData +{ + // To describe one asset with a particular RawHash, find the index of the hash in SequenceRawHashes + // ChunkCounts for that index will be the number of indexes in ChunkOrders that describe + // the sequence of chunks required to reconstruct the asset. + // Offset into ChunkOrders is based on how many entries in ChunkOrders the previous [n - 1] SequenceRawHashes uses + std::vector<IoHash> SequenceRawHashes; // Raw hash for Chunk sequence + std::vector<uint32_t> ChunkCounts; // Chunk count of ChunkOrder for SequenceRawHashes[n] + std::vector<uint32_t> ChunkOrders; // Chunk sequence indexed into ChunkHashes, ChunkCounts[n] indexes per SequenceRawHashes[n] + std::vector<IoHash> ChunkHashes; // Unique chunk hashes + std::vector<uint64_t> ChunkRawSizes; // Unique chunk raw size for ChunkHash[n] +}; + +struct ChunkedFolderContent +{ + SourcePlatform Platform = GetSourceCurrentPlatform(); + std::vector<std::filesystem::path> Paths; + std::vector<uint64_t> RawSizes; + std::vector<uint32_t> Attributes; + std::vector<IoHash> RawHashes; + ChunkedContentData ChunkedContent; +}; + +struct ChunkedContentLookup +{ + struct ChunkSequenceLocation + { + uint32_t SequenceIndex = (uint32_t)-1; + uint64_t Offset = (uint64_t)-1; + }; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> ChunkHashToChunkIndex; + tsl::robin_map<IoHash, uint32_t, IoHash::Hasher> RawHashToSequenceIndex; + std::vector<uint32_t> SequenceIndexChunkOrderOffset; + std::vector<ChunkSequenceLocation> ChunkSequenceLocations; + std::vector<size_t> + ChunkSequenceLocationOffset; // ChunkSequenceLocations[ChunkLocationOffset[ChunkIndex]] -> start of sources for ChunkIndex + std::vector<uint32_t> ChunkSequenceLocationCounts; // ChunkSequenceLocationCounts[ChunkIndex] count of chunk locations for ChunkIndex + std::vector<uint32_t> SequenceIndexFirstPathIndex; // SequenceIndexFirstPathIndex[SequenceIndex] -> first path index with that RawHash + std::vector<uint32_t> PathExtensionHash; +}; + +void SaveChunkedFolderContentToCompactBinary(const ChunkedFolderContent& Content, CbWriter& Output); +ChunkedFolderContent LoadChunkedFolderContentToCompactBinary(CbObjectView Input); + +ChunkedFolderContent MergeChunkedFolderContents(const ChunkedFolderContent& Base, std::span<const ChunkedFolderContent> Overlays); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, + const ChunkedContentLookup& BaseContentLookup, + std::span<const std::filesystem::path> DeletedPaths); +ChunkedFolderContent DeletePathsFromChunkedContent(const ChunkedFolderContent& Base, std::span<const std::filesystem::path> DeletedPaths); + +struct ChunkingStatistics +{ + std::atomic<uint64_t> FilesProcessed = 0; + std::atomic<uint64_t> FilesChunked = 0; + std::atomic<uint64_t> BytesHashed = 0; + std::atomic<uint64_t> UniqueChunksFound = 0; + std::atomic<uint64_t> UniqueSequencesFound = 0; + std::atomic<uint64_t> UniqueBytesFound = 0; + uint64_t ElapsedWallTimeUS = 0; +}; + +ChunkedFolderContent ChunkFolderContent(ChunkingStatistics& Stats, + WorkerThreadPool& WorkerPool, + const std::filesystem::path& RootPath, + const FolderContent& Content, + const ChunkingController& InChunkingController, + int32_t UpdateIntervalMS, + std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)>&& UpdateCallback, + std::atomic<bool>& AbortFlag, + std::atomic<bool>& PauseFlag); + +ChunkedContentLookup BuildChunkedContentLookup(const ChunkedFolderContent& Content); + +inline std::pair<size_t, uint32_t> +GetChunkSequenceLocationRange(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + return std::make_pair(Lookup.ChunkSequenceLocationOffset[ChunkIndex], Lookup.ChunkSequenceLocationCounts[ChunkIndex]); +} + +inline std::span<const ChunkedContentLookup::ChunkSequenceLocation> +GetChunkSequenceLocations(const ChunkedContentLookup& Lookup, uint32_t ChunkIndex) +{ + std::pair<size_t, uint32_t> Range = GetChunkSequenceLocationRange(Lookup, ChunkIndex); + return std::span<const ChunkedContentLookup::ChunkSequenceLocation>(Lookup.ChunkSequenceLocations).subspan(Range.first, Range.second); +} + +inline uint32_t +GetSequenceIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetChunkIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + return Lookup.RawHashToSequenceIndex.at(RawHash); +} + +inline uint32_t +GetFirstPathIndexForSeqeuenceIndex(const ChunkedContentLookup& Lookup, const uint32_t SequenceIndex) +{ + return Lookup.SequenceIndexFirstPathIndex[SequenceIndex]; +} + +inline uint32_t +GetFirstPathIndexForRawHash(const ChunkedContentLookup& Lookup, const IoHash& RawHash) +{ + const uint32_t SequenceIndex = GetSequenceIndexForRawHash(Lookup, RawHash); + return GetFirstPathIndexForSeqeuenceIndex(Lookup, SequenceIndex); +} + +namespace compactbinary_helpers { + template<typename Type> + void WriteArray(std::span<const Type> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const Type Value : Values) + { + Output << Value; + } + Output.EndArray(); + } + + template<typename Type> + void WriteArray(const std::vector<Type>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const Type>(Values), ArrayName, Output); + } + + template<> + inline void WriteArray(std::span<const std::filesystem::path> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const std::filesystem::path& Path : Values) + { + Output.AddString((const char*)Path.generic_u8string().c_str()); + } + Output.EndArray(); + } + + template<> + inline void WriteArray(const std::vector<std::filesystem::path>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const std::filesystem::path>(Values), ArrayName, Output); + } + + inline void WriteBinaryAttachmentArray(std::span<const IoHash> Values, std::string_view ArrayName, CbWriter& Output) + { + Output.BeginArray(ArrayName); + for (const IoHash& Hash : Values) + { + Output.AddBinaryAttachment(Hash); + } + Output.EndArray(); + } + + inline void WriteBinaryAttachmentArray(const std::vector<IoHash>& Values, std::string_view ArrayName, CbWriter& Output) + { + WriteArray(std::span<const IoHash>(Values), ArrayName, Output); + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint32_t>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt32()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<uint64_t>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsUInt64()); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<std::filesystem::path>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + std::u8string_view U8Path = ItemView.AsU8String(); + Result.push_back(std::filesystem::path(U8Path)); + } + } + + inline void ReadArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsHash()); + } + } + + inline void ReadBinaryAttachmentArray(std::string_view ArrayName, CbObjectView Input, std::vector<IoHash>& Result) + { + CbArrayView Array = Input[ArrayName].AsArrayView(); + Result.reserve(Array.Num()); + for (CbFieldView ItemView : Array) + { + Result.push_back(ItemView.AsBinaryAttachment()); + } + } + +} // namespace compactbinary_helpers + +} // namespace zen diff --git a/src/zenstore/include/zenstore/chunkedfile.h b/src/zenutil/include/zenutil/chunkedfile.h index c6330bdbd..4cec80fdb 100644 --- a/src/zenstore/include/zenstore/chunkedfile.h +++ b/src/zenutil/include/zenutil/chunkedfile.h @@ -43,7 +43,12 @@ struct ChunkedParams static const ChunkedParams UShaderByteCodeParams = {.UseThreshold = true, .MinSize = 17280, .MaxSize = 139264, .AvgSize = 36340}; -ChunkedInfoWithSource ChunkData(BasicFile& RawData, uint64_t Offset, uint64_t Size, ChunkedParams Params = {}); +ChunkedInfoWithSource ChunkData(BasicFile& RawData, + uint64_t Offset, + uint64_t Size, + ChunkedParams Params = {}, + std::atomic<uint64_t>* BytesProcessed = nullptr, + std::atomic<bool>* AbortFlag = nullptr); void Reconstruct(const ChunkedInfo& Info, const std::filesystem::path& TargetPath, std::function<IoBuffer(const IoHash& ChunkHash)> GetChunk); diff --git a/src/zenutil/include/zenutil/chunkingcontroller.h b/src/zenutil/include/zenutil/chunkingcontroller.h new file mode 100644 index 000000000..315502265 --- /dev/null +++ b/src/zenutil/include/zenutil/chunkingcontroller.h @@ -0,0 +1,75 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/compactbinary.h> + +#include <zenutil/chunkedfile.h> + +#include <atomic> +#include <filesystem> + +namespace zen { + +const std::vector<std::string> DefaultChunkingExcludeExtensions = + {".exe", ".dll", ".pdb", ".self", ".mp4", ".zip", ".7z", ".bzip", ".rar", ".gzip"}; +const std::vector<std::string> DefaultFixedChunkingExtensions = {".apk", ".nsp", ".xvc", ".pkg", ".dmg", ".ipa"}; +const bool DefaultChunkingExcludeElfFiles = true; +const bool DefaultChunkingExcludeMachOFiles = true; + +const ChunkedParams DefaultChunkedParams = {.MinSize = ((8u * 1u) * 1024u) - 128u, + .MaxSize = 128u * 1024u, + .AvgSize = ((8u * 4u) * 1024u) + 128u}; + +const size_t DefaultChunkingFileSizeLimit = DefaultChunkedParams.MaxSize; + +const uint64_t DefaultFixedChunkingChunkSize = 32u * 1024u * 1024u; +const uint64_t DefaultMinSizeForFixedChunking = DefaultFixedChunkingChunkSize * 8u; + +struct ChunkedInfoWithSource; + +class ChunkingController +{ +public: + virtual ~ChunkingController() {} + + // Return true if the input file was processed. If true is returned OutChunked will contain the chunked info + virtual bool ProcessFile(const std::filesystem::path& InputPath, + uint64_t RawSize, + ChunkedInfoWithSource& OutChunked, + std::atomic<uint64_t>& BytesProcessed, + std::atomic<bool>& AbortFlag) const = 0; + virtual std::string_view GetName() const = 0; + virtual CbObject GetParameters() const = 0; +}; + +struct BasicChunkingControllerSettings +{ + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; +}; + +std::unique_ptr<ChunkingController> CreateBasicChunkingController(const BasicChunkingControllerSettings& Settings); +std::unique_ptr<ChunkingController> CreateBasicChunkingController(CbObjectView Parameters); + +struct ChunkingControllerWithFixedChunkingSettings +{ + std::vector<std::string> FixedChunkingExtensions = DefaultFixedChunkingExtensions; + std::vector<std::string> ExcludeExtensions = DefaultChunkingExcludeExtensions; + bool ExcludeElfFiles = DefaultChunkingExcludeElfFiles; + bool ExcludeMachOFiles = DefaultChunkingExcludeMachOFiles; + uint64_t ChunkFileSizeLimit = DefaultChunkingFileSizeLimit; + ChunkedParams ChunkingParams = DefaultChunkedParams; + uint64_t FixedChunkingChunkSize = DefaultFixedChunkingChunkSize; + uint64_t MinSizeForFixedChunking = DefaultMinSizeForFixedChunking; +}; + +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(const ChunkingControllerWithFixedChunkingSettings& Setting); +std::unique_ptr<ChunkingController> CreateChunkingControllerWithFixedChunking(CbObjectView Parameters); + +std::unique_ptr<ChunkingController> CreateChunkingController(std::string_view Name, CbObjectView Parameters); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/commandlineoptions.h b/src/zenutil/include/zenutil/commandlineoptions.h new file mode 100644 index 000000000..f927d41e5 --- /dev/null +++ b/src/zenutil/include/zenutil/commandlineoptions.h @@ -0,0 +1,29 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/zencore.h> +#include <filesystem> + +ZEN_THIRD_PARTY_INCLUDES_START + +namespace cxxopts::values { +// We declare this specialization before including cxxopts to make it stick +void parse_value(const std::string& text, std::filesystem::path& value); +} // namespace cxxopts::values + +#include <cxxopts.hpp> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +std::vector<std::string> ParseCommandLine(std::string_view CommandLine); +std::vector<char*> StripCommandlineQuotes(std::vector<std::string>& InOutArgs); +void MakeSafeAbsolutePathÍnPlace(std::filesystem::path& Path); +[[nodiscard]] std::filesystem::path MakeSafeAbsolutePath(const std::filesystem::path& Path); +std::filesystem::path StringToPath(const std::string_view& Path); +std::string_view RemoveQuotes(const std::string_view& Arg); + +void commandlineoptions_forcelink(); // internal + +} // namespace zen diff --git a/src/zenutil/include/zenutil/environmentoptions.h b/src/zenutil/include/zenutil/environmentoptions.h new file mode 100644 index 000000000..7418608e4 --- /dev/null +++ b/src/zenutil/include/zenutil/environmentoptions.h @@ -0,0 +1,92 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/string.h> +#include <zenutil/commandlineoptions.h> + +namespace zen { + +class EnvironmentOptions +{ +public: + class OptionValue + { + public: + virtual void Parse(std::string_view Value) = 0; + + virtual ~OptionValue() {} + }; + + class StringOption : public OptionValue + { + public: + explicit StringOption(std::string& Value); + virtual void Parse(std::string_view Value) override; + std::string& RefValue; + }; + + class FilePathOption : public OptionValue + { + public: + explicit FilePathOption(std::filesystem::path& Value); + virtual void Parse(std::string_view Value) override; + std::filesystem::path& RefValue; + }; + + class BoolOption : public OptionValue + { + public: + explicit BoolOption(bool& Value); + virtual void Parse(std::string_view Value); + bool& RefValue; + }; + + template<Integral T> + class NumberOption : public OptionValue + { + public: + explicit NumberOption(T& Value) : RefValue(Value) {} + virtual void Parse(std::string_view Value) override + { + if (std::optional<T> OptionalValue = ParseInt<T>(Value); OptionalValue.has_value()) + { + RefValue = OptionalValue.value(); + } + } + T& RefValue; + }; + + struct Option + { + std::string CommandLineOptionName; + std::shared_ptr<OptionValue> Value; + }; + + std::shared_ptr<OptionValue> MakeOption(std::string& Value); + std::shared_ptr<OptionValue> MakeOption(std::filesystem::path& Value); + + template<Integral T> + std::shared_ptr<OptionValue> MakeOption(T& Value) + { + return std::make_shared<NumberOption<T>>(Value); + }; + + std::shared_ptr<OptionValue> MakeOption(bool& Value); + + template<typename T> + void AddOption(std::string_view EnvName, T& Value, std::string_view CommandLineOptionName = "") + { + OptionMap.insert_or_assign(std::string(EnvName), + Option{.CommandLineOptionName = std::string(CommandLineOptionName), .Value = MakeOption(Value)}); + }; + + EnvironmentOptions(); + + void Parse(const cxxopts::ParseResult& CmdLineResult); + +private: + std::unordered_map<std::string, Option> OptionMap; +}; + +} // namespace zen diff --git a/src/zenutil/include/zenutil/filebuildstorage.h b/src/zenutil/include/zenutil/filebuildstorage.h new file mode 100644 index 000000000..c95fb32e6 --- /dev/null +++ b/src/zenutil/include/zenutil/filebuildstorage.h @@ -0,0 +1,16 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> +#include <zenutil/buildstorage.h> + +namespace zen { +class HttpClient; + +std::unique_ptr<BuildStorage> CreateFileBuildStorage(const std::filesystem::path& StoragePath, + BuildStorage::Statistics& Stats, + bool EnableJsonOutput, + double LatencySec = 0.0, + double DelayPerKBSec = 0.0); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h new file mode 100644 index 000000000..bbf070993 --- /dev/null +++ b/src/zenutil/include/zenutil/jupiter/jupiterbuildstorage.h @@ -0,0 +1,18 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/logging.h> +#include <zenutil/buildstorage.h> + +namespace zen { +class HttpClient; + +std::unique_ptr<BuildStorage> CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + bool AllowRedirect, + const std::filesystem::path& TempFolderPath); +} // namespace zen diff --git a/src/zenutil/include/zenutil/jupiter/jupiterclient.h b/src/zenutil/include/zenutil/jupiter/jupiterclient.h index defe50edc..8a51bd60a 100644 --- a/src/zenutil/include/zenutil/jupiter/jupiterclient.h +++ b/src/zenutil/include/zenutil/jupiter/jupiterclient.h @@ -44,12 +44,11 @@ public: HttpClient& Client() { return m_HttpClient; } private: - LoggerRef m_Log; - const std::string m_DefaultDdcNamespace; - const std::string m_DefaultBlobStoreNamespace; - const std::string m_ComputeCluster; - std::function<HttpClientAccessToken()> m_TokenProvider; - HttpClient m_HttpClient; + LoggerRef m_Log; + const std::string m_DefaultDdcNamespace; + const std::string m_DefaultBlobStoreNamespace; + const std::string m_ComputeCluster; + HttpClient m_HttpClient; friend class JupiterSession; }; diff --git a/src/zenutil/include/zenutil/jupiter/jupitersession.h b/src/zenutil/include/zenutil/jupiter/jupitersession.h index 6a80332f4..b79790f25 100644 --- a/src/zenutil/include/zenutil/jupiter/jupitersession.h +++ b/src/zenutil/include/zenutil/jupiter/jupitersession.h @@ -65,7 +65,7 @@ struct FinalizeBuildPartResult : JupiterResult class JupiterSession { public: - JupiterSession(LoggerRef InLog, HttpClient& InHttpClient); + JupiterSession(LoggerRef InLog, HttpClient& InHttpClient, bool AllowRedirect); ~JupiterSession(); JupiterResult Authenticate(); @@ -102,33 +102,52 @@ public: std::vector<IoHash> Filter(std::string_view Namespace, std::string_view BucketId, const std::vector<IoHash>& ChunkHashes); - JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); - JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); - PutBuildPartResult PutBuildPart(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - std::string_view PartName, - const IoBuffer& Payload); - JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); - JupiterResult PutBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - ZenContentType ContentType, - const CompositeBuffer& Payload); - JupiterResult GetBuildBlob(std::string_view Namespace, - std::string_view BucketId, - const Oid& BuildId, - const Oid& PartId, - const IoHash& Hash, - std::filesystem::path TempFolderPath); + JupiterResult ListBuildNamespaces(); + JupiterResult ListBuildBuckets(std::string_view Namespace); + JupiterResult ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload); + JupiterResult PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload); + JupiterResult GetBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + JupiterResult FinalizeBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId); + PutBuildPartResult PutBuildPart(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& PartId, + std::string_view PartName, + const IoBuffer& Payload); + JupiterResult GetBuildPart(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult PutBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + const CompositeBuffer& Payload); + JupiterResult GetBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + std::filesystem::path TempFolderPath, + uint64_t Offset = 0, + uint64_t Size = (uint64_t)-1); + + JupiterResult PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::vector<std::function<JupiterResult(bool& OutIsComplete)>>& OutWorkItems); + JupiterResult GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, + std::function<void()>&& OnComplete, + std::vector<std::function<JupiterResult()>>& OutWorkItems); JupiterResult PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload); FinalizeBuildPartResult FinalizeBuildPart(std::string_view Namespace, @@ -136,7 +155,14 @@ public: const Oid& BuildId, const Oid& PartId, const IoHash& RawHash); - JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId); + JupiterResult FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, uint64_t MaxBlockCount); + JupiterResult GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload); + + JupiterResult PutBuildPartStats(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& BuildPartId, + IoBuffer Payload); private: inline LoggerRef Log() { return m_Log; } @@ -147,6 +173,7 @@ private: LoggerRef m_Log; HttpClient& m_HttpClient; + const bool m_AllowRedirect = false; }; } // namespace zen diff --git a/src/zenutil/include/zenutil/logging.h b/src/zenutil/include/zenutil/logging.h index ebf6372fc..d64eef207 100644 --- a/src/zenutil/include/zenutil/logging.h +++ b/src/zenutil/include/zenutil/logging.h @@ -32,6 +32,7 @@ struct LoggingOptions bool IsDebug = false; bool IsVerbose = false; bool IsTest = false; + bool AllowAsync = true; bool NoConsoleOutput = false; std::filesystem::path AbsLogFile; // Absolute path to main log file std::string LogId; diff --git a/src/zenutil/include/zenutil/logging/fullformatter.h b/src/zenutil/include/zenutil/logging/fullformatter.h index 07ad408fa..0326870e5 100644 --- a/src/zenutil/include/zenutil/logging/fullformatter.h +++ b/src/zenutil/include/zenutil/logging/fullformatter.h @@ -45,6 +45,8 @@ public: std::chrono::seconds TimestampSeconds; + std::chrono::milliseconds millis; + if (m_UseFullDate) { TimestampSeconds = std::chrono::duration_cast<std::chrono::seconds>(msg.time.time_since_epoch()); @@ -69,6 +71,8 @@ public: spdlog::details::fmt_helper::pad2(m_CachedLocalTm.tm_sec, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = spdlog::details::fmt_helper::time_fraction<std::chrono::milliseconds>(msg.time); } else { @@ -97,6 +101,8 @@ public: spdlog::details::fmt_helper::pad2(LogSecs, m_CachedDatetime); m_CachedDatetime.push_back('.'); } + + millis = std::chrono::duration_cast<std::chrono::milliseconds>(ElapsedTime - TimestampSeconds); } { @@ -104,7 +110,6 @@ public: OutBuffer.append(m_CachedDatetime.begin(), m_CachedDatetime.end()); } - auto millis = spdlog::details::fmt_helper::time_fraction<std::chrono::milliseconds>(msg.time); spdlog::details::fmt_helper::pad3(static_cast<uint32_t>(millis.count()), OutBuffer); OutBuffer.push_back(']'); OutBuffer.push_back(' '); diff --git a/src/zenutil/include/zenutil/logging/rotatingfilesink.h b/src/zenutil/include/zenutil/logging/rotatingfilesink.h index 758722156..cd28bdcb2 100644 --- a/src/zenutil/include/zenutil/logging/rotatingfilesink.h +++ b/src/zenutil/include/zenutil/logging/rotatingfilesink.h @@ -27,7 +27,6 @@ public: { ZEN_MEMSCOPE(ELLMTag::Logging); - ZEN_MEMSCOPE(ELLMTag::Logging); std::error_code Ec; if (RotateOnOpen) { diff --git a/src/zenutil/include/zenutil/parallelwork.h b/src/zenutil/include/zenutil/parallelwork.h new file mode 100644 index 000000000..639c6968c --- /dev/null +++ b/src/zenutil/include/zenutil/parallelwork.h @@ -0,0 +1,77 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#pragma once + +#include <zencore/scopeguard.h> +#include <zencore/thread.h> +#include <zencore/workthreadpool.h> + +#include <atomic> + +namespace zen { + +class ParallelWork +{ +public: + ParallelWork(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag); + + ~ParallelWork(); + + typedef std::function<void(std::atomic<bool>& AbortFlag)> WorkCallback; + typedef std::function<void(std::exception_ptr Ex, std::atomic<bool>& AbortFlag)> ExceptionCallback; + typedef std::function<void(bool IsAborted, bool IsPaused, std::ptrdiff_t PendingWork)> UpdateCallback; + + void ScheduleWork(WorkerThreadPool& WorkerPool, WorkCallback&& Work, ExceptionCallback&& OnError = {}) + { + m_PendingWork.AddCount(1); + try + { + WorkerPool.ScheduleWork([this, Work = std::move(Work), OnError = OnError ? std::move(OnError) : DefaultErrorFunction()] { + auto _ = MakeGuard([this]() { m_PendingWork.CountDown(); }); + try + { + while (m_PauseFlag && !m_AbortFlag) + { + Sleep(2000); + } + Work(m_AbortFlag); + } + catch (...) + { + OnError(std::current_exception(), m_AbortFlag); + } + }); + } + catch (const std::exception&) + { + m_PendingWork.CountDown(); + throw; + } + } + + void Abort() { m_AbortFlag = true; } + + bool IsAborted() const { return m_AbortFlag.load(); } + + void Wait(int32_t UpdateIntervalMS, UpdateCallback&& UpdateCallback); + + void Wait(); + + Latch& PendingWork() { return m_PendingWork; } + +private: + ExceptionCallback DefaultErrorFunction(); + void RethrowErrors(); + + std::atomic<bool>& m_AbortFlag; + std::atomic<bool>& m_PauseFlag; + bool m_DispatchComplete = false; + Latch m_PendingWork; + + RwLock m_ErrorLock; + std::vector<std::exception_ptr> m_Errors; +}; + +void parallellwork_forcelink(); + +} // namespace zen diff --git a/src/zenutil/include/zenutil/workerpools.h b/src/zenutil/include/zenutil/workerpools.h index 9683ad720..df2033bca 100644 --- a/src/zenutil/include/zenutil/workerpools.h +++ b/src/zenutil/include/zenutil/workerpools.h @@ -21,6 +21,9 @@ WorkerThreadPool& GetMediumWorkerPool(EWorkloadType WorkloadType); // Worker pool with std::thread::hardware_concurrency() / 8 worker threads, but at least one thread WorkerThreadPool& GetSmallWorkerPool(EWorkloadType WorkloadType); +// Worker pool with minimum number of worker threads, but at least one thread +WorkerThreadPool& GetTinyWorkerPool(EWorkloadType WorkloadType); + // Special worker pool that does not use worker thread but issues all scheduled work on the calling thread // This is useful for debugging when multiple async thread can make stepping in debugger complicated WorkerThreadPool& GetSyncWorkerPool(); diff --git a/src/zenutil/jupiter/jupiterbuildstorage.cpp b/src/zenutil/jupiter/jupiterbuildstorage.cpp new file mode 100644 index 000000000..9974725ff --- /dev/null +++ b/src/zenutil/jupiter/jupiterbuildstorage.cpp @@ -0,0 +1,479 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/jupiter/jupiterbuildstorage.h> + +#include <zencore/compactbinarybuilder.h> +#include <zencore/fmtutils.h> +#include <zencore/scopeguard.h> +#include <zencore/timer.h> +#include <zencore/trace.h> +#include <zenutil/jupiter/jupitersession.h> + +ZEN_THIRD_PARTY_INCLUDES_START +#include <tsl/robin_map.h> +ZEN_THIRD_PARTY_INCLUDES_END + +namespace zen { + +using namespace std::literals; + +class JupiterBuildStorage : public BuildStorage +{ +public: + JupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + bool AllowRedirect, + const std::filesystem::path& TempFolderPath) + : m_Session(InLog, InHttpClient, AllowRedirect) + , m_Stats(Stats) + , m_Namespace(Namespace) + , m_Bucket(Bucket) + , m_TempFolderPath(TempFolderPath) + { + } + virtual ~JupiterBuildStorage() {} + + virtual CbObject ListNamespaces(bool bRecursive) override + { + ZEN_TRACE_CPU("Jupiter::ListNamespaces"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult ListResult = m_Session.ListBuildNamespaces(); + AddStatistic(ListResult); + if (!ListResult.Success) + { + throw std::runtime_error(fmt::format("Failed listing namespaces: {} ({})", ListResult.Reason, ListResult.ErrorCode)); + } + CbObject NamespaceResponse = PayloadToCbObject("Failed listing namespaces"sv, ListResult.Response); + + CbObjectWriter Response; + Response.BeginArray("results"sv); + for (CbFieldView NamespaceField : NamespaceResponse["namespaces"]) + { + std::string_view Namespace = NamespaceField.AsString(); + if (!Namespace.empty()) + { + Response.BeginObject(); + Response.AddString("name", Namespace); + + if (bRecursive) + { + JupiterResult BucketsResult = m_Session.ListBuildBuckets(Namespace); + AddStatistic(BucketsResult); + if (!BucketsResult.Success) + { + throw std::runtime_error( + fmt::format("Failed listing namespaces: {} ({})", BucketsResult.Reason, BucketsResult.ErrorCode)); + } + CbObject BucketResponse = PayloadToCbObject("Failed listing namespaces"sv, BucketsResult.Response); + + Response.BeginArray("items"); + for (CbFieldView BucketField : BucketResponse["buckets"]) + { + std::string_view Bucket = BucketField.AsString(); + if (!Bucket.empty()) + { + Response.AddString(Bucket); + } + } + Response.EndArray(); + } + + Response.EndObject(); + } + } + Response.EndArray(); + + return Response.Save(); + } + + virtual CbObject ListBuilds(CbObject Query) override + { + ZEN_TRACE_CPU("Jupiter::ListBuilds"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = Query.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult ListResult = m_Session.ListBuilds(m_Namespace, m_Bucket, Payload); + AddStatistic(ListResult); + if (!ListResult.Success) + { + throw std::runtime_error(fmt::format("Failed listing builds: {} ({})", ListResult.Reason, ListResult.ErrorCode)); + } + return PayloadToCbObject("Failed listing builds"sv, ListResult.Response); + } + + virtual CbObject PutBuild(const Oid& BuildId, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutResult = m_Session.PutBuild(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(PutResult); + if (!PutResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build: {} ({})", PutResult.Reason, PutResult.ErrorCode)); + } + return PayloadToCbObject(fmt::format("Failed creating build: {}", BuildId), PutResult.Response); + } + + virtual CbObject GetBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("Jupiter::GetBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildResult = m_Session.GetBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(GetBuildResult); + if (!GetBuildResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build: {} ({})", GetBuildResult.Reason, GetBuildResult.ErrorCode)); + } + return PayloadToCbObject(fmt::format("Failed fetching build {}:", BuildId), GetBuildResult.Response); + } + + virtual void FinalizeBuild(const Oid& BuildId) override + { + ZEN_TRACE_CPU("Jupiter::FinalizeBuild"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FinalizeBuildResult = m_Session.FinalizeBuild(m_Namespace, m_Bucket, BuildId); + AddStatistic(FinalizeBuildResult); + if (!FinalizeBuildResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizeBuildResult.Reason, FinalizeBuildResult.ErrorCode)); + } + } + + virtual std::pair<IoHash, std::vector<IoHash>> PutBuildPart(const Oid& BuildId, + const Oid& BuildPartId, + std::string_view PartName, + const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + PutBuildPartResult PutPartResult = m_Session.PutBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartName, Payload); + AddStatistic(PutPartResult); + if (!PutPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed creating build part: {} ({})", PutPartResult.Reason, PutPartResult.ErrorCode)); + } + return std::make_pair(PutPartResult.RawHash, std::move(PutPartResult.Needs)); + } + + virtual CbObject GetBuildPart(const Oid& BuildId, const Oid& BuildPartId) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult GetBuildPartResult = m_Session.GetBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId); + AddStatistic(GetBuildPartResult); + if (!GetBuildPartResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching build part {}: {} ({})", + BuildPartId, + GetBuildPartResult.Reason, + GetBuildPartResult.ErrorCode)); + } + return PayloadToCbObject(fmt::format("Failed fetching build part {}:", BuildPartId), GetBuildPartResult.Response); + } + + virtual std::vector<IoHash> FinalizeBuildPart(const Oid& BuildId, const Oid& BuildPartId, const IoHash& PartHash) override + { + ZEN_TRACE_CPU("Jupiter::FinalizeBuildPart"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + FinalizeBuildPartResult FinalizePartResult = m_Session.FinalizeBuildPart(m_Namespace, m_Bucket, BuildId, BuildPartId, PartHash); + AddStatistic(FinalizePartResult); + if (!FinalizePartResult.Success) + { + throw std::runtime_error( + fmt::format("Failed finalizing build part: {} ({})", FinalizePartResult.Reason, FinalizePartResult.ErrorCode)); + } + return std::move(FinalizePartResult.Needs); + } + + virtual void PutBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + const CompositeBuffer& Payload) override + { + ZEN_TRACE_CPU("Jupiter::PutBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PutBlobResult = m_Session.PutBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, ContentType, Payload); + AddStatistic(PutBlobResult); + if (!PutBlobResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PutBlobResult.Reason, PutBlobResult.ErrorCode)); + } + } + + virtual std::vector<std::function<void()>> PutLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::function<void(uint64_t, bool)>&& OnSentBytes) override + { + ZEN_TRACE_CPU("Jupiter::PutLargeBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector<std::function<JupiterResult(bool&)>> WorkItems; + JupiterResult PutMultipartBlobResult = m_Session.PutMultipartBuildBlob(m_Namespace, + m_Bucket, + BuildId, + RawHash, + ContentType, + PayloadSize, + std::move(Transmitter), + WorkItems); + AddStatistic(PutMultipartBlobResult); + if (!PutMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed putting build part: {} ({})", PutMultipartBlobResult.Reason, PutMultipartBlobResult.ErrorCode)); + } + OnSentBytes(PutMultipartBlobResult.SentBytes, WorkItems.empty()); + + std::vector<std::function<void()>> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem), OnSentBytes]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + bool IsComplete = false; + JupiterResult PartResult = WorkItem(IsComplete); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed putting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + OnSentBytes(PartResult.SentBytes, IsComplete); + }); + } + return WorkList; + } + + virtual IoBuffer GetBuildBlob(const Oid& BuildId, const IoHash& RawHash, uint64_t RangeOffset, uint64_t RangeBytes) override + { + ZEN_TRACE_CPU("Jupiter::GetBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CreateDirectories(m_TempFolderPath); + JupiterResult GetBuildBlobResult = + m_Session.GetBuildBlob(m_Namespace, m_Bucket, BuildId, RawHash, m_TempFolderPath, RangeOffset, RangeBytes); + AddStatistic(GetBuildBlobResult); + if (!GetBuildBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching build blob {}: {} ({})", RawHash, GetBuildBlobResult.Reason, GetBuildBlobResult.ErrorCode)); + } + return std::move(GetBuildBlobResult.Response); + } + + virtual std::vector<std::function<void()>> GetLargeBuildBlob(const Oid& BuildId, + const IoHash& RawHash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, + std::function<void()>&& OnComplete) override + { + ZEN_TRACE_CPU("Jupiter::GetLargeBuildBlob"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + std::vector<std::function<JupiterResult()>> WorkItems; + JupiterResult GetMultipartBlobResult = m_Session.GetMultipartBuildBlob(m_Namespace, + m_Bucket, + BuildId, + RawHash, + ChunkSize, + std::move(OnReceive), + std::move(OnComplete), + WorkItems); + + AddStatistic(GetMultipartBlobResult); + if (!GetMultipartBlobResult.Success) + { + throw std::runtime_error( + fmt::format("Failed getting build part: {} ({})", GetMultipartBlobResult.Reason, GetMultipartBlobResult.ErrorCode)); + } + std::vector<std::function<void()>> WorkList; + for (auto& WorkItem : WorkItems) + { + WorkList.emplace_back([this, WorkItem = std::move(WorkItem)]() { + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult PartResult = WorkItem(); + AddStatistic(PartResult); + if (!PartResult.Success) + { + throw std::runtime_error(fmt::format("Failed getting build part: {} ({})", PartResult.Reason, PartResult.ErrorCode)); + } + }); + } + return WorkList; + } + + virtual bool PutBlockMetadata(const Oid& BuildId, const IoHash& BlockRawHash, const CbObject& MetaData) override + { + ZEN_TRACE_CPU("Jupiter::PutBlockMetadata"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + IoBuffer Payload = MetaData.GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutMetaResult = m_Session.PutBlockMetadata(m_Namespace, m_Bucket, BuildId, BlockRawHash, Payload); + AddStatistic(PutMetaResult); + if (!PutMetaResult.Success) + { + if (PutMetaResult.ErrorCode == int32_t(HttpResponseCode::NotFound)) + { + return false; + } + throw std::runtime_error( + fmt::format("Failed putting build block metadata: {} ({})", PutMetaResult.Reason, PutMetaResult.ErrorCode)); + } + return true; + } + + virtual CbObject FindBlocks(const Oid& BuildId, uint64_t MaxBlockCount) override + { + ZEN_TRACE_CPU("Jupiter::FindBlocks"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + JupiterResult FindResult = m_Session.FindBlocks(m_Namespace, m_Bucket, BuildId, MaxBlockCount); + AddStatistic(FindResult); + if (!FindResult.Success) + { + throw std::runtime_error(fmt::format("Failed fetching known blocks: {} ({})", FindResult.Reason, FindResult.ErrorCode)); + } + return PayloadToCbObject("Failed fetching known blocks"sv, FindResult.Response); + } + + virtual CbObject GetBlockMetadatas(const Oid& BuildId, std::span<const IoHash> BlockHashes) override + { + ZEN_TRACE_CPU("Jupiter::GetBlockMetadata"); + + Stopwatch ExecutionTimer; + auto _ = MakeGuard([&]() { m_Stats.TotalExecutionTimeUs += ExecutionTimer.GetElapsedTimeUs(); }); + CbObjectWriter Request; + + Request.BeginArray("blocks"sv); + for (const IoHash& BlockHash : BlockHashes) + { + Request.AddHash(BlockHash); + } + Request.EndArray(); + + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult GetBlockMetadataResult = m_Session.GetBlockMetadata(m_Namespace, m_Bucket, BuildId, Payload); + AddStatistic(GetBlockMetadataResult); + if (!GetBlockMetadataResult.Success) + { + throw std::runtime_error( + fmt::format("Failed fetching block metadatas: {} ({})", GetBlockMetadataResult.Reason, GetBlockMetadataResult.ErrorCode)); + } + return PayloadToCbObject("Failed fetching block metadatas", GetBlockMetadataResult.Response); + } + + virtual void PutBuildPartStats(const Oid& BuildId, + const Oid& BuildPartId, + const tsl::robin_map<std::string, double>& FloatStats) override + { + ZEN_UNUSED(BuildId, BuildPartId, FloatStats); + CbObjectWriter Request; + Request.BeginObject("floatStats"sv); + for (auto It : FloatStats) + { + Request.AddFloat(It.first, It.second); + } + Request.EndObject(); + IoBuffer Payload = Request.Save().GetBuffer().AsIoBuffer(); + Payload.SetContentType(ZenContentType::kCbObject); + JupiterResult PutBuildPartStatsResult = m_Session.PutBuildPartStats(m_Namespace, m_Bucket, BuildId, BuildPartId, Payload); + AddStatistic(PutBuildPartStatsResult); + if (!PutBuildPartStatsResult.Success) + { + throw std::runtime_error(fmt::format("Failed posting build part statistics: {} ({})", + PutBuildPartStatsResult.Reason, + PutBuildPartStatsResult.ErrorCode)); + } + } + +private: + static CbObject PayloadToCbObject(std::string_view Context, const IoBuffer& Payload) + { + if (Payload.GetContentType() == ZenContentType::kJSON) + { + std::string_view Json(reinterpret_cast<const char*>(Payload.GetData()), Payload.GetSize()); + return LoadCompactBinaryFromJson(Json).AsObject(); + } + else if (Payload.GetContentType() == ZenContentType::kCbObject) + { + return LoadCompactBinaryObject(Payload); + } + else if (Payload.GetContentType() == ZenContentType::kCompressedBinary) + { + IoHash RawHash; + uint64_t RawSize; + return LoadCompactBinaryObject(CompressedBuffer::FromCompressed(SharedBuffer(Payload), RawHash, RawSize)); + } + else + { + throw std::runtime_error( + fmt::format("{}: {} ({})", "Unsupported response format", Context, ToString(Payload.GetContentType()))); + } + } + + void AddStatistic(const JupiterResult& Result) + { + m_Stats.TotalBytesWritten += Result.SentBytes; + m_Stats.TotalBytesRead += Result.ReceivedBytes; + m_Stats.TotalRequestTimeUs += uint64_t(Result.ElapsedSeconds * 1000000.0); + m_Stats.TotalRequestCount++; + } + + JupiterSession m_Session; + Statistics& m_Stats; + const std::string m_Namespace; + const std::string m_Bucket; + const std::filesystem::path m_TempFolderPath; +}; + +std::unique_ptr<BuildStorage> +CreateJupiterBuildStorage(LoggerRef InLog, + HttpClient& InHttpClient, + BuildStorage::Statistics& Stats, + std::string_view Namespace, + std::string_view Bucket, + bool AllowRedirect, + const std::filesystem::path& TempFolderPath) +{ + ZEN_TRACE_CPU("CreateJupiterBuildStorage"); + + return std::make_unique<JupiterBuildStorage>(InLog, InHttpClient, Stats, Namespace, Bucket, AllowRedirect, TempFolderPath); +} + +} // namespace zen diff --git a/src/zenutil/jupiter/jupiterclient.cpp b/src/zenutil/jupiter/jupiterclient.cpp index 5e5da3750..dbac218a4 100644 --- a/src/zenutil/jupiter/jupiterclient.cpp +++ b/src/zenutil/jupiter/jupiterclient.cpp @@ -11,7 +11,6 @@ JupiterClient::JupiterClient(const JupiterClientOptions& Options, std::function< , m_DefaultDdcNamespace(Options.DdcNamespace) , m_DefaultBlobStoreNamespace(Options.BlobStoreNamespace) , m_ComputeCluster(Options.ComputeCluster) -, m_TokenProvider(std::move(TokenProvider)) , m_HttpClient(Options.ServiceUrl, HttpClientSettings{.ConnectTimeout = Options.ConnectTimeout, .Timeout = Options.Timeout, diff --git a/src/zenutil/jupiter/jupitersession.cpp b/src/zenutil/jupiter/jupitersession.cpp index f706a7efc..1fd59acdf 100644 --- a/src/zenutil/jupiter/jupitersession.cpp +++ b/src/zenutil/jupiter/jupitersession.cpp @@ -3,6 +3,9 @@ #include <zenutil/jupiter/jupitersession.h> #include <zencore/compactbinary.h> +#include <zencore/compactbinarybuilder.h> +#include <zencore/compositebuffer.h> +#include <zencore/compress.h> #include <zencore/fmtutils.h> #include <zencore/trace.h> @@ -46,7 +49,10 @@ namespace detail { } } // namespace detail -JupiterSession::JupiterSession(LoggerRef InLog, HttpClient& InHttpClient) : m_Log(InLog), m_HttpClient(InHttpClient) +JupiterSession::JupiterSession(LoggerRef InLog, HttpClient& InHttpClient, bool AllowRedirect) +: m_Log(InLog) +, m_HttpClient(InHttpClient) +, m_AllowRedirect(AllowRedirect) { } @@ -355,6 +361,32 @@ JupiterSession::CacheTypeExists(std::string_view Namespace, std::string_view Typ } JupiterResult +JupiterSession::ListBuildNamespaces() +{ + HttpClient::Response Response = m_HttpClient.Get(fmt::format("/api/v2/builds"), {HttpClient::Accept(ZenContentType::kJSON)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuildNamespaces"sv); +} + +JupiterResult +JupiterSession::ListBuildBuckets(std::string_view Namespace) +{ + HttpClient::Response Response = + m_HttpClient.Get(fmt::format("/api/v2/builds/{}", Namespace), {HttpClient::Accept(ZenContentType::kJSON)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuildBuckets"sv); +} + +JupiterResult +JupiterSession::ListBuilds(std::string_view Namespace, std::string_view BucketId, const IoBuffer& Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + std::string OptionalBucketPath = BucketId.empty() ? "" : fmt::format("/{}", BucketId); + HttpClient::Response Response = m_HttpClient.Post(fmt::format("/api/v2/builds/{}{}/search", Namespace, OptionalBucketPath), + Payload, + {HttpClient::Accept(ZenContentType::kCbObject)}); + return detail::ConvertResponse(Response, "JupiterSession::ListBuilds"sv); +} + +JupiterResult JupiterSession::PutBuild(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); @@ -424,29 +456,330 @@ JupiterResult JupiterSession::PutBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, ZenContentType ContentType, const CompositeBuffer& Payload) { - HttpClient::Response Response = m_HttpClient.Upload( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload, - ContentType); + HttpClient::Response Response = + m_HttpClient.Upload(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload, + ContentType); return detail::ConvertResponse(Response, "JupiterSession::PutBuildBlob"sv); } JupiterResult +JupiterSession::PutMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + ZenContentType ContentType, + uint64_t PayloadSize, + std::function<IoBuffer(uint64_t Offset, uint64_t Size)>&& Transmitter, + std::vector<std::function<JupiterResult(bool& OutIsComplete)>>& OutWorkItems) +{ + struct MultipartUploadResponse + { + struct Part + { + uint64_t FirstByte; + uint64_t LastByte; + std::string PartId; + std::string QueryString; + }; + + std::string UploadId; + std::string BlobName; + std::vector<Part> Parts; + + static MultipartUploadResponse Parse(CbObject& Payload) + { + MultipartUploadResponse Result; + Result.UploadId = Payload["uploadId"sv].AsString(); + Result.BlobName = Payload["blobName"sv].AsString(); + CbArrayView PartsArray = Payload["parts"sv].AsArrayView(); + Result.Parts.reserve(PartsArray.Num()); + for (CbFieldView PartView : PartsArray) + { + CbObjectView PartObject = PartView.AsObjectView(); + Result.Parts.emplace_back(Part{ + .FirstByte = PartObject["firstByte"sv].AsUInt64(), + .LastByte = PartObject["lastByte"sv].AsUInt64(), + .PartId = std::string(PartObject["partId"sv].AsString()), + .QueryString = std::string(PartObject["queryString"sv].AsString()), + }); + } + return Result; + } + }; + + CbObjectWriter StartMultipartPayloadWriter; + StartMultipartPayloadWriter.AddInteger("blobLength"sv, PayloadSize); + CbObject StartMultipartPayload = StartMultipartPayloadWriter.Save(); + + std::string StartMultipartResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/startMultipartUpload", Namespace, BucketId, BuildId, Hash.ToHexString()); + // ZEN_INFO("POST: {}", StartMultipartResponseRequestString); + HttpClient::Response StartMultipartResponse = + m_HttpClient.Post(StartMultipartResponseRequestString, StartMultipartPayload, HttpClient::Accept(ZenContentType::kCbObject)); + if (!StartMultipartResponse.IsSuccess()) + { + ZEN_WARN("{}", StartMultipartResponse.ErrorMessage("startMultipartUpload: ")); + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + CbObject ResponseObject = LoadCompactBinaryObject(StartMultipartResponse.ResponsePayload); + + struct WorkloadData + { + MultipartUploadResponse PartDescription; + std::function<IoBuffer(uint64_t Offset, uint64_t Size)> Transmitter; + std::atomic<size_t> PartsLeft; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + + Workload->PartDescription = MultipartUploadResponse::Parse(ResponseObject); + Workload->Transmitter = std::move(Transmitter); + Workload->PartsLeft = Workload->PartDescription.Parts.size(); + + for (size_t PartIndex = 0; PartIndex < Workload->PartDescription.Parts.size(); PartIndex++) + { + OutWorkItems.emplace_back([this, Namespace, BucketId, BuildId, Hash, ContentType, Workload, PartIndex]( + bool& OutIsComplete) -> JupiterResult { + const MultipartUploadResponse::Part& Part = Workload->PartDescription.Parts[PartIndex]; + IoBuffer PartPayload = Workload->Transmitter(Part.FirstByte, Part.LastByte - Part.FirstByte); + std::string MultipartUploadResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}&supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + Part.QueryString, + m_AllowRedirect ? "true"sv : "false"sv); + // ZEN_INFO("PUT: {}", MultipartUploadResponseRequestString); + HttpClient::Response MultipartUploadResponse = m_HttpClient.Put(MultipartUploadResponseRequestString, PartPayload); + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(MultipartUploadResponseRequestString)); + } + OutIsComplete = Workload->PartsLeft.fetch_sub(1) == 1; + if (OutIsComplete) + { + int64_t TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + int64_t TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + double TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + HttpClient::Response MultipartEndResponse = MultipartUploadResponse; + while (MultipartEndResponse.IsSuccess()) + { + CbObjectWriter CompletePayloadWriter; + CompletePayloadWriter.AddString("blobName"sv, Workload->PartDescription.BlobName); + CompletePayloadWriter.AddString("uploadId"sv, Workload->PartDescription.UploadId); + CompletePayloadWriter.AddBool("isCompressed"sv, ContentType == ZenContentType::kCompressedBinary); + CompletePayloadWriter.BeginArray("partIds"sv); + std::unordered_map<std::string, size_t> PartNameToIndex; + for (size_t UploadPartIndex = 0; UploadPartIndex < Workload->PartDescription.Parts.size(); UploadPartIndex++) + { + const MultipartUploadResponse::Part& PartDescription = Workload->PartDescription.Parts[UploadPartIndex]; + PartNameToIndex.insert({PartDescription.PartId, UploadPartIndex}); + CompletePayloadWriter.AddString(PartDescription.PartId); + } + CompletePayloadWriter.EndArray(); // "partIds" + CbObject CompletePayload = CompletePayloadWriter.Save(); + + std::string MultipartEndResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/completeMultipart", Namespace, BucketId, BuildId, Hash.ToHexString()); + + MultipartEndResponse = m_HttpClient.Post(MultipartEndResponseRequestString, + CompletePayload, + HttpClient::Accept(ZenContentType::kCbObject)); + TotalUploadedBytes += MultipartEndResponse.UploadedBytes; + TotalDownloadedBytes += MultipartEndResponse.DownloadedBytes; + TotalElapsedSeconds += MultipartEndResponse.ElapsedSeconds; + if (MultipartEndResponse.IsSuccess()) + { + CbObject ResponseObject = MultipartEndResponse.AsObject(); + CbArrayView MissingPartsArrayView = ResponseObject["missingParts"sv].AsArrayView(); + if (MissingPartsArrayView.Num() == 0) + { + break; + } + else + { + for (CbFieldView PartIdView : MissingPartsArrayView) + { + std::string RetryPartId(PartIdView.AsString()); + size_t RetryPartIndex = PartNameToIndex.at(RetryPartId); + const MultipartUploadResponse::Part& RetryPart = Workload->PartDescription.Parts[RetryPartIndex]; + IoBuffer RetryPartPayload = + Workload->Transmitter(RetryPart.FirstByte, RetryPart.LastByte - RetryPart.FirstByte - 1); + std::string RetryMultipartUploadResponseRequestString = + fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}/uploadMultipart{}&supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + RetryPart.QueryString, + m_AllowRedirect ? "true"sv : "false"sv); + + MultipartUploadResponse = m_HttpClient.Put(RetryMultipartUploadResponseRequestString, RetryPartPayload); + TotalUploadedBytes = MultipartUploadResponse.UploadedBytes; + TotalDownloadedBytes = MultipartUploadResponse.DownloadedBytes; + TotalElapsedSeconds = MultipartUploadResponse.ElapsedSeconds; + if (!MultipartUploadResponse.IsSuccess()) + { + ZEN_WARN("{}", MultipartUploadResponse.ErrorMessage(RetryMultipartUploadResponseRequestString)); + MultipartEndResponse = MultipartUploadResponse; + } + } + } + } + else + { + ZEN_WARN("{}", MultipartEndResponse.ErrorMessage(MultipartEndResponseRequestString)); + } + } + MultipartEndResponse.UploadedBytes = TotalUploadedBytes; + MultipartEndResponse.DownloadedBytes = TotalDownloadedBytes; + MultipartEndResponse.ElapsedSeconds = TotalElapsedSeconds; + return detail::ConvertResponse(MultipartEndResponse, "JupiterSession::PutMultipartBuildBlob"sv); + } + return detail::ConvertResponse(MultipartUploadResponse, "JupiterSession::PutMultipartBuildBlob"sv); + }); + } + return detail::ConvertResponse(StartMultipartResponse, "JupiterSession::PutMultipartBuildBlob"sv); +} + +JupiterResult +JupiterSession::GetMultipartBuildBlob(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const IoHash& Hash, + uint64_t ChunkSize, + std::function<void(uint64_t Offset, const IoBuffer& Chunk)>&& OnReceive, + std::function<void()>&& OnComplete, + std::vector<std::function<JupiterResult()>>& OutWorkItems) +{ + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + m_AllowRedirect ? "true"sv : "false"sv); + HttpClient::Response Response = + m_HttpClient.Get(RequestUrl, HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", 0, ChunkSize - 1)}})); + if (Response.IsSuccess()) + { + if (std::string_view ContentRange = Response.Header.Entries["Content-Range"]; !ContentRange.empty()) + { + if (std::string_view::size_type SizeDelimiterPos = ContentRange.find('/'); SizeDelimiterPos != std::string_view::npos) + { + if (std::optional<uint64_t> TotalSizeMaybe = ParseInt<uint64_t>(ContentRange.substr(SizeDelimiterPos + 1)); + TotalSizeMaybe.has_value()) + { + uint64_t TotalSize = TotalSizeMaybe.value(); + uint64_t PayloadSize = Response.ResponsePayload.GetSize(); + + OnReceive(0, Response.ResponsePayload); + + if (TotalSize > PayloadSize) + { + struct WorkloadData + { + std::function<void(uint64_t Offset, const IoBuffer& Chunk)> OnReceive; + std::function<void()> OnComplete; + std::atomic<uint64_t> BytesRemaining; + }; + + std::shared_ptr<WorkloadData> Workload(std::make_shared<WorkloadData>()); + Workload->OnReceive = std::move(OnReceive); + Workload->OnComplete = std::move(OnComplete); + Workload->BytesRemaining = TotalSize - PayloadSize; + + uint64_t Offset = PayloadSize; + while (Offset < TotalSize) + { + uint64_t PartSize = Min(ChunkSize, TotalSize - Offset); + OutWorkItems.emplace_back([this, + Namespace = std::string(Namespace), + BucketId = std::string(BucketId), + BuildId = Oid(BuildId), + Hash = IoHash(Hash), + TotalSize, + Workload, + Offset, + PartSize]() -> JupiterResult { + std::string RequestUrl = fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + m_AllowRedirect ? "true"sv : "false"sv); + HttpClient::Response Response = m_HttpClient.Get( + RequestUrl, + HttpClient::KeyValueMap({{"Range", fmt::format("bytes={}-{}", Offset, Offset + PartSize - 1)}})); + if (Response.IsSuccess()) + { + Workload->OnReceive(Offset, Response.ResponsePayload); + uint64_t ByteRemaning = Workload->BytesRemaining.fetch_sub(Response.ResponsePayload.GetSize()); + if (ByteRemaning == Response.ResponsePayload.GetSize()) + { + Workload->OnComplete(); + } + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + }); + Offset += PartSize; + } + } + else + { + OnComplete(); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); + } + } + } + OnReceive(0, Response.ResponsePayload); + OnComplete(); + } + return detail::ConvertResponse(Response, "JupiterSession::GetMultipartBuildBlob"sv); +} + +JupiterResult JupiterSession::GetBuildBlob(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, - std::filesystem::path TempFolderPath) + std::filesystem::path TempFolderPath, + uint64_t Offset, + uint64_t Size) { - HttpClient::Response Response = m_HttpClient.Download( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blobs/{}", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - TempFolderPath); + HttpClient::KeyValueMap Headers; + if (Offset != 0 || Size != (uint64_t)-1) + { + Headers.Entries.insert({"Range", fmt::format("bytes={}-{}", Offset, Offset + Size - 1)}); + } + HttpClient::Response Response = m_HttpClient.Download(fmt::format("/api/v2/builds/{}/{}/{}/blobs/{}?supportsRedirect={}", + Namespace, + BucketId, + BuildId, + Hash.ToHexString(), + m_AllowRedirect ? "true"sv : "false"sv), + TempFolderPath, + Headers); + if (Response.IsSuccess()) + { + // If we get a redirect to S3 or a non-Jupiter endpoint the content type will not be correct, validate it and set it + if (m_AllowRedirect && (Response.ResponsePayload.GetContentType() == HttpContentType::kBinary)) + { + IoHash ValidateRawHash; + uint64_t ValidateRawSize = 0; + ZEN_ASSERT_SLOW(CompressedBuffer::ValidateCompressedHeader(Response.ResponsePayload, ValidateRawHash, ValidateRawSize)); + ZEN_ASSERT_SLOW(ValidateRawHash == Hash); + ZEN_ASSERT_SLOW(ValidateRawSize > 0); + ZEN_UNUSED(ValidateRawHash, ValidateRawSize); + Response.ResponsePayload.SetContentType(ZenContentType::kCompressedBinary); + } + } return detail::ConvertResponse(Response, "JupiterSession::GetBuildBlob"sv); } @@ -454,14 +787,13 @@ JupiterResult JupiterSession::PutBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, - const Oid& PartId, const IoHash& Hash, const IoBuffer& Payload) { ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); - HttpClient::Response Response = m_HttpClient.Put( - fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, PartId, Hash.ToHexString()), - Payload); + HttpClient::Response Response = + m_HttpClient.Put(fmt::format("/api/v2/builds/{}/{}/{}/blocks/{}/metadata", Namespace, BucketId, BuildId, Hash.ToHexString()), + Payload); return detail::ConvertResponse(Response, "JupiterSession::PutBlockMetadata"sv); } @@ -494,12 +826,39 @@ JupiterSession::FinalizeBuildPart(std::string_view Namespace, } JupiterResult -JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, const Oid& PartId) +JupiterSession::FindBlocks(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, uint64_t MaxBlockCount) { + const std::string Parameters = MaxBlockCount == (uint64_t)-1 ? "" : fmt::format("?count={}", MaxBlockCount); HttpClient::Response Response = - m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/blocks/listBlocks", Namespace, BucketId, BuildId, PartId), + m_HttpClient.Get(fmt::format("/api/v2/builds/{}/{}/{}/blocks/listBlocks{}", Namespace, BucketId, BuildId, Parameters), HttpClient::Accept(ZenContentType::kCbObject)); return detail::ConvertResponse(Response, "JupiterSession::FindBlocks"sv); } +JupiterResult +JupiterSession::GetBlockMetadata(std::string_view Namespace, std::string_view BucketId, const Oid& BuildId, IoBuffer Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = + m_HttpClient.Post(fmt::format("/api/v2/builds/{}/{}/{}/blocks/getBlockMetadata", Namespace, BucketId, BuildId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + return detail::ConvertResponse(Response, "JupiterSession::GetBlockMetadata"sv); +} + +JupiterResult +JupiterSession::PutBuildPartStats(std::string_view Namespace, + std::string_view BucketId, + const Oid& BuildId, + const Oid& BuildPartId, + IoBuffer Payload) +{ + ZEN_ASSERT(Payload.GetContentType() == ZenContentType::kCbObject); + HttpClient::Response Response = + m_HttpClient.Put(fmt::format("/api/v2/builds/{}/{}/{}/parts/{}/stats", Namespace, BucketId, BuildId, BuildPartId), + Payload, + HttpClient::Accept(ZenContentType::kCbObject)); + return detail::ConvertResponse(Response, "JupiterSession::PutBuildPartStats"sv); +} + } // namespace zen diff --git a/src/zenutil/logging.cpp b/src/zenutil/logging.cpp index 6314c407f..cb0fd6679 100644 --- a/src/zenutil/logging.cpp +++ b/src/zenutil/logging.cpp @@ -10,11 +10,13 @@ ZEN_THIRD_PARTY_INCLUDES_START #include <spdlog/spdlog.h> ZEN_THIRD_PARTY_INCLUDES_END +#include <zencore/callstack.h> #include <zencore/compactbinary.h> #include <zencore/filesystem.h> #include <zencore/logging.h> #include <zencore/memory/llm.h> #include <zencore/string.h> +#include <zencore/timer.h> #include <zenutil/logging/fullformatter.h> #include <zenutil/logging/jsonformatter.h> #include <zenutil/logging/rotatingfilesink.h> @@ -48,7 +50,7 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) zen::logging::InitializeLogging(); zen::logging::EnableVTMode(); - bool IsAsync = true; + bool IsAsync = LogOptions.AllowAsync; if (LogOptions.IsDebug) { @@ -97,7 +99,13 @@ BeginInitializeLogging(const LoggingOptions& LogOptions) } } - std::set_terminate([]() { ZEN_CRITICAL("Program exited abnormally via std::terminate()"); }); + std::set_terminate([]() { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_CRITICAL("Program exited abnormally via std::terminate()\n{}", CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + }); // Default @@ -174,7 +182,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) LogLevel = logging::level::Debug; } - if (LogOptions.IsTest) + if (LogOptions.IsTest || LogOptions.IsVerbose) { LogLevel = logging::level::Trace; } @@ -184,21 +192,25 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) logging::RefreshLogLevels(LogLevel); spdlog::flush_on(spdlog::level::err); spdlog::flush_every(std::chrono::seconds{2}); - spdlog::set_formatter( - std::make_unique<logging::full_formatter>(LogOptions.LogId, std::chrono::system_clock::now())); // default to duration prefix + spdlog::set_formatter(std::make_unique<logging::full_formatter>( + LogOptions.LogId, + std::chrono::system_clock::now() - std::chrono::milliseconds(GetTimeSinceProcessStart()))); // default to duration prefix - if (LogOptions.AbsLogFile.extension() == ".json") + if (g_FileSink) { - g_FileSink->set_formatter(std::make_unique<logging::json_formatter>(LogOptions.LogId)); - } - else - { - g_FileSink->set_formatter(std::make_unique<logging::full_formatter>(LogOptions.LogId)); // this will have a date prefix - } + if (LogOptions.AbsLogFile.extension() == ".json") + { + g_FileSink->set_formatter(std::make_unique<logging::json_formatter>(LogOptions.LogId)); + } + else + { + g_FileSink->set_formatter(std::make_unique<logging::full_formatter>(LogOptions.LogId)); // this will have a date prefix + } - const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); + const std::string StartLogTime = zen::DateTime::Now().ToIso8601(); - spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + spdlog::apply_all([&](auto Logger) { Logger->info("log starting at {}", StartLogTime); }); + } g_IsLoggingInitialized = true; } @@ -206,7 +218,7 @@ FinishInitializeLogging(const LoggingOptions& LogOptions) void ShutdownLogging() { - if (g_IsLoggingInitialized) + if (g_IsLoggingInitialized && g_FileSink) { auto DefaultLogger = zen::logging::Default(); ZEN_LOG_INFO(DefaultLogger, "log ending at {}", zen::DateTime::Now().ToIso8601()); diff --git a/src/zenutil/parallelwork.cpp b/src/zenutil/parallelwork.cpp new file mode 100644 index 000000000..aa806438b --- /dev/null +++ b/src/zenutil/parallelwork.cpp @@ -0,0 +1,225 @@ +// Copyright Epic Games, Inc. All Rights Reserved. + +#include <zenutil/parallelwork.h> + +#include <zencore/callstack.h> +#include <zencore/except.h> +#include <zencore/fmtutils.h> +#include <zencore/logging.h> + +#include <typeinfo> + +#if ZEN_WITH_TESTS +# include <zencore/testing.h> +#endif // ZEN_WITH_TESTS + +namespace zen { + +ParallelWork::ParallelWork(std::atomic<bool>& AbortFlag, std::atomic<bool>& PauseFlag) +: m_AbortFlag(AbortFlag) +, m_PauseFlag(PauseFlag) +, m_PendingWork(1) +{ +} + +ParallelWork::~ParallelWork() +{ + try + { + if (!m_DispatchComplete) + { + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + ZEN_WARN( + "ParallelWork disposed without explicit wait for completion, likely caused by an exception, waiting for dispatched threads " + "to complete"); + m_PendingWork.CountDown(); + } + m_PendingWork.Wait(); + ptrdiff_t RemainingWork = m_PendingWork.Remaining(); + if (RemainingWork != 0) + { + void* Frames[8]; + uint32_t FrameCount = GetCallstack(2, 8, Frames); + CallstackFrames* Callstack = CreateCallstack(FrameCount, Frames); + ZEN_ERROR("ParallelWork destructor waited for outstanding work but pending work count is {} instead of 0\n{}", + RemainingWork, + CallstackToString(Callstack, " ")); + FreeCallstack(Callstack); + + uint32_t WaitedMs = 0; + while (m_PendingWork.Remaining() > 0 && WaitedMs < 2000) + { + Sleep(50); + WaitedMs += 50; + } + RemainingWork = m_PendingWork.Remaining(); + if (RemainingWork != 0) + { + ZEN_WARN("ParallelWork destructor safety wait failed, pending work count at {}", RemainingWork) + } + else + { + ZEN_INFO("ParallelWork destructor safety wait succeeded"); + } + } + } + catch (const std::exception& Ex) + { + ZEN_ERROR("Exception in ~ParallelWork: {}", Ex.what()); + } +} + +ParallelWork::ExceptionCallback +ParallelWork::DefaultErrorFunction() +{ + return [&](std::exception_ptr Ex, std::atomic<bool>& AbortFlag) { + m_ErrorLock.WithExclusiveLock([&]() { m_Errors.push_back(Ex); }); + AbortFlag = true; + }; +} + +void +ParallelWork::Wait(int32_t UpdateIntervalMS, UpdateCallback&& UpdateCallback) +{ + ZEN_ASSERT(!m_DispatchComplete); + m_DispatchComplete = true; + + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + m_PendingWork.CountDown(); + + while (!m_PendingWork.Wait(UpdateIntervalMS)) + { + UpdateCallback(m_AbortFlag.load(), m_PauseFlag.load(), m_PendingWork.Remaining()); + } + + RethrowErrors(); +} + +void +ParallelWork::Wait() +{ + ZEN_ASSERT(!m_DispatchComplete); + m_DispatchComplete = true; + + ZEN_ASSERT(m_PendingWork.Remaining() > 0); + m_PendingWork.CountDown(); + m_PendingWork.Wait(); + + RethrowErrors(); +} + +void +ParallelWork::RethrowErrors() +{ + if (!m_Errors.empty()) + { + if (m_Errors.size() > 1) + { + ZEN_INFO("Multiple exceptions thrown during ParallelWork execution, dropping the following exceptions:"); + auto It = m_Errors.begin() + 1; + while (It != m_Errors.end()) + { + try + { + std::rethrow_exception(*It); + } + catch (const std::exception& Ex) + { + ZEN_INFO(" {}", Ex.what()); + } + It++; + } + } + std::exception_ptr Ex = m_Errors.front(); + m_Errors.clear(); + std::rethrow_exception(Ex); + } +} + +#if ZEN_WITH_TESTS + +TEST_CASE("parallellwork.nowork") +{ + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + Work.Wait(); +} + +TEST_CASE("parallellwork.basic") +{ + WorkerThreadPool WorkerPool(2); + + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + for (uint32_t I = 0; I < 5; I++) + { + Work.ScheduleWork(WorkerPool, [](std::atomic<bool>& AbortFlag) { CHECK(!AbortFlag); }); + } + Work.Wait(); +} + +TEST_CASE("parallellwork.throws_in_work") +{ + WorkerThreadPool WorkerPool(2); + + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + for (uint32_t I = 0; I < 10; I++) + { + Work.ScheduleWork(WorkerPool, [I](std::atomic<bool>& AbortFlag) { + ZEN_UNUSED(AbortFlag); + if (I > 3) + { + throw std::runtime_error("We throw in async thread"); + } + else + { + Sleep(10); + } + }); + } + CHECK_THROWS_WITH(Work.Wait(), "We throw in async thread"); +} + +TEST_CASE("parallellwork.throws_in_dispatch") +{ + WorkerThreadPool WorkerPool(2); + std::atomic<uint32_t> ExecutedCount; + try + { + std::atomic<bool> AbortFlag; + std::atomic<bool> PauseFlag; + ParallelWork Work(AbortFlag, PauseFlag); + for (uint32_t I = 0; I < 5; I++) + { + Work.ScheduleWork(WorkerPool, [I, &ExecutedCount](std::atomic<bool>& AbortFlag) { + if (AbortFlag.load()) + { + return; + } + ExecutedCount++; + }); + if (I == 3) + { + throw std::runtime_error("We throw in dispatcher thread"); + } + } + CHECK(false); + } + catch (const std::runtime_error& Ex) + { + CHECK_EQ("We throw in dispatcher thread", std::string(Ex.what())); + CHECK_LE(ExecutedCount.load(), 4); + } +} + +void +parallellwork_forcelink() +{ +} +#endif // ZEN_WITH_TESTS + +} // namespace zen diff --git a/src/zenutil/workerpools.cpp b/src/zenutil/workerpools.cpp index e3165e838..797034978 100644 --- a/src/zenutil/workerpools.cpp +++ b/src/zenutil/workerpools.cpp @@ -11,9 +11,10 @@ ZEN_THIRD_PARTY_INCLUDES_END namespace zen { namespace { - const int LargeWorkerThreadPoolTreadCount = gsl::narrow<int>(std::thread::hardware_concurrency()); + const int LargeWorkerThreadPoolTreadCount = gsl::narrow<int>(Max(std::thread::hardware_concurrency() - 1u, 2u)); const int MediumWorkerThreadPoolTreadCount = gsl::narrow<int>(Max((std::thread::hardware_concurrency() / 4u), 2u)); const int SmallWorkerThreadPoolTreadCount = gsl::narrow<int>(Max((std::thread::hardware_concurrency() / 8u), 1u)); + const int TinyWorkerThreadPoolTreadCount = 1; static bool IsShutDown = false; @@ -35,6 +36,9 @@ namespace { WorkerPool BurstSmallWorkerPool = {.TreadCount = SmallWorkerThreadPoolTreadCount, .Name = "SmallThreadPool(burst)"}; WorkerPool BackgroundSmallWorkerPool = {.TreadCount = SmallWorkerThreadPoolTreadCount, .Name = "SmallThreadPool(bkg)"}; + WorkerPool BurstTinyWorkerPool = {.TreadCount = TinyWorkerThreadPoolTreadCount, .Name = "TinyThreadPool(burst)"}; + WorkerPool BackgroundTinyWorkerPool = {.TreadCount = TinyWorkerThreadPoolTreadCount, .Name = "TinyThreadPool(bkg)"}; + WorkerPool SyncWorkerPool = {.TreadCount = 0, .Name = "SyncThreadPool"}; WorkerThreadPool& EnsurePoolPtr(WorkerPool& Pool) @@ -75,6 +79,12 @@ GetSmallWorkerPool(EWorkloadType WorkloadType) } WorkerThreadPool& +GetTinyWorkerPool(EWorkloadType WorkloadType) +{ + return EnsurePoolPtr(WorkloadType == EWorkloadType::Burst ? BurstTinyWorkerPool : BackgroundTinyWorkerPool); +} + +WorkerThreadPool& GetSyncWorkerPool() { return EnsurePoolPtr(SyncWorkerPool); @@ -91,6 +101,8 @@ ShutdownWorkerPools() BackgroundMediumWorkerPool.Pool.reset(); BurstSmallWorkerPool.Pool.reset(); BackgroundSmallWorkerPool.Pool.reset(); + BurstTinyWorkerPool.Pool.reset(); + BackgroundTinyWorkerPool.Pool.reset(); SyncWorkerPool.Pool.reset(); } } // namespace zen diff --git a/src/zenutil/zenserverprocess.cpp b/src/zenutil/zenserverprocess.cpp index b36f11741..a5b342cb0 100644 --- a/src/zenutil/zenserverprocess.cpp +++ b/src/zenutil/zenserverprocess.cpp @@ -534,7 +534,7 @@ ZenServerEnvironment::CreateNewTestDir() TestDir << "test"sv << int64_t(ZenServerTestCounter.fetch_add(1)); std::filesystem::path TestPath = m_TestBaseDir / TestDir.c_str(); - ZEN_ASSERT(!std::filesystem::exists(TestPath)); + ZEN_ASSERT(!IsDir(TestPath)); ZEN_INFO("Creating new test dir @ '{}'", TestPath); @@ -568,7 +568,7 @@ ZenServerInstance::~ZenServerInstance() { Shutdown(); std::error_code DummyEc; - std::filesystem::remove(std::filesystem::temp_directory_path() / ("zenserver_" + m_Name + ".log"), DummyEc); + RemoveFile(std::filesystem::temp_directory_path() / ("zenserver_" + m_Name + ".log"), DummyEc); } catch (const std::exception& Err) { @@ -619,6 +619,7 @@ ZenServerInstance::Shutdown() std::error_code Ec; if (SignalShutdown(Ec)) { + Stopwatch Timer; ZEN_DEBUG("Waiting for zenserver process {} ({}) to shut down", m_Name, m_Process.Pid()); while (!m_Process.Wait(1000)) { @@ -632,7 +633,10 @@ ZenServerInstance::Shutdown() ZEN_WARN("Wait abandoned by exited process"); return 0; } - ZEN_WARN("Waiting for zenserver process {} ({}) timed out", m_Name, m_Process.Pid()); + ZEN_WARN("Waited for zenserver process {} ({}) to exit for {}", + m_Name, + m_Process.Pid(), + NiceTimeSpanMs(Timer.GetElapsedTimeMs())); } ZEN_DEBUG("zenserver process {} ({}) exited", m_Name, m_Process.Pid()); int ExitCode = m_Process.GetExitCode(); @@ -1033,7 +1037,7 @@ std::string ZenServerInstance::GetLogOutput() const { std::filesystem::path OutputPath = std::filesystem::temp_directory_path() / ("zenserver_" + m_Name + ".log"); - if (std::filesystem::is_regular_file(OutputPath)) + if (IsFile(OutputPath)) { FileContents Contents = ReadFile(OutputPath); if (!Contents.ErrorCode) @@ -1055,7 +1059,7 @@ ZenServerInstance::Terminate() const std::filesystem::path BaseDir = m_Env.ProgramBaseDir(); const std::filesystem::path Executable = BaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL; ProcessHandle RunningProcess; - std::error_code Ec = FindProcess(Executable, RunningProcess); + std::error_code Ec = FindProcess(Executable, RunningProcess, /*IncludeSelf*/ false); if (Ec) { throw std::system_error(Ec, fmt::format("failed to look up running server executable '{}'", Executable)); @@ -1123,7 +1127,7 @@ ValidateLockFileInfo(const LockFileInfo& Info, std::string& OutReason) OutReason = fmt::format("listen port ({}) is not valid", Info.EffectiveListenPort); return false; } - if (!std::filesystem::is_directory(Info.DataDir)) + if (!IsDir(Info.DataDir)) { OutReason = fmt::format("data directory ('{}') does not exist", Info.DataDir); return false; diff --git a/src/zenutil/zenutil.cpp b/src/zenutil/zenutil.cpp index c54144549..fe23b00c1 100644 --- a/src/zenutil/zenutil.cpp +++ b/src/zenutil/zenutil.cpp @@ -6,6 +6,9 @@ # include <zenutil/cache/cacherequests.h> # include <zenutil/cache/rpcrecording.h> +# include <zenutil/chunkedfile.h> +# include <zenutil/commandlineoptions.h> +# include <zenutil/parallelwork.h> namespace zen { @@ -15,6 +18,9 @@ zenutil_forcelinktests() cachepolicy_forcelink(); cache::rpcrecord_forcelink(); cacherequests_forcelink(); + chunkedfile_forcelink(); + commandlineoptions_forcelink(); + parallellwork_forcelink(); } } // namespace zen diff --git a/src/zenvfs/vfsprovider.cpp b/src/zenvfs/vfsprovider.cpp index a3cfe9d15..9cec5372a 100644 --- a/src/zenvfs/vfsprovider.cpp +++ b/src/zenvfs/vfsprovider.cpp @@ -373,13 +373,12 @@ VfsProvider::Initialize() std::filesystem::path ManifestPath = Root / ".zen_vfs"; bool HaveManifest = false; - if (std::filesystem::exists(Root)) + if (IsFile(Root)) + { + throw std::runtime_error("specified VFS root exists but is not a directory"); + } + if (IsDir(Root)) { - if (!std::filesystem::is_directory(Root)) - { - throw std::runtime_error("specified VFS root exists but is not a directory"); - } - std::error_code Ec; m_RootPath = WideToUtf8(CanonicalPath(Root, Ec).c_str()); @@ -388,7 +387,7 @@ VfsProvider::Initialize() throw std::system_error(Ec); } - if (std::filesystem::exists(ManifestPath)) + if (IsFile(ManifestPath)) { FileContents ManifestData = zen::ReadFile(ManifestPath); CbObject Manifest = LoadCompactBinaryObject(ManifestData.Flatten()); |