1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "zencompute/computeservice.h"
#if ZEN_WITH_COMPUTE_SERVICES
# include "functionrunner.h"
# include <zencore/compactbinarypackage.h>
# include <zencore/logging.h>
# include <zencore/uid.h>
# include <zencore/workthreadpool.h>
# include <zencore/zencore.h>
# include <zenhttp/httpclient.h>
# include <zenhttp/httpwsclient.h>
# include <atomic>
# include <filesystem>
# include <memory>
# include <thread>
# include <unordered_map>
namespace zen {
class CidStore;
}
namespace zen::compute {
/** HTTP-based runner
This implements a DDC remote compute execution strategy via REST API
*/
class RemoteHttpRunner : public FunctionRunner, private IWsClientHandler
{
RemoteHttpRunner(RemoteHttpRunner&&) = delete;
RemoteHttpRunner& operator=(RemoteHttpRunner&&) = delete;
public:
RemoteHttpRunner(ChunkResolver& InChunkResolver,
const std::filesystem::path& BaseDir,
std::string_view HostName,
WorkerThreadPool& InWorkerPool);
~RemoteHttpRunner();
virtual void Shutdown() override;
[[nodiscard]] virtual bool RegisterWorker(const CbPackage& WorkerPackage) override;
[[nodiscard]] virtual SubmitResult SubmitAction(Ref<RunnerAction> Action) override;
[[nodiscard]] virtual bool IsHealthy() override;
[[nodiscard]] virtual size_t GetSubmittedActionCount() override;
[[nodiscard]] virtual size_t QueryCapacity() override;
[[nodiscard]] virtual std::vector<SubmitResult> SubmitActions(const std::vector<Ref<RunnerAction>>& Actions) override;
virtual void CancelRemoteQueue(int QueueId) override;
std::string_view GetHostName() const { return m_HostName; }
protected:
LoggerRef Log() { return m_Log; }
private:
LoggerRef m_Log;
ChunkResolver& m_ChunkResolver;
WorkerThreadPool& m_WorkerPool;
std::string m_HostName;
std::string m_BaseUrl;
HttpClient m_Http;
std::atomic<bool> m_AcceptNewActions{true};
int32_t m_MaxRunningActions = 256; // arbitrary limit for testing
int32_t m_MaxBatchSize = 50;
struct HttpRunningAction
{
Ref<RunnerAction> Action;
int RemoteActionLsn = 0; // Remote LSN
RunnerAction::State RemoteState = RunnerAction::State::Failed;
CbPackage ActionResults;
};
RwLock m_RunningLock;
std::unordered_map<int, HttpRunningAction> m_RemoteRunningMap; // Note that this is keyed on the *REMOTE* lsn
std::thread m_MonitorThread;
std::atomic<bool> m_MonitorThreadEnabled{true};
Event m_MonitorThreadEvent;
void MonitorThreadFunction();
size_t SweepRunningActions();
RwLock m_QueueTokenLock;
std::unordered_map<int, Oid> m_RemoteQueueTokens; // local QueueId → remote queue token
// Stable identity for this runner instance, used as part of the idempotency key when
// creating remote queues. Generated once at construction and never changes.
Oid m_InstanceId;
// WebSocket completion notification client
std::unique_ptr<HttpWsClient> m_WsClient;
std::atomic<bool> m_WsConnected{false};
// IWsClientHandler
void OnWsOpen() override;
void OnWsMessage(const WebSocketMessage& Msg) override;
void OnWsClose(uint16_t Code, std::string_view Reason) override;
Oid EnsureRemoteQueue(int QueueId, const CbObject& Metadata, const CbObject& Config);
std::vector<SubmitResult> SubmitActionBatch(const std::string& SubmitUrl, const std::vector<Ref<RunnerAction>>& Actions);
std::vector<SubmitResult> ParseBatchResponse(const HttpClient::Response& Response, const std::vector<Ref<RunnerAction>>& Actions);
std::vector<SubmitResult> FallbackToIndividualSubmit(const std::vector<Ref<RunnerAction>>& Actions);
};
} // namespace zen::compute
#endif
|