aboutsummaryrefslogtreecommitdiff
path: root/src/zencompute/runners/remotehttprunner.h
blob: c17d0cf2af8aa054cbd77e44433b1a8d1fe5bec7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Copyright Epic Games, Inc. All Rights Reserved.

#pragma once

#include "zencompute/computeservice.h"

#if ZEN_WITH_COMPUTE_SERVICES

#	include "functionrunner.h"

#	include <zencore/compactbinarypackage.h>
#	include <zencore/logging.h>
#	include <zencore/uid.h>
#	include <zencore/workthreadpool.h>
#	include <zencore/zencore.h>
#	include <zenhttp/httpclient.h>
#	include <zenhttp/httpwsclient.h>

#	include <atomic>
#	include <filesystem>
#	include <memory>
#	include <thread>
#	include <unordered_map>

namespace zen {
class CidStore;
}

namespace zen::compute {

/** HTTP-based runner

	This implements a DDC remote compute execution strategy via REST API

  */

class RemoteHttpRunner : public FunctionRunner, private IWsClientHandler
{
	RemoteHttpRunner(RemoteHttpRunner&&) = delete;
	RemoteHttpRunner& operator=(RemoteHttpRunner&&) = delete;

public:
	RemoteHttpRunner(ChunkResolver&				  InChunkResolver,
					 const std::filesystem::path& BaseDir,
					 std::string_view			  HostName,
					 WorkerThreadPool&			  InWorkerPool);
	~RemoteHttpRunner();

	virtual void									Shutdown() override;
	[[nodiscard]] virtual bool						RegisterWorker(const CbPackage& WorkerPackage) override;
	[[nodiscard]] virtual SubmitResult				SubmitAction(Ref<RunnerAction> Action) override;
	[[nodiscard]] virtual bool						IsHealthy() override;
	[[nodiscard]] virtual size_t					GetSubmittedActionCount() override;
	[[nodiscard]] virtual size_t					QueryCapacity() override;
	[[nodiscard]] virtual std::vector<SubmitResult> SubmitActions(const std::vector<Ref<RunnerAction>>& Actions) override;
	virtual void									CancelRemoteQueue(int QueueId) override;

	std::string_view GetHostName() const { return m_HostName; }

protected:
	LoggerRef Log() { return m_Log; }

private:
	LoggerRef		  m_Log;
	ChunkResolver&	  m_ChunkResolver;
	WorkerThreadPool& m_WorkerPool;
	std::string		  m_HostName;
	std::string		  m_BaseUrl;
	HttpClient		  m_Http;

	std::atomic<bool> m_AcceptNewActions{true};
	int32_t			  m_MaxRunningActions = 256;  // arbitrary limit for testing
	int32_t			  m_MaxBatchSize	  = 50;

	struct HttpRunningAction
	{
		Ref<RunnerAction>	Action;
		int					RemoteActionLsn = 0;  // Remote LSN
		RunnerAction::State RemoteState		= RunnerAction::State::Failed;
		CbPackage			ActionResults;
	};

	RwLock									   m_RunningLock;
	std::unordered_map<int, HttpRunningAction> m_RemoteRunningMap;	// Note that this is keyed on the *REMOTE* lsn

	std::thread		  m_MonitorThread;
	std::atomic<bool> m_MonitorThreadEnabled{true};
	Event			  m_MonitorThreadEvent;
	void			  MonitorThreadFunction();
	size_t			  SweepRunningActions();

	RwLock						 m_QueueTokenLock;
	std::unordered_map<int, Oid> m_RemoteQueueTokens;  // local QueueId → remote queue token

	// Stable identity for this runner instance, used as part of the idempotency key when
	// creating remote queues. Generated once at construction and never changes.
	Oid m_InstanceId;

	// WebSocket completion notification client
	std::unique_ptr<HttpWsClient> m_WsClient;
	std::atomic<bool>			  m_WsConnected{false};

	// IWsClientHandler
	void OnWsOpen() override;
	void OnWsMessage(const WebSocketMessage& Msg) override;
	void OnWsClose(uint16_t Code, std::string_view Reason) override;

	Oid EnsureRemoteQueue(int QueueId, const CbObject& Metadata, const CbObject& Config);

	std::vector<SubmitResult> SubmitActionBatch(const std::string& SubmitUrl, const std::vector<Ref<RunnerAction>>& Actions);
	std::vector<SubmitResult> ParseBatchResponse(const HttpClient::Response& Response, const std::vector<Ref<RunnerAction>>& Actions);
	std::vector<SubmitResult> FallbackToIndividualSubmit(const std::vector<Ref<RunnerAction>>& Actions);
};

}  // namespace zen::compute

#endif