Skip to content

Commit 81ba9cf

Browse files
edoakestohtana
authored andcommitted
[core] Remove gcs_rpc_server.h dependency from GcsNodeManager (ray-project#55886)
Drops compilation time of `gcs_node_manager` target from ~24s to ~14s (measured locally on a change to `gcs_node_manager.cc`). --------- Signed-off-by: Edward Oakes <[email protected]> Signed-off-by: Masahiro Tanaka <[email protected]>
1 parent 91adf2e commit 81ba9cf

File tree

7 files changed

+211
-77
lines changed

7 files changed

+211
-77
lines changed

src/ray/gcs/gcs_server/BUILD.bazel

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ ray_cc_library(
7474
deps = [
7575
":gcs_init_data",
7676
":gcs_table_storage",
77+
":grpc_service_interfaces",
7778
"//src/ray/common:asio",
7879
"//src/ray/common:id",
7980
"//src/ray/common:ray_config",
8081
"//src/ray/gcs/pubsub:gcs_pub_sub_lib",
8182
"//src/ray/protobuf:gcs_service_cc_proto",
8283
"//src/ray/protobuf:ray_syncer_cc_proto",
83-
"//src/ray/rpc:gcs_server",
8484
"//src/ray/rpc:node_manager_client",
8585
"//src/ray/util:event",
8686
"//src/ray/util:logging",
@@ -219,6 +219,38 @@ ray_cc_library(
219219
],
220220
)
221221

222+
ray_cc_library(
223+
name = "grpc_service_interfaces",
224+
hdrs = [
225+
"grpc_service_interfaces.h",
226+
],
227+
visibility = ["//visibility:private"],
228+
deps = [
229+
"//src/ray/common:status",
230+
"//src/ray/protobuf:gcs_service_cc_grpc",
231+
],
232+
)
233+
234+
ray_cc_library(
235+
name = "grpc_services",
236+
srcs = [
237+
"grpc_services.cc",
238+
],
239+
hdrs = [
240+
"grpc_services.h",
241+
],
242+
visibility = ["//visibility:private"],
243+
deps = [
244+
":grpc_service_interfaces",
245+
"//src/ray/common:asio",
246+
"//src/ray/common:id",
247+
"//src/ray/protobuf:gcs_service_cc_grpc",
248+
"//src/ray/rpc:grpc_server",
249+
"//src/ray/rpc:server_call",
250+
"@com_github_grpc_grpc//:grpc++",
251+
],
252+
)
253+
222254
ray_cc_library(
223255
name = "gcs_server_lib",
224256
srcs = [
@@ -255,6 +287,8 @@ ray_cc_library(
255287
":gcs_task_manager",
256288
":gcs_usage_stats_client",
257289
":gcs_worker_manager",
290+
":grpc_service_interfaces",
291+
":grpc_services",
258292
"//src/ray/gcs/pubsub:gcs_pub_sub_lib",
259293
"//src/ray/gcs/store_client",
260294
"//src/ray/gcs/store_client:in_memory_store_client",

src/ray/gcs/gcs_server/gcs_node_manager.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
#include "ray/common/id.h"
2525
#include "ray/gcs/gcs_server/gcs_init_data.h"
2626
#include "ray/gcs/gcs_server/gcs_table_storage.h"
27+
#include "ray/gcs/gcs_server/grpc_service_interfaces.h"
2728
#include "ray/gcs/pubsub/gcs_pub_sub.h"
28-
#include "ray/rpc/gcs/gcs_rpc_server.h"
2929
#include "ray/rpc/node_manager/raylet_client_pool.h"
3030
#include "ray/util/event.h"
3131
#include "src/ray/protobuf/gcs.pb.h"
@@ -39,7 +39,7 @@ class GcsStateTest;
3939
/// GcsNodeManager is responsible for managing and monitoring nodes as well as handing
4040
/// node and resource related rpc requests.
4141
/// This class is not thread-safe.
42-
class GcsNodeManager : public rpc::NodeInfoHandler {
42+
class GcsNodeManager : public rpc::NodeInfoGcsServiceHandler {
4343
public:
4444
/// Create a GcsNodeManager.
4545
///

src/ray/gcs/gcs_server/gcs_server.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "ray/gcs/gcs_server/gcs_placement_group_mgr.h"
3030
#include "ray/gcs/gcs_server/gcs_resource_manager.h"
3131
#include "ray/gcs/gcs_server/gcs_worker_manager.h"
32+
#include "ray/gcs/gcs_server/grpc_services.h"
3233
#include "ray/gcs/gcs_server/store_client_kv.h"
3334
#include "ray/gcs/store_client/in_memory_store_client.h"
3435
#include "ray/gcs/store_client/observable_store_client.h"
@@ -356,7 +357,9 @@ void GcsServer::InitGcsNodeManager(const GcsInitData &gcs_init_data) {
356357
// Initialize by gcs tables data.
357358
gcs_node_manager_->Initialize(gcs_init_data);
358359
rpc_server_.RegisterService(std::make_unique<rpc::NodeInfoGrpcService>(
359-
io_context_provider_.GetDefaultIOContext(), *gcs_node_manager_));
360+
io_context_provider_.GetDefaultIOContext(),
361+
*gcs_node_manager_,
362+
RayConfig::instance().gcs_max_active_rpcs_per_handler()));
360363
}
361364

362365
void GcsServer::InitGcsHealthCheckManager(const GcsInitData &gcs_init_data) {
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/*
16+
* This file defines the gRPC service *INTERFACES* only.
17+
* The subcomponent that handles a given interface should inherit from the relevant
18+
* class. The target for the subcomponent should depend only on this file, not on
19+
* grpc_services.h.
20+
*/
21+
22+
#pragma once
23+
24+
#include "ray/common/status.h"
25+
#include "src/ray/protobuf/gcs_service.grpc.pb.h"
26+
27+
namespace ray {
28+
namespace rpc {
29+
30+
using SendReplyCallback = std::function<void(
31+
Status status, std::function<void()> success, std::function<void()> failure)>;
32+
33+
#define GCS_RPC_SEND_REPLY(send_reply_callback, reply, status) \
34+
reply->mutable_status()->set_code(static_cast<int>(status.code())); \
35+
reply->mutable_status()->set_message(status.message()); \
36+
send_reply_callback(ray::Status::OK(), nullptr, nullptr)
37+
38+
class NodeInfoGcsServiceHandler {
39+
public:
40+
virtual ~NodeInfoGcsServiceHandler() = default;
41+
42+
virtual void HandleGetClusterId(GetClusterIdRequest request,
43+
GetClusterIdReply *reply,
44+
SendReplyCallback send_reply_callback) = 0;
45+
46+
virtual void HandleRegisterNode(RegisterNodeRequest request,
47+
RegisterNodeReply *reply,
48+
SendReplyCallback send_reply_callback) = 0;
49+
50+
virtual void HandleUnregisterNode(UnregisterNodeRequest request,
51+
UnregisterNodeReply *reply,
52+
SendReplyCallback send_reply_callback) = 0;
53+
54+
virtual void HandleCheckAlive(CheckAliveRequest request,
55+
CheckAliveReply *reply,
56+
SendReplyCallback send_reply_callback) = 0;
57+
58+
virtual void HandleDrainNode(DrainNodeRequest request,
59+
DrainNodeReply *reply,
60+
SendReplyCallback send_reply_callback) = 0;
61+
62+
virtual void HandleGetAllNodeInfo(GetAllNodeInfoRequest request,
63+
GetAllNodeInfoReply *reply,
64+
SendReplyCallback send_reply_callback) = 0;
65+
};
66+
67+
} // namespace rpc
68+
} // namespace ray
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
#include "ray/gcs/gcs_server/grpc_services.h"
15+
16+
#include <memory>
17+
#include <vector>
18+
19+
namespace ray {
20+
namespace rpc {
21+
22+
void NodeInfoGrpcService::InitServerCallFactories(
23+
const std::unique_ptr<grpc::ServerCompletionQueue> &cq,
24+
std::vector<std::unique_ptr<ServerCallFactory>> *server_call_factories,
25+
const ClusterID &cluster_id) {
26+
// We only allow one cluster ID in the lifetime of a client.
27+
// So, if a client connects, it should not have a pre-existing different ID.
28+
RPC_SERVICE_HANDLER_CUSTOM_AUTH(NodeInfoGcsService,
29+
GetClusterId,
30+
max_active_rpcs_per_handler_,
31+
AuthType::EMPTY_AUTH);
32+
RPC_SERVICE_HANDLER(NodeInfoGcsService, RegisterNode, max_active_rpcs_per_handler_)
33+
RPC_SERVICE_HANDLER(NodeInfoGcsService, UnregisterNode, max_active_rpcs_per_handler_)
34+
RPC_SERVICE_HANDLER(NodeInfoGcsService, DrainNode, max_active_rpcs_per_handler_)
35+
RPC_SERVICE_HANDLER(NodeInfoGcsService, GetAllNodeInfo, max_active_rpcs_per_handler_)
36+
RPC_SERVICE_HANDLER(NodeInfoGcsService, CheckAlive, max_active_rpcs_per_handler_)
37+
}
38+
39+
} // namespace rpc
40+
} // namespace ray
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/*
16+
* This file defines the gRPC service handlers for the GCS server binary.
17+
* Subcomponents that implement a given interface should inherit from the relevant
18+
* class in grpc_service_interfaces.h.
19+
*
20+
* The GCS server main binary should be the only user of this target.
21+
*/
22+
23+
#pragma once
24+
25+
#include <memory>
26+
#include <vector>
27+
28+
#include "ray/common/asio/instrumented_io_context.h"
29+
#include "ray/common/id.h"
30+
#include "ray/gcs/gcs_server/grpc_service_interfaces.h"
31+
#include "ray/rpc/grpc_server.h"
32+
#include "ray/rpc/server_call.h"
33+
#include "src/ray/protobuf/gcs_service.grpc.pb.h"
34+
35+
namespace ray {
36+
namespace rpc {
37+
38+
class NodeInfoGrpcService : public GrpcService {
39+
public:
40+
explicit NodeInfoGrpcService(instrumented_io_context &io_service,
41+
NodeInfoGcsServiceHandler &service_handler,
42+
int64_t max_active_rpcs_per_handler)
43+
: GrpcService(io_service),
44+
service_handler_(service_handler),
45+
max_active_rpcs_per_handler_(max_active_rpcs_per_handler){};
46+
47+
protected:
48+
grpc::Service &GetGrpcService() override { return service_; }
49+
50+
void InitServerCallFactories(
51+
const std::unique_ptr<grpc::ServerCompletionQueue> &cq,
52+
std::vector<std::unique_ptr<ServerCallFactory>> *server_call_factories,
53+
const ClusterID &cluster_id) override;
54+
55+
private:
56+
NodeInfoGcsService::AsyncService service_;
57+
NodeInfoGcsServiceHandler &service_handler_;
58+
int64_t max_active_rpcs_per_handler_;
59+
};
60+
61+
} // namespace rpc
62+
} // namespace ray

src/ray/rpc/gcs/gcs_rpc_server.h

Lines changed: 0 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,6 @@ namespace rpc {
122122
HANDLER, \
123123
RayConfig::instance().gcs_max_active_rpcs_per_handler())
124124

125-
#define NODE_INFO_SERVICE_RPC_HANDLER(HANDLER) \
126-
RPC_SERVICE_HANDLER(NodeInfoGcsService, \
127-
HANDLER, \
128-
RayConfig::instance().gcs_max_active_rpcs_per_handler())
129-
130125
#define TASK_INFO_SERVICE_RPC_HANDLER(HANDLER) \
131126
RPC_SERVICE_HANDLER(TaskInfoGcsService, \
132127
HANDLER, \
@@ -319,73 +314,6 @@ class ActorInfoGrpcService : public GrpcService {
319314
ActorInfoGcsServiceHandler &service_handler_;
320315
};
321316

322-
class NodeInfoGcsServiceHandler {
323-
public:
324-
virtual ~NodeInfoGcsServiceHandler() = default;
325-
326-
virtual void HandleGetClusterId(rpc::GetClusterIdRequest request,
327-
rpc::GetClusterIdReply *reply,
328-
rpc::SendReplyCallback send_reply_callback) = 0;
329-
330-
virtual void HandleRegisterNode(RegisterNodeRequest request,
331-
RegisterNodeReply *reply,
332-
SendReplyCallback send_reply_callback) = 0;
333-
334-
virtual void HandleUnregisterNode(UnregisterNodeRequest request,
335-
UnregisterNodeReply *reply,
336-
SendReplyCallback send_reply_callback) = 0;
337-
338-
virtual void HandleCheckAlive(CheckAliveRequest request,
339-
CheckAliveReply *reply,
340-
SendReplyCallback send_reply_callback) = 0;
341-
342-
virtual void HandleDrainNode(DrainNodeRequest request,
343-
DrainNodeReply *reply,
344-
SendReplyCallback send_reply_callback) = 0;
345-
346-
virtual void HandleGetAllNodeInfo(GetAllNodeInfoRequest request,
347-
GetAllNodeInfoReply *reply,
348-
SendReplyCallback send_reply_callback) = 0;
349-
};
350-
351-
/// The `GrpcService` for `NodeInfoGcsService`.
352-
class NodeInfoGrpcService : public GrpcService {
353-
public:
354-
/// Constructor.
355-
///
356-
/// \param[in] handler The service handler that actually handle the requests.
357-
explicit NodeInfoGrpcService(instrumented_io_context &io_service,
358-
NodeInfoGcsServiceHandler &handler)
359-
: GrpcService(io_service), service_handler_(handler){};
360-
361-
protected:
362-
grpc::Service &GetGrpcService() override { return service_; }
363-
364-
void InitServerCallFactories(
365-
const std::unique_ptr<grpc::ServerCompletionQueue> &cq,
366-
std::vector<std::unique_ptr<ServerCallFactory>> *server_call_factories,
367-
const ClusterID &cluster_id) override {
368-
// We only allow one cluster ID in the lifetime of a client.
369-
// So, if a client connects, it should not have a pre-existing different ID.
370-
RPC_SERVICE_HANDLER_CUSTOM_AUTH(
371-
NodeInfoGcsService,
372-
GetClusterId,
373-
RayConfig::instance().gcs_max_active_rpcs_per_handler(),
374-
AuthType::EMPTY_AUTH);
375-
NODE_INFO_SERVICE_RPC_HANDLER(RegisterNode);
376-
NODE_INFO_SERVICE_RPC_HANDLER(UnregisterNode);
377-
NODE_INFO_SERVICE_RPC_HANDLER(DrainNode);
378-
NODE_INFO_SERVICE_RPC_HANDLER(GetAllNodeInfo);
379-
NODE_INFO_SERVICE_RPC_HANDLER(CheckAlive);
380-
}
381-
382-
private:
383-
/// The grpc async service object.
384-
NodeInfoGcsService::AsyncService service_;
385-
/// The service handler that actually handle the requests.
386-
NodeInfoGcsServiceHandler &service_handler_;
387-
};
388-
389317
class NodeResourceInfoGcsServiceHandler {
390318
public:
391319
virtual ~NodeResourceInfoGcsServiceHandler() = default;
@@ -767,7 +695,6 @@ class InternalPubSubGrpcService : public GrpcService {
767695

768696
using JobInfoHandler = JobInfoGcsServiceHandler;
769697
using ActorInfoHandler = ActorInfoGcsServiceHandler;
770-
using NodeInfoHandler = NodeInfoGcsServiceHandler;
771698
using NodeResourceInfoHandler = NodeResourceInfoGcsServiceHandler;
772699
using WorkerInfoHandler = WorkerInfoGcsServiceHandler;
773700
using PlacementGroupInfoHandler = PlacementGroupInfoGcsServiceHandler;

0 commit comments

Comments
 (0)