26
26
#include " ray/common/id.h"
27
27
#include " ray/core_worker/store_provider/memory_store/memory_store.h"
28
28
#include " ray/core_worker/task_event_buffer.h"
29
- #include " ray/core_worker/task_finisher .h"
29
+ #include " ray/core_worker/task_manager_interface .h"
30
30
#include " ray/stats/metric_defs.h"
31
31
#include " ray/util/counter_map.h"
32
32
#include " src/ray/protobuf/common.pb.h"
@@ -38,14 +38,6 @@ namespace core {
38
38
39
39
class ActorManager ;
40
40
41
- class TaskResubmissionInterface {
42
- public:
43
- virtual std::optional<rpc::ErrorType> ResubmitTask (
44
- const TaskID &task_id, std::vector<ObjectID> *task_deps) = 0;
45
-
46
- virtual ~TaskResubmissionInterface () = default ;
47
- };
48
-
49
41
using TaskStatusCounter = CounterMap<std::tuple<std::string, rpc::TaskStatus, bool >>;
50
42
using PutInLocalPlasmaCallback =
51
43
std::function<void (const RayObject &object, const ObjectID &object_id)>;
@@ -173,7 +165,7 @@ class ObjectRefStream {
173
165
int64_t total_num_object_consumed_{};
174
166
};
175
167
176
- class TaskManager : public TaskFinisherInterface , public TaskResubmissionInterface {
168
+ class TaskManager : public TaskManagerInterface {
177
169
public:
178
170
TaskManager (CoreWorkerMemoryStore &in_memory_store,
179
171
ReferenceCounter &reference_counter,
@@ -208,36 +200,11 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
208
200
});
209
201
}
210
202
211
- // / Add a task that is pending execution.
212
- // /
213
- // / The local ref count for all return refs (excluding actor creation tasks)
214
- // / will be initialized to 1 so that the ref is considered in scope before
215
- // / returning to the language frontend. The caller is responsible for
216
- // / decrementing the ref count once the frontend ref has gone out of scope.
217
- // /
218
- // / \param[in] caller_address The rpc address of the calling task.
219
- // / \param[in] spec The spec of the pending task.
220
- // / \param[in] max_retries Number of times this task may be retried
221
- // / on failure.
222
- // / \return ObjectRefs returned by this task.
223
203
std::vector<rpc::ObjectReference> AddPendingTask (const rpc::Address &caller_address,
224
204
const TaskSpecification &spec,
225
205
const std::string &call_site,
226
- int max_retries = 0 );
227
-
228
- // / Resubmit a task that has completed execution before. This is used to
229
- // / reconstruct objects stored in Plasma that were lost.
230
- // /
231
- // / \param[in] task_id The ID of the task to resubmit.
232
- // / \param[out] task_deps The object dependencies of the resubmitted task,
233
- // / i.e. all arguments that were not inlined in the task spec. The caller is
234
- // / responsible for making sure that these dependencies become available, so
235
- // / that the resubmitted task can run. This is only populated if the task was
236
- // / not already pending and was successfully resubmitted.
237
- // / \return nullopt if the task was successfully resubmitted (task or actor being
238
- // / scheduled, but no guarantee on completion), or was already pending. Return the
239
- // / appopriate error type to propagate for the object if the task was not successfully
240
- // / resubmitted.
206
+ int max_retries = 0 ) override ;
207
+
241
208
std::optional<rpc::ErrorType> ResubmitTask (const TaskID &task_id,
242
209
std::vector<ObjectID> *task_deps) override ;
243
210
@@ -246,13 +213,6 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
246
213
// / \param shutdown The shutdown callback to call.
247
214
void DrainAndShutdown (std::function<void ()> shutdown);
248
215
249
- // / Write return objects for a pending task to the memory store.
250
- // /
251
- // / \param[in] task_id ID of the pending task.
252
- // / \param[in] reply Proto response to a direct actor or task call.
253
- // / \param[in] worker_addr Address of the worker that executed the task.
254
- // / \param[in] is_application_error Whether this is an Exception return.
255
- // / \return Void.
256
216
void CompletePendingTask (const TaskID &task_id,
257
217
const rpc::PushTaskReply &reply,
258
218
const rpc::Address &worker_addr,
@@ -434,50 +394,18 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
434
394
std::pair<ObjectID, bool > PeekObjectRefStream (const ObjectID &generator_id)
435
395
ABSL_LOCKS_EXCLUDED(mu_);
436
396
437
- // / Called by submitter when a generator task marked for resubmission for intermediate
438
- // / object recovery comes back from the executing worker. We mark the attempt as failed
439
- // / and resubmit it, so we can recover the intermediate return.
440
397
void MarkGeneratorFailedAndResubmit (const TaskID &task_id) override ;
441
398
442
- // / Returns true if task can be retried.
443
- // /
444
- // / \param[in] task_id ID of the task to be retried.
445
- // / \return true if task is scheduled to be retried.
446
399
bool RetryTaskIfPossible (const TaskID &task_id,
447
400
const rpc::RayErrorInfo &error_info) override ;
448
401
449
- // / A pending task failed. This will either retry the task or mark the task
450
- // / as failed if there are no retries left.
451
- // /
452
- // / \param[in] task_id ID of the pending task.
453
- // / \param[in] error_type The type of the specific error.
454
- // / \param[in] status Optional status message.
455
- // / \param[in] ray_error_info The error information of a given error type.
456
- // / Nullptr means that there's no error information.
457
- // / TODO(sang): Remove nullptr case. Every error message should have metadata.
458
- // / \param[in] mark_task_object_failed whether or not it marks the task
459
- // / return object as failed. If this is set to false, then the caller is
460
- // / responsible for later failing or completing the task.
461
- // / \param[in] fail_immediately whether to fail the task and ignore
462
- // / the retries that are available.
463
- // / \return Whether the task will be retried or not.
464
402
bool FailOrRetryPendingTask (const TaskID &task_id,
465
403
rpc::ErrorType error_type,
466
404
const Status *status = nullptr ,
467
405
const rpc::RayErrorInfo *ray_error_info = nullptr ,
468
406
bool mark_task_object_failed = true ,
469
407
bool fail_immediately = false ) override ;
470
408
471
- // / A pending task failed. This will mark the task as failed.
472
- // / This doesn't always mark the return object as failed
473
- // / depending on mark_task_object_failed.
474
- // /
475
- // / \param[in] task_id ID of the pending task.
476
- // / \param[in] error_type The type of the specific error.
477
- // / \param[in] status Optional status message.
478
- // / \param[in] ray_error_info The error information of a given error type.
479
- // / \param[in] mark_task_object_failed whether or not it marks the task
480
- // / return object as failed.
481
409
void FailPendingTask (const TaskID &task_id,
482
410
rpc::ErrorType error_type,
483
411
const Status *status = nullptr ,
@@ -495,24 +423,11 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
495
423
const rpc::RayErrorInfo *ray_error_info,
496
424
const absl::flat_hash_set<ObjectID> &store_in_plasma_ids) ABSL_LOCKS_EXCLUDED(mu_);
497
425
498
- // / A task's dependencies were inlined in the task spec. This will decrement
499
- // / the ref count for the dependency IDs. If the dependencies contained other
500
- // / ObjectIDs, then the ref count for these object IDs will be incremented.
501
- // /
502
- // / \param[in] inlined_dependency_ids The args that were originally passed by
503
- // / reference into the task, but have now been inlined.
504
- // / \param[in] contained_ids Any ObjectIDs that were newly inlined in the
505
- // / task spec, because a serialized copy of the ID was contained in one of
506
- // / the inlined dependencies.
507
426
void OnTaskDependenciesInlined (const std::vector<ObjectID> &inlined_dependency_ids,
508
427
const std::vector<ObjectID> &contained_ids) override ;
509
428
510
- // / Set the task state to be canceled. Set the number of retries to zero.
511
- // /
512
- // / \param[in] task_id to cancel.
513
429
void MarkTaskCanceled (const TaskID &task_id) override ;
514
430
515
- // / Return the spec for a pending task.
516
431
std::optional<TaskSpecification> GetTaskSpec (const TaskID &task_id) const override ;
517
432
518
433
// / Return specs for pending children tasks of the given parent task.
@@ -524,10 +439,6 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
524
439
// / \return Whether the task can be submitted for execution.
525
440
bool IsTaskSubmissible (const TaskID &task_id) const ;
526
441
527
- // / Return whether the task is pending.
528
- // /
529
- // / \param[in] task_id ID of the task to query.
530
- // / \return Whether the task is pending.
531
442
bool IsTaskPending (const TaskID &task_id) const override ;
532
443
533
444
// / Return whether the task is scheduled adn waiting for execution.
@@ -549,17 +460,8 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
549
460
return total_lineage_footprint_bytes_;
550
461
}
551
462
552
- // / Record that the given task's dependencies have been created and the task
553
- // / can now be scheduled for execution.
554
- // /
555
- // / \param[in] task_id The task that is now scheduled.
556
463
void MarkDependenciesResolved (const TaskID &task_id) override ;
557
464
558
- // / Record that the given task is scheduled and wait for execution.
559
- // /
560
- // / \param[in] task_id The task that is will be running.
561
- // / \param[in] node_id The node id that this task wil be running.
562
- // / \param[in] worker_id The worker id that this task wil be running.
563
465
void MarkTaskWaitingForExecution (const TaskID &task_id,
564
466
const NodeID &node_id,
565
467
const WorkerID &worker_id) override ;
0 commit comments