Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions qa/L0_batcher/batcher_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -344,7 +344,7 @@ def test_static_batch_lt_any_preferred(self):
self.check_response(
trial,
1,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
precreated_shm_regions=precreated_shm_regions,
)
self.check_deferred_exception()
Expand All @@ -369,7 +369,7 @@ def test_static_batch_not_preferred(self):
self.check_response(
trial,
3,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
precreated_shm_regions=precreated_shm_regions,
)
self.check_deferred_exception()
Expand Down Expand Up @@ -491,7 +491,7 @@ def test_multi_batch_different_shape(self):
args=(
trial,
1,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"input_size": 8,
Expand Down Expand Up @@ -540,7 +540,7 @@ def test_multi_batch_not_preferred(self):
args=(
trial,
1,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm0_region_names,
Expand Down Expand Up @@ -619,13 +619,15 @@ def test_multi_batch_not_preferred_different_shape(self):
},
)
)
# Add some delay to ensure the first two requests arrive before the third
time.sleep(2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this 2 seconds be some function of the _max_queue_delay_ms instead?

threads.append(
threading.Thread(
target=self.check_response,
args=(
trial,
1,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"input_size": 8,
Expand Down Expand Up @@ -828,7 +830,7 @@ def test_multi_batch_sum_gt_max_preferred(self):
args=(
trial,
4,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm1_region_names,
Expand Down Expand Up @@ -1105,7 +1107,7 @@ def test_multi_batch_delayed_sum_gt_max_preferred(self):
args=(
trial,
4,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm1_region_names,
Expand Down Expand Up @@ -1164,7 +1166,7 @@ def test_multi_batch_delayed_use_max_batch(self):
args=(
trial,
3,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm0_region_names,
Expand All @@ -1178,7 +1180,7 @@ def test_multi_batch_delayed_use_max_batch(self):
args=(
trial,
4,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm1_region_names,
Expand Down Expand Up @@ -1464,7 +1466,7 @@ def test_multi_batch_use_best_preferred(self):
args=(
trial,
1,
(_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
(_max_queue_delay_ms * 2, _max_queue_delay_ms),
),
kwargs={
"shm_region_names": shm2_region_names,
Expand Down Expand Up @@ -1895,7 +1897,7 @@ def test_preferred_batch_only_use_no_preferred_size(self):
def test_max_queue_delay_only_non_default(self):
# Send 12 requests with batch size 1. The max_queue_delay is set
# to non-zero. Depending upon the timing of the requests arrival
# there can be either 1 or 2 model executions.
# there can be either 1 or multiple model executions.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why the originally intended behavior is not consistent and needs to be relaxed here? (different for zero and non-zero queue delay scenarios?)

model_base = "custom"
dtype = np.float32
shapes = (
Expand Down Expand Up @@ -1934,15 +1936,16 @@ def test_max_queue_delay_only_non_default(self):
t.join()
self.check_deferred_exception()
model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
self.check_status(model_name, None, 12, 12, (1, 2))
self.check_status(model_name, None, 12, 12, (1, 2, 3, 4))
except Exception as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

def test_max_queue_delay_only_default(self):
# Send 12 requests with batch size 1. The max_queue_delay is set
# to default value of 0. There should be two distinct model
# executions. The first few requests will form a first batch
# and the remaining requests will form the second batch.
# and the remaining requests will either form the second batch
# or more batches depending on their arrival time.
Comment on lines -1945 to +1948
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why the originally intended behavior is not consistent and needs to be relaxed here?

model_base = "custom"
dtype = np.float32
shapes = (
Expand Down Expand Up @@ -1981,7 +1984,7 @@ def test_max_queue_delay_only_default(self):
t.join()
self.check_deferred_exception()
model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
self.check_status(model_name, None, 12, 12, (2,))
self.check_status(model_name, None, 12, 12, (2, 3, 4, 5, 6))
except Exception as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

Expand Down
Loading