Skip to content

Commit aa01b9d

Browse files
committed
init
Signed-off-by: Yan Chunwei <[email protected]>
1 parent 09349cc commit aa01b9d

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

tensorrt_llm/llmapi/trtllm-llmapi-launch

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,9 @@ function mpi_world_size {
2424
}
2525

2626
function export_free_tcp_addr_for_spawn_proxy_process {
27-
# find free port starting from 10012
28-
local free_port=$(python -c 'import socket; s=socket.socket();
29-
port = 10012
30-
while True:
31-
try:
32-
s.bind(("", port))
33-
break
34-
except OSError:
35-
port += 1
36-
print(port); s.close()')
37-
export TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR="tcp://127.0.0.1:${free_port}"
27+
# Generate unique IPC address without importing tensorrt_llm to avoid MPI initialization conflicts
28+
local free_port=$(python3 -c "import uuid, tempfile, os; print(f'ipc://{os.path.join(tempfile.gettempdir(), \"rpc_test_\" + str(uuid.uuid4()))}')")
29+
export TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR=$free_port
3830
log_stderr "TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR: $TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR"
3931

4032
export TLLM_SPAWN_PROXY_PROCESS_IPC_HMAC_KEY=$(openssl rand -hex 32)
@@ -44,9 +36,12 @@ print(port); s.close()')
4436
export tllm_mpi_size=$(mpi_world_size)
4537
log_stderr "tllm_mpi_size: $tllm_mpi_size"
4638

47-
export_free_tcp_addr_for_spawn_proxy_process
4839

4940
if [ -z "$mpi_rank" ] || [ "$mpi_rank" -eq 0 ]; then
41+
42+
# IPC only works on localhost and in MPI rank0 process
43+
export_free_tcp_addr_for_spawn_proxy_process
44+
5045
log_stderr "Rank${mpi_rank} run ${task_with_command[@]} in background"
5146

5247
# MPI doesn't allow spawn a process sharing the MPI environment in a MPI

0 commit comments

Comments
 (0)