@@ -99,6 +99,8 @@ MODEL_CACHE_DIR="/scratch.trt_llm_data/llm-models"
99
99
ENABLE_NGC_DEVEL_IMAGE_TEST = params. enableNgcDevelImageTest ?: false
100
100
ENABLE_NGC_RELEASE_IMAGE_TEST = params. enableNgcReleaseImageTest ?: false
101
101
102
+ COMMON_SSH_OPTIONS = " -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
103
+
102
104
def uploadResults (def pipeline , SlurmCluster cluster , String nodeName , String stageName ){
103
105
withCredentials([usernamePassword(credentialsId : ' svc_tensorrt' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' )]) {
104
106
def remote = [
@@ -113,7 +115,7 @@ def uploadResults(def pipeline, SlurmCluster cluster, String nodeName, String st
113
115
pipeline. stage(' Submit Test Results' ) {
114
116
sh " mkdir -p ${ stageName} "
115
117
def resultsFilePath = " /home/svc_tensorrt/bloom/scripts/${ nodeName} /results/results.xml"
116
- def downloadResultCmd = " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ remote.user} @${ remote.host} :${ resultsFilePath} ${ stageName} /"
118
+ def downloadResultCmd = " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ remote.user} @${ remote.host} :${ resultsFilePath} ${ stageName} /"
117
119
def downloadSucceed = sh(script : downloadResultCmd, returnStatus : true ) == 0
118
120
if (downloadSucceed) {
119
121
sh " ls ${ stageName} "
@@ -239,7 +241,7 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
239
241
240
242
Utils . exec(pipeline, script : " chmod +x ${ jenkinsSetupPath} " , returnStdout : true )
241
243
242
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ jenkinsSetupPath} ${ remote.user} @${ remote.host} :~/bloom/scripts/${ nodeName} -slurm_jenkins_agent_setup.sh" ,)
244
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ jenkinsSetupPath} ${ remote.user} @${ remote.host} :~/bloom/scripts/${ nodeName} -slurm_jenkins_agent_setup.sh" ,)
243
245
244
246
Utils . exec(
245
247
pipeline,
@@ -327,7 +329,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
327
329
328
330
stage(' Prepare Testing' ) {
329
331
// Create Job Workspace folder in Frontend Node
330
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' ssh -oStrictHostKeyChecking=no ${ remote.user} @${ remote.host} 'mkdir ${ jobWorkspace} '" ,)
332
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' ssh ${ COMMON_SSH_OPTIONS } ${ remote.user} @${ remote.host} 'mkdir -p ${ jobWorkspace} '" ,)
331
333
332
334
// Download and Unzip Tar File
333
335
trtllm_utils. llmExecStepWithRetry(pipeline, script : " cd ${ llmPath} && wget -nv ${ llmTarfile} " )
@@ -336,11 +338,11 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
336
338
// Upload slurm_run_sh to Frontend node
337
339
def scriptRunLocalPath = " ${ llmSrcLocal} /jenkins/scripts/slurm_run.sh"
338
340
Utils . exec(pipeline, script : " chmod +x ${ scriptRunLocalPath} " , returnStdout : true )
339
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ scriptRunLocalPath} ${ remote.user} @${ remote.host} :${ scriptRunNode} " ,)
341
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ scriptRunLocalPath} ${ remote.user} @${ remote.host} :${ scriptRunNode} " ,)
340
342
341
343
// Upload waives.txt to Frontend node
342
344
def waivesListLocalPath = " ${ llmSrcLocal} /tests/integration/test_lists/waives.txt"
343
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ waivesListLocalPath} ${ remote.user} @${ remote.host} :${ waivesListPathNode} " ,)
345
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ waivesListLocalPath} ${ remote.user} @${ remote.host} :${ waivesListPathNode} " ,)
344
346
345
347
// Generate Test List and Upload to Frontend Node
346
348
def makoArgs = getMakoArgsFromStageName(stageName, true )
@@ -349,7 +351,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
349
351
// if the line cannot be split by "=", just ignore that line.
350
352
def makoOptsJson = transformMakoArgsToJson([" Mako options:" ] + makoArgs)
351
353
def testListPath = renderTestDB(testList, llmSrcLocal, stageName, makoOptsJson)
352
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ testListPath} ${ remote.user} @${ remote.host} :${ testListPathNode} " ,)
354
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ testListPath} ${ remote.user} @${ remote.host} :${ testListPathNode} " ,)
353
355
354
356
// Generate Multi Node Job Launch Script
355
357
def container = LLM_DOCKER_IMAGE . replace(" urm.nvidia.com/" , " urm.nvidia.com#" )
@@ -393,7 +395,7 @@ def runLLMTestlistOnSlurm_MultiNodes(pipeline, platform, testList, config=VANILL
393
395
""" . stripIndent()
394
396
pipeline. writeFile(file : scriptLaunchDestPath, text : scriptContent)
395
397
Utils . exec(pipeline, script : " chmod +x ${ scriptLaunchDestPath} " , returnStdout : true )
396
- Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p -oStrictHostKeyChecking=no ${ scriptLaunchDestPath} ${ remote.user} @${ remote.host} :${ scriptLaunch} " ,)
398
+ Utils . exec(pipeline, script : " sshpass -p '${ remote.passwd} ' scp -r -p ${ COMMON_SSH_OPTIONS } ${ scriptLaunchDestPath} ${ remote.user} @${ remote.host} :${ scriptLaunch} " ,)
397
399
}
398
400
stage(' Run Test' ) {
399
401
def scriptLaunch = " ${ jobWorkspace} /slurm_launch.sh"
@@ -1089,7 +1091,7 @@ def getSSHConnectionPorts(portConfigFile, stageName)
1089
1091
usernamePassword(credentialsId : ' tensorrt_llm_infra_debug_vm_01_credentials' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' ),
1090
1092
string(credentialsId : ' DEBUG_HOST_NAME' , variable : ' HOST_NAME' )
1091
1093
]) {
1092
- portUsage = sh(script : " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'netstat -tuln'" ,returnStdout : true )
1094
+ portUsage = sh(script : " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'netstat -tuln'" , returnStdout : true )
1093
1095
}
1094
1096
echo " Port Usage: ${ portUsage} "
1095
1097
@@ -1248,7 +1250,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
1248
1250
def llmRootConfig = " ${ LLM_ROOT}${ config} "
1249
1251
sh " mkdir ${ llmRootConfig} "
1250
1252
1251
- def llmPath = sh (script : " realpath ${ llmRootConfig} " ,returnStdout : true ). trim()
1253
+ def llmPath = sh (script : " realpath ${ llmRootConfig} " , returnStdout : true ). trim()
1252
1254
def llmSrc = " ${ llmPath} /TensorRT-LLM/src"
1253
1255
echoNodeAndGpuInfo(pipeline, stageName)
1254
1256
@@ -1362,9 +1364,9 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
1362
1364
usernamePassword(credentialsId : ' tensorrt_llm_infra_debug_vm_01_credentials' , usernameVariable : ' USERNAME' , passwordVariable : ' PASSWORD' ),
1363
1365
string(credentialsId : ' DEBUG_HOST_NAME' , variable : ' HOST_NAME' )
1364
1366
]) {
1365
- sh " sshpass -p ${ PASSWORD} -v ssh ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'cat >> ~/.ssh/authorized_keys' < ~/.ssh/id_rsa.pub"
1366
- sh " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'echo \"\" > ~/.ssh/known_hosts && cat ~/.ssh/id_rsa.pub' >> ~/.ssh/authorized_keys"
1367
- sh " ssh -v ${ USERNAME} @${ HOST_NAME} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null 'cat ~/.ssh/ports_config.txt' >> ${ portConfigFilePath} "
1367
+ sh " sshpass -p ${ PASSWORD} -v ssh ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'cat >> ~/.ssh/authorized_keys' < ~/.ssh/id_rsa.pub"
1368
+ sh " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'echo \"\" > ~/.ssh/known_hosts && cat ~/.ssh/id_rsa.pub' >> ~/.ssh/authorized_keys"
1369
+ sh " ssh -v ${ USERNAME} @${ HOST_NAME} ${ COMMON_SSH_OPTIONS } 'cat ~/.ssh/ports_config.txt' >> ${ portConfigFilePath} "
1368
1370
1369
1371
def (int userPort, int monitorPort) = getSSHConnectionPorts(portConfigFilePath, stageName)
1370
1372
if (userPort == 0 ) {
@@ -1373,7 +1375,7 @@ def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CO
1373
1375
return
1374
1376
}
1375
1377
1376
- sh " ssh -f -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -L 1111:127.0.0.1:${ monitorPort} -R ${ monitorPort} :127.0.0.1:1112 -NR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
1378
+ sh " ssh -f ${ COMMON_SSH_OPTIONS } -L 1111:127.0.0.1:${ monitorPort} -R ${ monitorPort} :127.0.0.1:1112 -NR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
1377
1379
sh " autossh -fNR ${ userPort} :localhost:22 ${ USERNAME} @${ HOST_NAME} "
1378
1380
sh " ps aux | grep ssh"
1379
1381
try {
0 commit comments