@@ -17,39 +17,27 @@ limitations under the License.
1717package odh
1818
1919import (
20+ "fmt"
21+ "os"
22+ "strings"
2023 "testing"
2124
2225 . "github.com/onsi/gomega"
2326 . "github.com/project-codeflare/codeflare-common/support"
2427 rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
2528)
2629
27- func TestRayFinetuneDemo (t * testing.T ) {
28- mnistRayLlmFinetune (t , 1 )
30+ func TestRayFinetuneLlmDeepspeedDemo (t * testing.T ) {
31+ rayFinetuneLlmDeepspeed (t , 1 )
2932}
3033
31- func mnistRayLlmFinetune (t * testing.T , numGpus int ) {
34+ func rayFinetuneLlmDeepspeed (t * testing.T , numGpus int ) {
3235 test := With (t )
3336
3437 // Create a namespace
3538 namespace := test .NewTestNamespace ()
36-
37- // Test configuration
38- jupyterNotebookConfigMapFileName := "ray_finetune_llm_deepspeed.ipynb"
39-
40- // Test configuration
41- configMap := map [string ][]byte {
42- // MNIST Ray Notebook
43- jupyterNotebookConfigMapFileName : ReadFile (test , "resources/ray_finetune_demo/ray_finetune_llm_deepspeed.ipynb" ),
44- "ray_finetune_llm_deepspeed.py" : ReadFile (test , "resources/ray_finetune_demo/ray_finetune_llm_deepspeed.py" ),
45- "ray_finetune_requirements.txt" : ReadRayFinetuneRequirementsTxt (test ),
46- "create_dataset.py" : ReadFile (test , "resources/ray_finetune_demo/create_dataset.py" ),
47- "lora.json" : ReadFile (test , "resources/ray_finetune_demo/lora.json" ),
48- "zero_3_llama_2_7b.json" : ReadFile (test , "resources/ray_finetune_demo/zero_3_llama_2_7b.json" ),
49- "utils.py" : ReadFile (test , "resources/ray_finetune_demo/utils.py" ),
50- }
51-
52- config := CreateConfigMap (test , namespace .Name , configMap )
39+ var workingDirectory , err = os .Getwd ()
40+ test .Expect (err ).ToNot (HaveOccurred ())
5341
5442 // Define the regular(non-admin) user
5543 userName := GetNotebookUserName (test )
@@ -58,6 +46,53 @@ func mnistRayLlmFinetune(t *testing.T, numGpus int) {
5846 // Create role binding with Namespace specific admin cluster role
5947 CreateUserRoleBindingWithClusterRole (test , userName , namespace .Name , "admin" )
6048
49+ // list changes required in llm-deepspeed-finetune-demo.ipynb file and update those
50+ requiredChangesInNotebook := map [string ]string {
51+ "import os" : "import os,time,sys" ,
52+ "import sys" : "!cp /opt/app-root/notebooks/* ./" ,
53+ "from codeflare_sdk.cluster.auth import TokenAuthentication" : "from codeflare_sdk.cluster.auth import TokenAuthentication\\ n\" ,\n \t \" from codeflare_sdk.job import RayJobClient" ,
54+ "token = ''" : fmt .Sprintf ("token = '%s'" , userToken ),
55+ "server = ''" : fmt .Sprintf ("server = '%s'" , GetOpenShiftApiUrl (test )),
56+ "namespace='ray-finetune-llm-deepspeed'" : fmt .Sprintf ("namespace='%s'" , namespace .Name ),
57+ "head_cpus=16" : "head_cpus=2" ,
58+ "head_gpus=1" : "head_gpus=0" ,
59+ "num_workers=7" : "num_workers=1" ,
60+ "min_cpus=16" : "min_cpus=4" ,
61+ "max_cpus=16" : "max_cpus=4" ,
62+ "min_memory=128" : "min_memory=48" ,
63+ "max_memory=256" : "max_memory=48" ,
64+ "head_memory=128" : "head_memory=48" ,
65+ "num_gpus=1" : fmt .Sprintf ("worker_extended_resource_requests={'nvidia.com/gpu': %d},\\ n\" ,\n \t \" write_to_file=True,\\ n\" ,\n \t \" verify_tls=False" , numGpus ),
66+ "image='quay.io/rhoai/ray:2.23.0-py39-cu121'" : fmt .Sprintf ("image='%s'" , GetRayImage ()),
67+ "client = cluster.job_client" : "ray_dashboard = cluster.cluster_dashboard_uri()\\ n\" ,\n \t \" header = {\\ \" Authorization\\ \" : \\ \" Bearer " + userToken + "\\ \" }\\ n\" ,\n \t \" client = RayJobClient(address=ray_dashboard, headers=header, verify=False)\\ n" ,
68+ "--num-devices=8" : fmt .Sprintf ("--num-devices=%d" , numGpus ),
69+ "--num-epochs=3" : fmt .Sprintf ("--num-epochs=%d" , 1 ),
70+ "--ds-config=./deepspeed_configs/zero_3_llama_2_7b.json" : "--ds-config=./zero_3_llama_2_7b.json \\ \" \\ n\" ,\n \t \" \\ \" --lora-config=./lora.json \\ \" \\ n\" ,\n \t \" \\ \" --as-test" ,
71+ "'pip': 'requirements.txt'" : "'pip': '/opt/app-root/src/requirements.txt'" ,
72+ "'working_dir': './'" : "'working_dir': '/opt/app-root/src'" ,
73+ "client.stop_job(submission_id)" : "finished = False\\ n\" ,\n \t \" while not finished:\\ n\" ,\n \t \" time.sleep(1)\\ n\" ,\n \t \" status = client.get_job_status(submission_id)\\ n\" ,\n \t \" finished = (status == \\ \" SUCCEEDED\\ \" )\\ n\" ,\n \t \" if finished:\\ n\" ,\n \t \" print(\\ \" Job completed Successfully !\\ \" )\\ n\" ,\n \t \" else:\\ n\" ,\n \t \" print(\\ \" Job failed !\\ \" )\\ n\" ,\n \t \" time.sleep(10)\\ n" ,
74+ }
75+
76+ updatedNotebookContent := string (ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.ipynb" ))
77+ for oldValue , newValue := range requiredChangesInNotebook {
78+ updatedNotebookContent = strings .Replace (updatedNotebookContent , oldValue , newValue , - 1 )
79+ }
80+ updatedNotebook := []byte (updatedNotebookContent )
81+
82+ // Test configuration
83+ jupyterNotebookConfigMapFileName := "ray_finetune_llm_deepspeed.ipynb"
84+ configMap := map [string ][]byte {
85+ jupyterNotebookConfigMapFileName : updatedNotebook ,
86+ "ray_finetune_llm_deepspeed.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.py" ),
87+ "requirements.txt" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/requirements.txt" ),
88+ "create_dataset.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/create_dataset.py" ),
89+ "lora.json" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/lora_configs/lora.json" ),
90+ "zero_3_llama_2_7b.json" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/deepspeed_configs/zero_3_llama_2_7b.json" ),
91+ "utils.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/utils.py" ),
92+ }
93+
94+ config := CreateConfigMap (test , namespace .Name , configMap )
95+
6196 // Create Notebook CR
6297 createNotebook (test , namespace , userToken , config .Name , jupyterNotebookConfigMapFileName , numGpus )
6398
@@ -77,26 +112,6 @@ func mnistRayLlmFinetune(t *testing.T, numGpus int) {
77112 )
78113
79114 // Make sure the RayCluster finishes and is deleted
80- test .Eventually (RayClusters (test , namespace .Name ), TestTimeoutGpuProvisioning ).
115+ test .Eventually (RayClusters (test , namespace .Name ), TestTimeoutMedium ).
81116 Should (HaveLen (0 ))
82117}
83-
84- func ReadRayFinetuneRequirementsTxt (test Test ) []byte {
85- // Read the requirements.txt from resources and perform replacements for custom values using go template
86- props := struct {
87- PipIndexUrl string
88- PipTrustedHost string
89- }{
90- PipIndexUrl : "--index " + string (GetPipIndexURL ()),
91- }
92-
93- // Provide trusted host only if defined
94- if len (GetPipTrustedHost ()) > 0 {
95- props .PipTrustedHost = "--trusted-host " + GetPipTrustedHost ()
96- }
97-
98- template , err := files .ReadFile ("resources/ray_finetune_demo/ray_finetune_requirements.txt" )
99- test .Expect (err ).NotTo (HaveOccurred ())
100-
101- return ParseTemplate (test , template , props )
102- }
0 commit comments