@@ -31,12 +31,17 @@ The CLI client uses the instructlab SDG library and provides it a run configurat
3131# cli_driver.py
3232
3333from sdg import SDG
34+ from run_config import SynthDataFlow
35+ from pipeline import Pipeline
3436import yaml
3537
3638client = openai_client(endpoint)
37- with open (' run_config.yaml' , ' r' ) as file :
38- run_config = yaml.safe_load(file )
39- cli_sdg = SDG(run_config, client) # run config has all the variables like num_samples, pipelinesteps etc
39+ model = " model-version"
40+
41+ synth_skills_flow = SynthDataFlow(client, model).get_flow()
42+ skills_pipe = Pipeline(synth_skills_flow)
43+
44+ cli_sdg = SDG([synth_skills_flow]) # run config has all the variables like num_samples, pipelinesteps etc
4045generated_samples = cli_sdg.generate()
4146```
4247
@@ -62,20 +67,35 @@ The run configuration includes the necessary parameters for executing the SDG co
6267 )
6368 ```
6469
65- ``` yaml
66- # run_config.yaml
67-
68- num_samples : 30
69- max_retry : 5
70- pipeline_steps :
71- gen_q :
72- prompt_template : " configs/gen_q.yaml"
73- filter_q :
74- prompt_template : " configs/filter_q.yaml"
75- max_new_tokens : 10000
76- # model parameters for generation
77- model_name : mixtral-model
78- model_prompt : ' <s> [INST] {prompt} [/INST]'
79- client : client
80- num_procs : 8
81- ` ` `
70+ ``` python
71+ # run_config.py
72+ class Flow (ABC ):
73+ def __init__ (self , client , model_id ) -> None :
74+ self .client = client
75+ self .model_id = model_id
76+
77+ @abstractmethod
78+ def get_flow (self ) -> list :
79+ pass
80+
81+
82+ class SynthDataFlow (Flow ):
83+ def get_flow (self ) -> list :
84+ return [
85+ {
86+ ' block_type' : LLMBlock,
87+ ' block_config' : {
88+ ' block_name' : " gen_q" ,
89+ ' config_path' : " configs/gen_q.yaml" ,
90+ ' client' : self .client,
91+ ' model_id' : self .model_id,
92+ ' model_prompt' : ' <s> [INST] {prompt} [/INST]' ,
93+ ' output_cols' : [' question' ],
94+ ' batch_kwargs' : {
95+ ' num_procs' : 8 ,
96+ ' num_samples' : 30 ,
97+ ' batched' : True ,
98+ },
99+ ' max_retry' : 5 ,
100+ ' max_new_tokens' : 10000
101+ ```
0 commit comments