@@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
318318 fin.close ();
319319
320320 std::vector<uint8_t > tmp;
321-
321+
322322 for (int i = 0 ; i < n_parts; ++i) {
323323 const int part_id = i;
324324 // const int part_id = n_parts - i - 1;
@@ -797,14 +797,6 @@ int main(int argc, char ** argv) {
797797
798798 gpt_params params;
799799
800- params.temp = 0 .1f ;
801- params.top_p = 0 .95f ;
802- params.n_ctx = 2048 ;
803- params.interactive = true ;
804- params.interactive_start = true ;
805- params.use_color = true ;
806- params.model = " ggml-alpaca-7b-q4.bin" ;
807-
808800 if (gpt_params_parse (argc, argv, params) == false ) {
809801 return 1 ;
810802 }
@@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
856848 // Add a space in front of the first character to match OG llama tokenizer behavior
857849 // params.prompt.insert(0, 1, ' ');
858850 // tokenize the prompt
859- std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
851+ std::vector<gpt_vocab::id> embd_inp;
860852
861853 // params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
862854
863855 // // tokenize the reverse prompt
864856 // std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);
865857
858+
859+ std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize (vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n " , true );
860+ std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize (vocab, " ### Instruction:\n\n " , true );
861+ std::vector<gpt_vocab::id> response_inp = ::llama_tokenize (vocab, " ### Response:\n\n " , false );
862+ embd_inp.insert (embd_inp.end (), instruct_inp.begin (), instruct_inp.end ());
863+
864+ if (!params.prompt .empty ()) {
865+ std::vector<gpt_vocab::id> param_inp = ::llama_tokenize (vocab, params.prompt , true );
866+ embd_inp.insert (embd_inp.end (), prompt_inp.begin (), prompt_inp.end ());
867+ embd_inp.insert (embd_inp.end (), param_inp.begin (), param_inp.end ());
868+ embd_inp.insert (embd_inp.end (), response_inp.begin (), response_inp.end ());
869+ }
870+
866871 // fprintf(stderr, "\n");
867872 // fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
868873 // fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
@@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
871876 // }
872877 // fprintf(stderr, "\n");
873878
874- std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize (vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n " , true );
875- std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize (vocab, " ### Instruction:\n\n " , true );
876- std::vector<gpt_vocab::id> response_inp = ::llama_tokenize (vocab, " ### Response:\n\n " , false );
877-
878- embd_inp.insert (embd_inp.end (), instruct_inp.begin (), instruct_inp.end ());
879-
880-
881879 if (params.interactive ) {
882880#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
883881 struct sigaction sigint_action;
@@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {
10761074
10771075 // end of text token
10781076 if (embd.back () == 2 ) {
1079- // fprintf(stderr, " [end of text]\n");
1080- is_interacting = true ;
1081- continue ;
1077+ if (params.interactive ) {
1078+ is_interacting = true ;
1079+ continue ;
1080+ } else {
1081+ printf (" \n " );
1082+ fprintf (stderr, " [end of text]\n " );
1083+ break ;
1084+ }
10821085 }
10831086 }
10841087
0 commit comments