Skip to content

Commit cceba88

Browse files
Allow using existing Llama Server instead of running locally (#345)
* Add setting to use existing Llama server * minor UI improvements * support infill template configuration --------- Co-authored-by: Carl-Robert Linnupuu <[email protected]>
1 parent 7387cf4 commit cceba88

18 files changed

+910
-498
lines changed

src/main/java/ee/carlrobert/codegpt/codecompletions/CodeCompletionRequestProvider.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package ee.carlrobert.codegpt.codecompletions;
22

3+
import ee.carlrobert.codegpt.completions.llama.LlamaModel;
34
import ee.carlrobert.codegpt.settings.state.LlamaSettingsState;
45
import ee.carlrobert.llm.client.llama.completion.LlamaCompletionRequest;
56
import ee.carlrobert.llm.client.openai.completion.request.OpenAITextCompletionRequest;
@@ -26,13 +27,25 @@ public OpenAITextCompletionRequest buildOpenAIRequest() {
2627
}
2728

2829
public LlamaCompletionRequest buildLlamaRequest() {
29-
var promptTemplate = LlamaSettingsState.getInstance().getInfillPromptTemplate();
30-
var prompt = promptTemplate.buildPrompt(details.getPrefix(), details.getSuffix());
30+
InfillPromptTemplate promptTemplate = getLlamaInfillPromptTemplate();
31+
String prompt = promptTemplate.buildPrompt(details.getPrefix(), details.getSuffix());
3132
return new LlamaCompletionRequest.Builder(prompt)
3233
.setN_predict(MAX_TOKENS)
3334
.setStream(true)
3435
.setTemperature(0.1)
3536
.setStop(promptTemplate.getStopTokens())
3637
.build();
3738
}
39+
40+
private InfillPromptTemplate getLlamaInfillPromptTemplate() {
41+
var settings = LlamaSettingsState.getInstance();
42+
if (!settings.isRunLocalServer()) {
43+
return settings.getRemoteModelInfillPromptTemplate();
44+
}
45+
if (settings.isUseCustomModel()) {
46+
return settings.getLocalModelInfillPromptTemplate();
47+
}
48+
return LlamaModel.findByHuggingFaceModel(settings.getHuggingFaceModel())
49+
.getInfillPromptTemplate();
50+
}
3851
}

src/main/java/ee/carlrobert/codegpt/completions/CompletionClientProvider.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import ee.carlrobert.codegpt.CodeGPTPlugin;
66
import ee.carlrobert.codegpt.completions.you.YouUserManager;
77
import ee.carlrobert.codegpt.credentials.AzureCredentialsManager;
8+
import ee.carlrobert.codegpt.credentials.LlamaCredentialsManager;
89
import ee.carlrobert.codegpt.credentials.OpenAICredentialsManager;
910
import ee.carlrobert.codegpt.settings.advanced.AdvancedSettingsState;
1011
import ee.carlrobert.codegpt.settings.state.AzureSettingsState;
@@ -13,6 +14,7 @@
1314
import ee.carlrobert.llm.client.azure.AzureClient;
1415
import ee.carlrobert.llm.client.azure.AzureCompletionRequestParams;
1516
import ee.carlrobert.llm.client.llama.LlamaClient;
17+
import ee.carlrobert.llm.client.llama.LlamaClient.Builder;
1618
import ee.carlrobert.llm.client.openai.OpenAIClient;
1719
import ee.carlrobert.llm.client.you.UTMParameters;
1820
import ee.carlrobert.llm.client.you.YouClient;
@@ -72,9 +74,17 @@ public static YouClient getYouClient() {
7274
}
7375

7476
public static LlamaClient getLlamaClient() {
75-
return new LlamaClient.Builder()
76-
.setPort(LlamaSettingsState.getInstance().getServerPort())
77-
.build(getDefaultClientBuilder());
77+
LlamaSettingsState llamaSettingsState = LlamaSettingsState.getInstance();
78+
Builder builder = new Builder()
79+
.setPort(llamaSettingsState.getServerPort());
80+
if (!llamaSettingsState.isRunLocalServer()) {
81+
builder.setHost(llamaSettingsState.getBaseHost());
82+
String apiKey = LlamaCredentialsManager.getInstance().getApiKey();
83+
if (apiKey != null && !apiKey.isBlank()) {
84+
builder.setApiKey(apiKey);
85+
}
86+
}
87+
return builder.build(getDefaultClientBuilder());
7888
}
7989

8090
private static OkHttpClient.Builder getDefaultClientBuilder() {

src/main/java/ee/carlrobert/codegpt/completions/CompletionRequestProvider.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,14 @@ public LlamaCompletionRequest buildLlamaCompletionRequest(
105105
Message message,
106106
ConversationType conversationType) {
107107
var settings = LlamaSettingsState.getInstance();
108-
var promptTemplate = settings.isUseCustomModel()
109-
? settings.getPromptTemplate()
110-
: LlamaModel.findByHuggingFaceModel(settings.getHuggingFaceModel()).getPromptTemplate();
108+
PromptTemplate promptTemplate;
109+
if (settings.isRunLocalServer()) {
110+
promptTemplate = settings.isUseCustomModel()
111+
? settings.getLocalModelPromptTemplate()
112+
: LlamaModel.findByHuggingFaceModel(settings.getHuggingFaceModel()).getPromptTemplate();
113+
} else {
114+
promptTemplate = settings.getRemoteModelPromptTemplate();
115+
}
111116

112117
var systemPrompt = COMPLETION_SYSTEM_PROMPT;
113118
if (conversationType == ConversationType.FIX_COMPILE_ERRORS) {

src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static java.util.stream.Collectors.toList;
55
import static java.util.stream.Collectors.toSet;
66

7+
import ee.carlrobert.codegpt.codecompletions.InfillPromptTemplate;
78
import ee.carlrobert.codegpt.completions.HuggingFaceModel;
89
import java.util.Collections;
910
import java.util.List;
@@ -17,6 +18,7 @@ public enum LlamaModel {
1718
+ "support for large input contexts, and zero-shot instruction following ability for "
1819
+ "programming tasks.",
1920
PromptTemplate.LLAMA,
21+
InfillPromptTemplate.LLAMA,
2022
List.of(
2123
HuggingFaceModel.CODE_LLAMA_7B_Q3,
2224
HuggingFaceModel.CODE_LLAMA_7B_Q4,
@@ -45,6 +47,7 @@ public enum LlamaModel {
4547
+ "in both English and Chinese. It achieves state-of-the-art performance among "
4648
+ "open-source code models on multiple programming languages and various benchmarks.",
4749
PromptTemplate.ALPACA,
50+
InfillPromptTemplate.DEEPSEEK_CODER,
4851
List.of(
4952
HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q3,
5053
HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q4,
@@ -85,16 +88,27 @@ public enum LlamaModel {
8588
private final String label;
8689
private final String description;
8790
private final PromptTemplate promptTemplate;
91+
private final InfillPromptTemplate infillPromptTemplate;
8892
private final List<HuggingFaceModel> huggingFaceModels;
8993

9094
LlamaModel(
9195
String label,
9296
String description,
9397
PromptTemplate promptTemplate,
9498
List<HuggingFaceModel> huggingFaceModels) {
99+
this(label, description, promptTemplate, null, huggingFaceModels);
100+
}
101+
102+
LlamaModel(
103+
String label,
104+
String description,
105+
PromptTemplate promptTemplate,
106+
InfillPromptTemplate infillPromptTemplate,
107+
List<HuggingFaceModel> huggingFaceModels) {
95108
this.label = label;
96109
this.description = description;
97110
this.promptTemplate = promptTemplate;
111+
this.infillPromptTemplate = infillPromptTemplate;
98112
this.huggingFaceModels = huggingFaceModels;
99113
}
100114

@@ -125,6 +139,10 @@ public PromptTemplate getPromptTemplate() {
125139
return promptTemplate;
126140
}
127141

142+
public InfillPromptTemplate getInfillPromptTemplate() {
143+
return infillPromptTemplate;
144+
}
145+
128146
public List<HuggingFaceModel> getHuggingFaceModels() {
129147
return huggingFaceModels;
130148
}

src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaServerAgent.java

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,15 @@ public void startAgent(
3939
Runnable onSuccess,
4040
Runnable onServerTerminated) {
4141
ApplicationManager.getApplication().invokeLater(() -> {
42-
if (!params.isUseCustomServer()) {
43-
try {
44-
serverProgressPanel.updateText(
45-
CodeGPTBundle.get("llamaServerAgent.buildingProject.description"));
46-
makeProcessHandler = new OSProcessHandler(getMakeCommandLinde());
47-
makeProcessHandler.addProcessListener(
48-
getMakeProcessListener(params, serverProgressPanel, onSuccess, onServerTerminated));
49-
makeProcessHandler.startNotify();
50-
} catch (ExecutionException e) {
51-
throw new RuntimeException(e);
52-
}
53-
} else {
54-
startServer(params, serverProgressPanel, onSuccess, onServerTerminated);
42+
try {
43+
serverProgressPanel.updateText(
44+
CodeGPTBundle.get("llamaServerAgent.buildingProject.description"));
45+
makeProcessHandler = new OSProcessHandler(getMakeCommandLinde());
46+
makeProcessHandler.addProcessListener(
47+
getMakeProcessListener(params, serverProgressPanel, onSuccess, onServerTerminated));
48+
makeProcessHandler.startNotify();
49+
} catch (ExecutionException e) {
50+
throw new RuntimeException(e);
5551
}
5652
});
5753
}
@@ -83,31 +79,23 @@ public void onTextAvailable(@NotNull ProcessEvent event, @NotNull Key outputType
8379

8480
@Override
8581
public void processTerminated(@NotNull ProcessEvent event) {
86-
startServer(params, serverProgressPanel, onSuccess, onServerTerminated);
82+
try {
83+
LOG.info("Booting up llama server");
84+
85+
serverProgressPanel.updateText(
86+
CodeGPTBundle.get("llamaServerAgent.serverBootup.description"));
87+
startServerProcessHandler = new OSProcessHandler.Silent(getServerCommandLine(params));
88+
startServerProcessHandler.addProcessListener(
89+
getProcessListener(params.getPort(), onSuccess, onServerTerminated));
90+
startServerProcessHandler.startNotify();
91+
} catch (ExecutionException ex) {
92+
LOG.error("Unable to start llama server", ex);
93+
throw new RuntimeException(ex);
94+
}
8795
}
8896
};
8997
}
9098

91-
private void startServer(
92-
LlamaServerStartupParams params,
93-
ServerProgressPanel serverProgressPanel,
94-
Runnable onSuccess,
95-
Runnable onServerTerminated) {
96-
try {
97-
LOG.info("Booting up llama server");
98-
99-
serverProgressPanel.updateText(
100-
CodeGPTBundle.get("llamaServerAgent.serverBootup.description"));
101-
startServerProcessHandler = new OSProcessHandler.Silent(getServerCommandLine(params));
102-
startServerProcessHandler.addProcessListener(
103-
getProcessListener(params.getPort(), onSuccess, onServerTerminated));
104-
startServerProcessHandler.startNotify();
105-
} catch (ExecutionException ex) {
106-
LOG.error("Unable to start llama server", ex);
107-
throw new RuntimeException(ex);
108-
}
109-
}
110-
11199
private ProcessListener getProcessListener(
112100
int port,
113101
Runnable onSuccess,
@@ -164,8 +152,8 @@ private static GeneralCommandLine getMakeCommandLinde() {
164152

165153
private GeneralCommandLine getServerCommandLine(LlamaServerStartupParams params) {
166154
GeneralCommandLine commandLine = new GeneralCommandLine().withCharset(StandardCharsets.UTF_8);
167-
commandLine.setExePath("./" + params.getServerFileName());
168-
commandLine.withWorkDirectory(params.getServerDirectory());
155+
commandLine.setExePath("./server");
156+
commandLine.withWorkDirectory(CodeGPTPlugin.getLlamaSourcePath());
169157
commandLine.addParameters(
170158
"-m", params.getModelPath(),
171159
"-c", String.valueOf(params.getContextLength()),

src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaServerStartupParams.java

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,28 @@
11
package ee.carlrobert.codegpt.completions.llama;
22

3-
import java.io.File;
43
import java.util.List;
54

65
public class LlamaServerStartupParams {
76

8-
private final String serverPath;
9-
private final boolean useCustomServer;
107
private final String modelPath;
118
private final int contextLength;
129
private final int threads;
1310
private final int port;
1411
private final List<String> additionalParameters;
1512

1613
public LlamaServerStartupParams(
17-
String serverPath,
18-
boolean useCustomServer, String modelPath,
14+
String modelPath,
1915
int contextLength,
2016
int threads,
2117
int port,
2218
List<String> additionalParameters) {
23-
this.serverPath = serverPath;
24-
this.useCustomServer = useCustomServer;
2519
this.modelPath = modelPath;
2620
this.contextLength = contextLength;
2721
this.threads = threads;
2822
this.port = port;
2923
this.additionalParameters = additionalParameters;
3024
}
3125

32-
public String getServerPath() {
33-
return serverPath;
34-
}
35-
36-
public String getServerFileName() {
37-
return serverPath.substring(serverPath.lastIndexOf(File.separator) + 1);
38-
}
39-
40-
public String getServerDirectory() {
41-
return serverPath.substring(0, serverPath.lastIndexOf(File.separator) + 1);
42-
}
43-
44-
public boolean isUseCustomServer() {
45-
return useCustomServer;
46-
}
47-
4826
public String getModelPath() {
4927
return modelPath;
5028
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package ee.carlrobert.codegpt.credentials;
2+
3+
import com.intellij.credentialStore.CredentialAttributes;
4+
import com.intellij.openapi.application.ApplicationManager;
5+
import com.intellij.openapi.components.Service;
6+
import org.jetbrains.annotations.Nullable;
7+
8+
@Service
9+
public final class LlamaCredentialsManager {
10+
11+
private static final CredentialAttributes llamaApiKeyCredentialAttributes =
12+
CredentialsUtil.createCredentialAttributes("LLAMA_API_KEY");
13+
14+
private String llamaApiKey;
15+
16+
private LlamaCredentialsManager() {
17+
llamaApiKey = CredentialsUtil.getPassword(llamaApiKeyCredentialAttributes);
18+
}
19+
20+
public static LlamaCredentialsManager getInstance() {
21+
return ApplicationManager.getApplication().getService(LlamaCredentialsManager.class);
22+
}
23+
24+
public boolean isApiKeySet() {
25+
return llamaApiKey != null && !llamaApiKey.isEmpty();
26+
}
27+
28+
public @Nullable String getApiKey() {
29+
return llamaApiKey;
30+
}
31+
32+
public void setApiKey(String llamaApiKey) {
33+
this.llamaApiKey = llamaApiKey;
34+
CredentialsUtil.setPassword(llamaApiKeyCredentialAttributes, llamaApiKey);
35+
}
36+
}

src/main/java/ee/carlrobert/codegpt/settings/SettingsConfigurable.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import ee.carlrobert.codegpt.CodeGPTBundle;
77
import ee.carlrobert.codegpt.conversations.ConversationsState;
88
import ee.carlrobert.codegpt.credentials.AzureCredentialsManager;
9+
import ee.carlrobert.codegpt.credentials.LlamaCredentialsManager;
910
import ee.carlrobert.codegpt.credentials.OpenAICredentialsManager;
1011
import ee.carlrobert.codegpt.settings.state.AzureSettingsState;
1112
import ee.carlrobert.codegpt.settings.state.LlamaSettingsState;
@@ -75,6 +76,8 @@ public void apply() {
7576
AzureCredentialsManager.getInstance().setApiKey(serviceSelectionForm.getAzureOpenAIApiKey());
7677
AzureCredentialsManager.getInstance()
7778
.setAzureActiveDirectoryToken(serviceSelectionForm.getAzureActiveDirectoryToken());
79+
LlamaCredentialsManager.getInstance()
80+
.setApiKey(serviceSelectionForm.getLlamaServerPreferencesForm().getApiKey());
7881

7982
var settings = SettingsState.getInstance();
8083
settings.setDisplayName(settingsComponent.getDisplayName());

0 commit comments

Comments
 (0)