Skip to content

Commit c8dbf91

Browse files
authored
Merge pull request #11 from meta-llama/release/0.0.54
Release/0.0.54
2 parents 806f072 + 74526a2 commit c8dbf91

File tree

12 files changed

+607
-117
lines changed

12 files changed

+607
-117
lines changed

README.md

Lines changed: 151 additions & 115 deletions
Large diffs are not rendered by default.

build-libs.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export SKIP_MOCK_TESTS=true
66
./gradlew :llama-stack-client-kotlin-core:spotlessApply
77
./gradlew :llama-stack-client-kotlin-client-okhttp:spotlessApply
88
./gradlew :llama-stack-client-kotlin:spotlessApply
9+
./gradlew :llama-stack-client-kotlin-client-local:spotlessApply
910

1011
./gradlew build
1112

@@ -17,3 +18,4 @@ echo $BUILD_JARS_DIR
1718
cp -a llama-stack-client-kotlin/build/libs/. $BUILD_JARS_DIR
1819
cp -a llama-stack-client-kotlin-client-okhttp/build/libs/. $BUILD_JARS_DIR
1920
cp -a llama-stack-client-kotlin-core/build/libs/. $BUILD_JARS_DIR
21+
cp -a llama-stack-client-kotlin-client-local/build/libs/. $BUILD_JARS_DIR

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ plugins {
44

55
allprojects {
66
group = "com.llama.llamastack"
7-
version = "0.0.1-alpha.2"
7+
version = "0.0.54"
88
}
55.8 KB
Loading
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
plugins {
2+
id("llama-stack-client.kotlin")
3+
id("llama-stack-client.publish")
4+
}
5+
6+
dependencies {
7+
api(project(":llama-stack-client-kotlin-core"))
8+
testImplementation(kotlin("test"))
9+
implementation(fileTree("libs") { include("*.jar") })
10+
implementation(files("jni/**/*.so"))
11+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# Purpose: The goal of this script is to get ExecuTorch AAR file for local inferencing.
9+
# How to run:
10+
# - `sh download-prebuild-et-lib.sh` : download ExecuTorch AAR in apps/libs (meant to be run in Android App environment)
11+
# - `sh download-prebuild-et-lib.sh` : download+unzip ExecuTorch AAR in working directory and clean-up.
12+
# Based on ExecuTorch Commit ID: 0a12e33d22a3d44d1aa2af5f0d0673d45b962553
13+
14+
set -eu
15+
16+
AAR_URL="https://ossci-android.s3.amazonaws.com/executorch/release/executorch-20241202/executorch.aar"
17+
AAR_SHASUM_URL="https://ossci-android.s3.amazonaws.com/executorch/release/executorch-20241202/executorch.aar.sha256sums"
18+
LIBS_PATH="$(dirname "$0")/app/libs"
19+
UNZIP=false
20+
while [[ $# -gt 0 ]]; do
21+
case $1 in
22+
--unzip)
23+
UNZIP=true
24+
LIBS_PATH="$(dirname "$0")"
25+
shift
26+
;;
27+
*)
28+
echo "Unknown option: $1"
29+
exit 1
30+
;;
31+
esac
32+
done
33+
34+
if [ "$UNZIP" = false ]; then
35+
mkdir -p "${LIBS_PATH}"
36+
fi
37+
38+
curl -O "${AAR_SHASUM_URL}"
39+
shasum --check --status executorch.aar.sha256sums || curl "${AAR_URL}" -o "${LIBS_PATH}/executorch.aar"
40+
41+
if [ "$UNZIP" = true ]; then
42+
unzip -q executorch.aar
43+
rm "executorch.aar" "executorch.aar.sha256sums"
44+
fi
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// File generated from our OpenAPI spec by Stainless.
2+
3+
package com.llama.llamastack.client.local
4+
5+
import com.llama.llamastack.client.local.util.PromptFormatLocal
6+
import com.llama.llamastack.core.JsonValue
7+
import com.llama.llamastack.core.RequestOptions
8+
import com.llama.llamastack.models.CompletionMessage
9+
import com.llama.llamastack.models.EmbeddingsResponse
10+
import com.llama.llamastack.models.InferenceChatCompletionParams
11+
import com.llama.llamastack.models.InferenceChatCompletionResponse
12+
import com.llama.llamastack.models.InferenceCompletionParams
13+
import com.llama.llamastack.models.InferenceCompletionResponse
14+
import com.llama.llamastack.models.InferenceEmbeddingsParams
15+
import com.llama.llamastack.services.blocking.InferenceService
16+
import org.pytorch.executorch.LlamaCallback
17+
18+
class InferenceServiceLocalImpl
19+
constructor(
20+
private val clientOptions: LocalClientOptions,
21+
) : InferenceService, LlamaCallback {
22+
23+
private var resultMessage: String = ""
24+
private var onResultComplete: Boolean = false
25+
private var statsMetric: Float = 0.0f
26+
private var onStatsComplete: Boolean = false
27+
private var modelName: String = ""
28+
29+
private var sequenceLengthKey: String = "seq_len"
30+
31+
override fun onResult(p0: String?) {
32+
if (PromptFormatLocal.getStopTokens(modelName).any { it == p0 }) {
33+
onResultComplete = true
34+
return
35+
}
36+
37+
if (p0.equals("\n\n") || p0.equals("\n")) {
38+
if (resultMessage.isNotEmpty()) {
39+
resultMessage += p0
40+
}
41+
} else {
42+
resultMessage += p0
43+
}
44+
}
45+
46+
override fun onStats(p0: Float) {
47+
onResultComplete =
48+
true // required since in some cases where seq_len is met then EOT is not appended by ET
49+
// logic
50+
statsMetric = p0
51+
onStatsComplete = true
52+
}
53+
54+
override fun chatCompletion(
55+
params: InferenceChatCompletionParams,
56+
requestOptions: RequestOptions
57+
): InferenceChatCompletionResponse {
58+
resultMessage = ""
59+
val mModule = clientOptions.llamaModule
60+
modelName = params.modelId()
61+
val formattedPrompt =
62+
PromptFormatLocal.getTotalFormattedPrompt(params.messages(), modelName)
63+
64+
// Developer can pass in their sequence length but if not then it will default to a
65+
// particular dynamic value. This is to ensure enough value is provided to give a reasonably
66+
// complete response. 0.75 is the approximate words per token. And 64 is buffer for tokens
67+
// for generate response.
68+
val seqLength =
69+
params._additionalQueryParams().values(sequenceLengthKey).lastOrNull()?.toInt()
70+
?: ((formattedPrompt.length * 0.75) + 64).toInt()
71+
72+
println("Chat Completion Prompt is: $formattedPrompt with seqLength of $seqLength")
73+
onResultComplete = false
74+
mModule.generate(formattedPrompt, seqLength, this, false)
75+
76+
while (!onResultComplete && !onStatsComplete) {
77+
Thread.sleep(100)
78+
}
79+
onResultComplete = false
80+
onStatsComplete = false
81+
82+
return InferenceChatCompletionResponse.ofChatCompletionResponse(
83+
InferenceChatCompletionResponse.ChatCompletionResponse.builder()
84+
.completionMessage(
85+
CompletionMessage.builder()
86+
.content(CompletionMessage.Content.ofString(resultMessage))
87+
.build()
88+
)
89+
.putAdditionalProperty("tps", JsonValue.from(statsMetric))
90+
.build()
91+
)
92+
}
93+
94+
override fun completion(
95+
params: InferenceCompletionParams,
96+
requestOptions: RequestOptions
97+
): InferenceCompletionResponse {
98+
TODO("Not yet implemented")
99+
}
100+
101+
override fun embeddings(
102+
params: InferenceEmbeddingsParams,
103+
requestOptions: RequestOptions
104+
): EmbeddingsResponse {
105+
TODO("Not yet implemented")
106+
}
107+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// File generated from our OpenAPI spec by Stainless.
2+
3+
package com.llama.llamastack.client.local
4+
5+
import com.llama.llamastack.client.LlamaStackClientClient
6+
import com.llama.llamastack.client.LlamaStackClientClientAsync
7+
import com.llama.llamastack.models.*
8+
import com.llama.llamastack.services.blocking.*
9+
10+
class LlamaStackClientClientLocalImpl
11+
constructor(
12+
private val clientOptions: LocalClientOptions,
13+
) : LlamaStackClientClient {
14+
15+
private val inference: InferenceService by lazy { InferenceServiceLocalImpl(clientOptions) }
16+
17+
override fun inference(): InferenceService = inference
18+
19+
override fun async(): LlamaStackClientClientAsync {
20+
TODO("Not yet implemented")
21+
}
22+
23+
override fun telemetry(): TelemetryService {
24+
TODO("Not yet implemented")
25+
}
26+
27+
override fun datasetio(): DatasetioService {
28+
TODO("Not yet implemented")
29+
}
30+
31+
override fun scoring(): ScoringService {
32+
TODO("Not yet implemented")
33+
}
34+
35+
override fun scoringFunctions(): ScoringFunctionService {
36+
TODO("Not yet implemented")
37+
}
38+
39+
override fun evalTasks(): EvalTaskService {
40+
TODO("Not yet implemented")
41+
}
42+
43+
override fun agents(): AgentService {
44+
TODO("Not yet implemented")
45+
}
46+
47+
override fun batchInference(): BatchInferenceService {
48+
TODO("Not yet implemented")
49+
}
50+
51+
override fun datasets(): DatasetService {
52+
TODO("Not yet implemented")
53+
}
54+
55+
override fun eval(): EvalService {
56+
TODO("Not yet implemented")
57+
}
58+
59+
override fun inspect(): InspectService {
60+
TODO("Not yet implemented")
61+
}
62+
63+
override fun safety(): SafetyService {
64+
TODO("Not yet implemented")
65+
}
66+
67+
override fun memory(): MemoryService {
68+
TODO("Not yet implemented")
69+
}
70+
71+
override fun postTraining(): PostTrainingService {
72+
TODO("Not yet implemented")
73+
}
74+
75+
override fun providers(): ProviderService {
76+
TODO("Not yet implemented")
77+
}
78+
79+
override fun routes(): RouteService {
80+
TODO("Not yet implemented")
81+
}
82+
83+
override fun syntheticDataGeneration(): SyntheticDataGenerationService {
84+
TODO("Not yet implemented")
85+
}
86+
87+
override fun models(): ModelService {
88+
TODO("Not yet implemented")
89+
}
90+
91+
override fun memoryBanks(): MemoryBankService {
92+
TODO("Not yet implemented")
93+
}
94+
95+
override fun shields(): ShieldService {
96+
TODO("Not yet implemented")
97+
}
98+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package com.llama.llamastack.client.local
2+
3+
import com.llama.llamastack.client.LlamaStackClientClient
4+
5+
class LlamaStackClientLocalClient private constructor() {
6+
7+
companion object {
8+
fun builder() = Builder()
9+
}
10+
11+
class Builder {
12+
13+
private var clientOptions: LocalClientOptions.Builder = LocalClientOptions.builder()
14+
private var modelPath: String? = null
15+
private var tokenizerPath: String? = null
16+
private var temperature: Float = 0.0F
17+
18+
fun modelPath(modelPath: String) = apply { this.modelPath = modelPath }
19+
20+
fun tokenizerPath(tokenizerPath: String) = apply { this.tokenizerPath = tokenizerPath }
21+
22+
fun temperature(temperature: Float) = apply { this.temperature = temperature }
23+
24+
fun fromEnv() = apply { clientOptions.fromEnv() }
25+
26+
fun build(): LlamaStackClientClient {
27+
28+
return LlamaStackClientClientLocalImpl(
29+
clientOptions
30+
.modelPath(modelPath!!)
31+
.tokenizerPath(tokenizerPath!!)
32+
.temperature(temperature)
33+
.build()
34+
)
35+
}
36+
}
37+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// File generated from our OpenAPI spec by Stainless.
2+
3+
package com.llama.llamastack.client.local
4+
5+
import com.llama.llamastack.errors.LlamaStackClientException
6+
import org.pytorch.executorch.LlamaModule
7+
8+
class LocalClientOptions
9+
private constructor(
10+
val modelPath: String,
11+
val tokenizerPath: String,
12+
val temperature: Float,
13+
val llamaModule: LlamaModule
14+
) {
15+
16+
companion object {
17+
fun builder() = Builder()
18+
}
19+
20+
class Builder {
21+
private var modelPath: String? = null
22+
private var tokenizerPath: String? = null
23+
private var temperature: Float = 0.0F
24+
private var llamaModule: LlamaModule? = null
25+
26+
fun modelPath(modelPath: String) = apply { this.modelPath = modelPath }
27+
28+
fun tokenizerPath(tokenizerPath: String) = apply { this.tokenizerPath = tokenizerPath }
29+
30+
fun temperature(temperature: Float) = apply { this.temperature = temperature }
31+
32+
fun fromEnv() = apply {}
33+
34+
fun build(): LocalClientOptions {
35+
checkNotNull(modelPath) { "`modelPath` is required but not set" }
36+
checkNotNull(tokenizerPath) { "`tokenizerPath` is required but not set" }
37+
38+
try {
39+
this.llamaModule = LlamaModule(1, modelPath, tokenizerPath, temperature)
40+
checkNotNull(llamaModule) { "`temperature` is required but not set" }
41+
llamaModule!!.load()
42+
println(
43+
"llamaModule loading with modelPath: $modelPath | " +
44+
"tokenizerPath: $tokenizerPath | temperature: $temperature"
45+
)
46+
return LocalClientOptions(modelPath!!, tokenizerPath!!, temperature, llamaModule!!)
47+
} catch (e: NoClassDefFoundError) {
48+
throw LlamaStackClientException(
49+
"ExecuTorch AAR file needs to be included in the libs/ for your app. " +
50+
"Please see the README for more details: " +
51+
"https://github.com/meta-llama/llama-stack-client-kotlin/tree/main",
52+
e
53+
)
54+
}
55+
}
56+
}
57+
}

0 commit comments

Comments
 (0)