Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e363f3
don't build dependencies when installing from repo
kondratyevd Mar 13, 2025
d9a4c4b
bump release
kondratyevd Mar 13, 2025
9d892a5
first steps towards dynamic loading: configure model-specific routing…
kondratyevd May 23, 2025
4c33ce6
remove unnecessary diffs
kondratyevd May 30, 2025
704084d
restore original Lua filter
kondratyevd May 30, 2025
f76a0db
values parameter to enable dynamic routing in envoy
kondratyevd May 30, 2025
6523046
Update JSON schema
actions-user May 30, 2025
e6ab635
Update helm docs
actions-user May 30, 2025
ac86125
remove extra service
kondratyevd May 30, 2025
ef4b247
Merge branch 'dynamic-loading' of github.com:fastmachinelearning/Supe…
kondratyevd May 30, 2025
db5e913
make path to lua script more configurable
kondratyevd May 30, 2025
205077b
Update JSON schema
actions-user May 30, 2025
aa6129f
Update helm docs
actions-user May 30, 2025
03fda78
rename header
kondratyevd May 30, 2025
338e947
Merge branch 'dynamic-loading' of github.com:fastmachinelearning/Supe…
kondratyevd May 30, 2025
8cbb808
clean up dynamic lua filter
kondratyevd May 30, 2025
62a4fea
add some comments
kondratyevd May 30, 2025
29d2203
correctly mount lua config
kondratyevd May 30, 2025
987549b
change log level for testing
kondratyevd May 30, 2025
a946f30
undo
kondratyevd May 30, 2025
4510881
extract model version from gRPC body
kondratyevd May 30, 2025
3261947
imrprove lua script
kondratyevd May 30, 2025
7bcba16
Merge remote-tracking branch 'origin/main' into dynamic-loading
kondratyevd Jul 29, 2025
2cfa4e4
Update JSON schema
actions-user Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/.values-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@
| envoy.rate_limiter.listener_level.max_tokens | int | `5` | Maximum number of simultaneous connections to the Envoy Proxy. Each new connection takes a "token" from the "bucket" which initially contains ``max_tokens`` tokens. |
| envoy.rate_limiter.listener_level.tokens_per_fill | int | `1` | ``tokens_per_fill`` tokens are added to the "bucket" every ``fill_interval``, allowing new connections to be established. |
| envoy.rate_limiter.listener_level.fill_interval | string | `"12s"` | For example, adding a new token every 12 seconds allows 5 new connections every minute. |
| envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
| envoy.rate_limiter.prometheus_based | object | `{"enabled":false}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
| envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter |
| envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV |
| envoy.lua_filter.enabled | bool | `false` | |
| envoy.lua_filter.lua_config | string | `"cfg/envoy-filter.lua"` | |
| envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. |
| envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy |
| envoy.auth.jwt_issuer | string | `""` | |
| envoy.auth.jwt_remote_jwks_uri | string | `""` | |
Expand Down
64 changes: 64 additions & 0 deletions helm/supersonic/cfg/envoy-filter-dynamic.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
function envoy_on_request(request_handle)
local path = request_handle:headers():get(":path")
local contentType = request_handle:headers():get("content-type")


---- Extract model_name from ModelInferRequest ----
if contentType == "application/grpc" then
-- request_handle:logInfo("path = " .. path)
if path == "/inference.GRPCInferenceService/ModelInfer" then

local model_name, model_version = extract_model_name_and_version(request_handle, body)
-- request_handle:logInfo("ModelInfer model_name = " .. model_name .. " model_version = " .. model_version)

-- log and propagate via dynamic metadata
if model_name and model_version then
local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version
local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001"
request_handle:logInfo("route-to = " .. header_value)
-- add header
request_handle:headers():add("route-to", header_value)
end
else
--- for non-inference calls, for now just forward to default service
request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001")
end
end
end

function extract_model_name_and_version(request_handle)
local model_name = ""
local model_version = ""
local body = request_handle:body():getBytes(0, request_handle:body():length())

if body and #body > 5 then
-- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
local msg = body:sub(6)

-- protobuf wire format for field 1, wire type 2: tag = 0x0A
-- field 1 is the model name - we know it from here:
-- wire type 2 means that the field is length-delimited
if msg:byte(1) == 0x0A then
-- next byte is a varint length (assumes <128 bytes)
local name_len = msg:byte(2)
-- extract UTF-8 model name
model_name = msg:sub(3, 2 + name_len)
-- request_handle:logInfo("ModelInfer model_name = " .. model_name)
local offset = 3 + name_len

-- Extract model version (field 2, wire type 2, tag 0x12)
if msg:byte(offset) == 0x12 then
local ver_len = msg:byte(offset + 1)
model_version = msg:sub(offset + 2, offset + 1 + ver_len)
-- request_handle:logInfo("ModelInfer model_version = " .. model_version)
offset = offset + 2 + ver_len
else
request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)",
offset, msg:byte(offset)))
end
else
request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
end
end
return model_name, model_version
end
45 changes: 42 additions & 3 deletions helm/supersonic/templates/envoy/configmaps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,22 @@ static_resources:
routes:
- match:
prefix: "/"
{{- if .envoy.dynamic_routing.enabled }}
typed_per_filter_config:
envoy.filters.http.dynamic_forward_proxy:
"@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig
host_rewrite_header: "route-to"
route:
cluster: dynamic_forward_proxy_cluster
timeout: {{ .envoy.grpc_route_timeout }}
{{- else }}
route:
cluster: triton_grpc_service
timeout: {{ .envoy.grpc_route_timeout }}
{{- end }}

http_filters:
{{- with .envoy.rate_limiter.prometheus_based }}
{{- with .envoy.lua_filter }}
{{- if .enabled }}
- name: envoy.filters.http.lua
typed_config:
Expand Down Expand Up @@ -94,6 +105,16 @@ static_resources:
provider_name: provider_icecube
{{- end }}
{{- end }}
{{- if .envoy.dynamic_routing.enabled }}
- name: envoy.filters.http.dynamic_forward_proxy
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig
dns_cache_config:
name: dynamic_cache
dns_lookup_family: ALL
dns_cache_circuit_breaker:
max_pending_requests: 1024
{{- end }}
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
Expand Down Expand Up @@ -176,6 +197,22 @@ static_resources:
socket_address:
address: {{ .tritonName }}
port_value: {{ .tritonGrpcPort }}
{{- if .envoy.dynamic_routing.enabled }}
- name: dynamic_forward_proxy_cluster
connect_timeout: 2s
lb_policy: CLUSTER_PROVIDED
http2_protocol_options:
max_concurrent_streams: 1000
cluster_type:
name: envoy.clusters.dynamic_forward_proxy
typed_config:
"@type": type.googleapis.com/envoy.extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig
dns_cache_config:
name: dynamic_cache
dns_lookup_family: ALL
dns_cache_circuit_breaker:
max_pending_requests: 1024
{{- end }}
{{- end }}
{{- end }}

Expand Down Expand Up @@ -226,7 +263,7 @@ data:
{{ include "envoy.configuration.yaml" $envoyContext | indent 4 }}
---

{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
{{- /* Create a ConfigMap for the Lua filter */}}
apiVersion: v1
kind: ConfigMap
Expand All @@ -239,12 +276,14 @@ metadata:
data:
envoy-filter.lua: |-
{{- /* Read and process the Lua configuration file */}}
{{- $luaConfig := $.Files.Get .Values.envoy.rate_limiter.prometheus_based.luaConfig | nindent 4 }}
{{- $luaConfig := $.Files.Get .Values.envoy.lua_filter.lua_config | nindent 4 }}
{{- $luaConfig = $luaConfig | replace "SERVER_LOAD_METRIC" (include "supersonic.defaultMetric" . | quote) }}
{{- $luaConfig = $luaConfig | replace "SERVER_LOAD_THRESHOLD" (quote .Values.serverLoadThreshold) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_HOST" (include "supersonic.prometheusHost" .) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_PORT" (include "supersonic.prometheusPort" .) }}
{{- $luaConfig = $luaConfig | replace "RELEASE" .Release.Name }}
{{- $luaConfig = $luaConfig | replace "NAMESPACE" .Release.Namespace }}
{{ $luaConfig | indent 4 }}

---
Expand Down
4 changes: 2 additions & 2 deletions helm/supersonic/templates/envoy/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ spec:
volumeMounts:
- name: {{ include "supersonic.name" . }}-envoy-config
mountPath: /etc/envoy
{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
- name: {{ include "supersonic.name" . }}-lua-volume
mountPath: /etc/envoy/lua
readOnly: true
Expand All @@ -62,7 +62,7 @@ spec:
- name: {{ include "supersonic.name" . }}-envoy-config
configMap:
name: {{ include "supersonic.name" . }}-envoy-config
{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
- name: {{ include "supersonic.name" . }}-lua-volume
configMap:
name: {{ include "supersonic.name" . }}-lua-config
Expand Down
34 changes: 29 additions & 5 deletions helm/supersonic/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -391,14 +391,10 @@
"properties": {
"enabled": {
"type": "boolean"
},
"luaConfig": {
"type": "string"
}
},
"required": [
"enabled",
"luaConfig"
"enabled"
]
}
},
Expand All @@ -410,6 +406,32 @@
"loadBalancerPolicy": {
"type": "string"
},
"lua_filter": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"lua_config": {
"type": "string"
}
},
"required": [
"enabled",
"lua_config"
]
},
"dynamic_routing": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
}
},
"required": [
"enabled"
]
},
"auth": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -449,11 +471,13 @@
"annotations",
"args",
"auth",
"dynamic_routing",
"enabled",
"grpc_route_timeout",
"image",
"ingress",
"loadBalancerPolicy",
"lua_filter",
"nodeSelector",
"rate_limiter",
"replicas",
Expand Down
9 changes: 8 additions & 1 deletion helm/supersonic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,19 @@ envoy:
prometheus_based:
# -- Enable rate limiter
enabled: false
luaConfig: "cfg/envoy-filter.lua"

# -- Envoy load balancer policy.
# Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV
loadBalancerPolicy: "LEAST_REQUEST"

lua_filter:
enabled: false
lua_config: "cfg/envoy-filter.lua"

# -- Enable dynamic routing in Envoy proxy.
dynamic_routing:
enabled: false

auth:
# -- Enable authentication in Envoy proxy
enabled: false
Expand Down
9 changes: 9 additions & 0 deletions values/values-geddes-cms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ triton:
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
--model-control-mode=explicit \
--allow-gpu-metrics=true \
--log-verbose=0 \
--strict-model-config=false \
Expand Down Expand Up @@ -56,6 +57,14 @@ envoy:
enabled: true
hostName: sonic-cms.geddes.rcac.purdue.edu
ingressClassName: public
rate_limiter:
prometheus_based:
enabled: false
dynamic_routing:
enabled: true
lua_filter:
enabled: true
lua_config: "cfg/envoy-filter-dynamic.lua"

autoscaler:
enabled: true
Expand Down