Skip to content

Separate nova + manila deployments & associated refactoring #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .github/renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,6 @@
"automerge": true,
"groupName": "github.com/sapcc"
},
{
"enabled": false,
"matchPackageNames": [
"postgresql"
]
},
{
"enabled": false,
"matchFileNames": [
"helm/cortex-postgres/**"
]
},
{
"matchPackageNames": [
"!/^github\\.com\\/sapcc\\/.*/",
Expand Down
32 changes: 32 additions & 0 deletions .github/workflows/check-alerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Check Alerts using Promtool

on:
pull_request:
paths:
- '**/*.rules.yaml'
- '**/*.alerts.yaml'

jobs:
lint:
runs-on: ubuntu-latest

steps:
- name: Checkout PR
uses: actions/checkout@v4

- name: Get changed rule and alert files
id: changed
uses: tj-actions/changed-files@v46
with:
files: |
**/*.rules.yaml
**/*.alerts.yaml

- name: Check changed rule and alert files via promtool
if: steps.changed.outputs.any_changed == 'true'
uses: peimanja/[email protected]
with:
promtool_actions_subcommand: 'rules'
promtool_actions_files: ${{ steps.changed.outputs.all_changed_files }}
promtool_actions_version: 'latest'
promtool_actions_comment: 'false'
32 changes: 25 additions & 7 deletions .github/workflows/push-charts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,39 @@ jobs:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Get all changed Chart.yaml files
id: changed-chart-yaml-files
- name: Get all changed library Chart.yaml files
id: changed-chart-yaml-files-library
uses: tj-actions/changed-files@v46
with:
files: |
**/Chart.yaml
- name: Push chart to registry
if: steps.changed-chart-yaml-files.outputs.all_changed_files != ''
helm/library/**/Chart.yaml
- name: Push library charts to registry
if: steps.changed-chart-yaml-files-library.outputs.all_changed_files != ''
shell: bash
env:
ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files.outputs.all_changed_files }}
ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-library.outputs.all_changed_files }}
run: |
for CHART_FILE in ${ALL_CHANGED_FILES}; do
CHART_DIR=$(dirname $CHART_FILE)
helm package $CHART_DIR --dependency-update --destination $CHART_DIR
CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
done
done
- name: Get all changed bundle Chart.yaml files
id: changed-chart-yaml-files-bundle
uses: tj-actions/changed-files@v46
with:
files: |
helm/bundles/**/Chart.yaml
- name: Push bundle charts to registry
if: steps.changed-chart-yaml-files-bundle.outputs.all_changed_files != ''
shell: bash
env:
ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-bundle.outputs.all_changed_files }}
run: |
for CHART_FILE in ${ALL_CHANGED_FILES}; do
CHART_DIR=$(dirname $CHART_FILE)
helm package $CHART_DIR --dependency-update --destination $CHART_DIR
CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
done
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Update Helm Image Tag
name: Update Helm AppVersion

on:
workflow_run:
Expand All @@ -7,7 +7,7 @@ on:
- completed

jobs:
update-helm-tag:
update-appversion:
if: >-
${{ github.event.workflow_run.conclusion == 'success' && !contains(github.event.workflow_run.head_commit.message, '[skip ci]') }}
runs-on: ubuntu-latest
Expand All @@ -23,14 +23,14 @@ jobs:
id: vars
run: echo "sha=sha-$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT

- name: Update image.tag in values.yaml
- name: Update appVersion in Chart.yaml
run: |
sed -i 's/^\([ ]*tag:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/cortex/values.yaml
sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/library/cortex-core/Chart.yaml

- name: Commit and push changes
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add helm/cortex/values.yaml
git commit -m "Bump cortex image tag in chart to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
git add helm/library/cortex-core/Chart.yaml
git commit -m "Bump helm/library/cortex-core appVersion to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
git push origin HEAD:main
212 changes: 119 additions & 93 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
# Don't track us.
analytics_settings(False)

# The upgrade job may take a long time to run, so it is disabled by default.
enable_postgres_upgrade = False

if not os.getenv('TILT_VALUES_PATH'):
fail("TILT_VALUES_PATH is not set.")
if not os.path.exists(os.getenv('TILT_VALUES_PATH')):
fail("TILT_VALUES_PATH "+ os.getenv('TILT_VALUES_PATH') + " does not exist.")

# The upgrade job may take a long time to run, so it is disabled by default.
enable_postgres_upgrade = False

load('ext://helm_resource', 'helm_resource', 'helm_repo')
helm_repo(
'Bitnami Helm Repo',
Expand All @@ -27,93 +27,16 @@ helm_repo(
labels=['Repositories'],
)

########### Cortex Core Services
tilt_values = os.getenv('TILT_VALUES_PATH')
docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[
'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile',
])
local('sh helm/sync.sh helm/cortex')
k8s_yaml(helm('./helm/cortex', name='cortex', values=[tilt_values]))
k8s_resource('cortex-syncer', port_forwards=[
port_forward(8001, 2112),
], links=[
link('localhost:8001/metrics', '/metrics'),
], labels=['Core-Services'])
k8s_resource('cortex-extractor', port_forwards=[
port_forward(8002, 2112),
], links=[
link('localhost:8002/metrics', '/metrics'),
], labels=['Core-Services'])
k8s_resource('cortex-scheduler-nova', port_forwards=[
port_forward(8003, 8080),
port_forward(8004, 2112),
], links=[
link('localhost:8004/metrics', '/metrics'),
], labels=['Core-Services'])
k8s_resource('cortex-scheduler-manila', port_forwards=[
port_forward(8005, 8080),
port_forward(8006, 2112),
], links=[
link('localhost:8006/metrics', '/metrics'),
], labels=['Core-Services'])
k8s_resource('cortex-kpis', port_forwards=[
port_forward(8007, 2112),
], links=[
link('localhost:8007/metrics', '/metrics'),
], labels=['Core-Services'])
k8s_resource('cortex-descheduler-nova', port_forwards=[
port_forward(8008, 2112),
], links=[
link('localhost:8008/metrics', '/metrics'),
], labels=['Core-Services'])

########### Cortex Commands
k8s_resource('cortex-cli', labels=['Commands'])
local_resource(
'Run E2E Tests',
'kubectl exec -it deploy/cortex-cli -- /usr/bin/cortex checks',
deps=['./internal/checks'],
labels=['Commands'],
trigger_mode=TRIGGER_MODE_MANUAL,
auto_init=False,
)
k8s_resource('cortex-migrations', labels=['Commands'])

########### RabbitMQ MQTT for Cortex Core Service
local('sh helm/sync.sh helm/cortex-mqtt')
k8s_yaml(helm('./helm/cortex-mqtt', name='cortex-mqtt'))
k8s_resource('cortex-mqtt', port_forwards=[
port_forward(1883, 1883), # Direct TCP connection
port_forward(9000, 15675), # Websocket connection
], labels=['Core-Services'])

########### Postgres DB for Cortex Core Service
local('sh helm/sync.sh helm/cortex-postgres')
job_flag = 'upgradeJob.enabled=' + str(enable_postgres_upgrade).lower()
k8s_yaml(helm('./helm/cortex-postgres', name='cortex-postgres', set=job_flag))
k8s_resource('cortex-postgresql', port_forwards=[
port_forward(5432, 5432),
], labels=['Database'])
if enable_postgres_upgrade:
# Get the version from the chart.
cmd = "helm show chart ./helm/cortex-postgres | grep -E '^version:' | awk '{print $2}'"
chart_version = str(local(cmd)).strip()
# Use the chart version to name the pre-upgrade job.
k8s_resource('cortex-postgresql-pre-upgrade-'+chart_version, labels=['Database'])
k8s_resource('cortex-postgresql-post-upgrade-'+chart_version, labels=['Database'])

########### Monitoring
local('sh helm/sync.sh helm/cortex-prometheus-operator')
k8s_yaml(helm('./helm/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator
local('sh helm/sync.sh helm/cortex-prometheus')
k8s_yaml(helm('./helm/cortex-prometheus', name='cortex-prometheus')) # Alerts + ServiceMonitor
########### Dev Dependencies
local('sh helm/sync.sh helm/dev/cortex-prometheus-operator')
k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator
k8s_resource('cortex-prometheus-operator', labels=['Monitoring'])
k8s_resource(
new_name='cortex-prometheus',
port_forwards=[port_forward(9090, 9090)],
port_forwards=[port_forward(3000, 9090)],
links=[
link('http://localhost:9090', 'metrics'),
link('http://localhost:9090/alerts', 'alerts'),
link('http://localhost:3000', 'metrics'),
link('http://localhost:3000/alerts', 'alerts'),
],
objects=['cortex-prometheus:Prometheus:default'],
labels=['Monitoring'],
Expand All @@ -126,17 +49,120 @@ k8s_resource(
docker_build('cortex-visualizer', 'visualizer')
k8s_yaml('./visualizer/app.yaml')
k8s_resource('cortex-visualizer', port_forwards=[
port_forward(8009, 80),
port_forward(4000, 80),
], links=[
link('localhost:8009/nova.html', 'nova visualizer'),
link('localhost:8009/manila.html', 'manila visualizer'),
link('localhost:4000/nova.html', 'nova visualizer'),
link('localhost:4000/manila.html', 'manila visualizer'),
], labels=['Monitoring'])

########### Plutono (Grafana Fork)
docker_build('cortex-plutono', 'plutono')
k8s_yaml('./plutono/app.yaml')
k8s_resource('cortex-plutono', port_forwards=[
port_forward(3000, 3000, name='plutono'),
port_forward(5000, 3000, name='plutono'),
], links=[
link('http://localhost:3000/d/cortex/cortex?orgId=1', 'cortex dashboard'),
link('http://localhost:5000/d/cortex/cortex?orgId=1', 'cortex dashboard'),
], labels=['Monitoring'])

########### Cortex Bundles
tilt_values = os.getenv('TILT_VALUES_PATH')
docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[
'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile',
])

# Package the lib charts locally and sync them to the bundle charts. In this way
# we can bump the lib charts locally and test them before pushing them to the OCI registry.
lib_charts = ['cortex-core', 'cortex-postgres', 'cortex-mqtt']
bundle_charts = ['cortex-nova', 'cortex-manila']
for lib_chart in lib_charts:
watch_file('helm/library/' + lib_chart) # React to lib chart changes.
local('sh helm/sync.sh helm/library/' + lib_chart)
for bundle_chart in bundle_charts:
local('helm package helm/library/' + lib_chart)
gen_tgz = str(local('ls ' + lib_chart + '-*.tgz')).strip()
cmp = 'sh helm/cmp.sh ' + gen_tgz + ' helm/bundles/' + bundle_chart + '/charts/' + gen_tgz
cmp_result = str(local(cmp)).strip()
if cmp_result == 'true': # same chart
print('Skipping ' + lib_chart + ' as it is already up to date in ' + bundle_chart)
# Make sure the gen_tgz is removed from the local directory.
local('rm -f ' + gen_tgz)
else:
local('mv -f ' + gen_tgz + ' helm/bundles/' + bundle_chart + '/charts/')
# Ensure the bundle charts are up to date.
for bundle_chart in bundle_charts:
local('sh helm/sync.sh helm/bundles/' + bundle_chart)

# Deploy the Cortex bundles.
k8s_yaml(helm('./helm/bundles/cortex-nova', name='cortex-nova', values=[tilt_values]))
k8s_yaml(helm('./helm/bundles/cortex-manila', name='cortex-manila', values=[tilt_values]))

# Note: place resources higher in this list to ensure their local port stays the same.
# Elements placed lower in the list will have their local port shifted by elements inserted above.
resources = [
(
'MQTT',
[
'cortex-nova-mqtt',
'cortex-manila-mqtt',
],
[(1883, 'tcp')],
),
(
'Database',
[
'cortex-nova-postgresql',
'cortex-manila-postgresql',
],
[(5432, 'psql')],
),
(
'Cortex-Nova',
[
'cortex-nova-migrations',
'cortex-nova-cli',
'cortex-nova-syncer',
'cortex-nova-extractor',
'cortex-nova-kpis',
'cortex-nova-scheduler',
'cortex-nova-descheduler',
],
[(2112, 'metrics'), (8080, 'api')],
),
(
'Cortex-Manila',
[
'cortex-manila-migrations',
'cortex-manila-cli',
'cortex-manila-syncer',
'cortex-manila-extractor',
'cortex-manila-kpis',
'cortex-manila-scheduler',
],
[(2112, 'metrics'), (8080, 'api')],
),
]
local_port = 8000
for label, components, service_ports in resources:
for component in components:
k8s_resource(
component,
port_forwards=[
port_forward(local_port + i, service_port)
for i, (service_port, _) in enumerate(service_ports)
],
links=[
link('http://localhost:' + str(local_port + i) + '/' + service_port_name, '/' + service_port_name)
for i, (_, service_port_name) in enumerate(service_ports)
],
labels=[label],
)
local_port += len(service_ports)

########### E2E Tests
local_resource(
'Run E2E Tests',
'/bin/sh -c "kubectl exec deploy/cortex-nova-cli -- /usr/bin/cortex checks" && '+\
'/bin/sh -c "kubectl exec deploy/cortex-manila-cli -- /usr/bin/cortex checks"',
deps=['./internal/checks'],
labels=['Commands'],
trigger_mode=TRIGGER_MODE_MANUAL,
auto_init=False,
)
2 changes: 1 addition & 1 deletion commands/checks/manila/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func checkManilaSchedulerReturnsValidHosts(ctx context.Context, config conf.Conf
Weights: weights,
}
port := strconv.Itoa(config.GetAPIConfig().Port)
apiURL := "http://cortex-scheduler-manila:" + port + "/scheduler/manila/external"
apiURL := "http://cortex-manila-scheduler:" + port + "/scheduler/manila/external"
slog.Info("sending request to external scheduler", "apiURL", apiURL)

requestBody := must.Return(json.Marshal(request))
Expand Down
2 changes: 1 addition & 1 deletion commands/checks/nova/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func checkNovaSchedulerReturnsValidHosts(ctx context.Context, config conf.Config
Weights: weights,
}
port := strconv.Itoa(config.GetAPIConfig().Port)
apiURL := "http://cortex-scheduler-nova:" + port + "/scheduler/nova/external"
apiURL := "http://cortex-nova-scheduler:" + port + "/scheduler/nova/external"
slog.Info("sending request to external scheduler", "apiURL", apiURL)

requestBody := must.Return(json.Marshal(request))
Expand Down
Loading
Loading