Skip to content

Commit 2c2d94c

Browse files
authored
Merge pull request #740 from basecamp/remove-healthcheck-step
Remove the healthcheck step
2 parents c62bd1d + 706b82b commit 2c2d94c

26 files changed

+300
-345
lines changed

lib/kamal/cli/app.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@ def boot
1414
end
1515
end
1616

17+
#  Primary hosts and roles are returned first, so they can open the barrier
18+
barrier = Kamal::Cli::Healthcheck::Barrier.new if KAMAL.roles.many?
19+
1720
on(KAMAL.hosts, **KAMAL.boot_strategy) do |host|
1821
KAMAL.roles_on(host).each do |role|
19-
Kamal::Cli::App::Boot.new(host, role, version, self).run
22+
Kamal::Cli::App::Boot.new(host, role, self, version, barrier).run
2023
end
2124
end
2225

26+
#  Tag once the app booted on all hosts
2327
on(KAMAL.hosts) do |host|
2428
execute *KAMAL.auditor.record("Tagging #{KAMAL.config.absolute_image} as the latest image"), verbosity: :debug
2529
execute *KAMAL.app.tag_latest_image

lib/kamal/cli/app/boot.rb

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,37 @@
11
class Kamal::Cli::App::Boot
2-
attr_reader :host, :role, :version, :sshkit
3-
delegate :execute, :capture_with_info, :info, to: :sshkit
4-
delegate :uses_cord?, :assets?, to: :role
2+
attr_reader :host, :role, :version, :barrier, :sshkit
3+
delegate :execute, :capture_with_info, :capture_with_pretty_json, :info, :error, to: :sshkit
4+
delegate :uses_cord?, :assets?, :running_traefik?, to: :role
55

6-
def initialize(host, role, version, sshkit)
6+
def initialize(host, role, sshkit, version, barrier)
77
@host = host
88
@role = role
99
@version = version
10+
@barrier = barrier
1011
@sshkit = sshkit
1112
end
1213

1314
def run
1415
old_version = old_version_renamed_if_clashing
1516

16-
start_new_version
17+
wait_at_barrier if queuer?
18+
19+
begin
20+
start_new_version
21+
rescue => e
22+
close_barrier if gatekeeper?
23+
stop_new_version
24+
raise
25+
end
26+
27+
release_barrier if gatekeeper?
1728

1829
if old_version
1930
stop_old_version(old_version)
2031
end
2132
end
2233

2334
private
24-
def app
25-
@app ||= KAMAL.app(role: role, host: host)
26-
end
27-
28-
def auditor
29-
@auditor = KAMAL.auditor(role: role)
30-
end
31-
32-
def audit(message)
33-
execute *auditor.record(message), verbosity: :debug
34-
end
35-
3635
def old_version_renamed_if_clashing
3736
if capture_with_info(*app.container_id_for_version(version), raise_on_non_zero_exit: false).present?
3837
renamed_version = "#{version}_replaced_#{SecureRandom.hex(8)}"
@@ -46,12 +45,17 @@ def old_version_renamed_if_clashing
4645

4746
def start_new_version
4847
audit "Booted app version #{version}"
48+
4949
execute *app.tie_cord(role.cord_host_file) if uses_cord?
5050
hostname = "#{host.to_s[0...51].gsub(/\.+$/, '')}-#{SecureRandom.hex(6)}"
5151
execute *app.run(hostname: hostname)
5252
Kamal::Cli::Healthcheck::Poller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) }
5353
end
5454

55+
def stop_new_version
56+
execute *app.stop(version: version), raise_on_non_zero_exit: false
57+
end
58+
5559
def stop_old_version(version)
5660
if uses_cord?
5761
cord = capture_with_info(*app.cord(version: version), raise_on_non_zero_exit: false).strip
@@ -65,4 +69,51 @@ def stop_old_version(version)
6569

6670
execute *app.clean_up_assets if assets?
6771
end
72+
73+
def release_barrier
74+
if barrier.open
75+
info "First #{KAMAL.primary_role} container is healthy on #{host}, booting other roles"
76+
end
77+
end
78+
79+
def wait_at_barrier
80+
info "Waiting for the first healthy #{KAMAL.primary_role} container before booting #{role} on #{host}..."
81+
barrier.wait
82+
info "First #{KAMAL.primary_role} container is healthy, booting #{role} on #{host}..."
83+
rescue Kamal::Cli::Healthcheck::Error
84+
info "First #{KAMAL.primary_role} container is unhealthy, not booting #{role} on #{host}"
85+
raise
86+
end
87+
88+
def close_barrier
89+
if barrier.close
90+
info "First #{KAMAL.primary_role} container is unhealthy on #{host}, not booting other roles"
91+
error capture_with_info(*app.logs(version: version))
92+
error capture_with_info(*app.container_health_log(version: version))
93+
end
94+
end
95+
96+
def barrier_role?
97+
role == KAMAL.primary_role
98+
end
99+
100+
def app
101+
@app ||= KAMAL.app(role: role, host: host)
102+
end
103+
104+
def auditor
105+
@auditor = KAMAL.auditor(role: role)
106+
end
107+
108+
def audit(message)
109+
execute *auditor.record(message), verbosity: :debug
110+
end
111+
112+
def gatekeeper?
113+
barrier && barrier_role?
114+
end
115+
116+
def queuer?
117+
barrier && !barrier_role?
118+
end
68119
end

lib/kamal/cli/healthcheck.rb

Lines changed: 0 additions & 21 deletions
This file was deleted.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
class Kamal::Cli::Healthcheck::Barrier
2+
def initialize
3+
@ivar = Concurrent::IVar.new
4+
end
5+
6+
def close
7+
set(false)
8+
end
9+
10+
def open
11+
set(true)
12+
end
13+
14+
def wait
15+
unless opened?
16+
raise Kamal::Cli::Healthcheck::Error.new("Halted at barrier")
17+
end
18+
end
19+
20+
private
21+
def opened?
22+
@ivar.value
23+
end
24+
25+
def set(value)
26+
@ivar.set(value)
27+
true
28+
rescue Concurrent::MultipleAssignmentError
29+
false
30+
end
31+
end

lib/kamal/cli/healthcheck/error.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class Kamal::Cli::Healthcheck::Error < StandardError
2+
end

lib/kamal/cli/healthcheck/poller.rb

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ module Kamal::Cli::Healthcheck::Poller
33

44
TRAEFIK_UPDATE_DELAY = 5
55

6-
class HealthcheckError < StandardError; end
76

87
def wait_for_healthy(pause_after_ready: false, &block)
98
attempt = 1
@@ -16,9 +15,9 @@ def wait_for_healthy(pause_after_ready: false, &block)
1615
when "running" # No health check configured
1716
sleep KAMAL.config.readiness_delay if pause_after_ready
1817
else
19-
raise HealthcheckError, "container not ready (#{status})"
18+
raise Kamal::Cli::Healthcheck::Error, "container not ready (#{status})"
2019
end
21-
rescue HealthcheckError => e
20+
rescue Kamal::Cli::Healthcheck::Error => e
2221
if attempt <= max_attempts
2322
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
2423
sleep attempt
@@ -41,9 +40,9 @@ def wait_for_unhealthy(pause_after_ready: false, &block)
4140
when "unhealthy"
4241
sleep TRAEFIK_UPDATE_DELAY if pause_after_ready
4342
else
44-
raise HealthcheckError, "container not unhealthy (#{status})"
43+
raise Kamal::Cli::Healthcheck::Error, "container not unhealthy (#{status})"
4544
end
46-
rescue HealthcheckError => e
45+
rescue Kamal::Cli::Healthcheck::Error => e
4746
if attempt <= max_attempts
4847
info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..."
4948
sleep attempt

lib/kamal/cli/main.rb

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ def deploy
4141
say "Ensure Traefik is running...", :magenta
4242
invoke "kamal:cli:traefik:boot", [], invoke_options
4343

44-
if KAMAL.config.role(KAMAL.config.primary_role).running_traefik?
45-
say "Ensure app can pass healthcheck...", :magenta
46-
invoke "kamal:cli:healthcheck:perform", [], invoke_options
47-
end
48-
4944
say "Detect stale containers...", :magenta
5045
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)
5146

@@ -76,9 +71,6 @@ def redeploy
7671

7772
run_hook "pre-deploy"
7873

79-
say "Ensure app can pass healthcheck...", :magenta
80-
invoke "kamal:cli:healthcheck:perform", [], invoke_options
81-
8274
say "Detect stale containers...", :magenta
8375
invoke "kamal:cli:app:stale_containers", [], invoke_options.merge(stop: true)
8476

@@ -227,9 +219,6 @@ def version
227219
desc "env", "Manage environment files"
228220
subcommand "env", Kamal::Cli::Env
229221

230-
desc "healthcheck", "Healthcheck application"
231-
subcommand "healthcheck", Kamal::Cli::Healthcheck
232-
233222
desc "lock", "Manage the deploy lock"
234223
subcommand "lock", Kamal::Cli::Lock
235224

@@ -254,7 +243,7 @@ def container_available?(version)
254243
raise "Container not found" unless container_id.present?
255244
end
256245
end
257-
rescue SSHKit::Runner::ExecuteError => e
246+
rescue SSHKit::Runner::ExecuteError, SSHKit::Runner::MultipleExecuteError => e
258247
if e.message =~ /Container not found/
259248
say "Error looking for container version #{version}: #{e.message}"
260249
return false

lib/kamal/commands/app/containers.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
module Kamal::Commands::App::Containers
2+
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"
3+
24
def list_containers
35
docker :container, :ls, "--all", *filter_args
46
end
@@ -20,4 +22,10 @@ def rename_container(version:, new_version:)
2022
def remove_containers
2123
docker :container, :prune, "--force", *filter_args
2224
end
25+
26+
def container_health_log(version:)
27+
pipe \
28+
container_id_for(container_name: container_name(version)),
29+
xargs(docker(:inspect, "--format", DOCKER_HEALTH_LOG_FORMAT))
30+
end
2331
end

lib/kamal/commands/app/logging.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Kamal::Commands::App::Logging
2-
def logs(since: nil, lines: nil, grep: nil)
2+
def logs(version: nil, since: nil, lines: nil, grep: nil)
33
pipe \
4-
current_running_container_id,
4+
version ? container_id_for_version(version) : current_running_container_id,
55
"xargs docker logs#{" --since #{since}" if since}#{" --tail #{lines}" if lines} 2>&1",
66
("grep '#{grep}'" if grep)
77
end

lib/kamal/commands/base.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ class Base
33
delegate :sensitive, :argumentize, to: Kamal::Utils
44

55
DOCKER_HEALTH_STATUS_FORMAT = "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'"
6-
DOCKER_HEALTH_LOG_FORMAT = "'{{json .State.Health}}'"
76

87
attr_accessor :config
98

0 commit comments

Comments
 (0)