Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion certbot/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ fn load_config(config: &PathBuf) -> Result<CertBotConfig> {
let renew_timeout = Duration::from_secs(config.renew_timeout);
let bot_config = CertBotConfig::builder()
.acme_url(config.acme_url)
.cert_dir(workdir.backup_dir())
.cert_dir(workdir.cert_backup_dir())
.cert_file(workdir.cert_path())
.key_file(workdir.key_path())
.auto_create_account(true)
Expand Down
12 changes: 6 additions & 6 deletions certbot/src/workdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ impl WorkDir {
self.workdir.join("credentials.json")
}

pub fn backup_dir(&self) -> PathBuf {
pub fn cert_backup_dir(&self) -> PathBuf {
self.workdir.join("backup")
}

pub fn live_dir(&self) -> PathBuf {
pub fn cert_live_dir(&self) -> PathBuf {
self.workdir.join("live")
}

pub fn cert_path(&self) -> PathBuf {
self.live_dir().join("cert.pem")
self.cert_live_dir().join("cert.pem")
}

pub fn key_path(&self) -> PathBuf {
self.live_dir().join("key.pem")
self.cert_live_dir().join("key.pem")
}

pub fn list_certs(&self) -> Result<Vec<PathBuf>> {
crate::bot::list_certs(self.backup_dir())
crate::bot::list_certs(self.cert_backup_dir())
}

pub fn acme_account_uri(&self) -> Result<String> {
Expand All @@ -58,6 +58,6 @@ impl WorkDir {
}

pub fn list_cert_public_keys(&self) -> Result<BTreeSet<Vec<u8>>> {
crate::bot::list_cert_public_keys(self.backup_dir())
crate::bot::list_cert_public_keys(self.cert_backup_dir())
}
}
2 changes: 1 addition & 1 deletion gateway/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ impl CertbotConfig {
let workdir = certbot::WorkDir::new(&self.workdir);
certbot::CertBotConfig::builder()
.auto_create_account(true)
.cert_dir(workdir.backup_dir())
.cert_dir(workdir.cert_backup_dir())
.cert_file(workdir.cert_path())
.key_file(workdir.key_path())
.credentials_file(workdir.account_credentials_path())
Expand Down
2 changes: 1 addition & 1 deletion guest-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fs-err.workspace = true
rcgen.workspace = true
sha2.workspace = true
clap.workspace = true
tokio.workspace = true
tokio = { workspace = true, features = ["full"] }
hex.workspace = true
serde_json.workspace = true
bollard.workspace = true
Expand Down
52 changes: 51 additions & 1 deletion guest-agent/src/guest_api_service.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fmt::Debug;

use anyhow::{Context, Result};
use anyhow::{bail, Context, Result};
use bollard::{container::ListContainersOptions, Docker};
use cmd_lib::{run_cmd as cmd, run_fun};
use dstack_guest_agent_rpc::worker_server::WorkerRpc as _;
Expand All @@ -18,6 +18,8 @@ use tracing::error;

use crate::{rpc_service::ExternalRpcHandler, AppState};

const BACKUP_LOCK_FILE: &str = "/run/dstack-backup.lock";

pub struct GuestApiHandler {
state: AppState,
}
Expand All @@ -43,6 +45,7 @@ impl GuestApiRpc for GuestApiHandler {
device_id: info.device_id,
app_cert: info.app_cert,
tcb_info: info.tcb_info,
backup_in_progress: fs::metadata(BACKUP_LOCK_FILE).is_ok(),
})
}

Expand Down Expand Up @@ -112,6 +115,53 @@ impl GuestApiRpc for GuestApiHandler {
async fn list_containers(self) -> Result<ListContainersResponse> {
list_containers().await
}

async fn pre_backup(self) -> Result<()> {
fs::OpenOptions::new()
.create_new(true)
.write(true)
.open(BACKUP_LOCK_FILE)
.context("Failed to create backup lock file, there is another backup in progress")?;
// Run /dstack/hooks/pre-backup if it exists
let pre_backup_hook = "/dstack/hooks/pre-backup";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can we set up hooks for pre-backup and post-backup?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just write some shell commands into the files pre-backup/post-backup. It usaully need nothing todo. Some apps may want to flush their app data to disk before backup.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are they set up only by node operators? I'm wondering who can actually create these hook scripts.

Copy link
Collaborator Author

@kvinwang kvinwang Jul 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are they set up only by node operators? I'm wondering who can actually create these hook scripts.

It's up to app defines the logic in the hooks. For example, flush their mysql database in the CVM. The hooks is put there for future use. I think we don't need to care about it at this time, until their is a use case appears.

if is_exe(pre_backup_hook) {
let status = tokio::process::Command::new(pre_backup_hook)
.spawn()
.context("Failed to run pre-backup hook")?
.wait()
.await
.context("Failed to run pre-backup hook")?;
if !status.success() {
bail!("Failed to run pre-backup hook");
}
}
Ok(())
}

async fn post_backup(self) -> Result<()> {
fs::remove_file(BACKUP_LOCK_FILE).context("Failed to remove backup lock file")?;
let post_backup_hook = "/dstack/hooks/post-backup";
if is_exe(post_backup_hook) {
let status = tokio::process::Command::new(post_backup_hook)
.spawn()
.context("Failed to run post-backup hook")?
.wait()
.await
.context("Failed to run post-backup hook")?;
if !status.success() {
bail!("Failed to run post-backup hook");
}
}
Ok(())
}
}

fn is_exe(path: &str) -> bool {
use std::os::unix::fs::PermissionsExt;
let Ok(metadata) = fs::metadata(path) else {
return false;
};
metadata.is_file() && metadata.permissions().mode() & 0o111 != 0
}

pub(crate) async fn list_containers() -> Result<ListContainersResponse> {
Expand Down
6 changes: 6 additions & 0 deletions guest-api/proto/guest_api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ message GuestInfo {
string tcb_info = 5;
// Device ID
bytes device_id = 6;
// true if backup is in progress
bool backup_in_progress = 7;
}

message IpAddress {
Expand Down Expand Up @@ -123,6 +125,8 @@ service GuestApi {
rpc NetworkInfo(google.protobuf.Empty) returns (NetworkInformation);
rpc ListContainers(google.protobuf.Empty) returns (ListContainersResponse);
rpc Shutdown(google.protobuf.Empty) returns (google.protobuf.Empty);
rpc PreBackup(google.protobuf.Empty) returns (google.protobuf.Empty);
rpc PostBackup(google.protobuf.Empty) returns (google.protobuf.Empty);
}

service ProxiedGuestApi {
Expand All @@ -131,4 +135,6 @@ service ProxiedGuestApi {
rpc NetworkInfo(Id) returns (NetworkInformation);
rpc ListContainers(Id) returns (ListContainersResponse);
rpc Shutdown(Id) returns (google.protobuf.Empty);
rpc PreBackup(Id) returns (google.protobuf.Empty);
rpc PostBackup(Id) returns (google.protobuf.Empty);
}
Loading