Skip to content

Commit d457992

Browse files
committed
Signed-off-by: Rishikpulhani <[email protected]>
feat(dhcp): Send DHCPRELEASE on container teardown When a container using DHCP is torn down, its lease is left active on the DHCP server until it expires. This can be problematic in environments with small IP pools or long lease times. In setups using Dynamic DNS (DDNS), it can also lead to stale DNS records. This change introduces the capability for netavark to send a DHCPRELEASE message to the DHCP server when a container's network is torn down. This is implemented by: - Adding a `release_lease` method to the `DhcpV4Service` in `dhcp_service.rs`, which wraps the `release` function from the underlying mozim client. - Wrapped DhcpV4Service inside Arc<> share that to the process_client_stream function and then also store it in the task map - used it in process_client_stream task and also in the teardown function to send the DHCPRELEASE message
1 parent cc646bc commit d457992

File tree

2 files changed

+92
-48
lines changed

2 files changed

+92
-48
lines changed

src/commands/dhcp_proxy.rs

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ use tonic::{
3838
transport::Server, Code, Code::Internal, Code::InvalidArgument, Request, Response, Status,
3939
};
4040

41+
type TaskData = (Arc<tokio::sync::Mutex<DhcpV4Service>>, AbortHandle);
42+
4143
#[derive(Debug)]
4244
/// This is the tonic netavark proxy service that is required to impl the Netavark Proxy trait which
4345
/// includes the gRPC methods defined in proto/proxy.proto. We can store a atomically referenced counted
@@ -59,7 +61,7 @@ struct NetavarkProxyService<W: Write + Clear> {
5961
timeout_sender: Option<Arc<Mutex<Sender<i32>>>>,
6062
// All dhcp poll will be spawned on a new task, keep track of it so
6163
// we can remove it on teardown. The key is the container mac.
62-
task_map: Arc<Mutex<HashMap<String, AbortHandle>>>,
64+
task_map: Arc<Mutex<HashMap<String, TaskData>>>,
6365
}
6466

6567
impl<W: Write + Clear> NetavarkProxyService<W> {
@@ -136,8 +138,8 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
136138
};
137139
}
138140

139-
/// When a container is shut down this method should be called. It will clear the lease information
140-
/// from the caching system.
141+
/// When a container is shut down this method should be called. It will release the
142+
/// DHCP lease and clear the lease information from the caching system.
141143
async fn teardown(
142144
&self,
143145
request: Request<NetworkConfig>,
@@ -149,12 +151,25 @@ impl<W: Write + Clear + Send + 'static> NetavarkProxy for NetavarkProxyService<W
149151
let cache = self.cache.clone();
150152
let tasks = self.task_map.clone();
151153

152-
let task = tasks
153-
.lock()
154-
.expect("lock tasks")
155-
.remove(&nc.container_mac_addr);
156-
if let Some(handle) = task {
157-
handle.abort();
154+
let maybe_service_arc = {
155+
// Scope for the std::sync::MutexGuard
156+
let mut tasks_guard = tasks.lock().expect("lock tasks");
157+
158+
if let Some((service_arc, handle)) = tasks_guard.remove(&nc.container_mac_addr) {
159+
handle.abort();
160+
Some(service_arc)
161+
} else {
162+
None
163+
}
164+
};
165+
if let Some(service_arc) = maybe_service_arc {
166+
let mut service = service_arc.lock().await;
167+
if let Err(e) = service.release_lease() {
168+
warn!(
169+
"Failed to send DHCPRELEASE for {}: {}",
170+
&nc.container_mac_addr, e
171+
);
172+
}
158173
}
159174

160175
// Remove the client from the cache dir
@@ -406,7 +421,7 @@ async fn process_setup<W: Write + Clear>(
406421
network_config: NetworkConfig,
407422
timeout: u32,
408423
cache: Arc<Mutex<LeaseCache<W>>>,
409-
tasks: Arc<Mutex<HashMap<String, AbortHandle>>>,
424+
tasks: Arc<Mutex<HashMap<String, TaskData>>>,
410425
) -> Result<NetavarkLease, Status> {
411426
let container_network_interface = network_config.container_iface.clone();
412427
let ns_path = network_config.ns_path.clone();
@@ -422,11 +437,13 @@ async fn process_setup<W: Write + Clear>(
422437
let mut service = DhcpV4Service::new(network_config, timeout)?;
423438

424439
let lease = service.get_lease().await?;
425-
let task = tokio::spawn(process_client_stream(service));
440+
let service_arc = Arc::new(tokio::sync::Mutex::new(service));
441+
let service_arc_clone = service_arc.clone();
442+
let task_handle = tokio::spawn(process_client_stream(service_arc_clone));
426443
tasks
427444
.lock()
428445
.expect("lock tasks")
429-
.insert(mac.to_string(), task.abort_handle());
446+
.insert(mac.to_string(), (service_arc, task_handle.abort_handle()));
430447
lease
431448
}
432449
//V6 TODO implement DHCPv6

src/dhcp_proxy/dhcp_service.rs

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ use crate::network::netlink::Route;
1111
use crate::wrap;
1212
use log::debug;
1313
use mozim::{DhcpV4ClientAsync, DhcpV4Config, DhcpV4Lease as MozimV4Lease};
14+
use std::sync::Arc;
15+
use tokio::sync::Mutex;
1416
use tokio_stream::StreamExt;
15-
1617
use tonic::{Code, Status};
1718

1819
/// The kind of DhcpServiceError that can be caused when finding a dhcp lease
@@ -39,6 +40,7 @@ impl DhcpServiceError {
3940
}
4041

4142
/// DHCP service is responsible for creating, handling, and managing the dhcp lease process.
43+
#[derive(Debug)]
4244
pub struct DhcpV4Service {
4345
client: DhcpV4ClientAsync,
4446
network_config: NetworkConfig,
@@ -129,6 +131,28 @@ impl DhcpV4Service {
129131
"Could not find a lease within the timeout limit".to_string(),
130132
))
131133
}
134+
135+
/// Sends a DHCPRELEASE message for the given lease.
136+
/// This is a "best effort" operation and should not block teardown.
137+
pub fn release_lease(&mut self) -> Result<(), DhcpServiceError> {
138+
if let Some(lease) = &self.previous_lease {
139+
debug!(
140+
"Attempting to release lease for MAC: {}",
141+
&self.network_config.container_mac_addr
142+
);
143+
// Directly call the release function on the underlying mozim client.
144+
self.client
145+
.release(lease)
146+
.map_err(|e| DhcpServiceError::new(Bug, e.to_string()))
147+
} else {
148+
// No previous lease recorded; nothing to release. Best-effort -> succeed silently.
149+
debug!(
150+
"No previous lease to release for MAC: {}",
151+
&self.network_config.container_mac_addr
152+
);
153+
Ok(())
154+
}
155+
}
132156
}
133157

134158
impl std::fmt::Display for DhcpServiceError {
@@ -149,50 +173,53 @@ impl From<DhcpServiceError> for Status {
149173
}
150174
}
151175

152-
pub async fn process_client_stream(mut client: DhcpV4Service) {
153-
while let Some(lease) = client.client.next().await {
154-
match lease {
155-
Ok(lease) => {
156-
log::info!(
157-
"got new lease for mac {}: {:?}",
158-
&client.network_config.container_mac_addr,
159-
&lease
160-
);
161-
// get previous lease and check if ip addr changed, if not we do not have to do anything
162-
if let Some(old_lease) = &client.previous_lease {
163-
if old_lease.yiaddr != lease.yiaddr
164-
|| old_lease.subnet_mask != lease.subnet_mask
165-
|| old_lease.gateways != lease.gateways
166-
{
167-
// ips do not match, remove old ones and assign new ones.
168-
log::info!(
169-
"ip or gateway for mac {} changed, update address",
170-
&client.network_config.container_mac_addr
171-
);
172-
match update_lease_ip(
173-
&client.network_config.ns_path,
174-
&client.network_config.container_iface,
175-
old_lease,
176-
&lease,
177-
) {
178-
Ok(_) => {}
179-
Err(err) => {
176+
pub async fn process_client_stream(service_arc: Arc<Mutex<DhcpV4Service>>) {
177+
loop {
178+
let lease_result = service_arc.lock().await.client.next().await;
179+
180+
if let Some(lease_result) = lease_result {
181+
let mut client = service_arc.lock().await;
182+
match lease_result {
183+
Ok(lease) => {
184+
log::info!(
185+
"got new lease for mac {}: {:?}",
186+
&client.network_config.container_mac_addr,
187+
&lease
188+
);
189+
190+
if let Some(old_lease) = &client.previous_lease {
191+
if old_lease.yiaddr != lease.yiaddr
192+
|| old_lease.subnet_mask != lease.subnet_mask
193+
|| old_lease.gateways != lease.gateways
194+
{
195+
log::info!(
196+
"ip or gateway for mac {} changed, update address",
197+
&client.network_config.container_mac_addr
198+
);
199+
if let Err(err) = update_lease_ip(
200+
&client.network_config.ns_path,
201+
&client.network_config.container_iface,
202+
old_lease,
203+
&lease,
204+
) {
180205
log::error!("{err}");
181-
continue;
182206
}
183207
}
184208
}
209+
client.previous_lease = Some(lease);
210+
}
211+
Err(err) => {
212+
log::error!(
213+
"Failed to renew lease for {}: {err}",
214+
&client.network_config.container_mac_addr
215+
);
185216
}
186-
client.previous_lease = Some(lease)
187217
}
188-
Err(err) => log::error!(
189-
"Failed to renew lease for {}: {err}",
190-
&client.network_config.container_mac_addr
191-
),
218+
} else {
219+
break;
192220
}
193221
}
194222
}
195-
196223
fn update_lease_ip(
197224
netns: &str,
198225
interface: &str,

0 commit comments

Comments
 (0)