Skip to content

Commit 9e1773c

Browse files
Brian-PerkinserfrimodsunilmutBrian Perkins
authored
netvsp: handle RNDIS packet filter OID for stopping Rx (#1967)
Linux netvsc sends an OID to stop receiving packets on vmbus channel close. Example scenarios: hibernation and MTU change. Prior to opening a new channel and processing the packets, netvsc checks that there are no pending packets. If there are, netvsc logs and error and is unable to recover. We observe the error: `hv_netvsc eth0: Ring buffer not empty after closing rndis` in the guest syslog. Modifying netvsp to handle the OID and drop incoming RX traffic. This will allow for netvsc to successfully close and re-open the vmbus channel, even under heavy incoming traffic. Cherry-pick of #1873 and #1949 --------- Co-authored-by: erfrimod <[email protected]> Co-authored-by: Sunil Muthuswamy <[email protected]> Co-authored-by: Brian Perkins <[email protected]>
1 parent 65ed15e commit 9e1773c

File tree

4 files changed

+474
-26
lines changed

4 files changed

+474
-26
lines changed

vm/devices/net/netvsp/src/lib.rs

Lines changed: 143 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,19 @@ const LINK_DELAY_DURATION: Duration = Duration::from_secs(5);
147147
#[cfg(test)]
148148
const LINK_DELAY_DURATION: Duration = Duration::from_millis(333);
149149

150-
#[derive(PartialEq)]
151-
enum CoordinatorMessage {
150+
#[derive(Default, PartialEq)]
151+
struct CoordinatorMessageUpdateType {
152152
/// Update guest VF state based on current availability and the guest VF state tracked by the primary channel.
153153
/// This includes adding the guest VF device and switching the data path.
154-
UpdateGuestVfState,
154+
guest_vf_state: bool,
155+
/// Update the receive filter for all channels.
156+
filter_state: bool,
157+
}
158+
159+
#[derive(PartialEq)]
160+
enum CoordinatorMessage {
161+
/// Update network state.
162+
Update(CoordinatorMessageUpdateType),
155163
/// Restart endpoints and resume processing. This will also attempt to set VF and data path state to match current
156164
/// expectations.
157165
Restart,
@@ -382,6 +390,7 @@ struct NetChannel<T: RingMem> {
382390
pending_send_size: usize,
383391
restart: Option<CoordinatorMessage>,
384392
can_use_ring_size_opt: bool,
393+
packet_filter: u32,
385394
}
386395

387396
/// Buffers used during packet processing.
@@ -432,6 +441,7 @@ struct ActiveState {
432441
pending_tx_packets: Vec<PendingTxPacket>,
433442
free_tx_packets: Vec<TxId>,
434443
pending_tx_completions: VecDeque<PendingTxCompletion>,
444+
pending_rx_packets: VecDeque<RxId>,
435445

436446
rx_bufs: RxBuffers,
437447

@@ -442,6 +452,7 @@ struct ActiveState {
442452
struct QueueStats {
443453
tx_stalled: Counter,
444454
rx_dropped_ring_full: Counter,
455+
rx_dropped_filtered: Counter,
445456
spurious_wakes: Counter,
446457
rx_packets: Counter,
447458
tx_packets: Counter,
@@ -908,6 +919,7 @@ impl ActiveState {
908919
pending_tx_packets: vec![Default::default(); TX_PACKET_QUOTA],
909920
free_tx_packets: (0..TX_PACKET_QUOTA as u32).rev().map(TxId).collect(),
910921
pending_tx_completions: VecDeque::new(),
922+
pending_rx_packets: VecDeque::new(),
911923
rx_bufs: RxBuffers::new(recv_buffer_count),
912924
stats: Default::default(),
913925
}
@@ -1364,6 +1376,7 @@ impl Nic {
13641376
pending_send_size: 0,
13651377
restart: None,
13661378
can_use_ring_size_opt,
1379+
packet_filter: rndisprot::NDIS_PACKET_TYPE_NONE,
13671380
},
13681381
state,
13691382
coordinator_send: self.coordinator_send.clone().unwrap(),
@@ -1453,6 +1466,7 @@ impl Nic {
14531466
mut control: RestoreControl<'_>,
14541467
state: saved_state::SavedState,
14551468
) -> Result<(), NetRestoreError> {
1469+
let mut saved_packet_filter = 0u32;
14561470
if let Some(state) = state.open {
14571471
let open = match &state.primary {
14581472
saved_state::Primary::Version => vec![true],
@@ -1537,8 +1551,12 @@ impl Nic {
15371551
tx_spread_sent,
15381552
guest_link_down,
15391553
pending_link_action,
1554+
packet_filter,
15401555
} = ready;
15411556

1557+
// If saved state does not have a packet filter set, default to directed, multicast, and broadcast.
1558+
saved_packet_filter = packet_filter.unwrap_or(rndisprot::NPROTO_PACKET_FILTER);
1559+
15421560
let version = check_version(version)
15431561
.ok_or(NetRestoreError::UnsupportedVersion(version))?;
15441562

@@ -1621,6 +1639,11 @@ impl Nic {
16211639
self.insert_worker(channel_idx as u16, &request.unwrap(), state, false)?;
16221640
}
16231641
}
1642+
for worker in self.coordinator.state_mut().unwrap().workers.iter_mut() {
1643+
if let Some(worker_state) = worker.state_mut() {
1644+
worker_state.channel.packet_filter = saved_packet_filter;
1645+
}
1646+
}
16241647
} else {
16251648
control
16261649
.restore(&[false])
@@ -1782,6 +1805,11 @@ impl Nic {
17821805
PrimaryChannelGuestVfState::Restoring(saved_state) => saved_state,
17831806
};
17841807

1808+
let worker_0_packet_filter = coordinator.workers[0]
1809+
.state()
1810+
.unwrap()
1811+
.channel
1812+
.packet_filter;
17851813
saved_state::Primary::Ready(saved_state::ReadyPrimary {
17861814
version: ready.buffers.version as u32,
17871815
receive_buffer: ready.buffers.recv_buffer.saved_state(),
@@ -1811,6 +1839,7 @@ impl Nic {
18111839
tx_spread_sent: primary.tx_spread_sent,
18121840
guest_link_down: !primary.guest_link_up,
18131841
pending_link_action,
1842+
packet_filter: Some(worker_0_packet_filter),
18141843
})
18151844
}
18161845
};
@@ -2594,7 +2623,12 @@ impl<T: RingMem> NetChannel<T> {
25942623
if primary.rndis_state == RndisState::Operational {
25952624
if self.guest_vf_is_available(Some(vfid), buffers.version, buffers.ndis_config)? {
25962625
primary.guest_vf_state = PrimaryChannelGuestVfState::AvailableAdvertised;
2597-
return Ok(Some(CoordinatorMessage::UpdateGuestVfState));
2626+
return Ok(Some(CoordinatorMessage::Update(
2627+
CoordinatorMessageUpdateType {
2628+
guest_vf_state: true,
2629+
..Default::default()
2630+
},
2631+
)));
25982632
} else if let Some(true) = primary.is_data_path_switched {
25992633
tracing::error!(
26002634
"Data path switched, but current guest negotiation does not support VTL0 VF"
@@ -2734,10 +2768,7 @@ impl<T: RingMem> NetChannel<T> {
27342768
// flag on inband packets and won't send a completion
27352769
// packet.
27362770
primary.guest_vf_state = PrimaryChannelGuestVfState::AvailableAdvertised;
2737-
// restart will also add the VF based on the guest_vf_state
2738-
if self.restart.is_none() {
2739-
self.restart = Some(CoordinatorMessage::UpdateGuestVfState);
2740-
}
2771+
self.send_coordinator_update_vf();
27412772
} else if let Some(true) = primary.is_data_path_switched {
27422773
tracing::error!(
27432774
"Data path switched, but current guest negotiation does not support VTL0 VF"
@@ -2785,12 +2816,18 @@ impl<T: RingMem> NetChannel<T> {
27852816
tracing::trace!(?request, "handling control message MESSAGE_TYPE_SET_MSG");
27862817

27872818
let status = match self.adapter.handle_oid_set(primary, request.oid, reader) {
2788-
Ok(restart_endpoint) => {
2819+
Ok((restart_endpoint, packet_filter)) => {
27892820
// Restart the endpoint if the OID changed some critical
27902821
// endpoint property.
27912822
if restart_endpoint {
27922823
self.restart = Some(CoordinatorMessage::Restart);
27932824
}
2825+
if let Some(filter) = packet_filter {
2826+
if self.packet_filter != filter {
2827+
self.packet_filter = filter;
2828+
self.send_coordinator_update_filter();
2829+
}
2830+
}
27942831
rndisprot::STATUS_SUCCESS
27952832
}
27962833
Err(err) => {
@@ -2974,6 +3011,31 @@ impl<T: RingMem> NetChannel<T> {
29743011
}
29753012
Ok(())
29763013
}
3014+
3015+
fn send_coordinator_update_message(&mut self, guest_vf: bool, packet_filter: bool) {
3016+
if self.restart.is_none() {
3017+
self.restart = Some(CoordinatorMessage::Update(CoordinatorMessageUpdateType {
3018+
guest_vf_state: guest_vf,
3019+
filter_state: packet_filter,
3020+
}));
3021+
} else if let Some(CoordinatorMessage::Restart) = self.restart {
3022+
// If a restart message is pending, do nothing.
3023+
// A restart will try to switch the data path based on primary.guest_vf_state.
3024+
// A restart will apply packet filter changes.
3025+
} else if let Some(CoordinatorMessage::Update(ref mut update)) = self.restart {
3026+
// Add the new update to the existing message.
3027+
update.guest_vf_state |= guest_vf;
3028+
update.filter_state |= packet_filter;
3029+
}
3030+
}
3031+
3032+
fn send_coordinator_update_vf(&mut self) {
3033+
self.send_coordinator_update_message(true, false);
3034+
}
3035+
3036+
fn send_coordinator_update_filter(&mut self) {
3037+
self.send_coordinator_update_message(false, true);
3038+
}
29773039
}
29783040

29793041
/// Writes an RNDIS message to `writer`.
@@ -3291,13 +3353,14 @@ impl Adapter {
32913353
primary: &mut PrimaryChannelState,
32923354
oid: rndisprot::Oid,
32933355
reader: impl MemoryRead + Clone,
3294-
) -> Result<bool, OidError> {
3356+
) -> Result<(bool, Option<u32>), OidError> {
32953357
tracing::debug!(?oid, "oid set");
32963358

32973359
let mut restart_endpoint = false;
3360+
let mut packet_filter = None;
32983361
match oid {
32993362
rndisprot::Oid::OID_GEN_CURRENT_PACKET_FILTER => {
3300-
// TODO
3363+
packet_filter = self.oid_set_packet_filter(reader)?;
33013364
}
33023365
rndisprot::Oid::OID_TCP_OFFLOAD_PARAMETERS => {
33033366
self.oid_set_offload_parameters(reader, primary)?;
@@ -3324,7 +3387,7 @@ impl Adapter {
33243387
return Err(OidError::UnknownOid);
33253388
}
33263389
}
3327-
Ok(restart_endpoint)
3390+
Ok((restart_endpoint, packet_filter))
33283391
}
33293392

33303393
fn oid_set_rss_parameters(
@@ -3382,6 +3445,15 @@ impl Adapter {
33823445
Ok(())
33833446
}
33843447

3448+
fn oid_set_packet_filter(
3449+
&self,
3450+
reader: impl MemoryRead + Clone,
3451+
) -> Result<Option<u32>, OidError> {
3452+
let filter: rndisprot::RndisPacketFilterOidValue = reader.clone().read_plain()?;
3453+
tracing::debug!(filter, "set packet filter");
3454+
Ok(Some(filter))
3455+
}
3456+
33853457
fn oid_set_offload_parameters(
33863458
&self,
33873459
reader: impl MemoryRead + Clone,
@@ -3872,8 +3944,26 @@ impl Coordinator {
38723944
}
38733945
sleep_duration = None;
38743946
}
3875-
Message::Internal(CoordinatorMessage::UpdateGuestVfState) => {
3876-
self.update_guest_vf_state(state).await;
3947+
Message::Internal(CoordinatorMessage::Update(update_type)) => {
3948+
if update_type.filter_state {
3949+
self.stop_workers().await;
3950+
let worker_0_packet_filter =
3951+
self.workers[0].state().unwrap().channel.packet_filter;
3952+
self.workers.iter_mut().skip(1).for_each(|worker| {
3953+
if let Some(state) = worker.state_mut() {
3954+
state.channel.packet_filter = worker_0_packet_filter;
3955+
tracing::debug!(
3956+
packet_filter = ?worker_0_packet_filter,
3957+
channel_idx = state.channel_idx,
3958+
"update packet filter"
3959+
);
3960+
}
3961+
});
3962+
}
3963+
3964+
if update_type.guest_vf_state {
3965+
self.update_guest_vf_state(state).await;
3966+
}
38773967
}
38783968
Message::UpdateFromEndpoint(EndpointAction::RestartRequired) => self.restart = true,
38793969
Message::UpdateFromEndpoint(EndpointAction::LinkStatusNotify(connect)) => {
@@ -4306,13 +4396,22 @@ impl Coordinator {
43064396
self.num_queues = num_queues;
43074397
}
43084398

4399+
let worker_0_packet_filter = self.workers[0].state().unwrap().channel.packet_filter;
43094400
// Provide the queue and receive buffer ranges for each worker.
43104401
for ((worker, queue), rx_buffer) in self.workers.iter_mut().zip(queues).zip(rx_buffers) {
43114402
worker.task_mut().queue_state = Some(QueueState {
43124403
queue,
43134404
target_vp_set: false,
43144405
rx_buffer_range: rx_buffer,
43154406
});
4407+
// Update the receive packet filter for the subchannel worker.
4408+
if let Some(worker) = worker.state_mut() {
4409+
worker.channel.packet_filter = worker_0_packet_filter;
4410+
// Clear any pending RxIds as buffers were redistributed.
4411+
if let Some(ready_state) = worker.state.ready_mut() {
4412+
ready_state.state.pending_rx_packets.clear();
4413+
}
4414+
}
43164415
}
43174416

43184417
Ok(())
@@ -4726,6 +4825,17 @@ impl<T: 'static + RingMem> NetChannel<T> {
47264825
send.capacity() - limit
47274826
};
47284827

4828+
// If the packet filter has changed to allow rx packets, add any pended RxIds.
4829+
if !state.pending_rx_packets.is_empty()
4830+
&& self.packet_filter != rndisprot::NDIS_PACKET_TYPE_NONE
4831+
{
4832+
let epqueue = queue_state.queue.as_mut();
4833+
let (front, back) = state.pending_rx_packets.as_slices();
4834+
epqueue.rx_avail(front);
4835+
epqueue.rx_avail(back);
4836+
state.pending_rx_packets.clear();
4837+
}
4838+
47294839
// Handle any guest state changes since last run.
47304840
if let Some(primary) = state.primary.as_mut() {
47314841
if primary.requested_num_queues > 1 && !primary.tx_spread_sent {
@@ -4927,6 +5037,22 @@ impl<T: 'static + RingMem> NetChannel<T> {
49275037
return Ok(false);
49285038
}
49295039

5040+
state.stats.rx_packets_per_wake.add_sample(n as u64);
5041+
5042+
if self.packet_filter == rndisprot::NDIS_PACKET_TYPE_NONE {
5043+
tracing::trace!(
5044+
packet_filter = self.packet_filter,
5045+
"rx packets dropped due to packet filter"
5046+
);
5047+
// Pend the newly available RxIds until the packet filter is updated.
5048+
// Under high load this will eventually lead to no available RxIds,
5049+
// which will cause the backend to drop the packets instead of
5050+
// processing them here.
5051+
state.pending_rx_packets.extend(&data.rx_ready[..n]);
5052+
state.stats.rx_dropped_filtered.add(n as u64);
5053+
return Ok(false);
5054+
}
5055+
49305056
let transaction_id = data.rx_ready[0].0.into();
49315057
let ready_ids = data.rx_ready[..n].iter().map(|&RxId(id)| id);
49325058

@@ -4951,15 +5077,15 @@ impl<T: 'static + RingMem> NetChannel<T> {
49515077
}
49525078
Some(_) => {
49535079
// Ring buffer is full. Drop the packets and free the rx
4954-
// buffers.
5080+
// buffers. When the ring has limited space, the main loop will
5081+
// stop polling for receive packets.
49555082
state.stats.rx_dropped_ring_full.add(n as u64);
49565083

49575084
state.rx_bufs.free(data.rx_ready[0].0);
49585085
epqueue.rx_avail(&data.rx_ready[..n]);
49595086
}
49605087
}
49615088

4962-
state.stats.rx_packets_per_wake.add_sample(n as u64);
49635089
Ok(true)
49645090
}
49655091

@@ -5062,10 +5188,7 @@ impl<T: 'static + RingMem> NetChannel<T> {
50625188
_ => (),
50635189
};
50645190
if queue_switch_operation {
5065-
// A restart will also try to switch the data path based on primary.guest_vf_state.
5066-
if self.restart.is_none() {
5067-
self.restart = Some(CoordinatorMessage::UpdateGuestVfState)
5068-
};
5191+
self.send_coordinator_update_vf();
50695192
} else {
50705193
self.send_completion(transaction_id, &[])?;
50715194
}

vm/devices/net/netvsp/src/rndisprot.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ open_enum! {
751751
DEFAULT = 0x80,
752752
RSS_CAPABILITIES = 0x88,
753753
RSS_PARAMETERS = 0x89,
754+
OID_REQUEST = 0x96,
754755
OFFLOAD = 0xA7,
755756
OFFLOAD_ENCAPSULATION = 0xA8,
756757
}
@@ -1082,3 +1083,14 @@ open_enum! {
10821083
BINARY = 4,
10831084
}
10841085
}
1086+
1087+
pub type RndisPacketFilterOidValue = u32;
1088+
1089+
// Rndis Packet Filter Flags (OID_GEN_CURRENT_PACKET_FILTER)
1090+
pub const NDIS_PACKET_TYPE_NONE: u32 = 0x00000000;
1091+
pub const NDIS_PACKET_TYPE_DIRECTED: u32 = 0x00000001;
1092+
pub const NDIS_PACKET_TYPE_MULTICAST: u32 = 0x00000002;
1093+
pub const NDIS_PACKET_TYPE_ALL_MULTICAST: u32 = 0x00000004;
1094+
pub const NDIS_PACKET_TYPE_BROADCAST: u32 = 0x00000008;
1095+
pub const NPROTO_PACKET_FILTER: u32 =
1096+
NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_ALL_MULTICAST | NDIS_PACKET_TYPE_BROADCAST;

vm/devices/net/netvsp/src/saved_state.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ pub struct ReadyPrimary {
120120
pub guest_link_down: bool,
121121
#[mesh(15)]
122122
pub pending_link_action: Option<bool>,
123+
#[mesh(16)]
124+
pub packet_filter: Option<u32>,
123125
}
124126

125127
#[derive(Debug, Protobuf)]

0 commit comments

Comments
 (0)