Skip to content

Commit 4574702

Browse files
authored
petri: unblock commands and use com3 for hyper-v kmsg logs when possible (#1176)
Using COM3 instead of diag_client allows us to get OpenHCL logs if it crashes early in boot. Currently this only works on our TDX and SNP runners, since our nested runners have a build of windows that is too old and additional com ports are not yet supported on ARM. To make this work, the powershell and hvc commands needed to be run in a separate process (using the `unblock` function) to avoid blocking the process doing the logging. Also fixes guest serial logs that fail to reconnect after os error 231 (ubuntu 22.04 tests).
1 parent ee9ff7e commit 4574702

File tree

15 files changed

+383
-193
lines changed

15 files changed

+383
-193
lines changed

Cargo.lock

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5443,6 +5443,7 @@ version = "0.0.0"
54435443
dependencies = [
54445444
"anyhow",
54455445
"async-trait",
5446+
"blocking",
54465447
"chipset_resources",
54475448
"clap",
54485449
"diag_client",
@@ -5514,6 +5515,7 @@ dependencies = [
55145515
"vmotherboard",
55155516
"vmsocket",
55165517
"vtl2_settings_proto",
5518+
"windows-version",
55175519
]
55185520

55195521
[[package]]
@@ -9642,6 +9644,15 @@ dependencies = [
96429644
"windows_x86_64_msvc 0.53.0",
96439645
]
96449646

9647+
[[package]]
9648+
name = "windows-version"
9649+
version = "0.1.4"
9650+
source = "registry+https://github.com/rust-lang/crates.io-index"
9651+
checksum = "e04a5c6627e310a23ad2358483286c7df260c964eb2d003d8efd6d0f4e79265c"
9652+
dependencies = [
9653+
"windows-link",
9654+
]
9655+
96459656
[[package]]
96469657
name = "windows_aarch64_gnullvm"
96479658
version = "0.48.5"

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ winapi = "0.3"
537537
windows = "0.59"
538538
windows-service = "0.8"
539539
windows-sys = "0.59"
540+
windows-version = "0.1.4"
540541
xshell = "=0.2.2" # pin to 0.2.2 to work around https://github.com/matklad/xshell/issues/63
541542
xshell-macros = "0.2"
542543
# We add the derive feature here since the vast majority of our crates use it.

flowey/flowey_lib_hvlite/src/install_vmm_tests_deps.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ Otherwise, press `ctrl-c` to cancel the run.
155155
// Select required reg keys
156156
let mut reg_keys_to_set = BTreeSet::new();
157157
if hyperv {
158+
// Allow loading IGVM from file (to run custom OpenHCL firmware)
158159
reg_keys_to_set.insert("AllowFirmwareLoadFromFile");
160+
// Enable COM3 and COM4 for Hyper-V VMs so we can get the OpenHCL KMSG logs over serial
161+
reg_keys_to_set.insert("EnableAdditionalComPorts");
159162

160163
if hardware_isolation {
161164
reg_keys_to_set.insert("EnableHardwareIsolation");

petri/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ sparse_mmap.workspace = true
6363

6464
anyhow.workspace = true
6565
async-trait.workspace = true
66+
blocking.workspace = true
6667
clap.workspace = true
6768
fatfs = { workspace = true, features = ["std", "alloc"] }
6869
fs-err.workspace = true
@@ -84,5 +85,8 @@ thiserror.workspace = true
8485
tracing.workspace = true
8586
tracing-subscriber.workspace = true
8687

88+
[target.'cfg(windows)'.dependencies]
89+
windows-version.workspace = true
90+
8791
[lints]
8892
workspace = true

petri/src/tracing.rs

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4-
use diag_client::kmsg_stream::KmsgStream;
54
use fs_err::File;
65
use fs_err::PathExt;
76
use futures::AsyncBufReadExt;
@@ -305,24 +304,41 @@ impl<'a> MakeWriter<'a> for PetriWriter {
305304
}
306305

307306
/// Logs lines from `reader` into `log_file`.
308-
pub async fn log_stream(
307+
///
308+
/// Attempts to parse lines as `SyslogParsedEntry`, extracting the log level.
309+
/// Passes through any non-conforming logs.
310+
pub async fn log_task(
309311
log_file: PetriLogFile,
310312
reader: impl AsyncRead + Unpin + Send + 'static,
313+
name: &str,
311314
) -> anyhow::Result<()> {
315+
tracing::info!("connected to {name}");
312316
let mut buf = Vec::new();
313317
let mut reader = BufReader::new(reader);
314318
loop {
315319
buf.clear();
316-
let n = (&mut reader).take(256).read_until(b'\n', &mut buf).await?;
317-
if n == 0 {
318-
break;
320+
match (&mut reader).take(256).read_until(b'\n', &mut buf).await {
321+
Ok(0) => {
322+
tracing::info!("disconnected from {name}: EOF");
323+
return Ok(());
324+
}
325+
Err(e) => {
326+
tracing::info!("disconnected from {name}: error: {e:#}");
327+
return Err(e.into());
328+
}
329+
_ => {}
319330
}
320331

321332
let string_buf = String::from_utf8_lossy(&buf);
322333
let string_buf_trimmed = string_buf.trim_end();
323-
log_file.write_entry(string_buf_trimmed);
334+
335+
if let Some(message) = kmsg::SyslogParsedEntry::new(string_buf_trimmed) {
336+
let level = kernel_level_to_tracing_level(message.level);
337+
log_file.write_entry_fmt(None, level, format_args!("{}", message.display(false)));
338+
} else {
339+
log_file.write_entry(string_buf_trimmed);
340+
}
324341
}
325-
Ok(())
326342
}
327343

328344
/// Maps kernel log levels to tracing levels.
@@ -339,23 +355,30 @@ fn kernel_level_to_tracing_level(kernel_level: u8) -> Level {
339355
/// read from the kmsg stream and write entries to the log
340356
pub async fn kmsg_log_task(
341357
log_file: PetriLogFile,
342-
mut file_stream: KmsgStream,
358+
diag_client: diag_client::DiagClient,
343359
) -> anyhow::Result<()> {
344-
while let Some(data) = file_stream.next().await {
345-
match data {
346-
Ok(data) => {
347-
let message = KmsgParsedEntry::new(&data).unwrap();
348-
let level = kernel_level_to_tracing_level(message.level);
349-
log_file.write_entry_fmt(None, level, format_args!("{}", message.display(false)));
350-
}
351-
Err(err) => {
352-
tracing::info!("kmsg disconnected: {err:?}");
353-
break;
360+
loop {
361+
diag_client.wait_for_server().await?;
362+
let mut kmsg = diag_client.kmsg(true).await?;
363+
tracing::info!("kmsg connected");
364+
while let Some(data) = kmsg.next().await {
365+
match data {
366+
Ok(data) => {
367+
let message = KmsgParsedEntry::new(&data).unwrap();
368+
let level = kernel_level_to_tracing_level(message.level);
369+
log_file.write_entry_fmt(
370+
None,
371+
level,
372+
format_args!("{}", message.display(false)),
373+
);
374+
}
375+
Err(err) => {
376+
tracing::info!("kmsg disconnected: {err:#}");
377+
break;
378+
}
354379
}
355380
}
356381
}
357-
358-
Ok(())
359382
}
360383

361384
#[cfg(test)]

petri/src/vm/hyperv/hvc.rs

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,34 @@ use super::vm::CommandError;
77
use anyhow::Context;
88
use guid::Guid;
99

10-
pub fn hvc_start(vmid: &Guid) -> Result<(), CommandError> {
11-
hvc_output(|cmd| cmd.arg("start").arg(vmid.to_string())).map(|_| ())
10+
pub async fn hvc_start(vmid: &Guid) -> Result<(), CommandError> {
11+
hvc_output(|cmd| cmd.arg("start").arg(vmid.to_string()))
12+
.await
13+
.map(|_| ())
1214
}
1315

14-
pub fn hvc_stop(vmid: &Guid) -> Result<(), CommandError> {
15-
hvc_output(|cmd| cmd.arg("stop").arg(vmid.to_string())).map(|_| ())
16+
pub async fn hvc_stop(vmid: &Guid) -> Result<(), CommandError> {
17+
hvc_output(|cmd| cmd.arg("stop").arg(vmid.to_string()))
18+
.await
19+
.map(|_| ())
1620
}
1721

18-
pub fn hvc_kill(vmid: &Guid) -> Result<(), CommandError> {
19-
hvc_output(|cmd| cmd.arg("kill").arg(vmid.to_string())).map(|_| ())
22+
pub async fn hvc_kill(vmid: &Guid) -> Result<(), CommandError> {
23+
hvc_output(|cmd| cmd.arg("kill").arg(vmid.to_string()))
24+
.await
25+
.map(|_| ())
2026
}
2127

22-
pub fn hvc_restart(vmid: &Guid) -> Result<(), CommandError> {
23-
hvc_output(|cmd| cmd.arg("restart").arg(vmid.to_string())).map(|_| ())
28+
pub async fn hvc_restart(vmid: &Guid) -> Result<(), CommandError> {
29+
hvc_output(|cmd| cmd.arg("restart").arg(vmid.to_string()))
30+
.await
31+
.map(|_| ())
2432
}
2533

26-
pub fn hvc_reset(vmid: &Guid) -> Result<(), CommandError> {
27-
hvc_output(|cmd| cmd.arg("reset").arg(vmid.to_string())).map(|_| ())
34+
pub async fn hvc_reset(vmid: &Guid) -> Result<(), CommandError> {
35+
hvc_output(|cmd| cmd.arg("reset").arg(vmid.to_string()))
36+
.await
37+
.map(|_| ())
2838
}
2939

3040
/// HyperV VM state as reported by hvc
@@ -52,9 +62,10 @@ pub enum VmState {
5262
Resuming,
5363
}
5464

55-
pub fn hvc_state(vmid: &Guid) -> anyhow::Result<VmState> {
65+
pub async fn hvc_state(vmid: &Guid) -> anyhow::Result<VmState> {
5666
Ok(
5767
match hvc_output(|cmd| cmd.arg("state").arg(vmid.to_string()))
68+
.await
5869
.context("hvc_state")?
5970
.as_str()
6071
{
@@ -73,12 +84,12 @@ pub fn hvc_state(vmid: &Guid) -> anyhow::Result<VmState> {
7384
)
7485
}
7586

76-
pub fn hvc_ensure_off(vmid: &Guid) -> anyhow::Result<()> {
87+
pub async fn hvc_ensure_off(vmid: &Guid) -> anyhow::Result<()> {
7788
for _ in 0..5 {
78-
if matches!(hvc_state(vmid)?, VmState::Off) {
89+
if matches!(hvc_state(vmid).await?, VmState::Off) {
7990
return Ok(());
8091
}
81-
if let Err(e) = hvc_kill(vmid) {
92+
if let Err(e) = hvc_kill(vmid).await {
8293
tracing::warn!("hvc_kill attempt failed: {e}")
8394
}
8495
std::thread::sleep(std::time::Duration::from_secs(1));
@@ -88,11 +99,11 @@ pub fn hvc_ensure_off(vmid: &Guid) -> anyhow::Result<()> {
8899
}
89100

90101
/// Runs hvc with the given arguments and returns the output.
91-
fn hvc_output(
102+
async fn hvc_output(
92103
f: impl FnOnce(&mut std::process::Command) -> &mut std::process::Command,
93104
) -> Result<String, CommandError> {
94105
let mut cmd = std::process::Command::new("hvc.exe");
95106
f(&mut cmd);
96107

97-
super::vm::run_cmd(cmd)
108+
super::vm::run_cmd(cmd).await
98109
}

0 commit comments

Comments
 (0)