Skip to content

Commit cd2d5d2

Browse files
Tracking/limiting memory allocator (#1192)
1 parent 8cfbee7 commit cd2d5d2

File tree

19 files changed

+569
-58
lines changed

19 files changed

+569
-58
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

polkadot/node/core/pvf/common/src/error.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,56 @@ use std::fmt;
2323
pub type PrepareResult = Result<PrepareStats, PrepareError>;
2424

2525
/// An error that occurred during the prepare part of the PVF pipeline.
26+
// Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD` below)
2627
#[derive(Debug, Clone, Encode, Decode)]
2728
pub enum PrepareError {
2829
/// During the prevalidation stage of preparation an issue was found with the PVF.
30+
#[codec(index = 0)]
2931
Prevalidation(String),
3032
/// Compilation failed for the given PVF.
33+
#[codec(index = 1)]
3134
Preparation(String),
3235
/// Instantiation of the WASM module instance failed.
36+
#[codec(index = 2)]
3337
RuntimeConstruction(String),
3438
/// An unexpected panic has occurred in the preparation worker.
39+
#[codec(index = 3)]
3540
Panic(String),
3641
/// Failed to prepare the PVF due to the time limit.
42+
#[codec(index = 4)]
3743
TimedOut,
3844
/// An IO error occurred. This state is reported by either the validation host or by the
3945
/// worker.
46+
#[codec(index = 5)]
4047
IoErr(String),
4148
/// The temporary file for the artifact could not be created at the given cache path. This
4249
/// state is reported by the validation host (not by the worker).
50+
#[codec(index = 6)]
4351
CreateTmpFileErr(String),
4452
/// The response from the worker is received, but the file cannot be renamed (moved) to the
4553
/// final destination location. This state is reported by the validation host (not by the
4654
/// worker).
55+
#[codec(index = 7)]
4756
RenameTmpFileErr {
4857
err: String,
4958
// Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible
5059
// conversion to `Option<String>`.
5160
src: Option<String>,
5261
dest: Option<String>,
5362
},
63+
/// Memory limit reached
64+
#[codec(index = 8)]
65+
OutOfMemory,
5466
/// The response from the worker is received, but the worker cache could not be cleared. The
5567
/// worker has to be killed to avoid jobs having access to data from other jobs. This state is
5668
/// reported by the validation host (not by the worker).
69+
#[codec(index = 9)]
5770
ClearWorkerDir(String),
5871
}
5972

73+
/// Pre-encoded length-prefixed `PrepareResult::Err(PrepareError::OutOfMemory)`
74+
pub const OOM_PAYLOAD: &[u8] = b"\x02\x00\x00\x00\x00\x00\x00\x00\x01\x08";
75+
6076
impl PrepareError {
6177
/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
6278
/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
@@ -67,7 +83,7 @@ impl PrepareError {
6783
pub fn is_deterministic(&self) -> bool {
6884
use PrepareError::*;
6985
match self {
70-
Prevalidation(_) | Preparation(_) | Panic(_) => true,
86+
Prevalidation(_) | Preparation(_) | Panic(_) | OutOfMemory => true,
7187
TimedOut |
7288
IoErr(_) |
7389
CreateTmpFileErr(_) |
@@ -92,6 +108,7 @@ impl fmt::Display for PrepareError {
92108
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
93109
RenameTmpFileErr { err, src, dest } =>
94110
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
111+
OutOfMemory => write!(f, "prepare: out of memory"),
95112
ClearWorkerDir(err) => write!(f, "prepare: error clearing worker cache: {}", err),
96113
}
97114
}
@@ -147,3 +164,11 @@ impl fmt::Display for InternalValidationError {
147164
}
148165
}
149166
}
167+
168+
#[test]
169+
fn pre_encoded_payloads() {
170+
let oom_enc = PrepareResult::Err(PrepareError::OutOfMemory).encode();
171+
let mut oom_payload = oom_enc.len().to_le_bytes().to_vec();
172+
oom_payload.extend(oom_enc);
173+
assert_eq!(oom_payload, OOM_PAYLOAD);
174+
}

polkadot/node/core/pvf/common/src/executor_intf.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,36 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S
166166
ExecutorParam::StackLogicalMax(slm) => stack_limit.logical_max = *slm,
167167
ExecutorParam::StackNativeMax(snm) => stack_limit.native_stack_max = *snm,
168168
ExecutorParam::WasmExtBulkMemory => sem.wasm_bulk_memory = true,
169-
// TODO: Not implemented yet; <https://github.com/paritytech/polkadot/issues/6472>.
170-
ExecutorParam::PrecheckingMaxMemory(_) => (),
171-
ExecutorParam::PvfPrepTimeout(_, _) | ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
169+
ExecutorParam::PrecheckingMaxMemory(_) |
170+
ExecutorParam::PvfPrepTimeout(_, _) |
171+
ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
172172
}
173173
}
174174
sem.deterministic_stack_limit = Some(stack_limit);
175175
Ok(sem)
176176
}
177177

178+
/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
179+
pub fn prevalidate(code: &[u8]) -> Result<RuntimeBlob, sc_executor_common::error::WasmError> {
180+
let blob = RuntimeBlob::new(code)?;
181+
// It's assumed this function will take care of any prevalidation logic
182+
// that needs to be done.
183+
//
184+
// Do nothing for now.
185+
Ok(blob)
186+
}
187+
188+
/// Runs preparation on the given runtime blob. If successful, it returns a serialized compiled
189+
/// artifact which can then be used to pass into `Executor::execute` after writing it to the disk.
190+
pub fn prepare(
191+
blob: RuntimeBlob,
192+
executor_params: &ExecutorParams,
193+
) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
194+
let semantics = params_to_wasmtime_semantics(executor_params)
195+
.map_err(|e| sc_executor_common::error::WasmError::Other(e))?;
196+
sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
197+
}
198+
178199
/// Available host functions. We leave out:
179200
///
180201
/// 1. storage related stuff (PVF doesn't have a notion of a persistent storage/trie)

polkadot/node/core/pvf/common/src/prepare.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ pub struct PrepareStats {
2929
/// supported by the OS, `ru_maxrss`.
3030
#[derive(Clone, Debug, Default, Encode, Decode)]
3131
pub struct MemoryStats {
32-
/// Memory stats from `tikv_jemalloc_ctl`.
32+
/// Memory stats from `tikv_jemalloc_ctl`, polling-based and not very precise.
3333
#[cfg(any(target_os = "linux", feature = "jemalloc-allocator"))]
3434
pub memory_tracker_stats: Option<MemoryAllocationStats>,
3535
/// `ru_maxrss` from `getrusage`. `None` if an error occurred.
3636
#[cfg(target_os = "linux")]
3737
pub max_rss: Option<i64>,
38+
/// Peak allocation in bytes measured by tracking allocator
39+
pub peak_tracked_alloc: u64,
3840
}
3941

4042
/// Statistics of collected memory metrics.

polkadot/node/core/pvf/prepare-worker/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ cfg-if = "1.0"
1111
gum = { package = "tracing-gum", path = "../../../gum" }
1212
libc = "0.2.139"
1313
rayon = "1.5.1"
14+
tracking-allocator = { path = "../../../tracking-allocator" }
1415
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
16+
tikv-jemallocator = { version = "0.5.0", optional = true }
1517

1618
parity-scale-codec = { version = "3.6.1", default-features = false, features = ["derive"] }
1719

@@ -22,11 +24,22 @@ sc-executor-common = { path = "../../../../../substrate/client/executor/common"
2224
sc-executor-wasmtime = { path = "../../../../../substrate/client/executor/wasmtime" }
2325

2426
[target.'cfg(target_os = "linux")'.dependencies]
27+
tikv-jemallocator = "0.5.0"
2528
tikv-jemalloc-ctl = "0.5.0"
2629

2730
[features]
2831
builder = []
2932
jemalloc-allocator = [
3033
"dep:tikv-jemalloc-ctl",
34+
"dep:tikv-jemallocator",
3135
"polkadot-node-core-pvf-common/jemalloc-allocator",
3236
]
37+
38+
[dev-dependencies]
39+
criterion = { version = "0.4.0", default-features = false, features = ["cargo_bench_support"] }
40+
rococo-runtime = { path = "../../../../runtime/rococo" }
41+
sp-maybe-compressed-blob = { path = "../../../../../substrate/primitives/maybe-compressed-blob" }
42+
43+
[[bench]]
44+
name = "prepare_rococo_runtime"
45+
harness = false
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright (C) Parity Technologies (UK) Ltd.
2+
// This file is part of Polkadot.
3+
4+
// Polkadot is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
9+
// Polkadot is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU General Public License for more details.
13+
14+
// You should have received a copy of the GNU General Public License
15+
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16+
17+
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
18+
use polkadot_node_core_pvf_common::{
19+
executor_intf::{prepare, prevalidate},
20+
prepare::PrepareJobKind,
21+
pvf::PvfPrepData,
22+
};
23+
use polkadot_primitives::ExecutorParams;
24+
use std::time::Duration;
25+
26+
fn do_prepare_runtime(pvf: PvfPrepData) {
27+
let blob = match prevalidate(&pvf.code()) {
28+
Err(err) => panic!("{:?}", err),
29+
Ok(b) => b,
30+
};
31+
32+
match prepare(blob, &pvf.executor_params()) {
33+
Ok(_) => (),
34+
Err(err) => panic!("{:?}", err),
35+
}
36+
}
37+
38+
fn prepare_rococo_runtime(c: &mut Criterion) {
39+
let blob = rococo_runtime::WASM_BINARY.unwrap();
40+
let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
41+
Ok(code) => PvfPrepData::from_code(
42+
code.into_owned(),
43+
ExecutorParams::default(),
44+
Duration::from_secs(360),
45+
PrepareJobKind::Compilation,
46+
),
47+
Err(e) => {
48+
panic!("Cannot decompress blob: {:?}", e);
49+
},
50+
};
51+
52+
let mut group = c.benchmark_group("rococo");
53+
group.sampling_mode(SamplingMode::Flat);
54+
group.sample_size(20);
55+
group.measurement_time(Duration::from_secs(240));
56+
group.bench_function("prepare Rococo runtime", |b| {
57+
// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
58+
// benchmark accuracy
59+
b.iter(|| do_prepare_runtime(pvf.clone()))
60+
});
61+
group.finish();
62+
}
63+
64+
criterion_group!(preparation, prepare_rococo_runtime);
65+
criterion_main!(preparation);

polkadot/node/core/pvf/prepare-worker/src/executor_intf.rs

Lines changed: 0 additions & 42 deletions
This file was deleted.

0 commit comments

Comments
 (0)