From b5a0341d3976ce8cd4812158fb758d3ae5912db5 Mon Sep 17 00:00:00 2001
From: Jun Wu <quark@lihdd.net>
Date: Fri, 24 Feb 2023 20:13:13 -0800
Subject: [PATCH 1/2] Add SetupOptions to customize FailScenario::setup
 behavior

For now it makes the environment variable configurable, which
might be useful for different applications to use different
env var names to avoid conflict.

I plan to add more features in the next change.

Signed-off-by: Jun Wu <quark@lihdd.net>
---
 src/lib.rs | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index f23cc44..7cb7539 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -534,6 +534,22 @@ pub struct FailScenario<'a> {
     scenario_guard: MutexGuard<'a, &'static FailPointRegistry>,
 }
 
+/// Customize behaviors setting up [`FailScenario`].
+#[non_exhaustive]
+#[derive(Debug)]
+pub struct SetupOptions {
+    /// Environment variable to use. Default: `"FAILPOINTS"`.
+    pub env_var_name: &'static str,
+}
+
+impl Default for SetupOptions {
+    fn default() -> Self {
+        Self {
+            env_var_name: "FAILPOINTS",
+        }
+    }
+}
+
 impl<'a> FailScenario<'a> {
     /// Set up the system for a fail points scenario.
     ///
@@ -555,12 +571,18 @@ impl<'a> FailScenario<'a> {
     ///
     /// Panics if an action is not formatted correctly.
     pub fn setup() -> Self {
+        Self::setup_with_options(Default::default())
+    }
+
+    /// Similar to [`FailScenario::setup`] but takes an extra [`SetupOptions`]
+    /// for customization.
+    pub fn setup_with_options(options: SetupOptions) -> Self {
         // Cleanup first, in case of previous failed/panic'ed test scenarios.
         let scenario_guard = SCENARIO.lock().unwrap_or_else(|e| e.into_inner());
         let mut registry = scenario_guard.registry.write().unwrap();
         Self::cleanup(&mut registry);
 
-        let failpoints = match env::var("FAILPOINTS") {
+        let failpoints = match env::var(options.env_var_name) {
             Ok(s) => s,
             Err(VarError::NotPresent) => return Self { scenario_guard },
             Err(e) => panic!("invalid failpoints: {:?}", e),
@@ -1062,4 +1084,20 @@ mod tests {
         assert_eq!(rx.recv_timeout(Duration::from_millis(500)).unwrap(), 0);
         assert_eq!(f1(), 0);
     }
+
+    #[test]
+    #[cfg_attr(not(feature = "failpoints"), ignore)]
+    fn test_setup_with_customized_env_name() {
+        let f1 = || {
+            fail_point!("setup_with_customized_env_name", |_| 1);
+            0
+        };
+        env::set_var("FOO_FAILPOINTS", "setup_with_customized_env_name=return");
+        let scenario = FailScenario::setup_with_options(SetupOptions {
+            env_var_name: "FOO_FAILPOINTS",
+            ..Default::default()
+        });
+        assert_eq!(f1(), 1);
+        scenario.teardown();
+    }
 }

From f66d15255e2458d654dd7f57e0a41b8bac54ae5a Mon Sep 17 00:00:00 2001
From: Jun Wu <quark@lihdd.net>
Date: Fri, 24 Feb 2023 21:54:50 -0800
Subject: [PATCH 2/2] Support customized logic for FAILPOINTS tasks

Previously, the only way to provide customized (as a Rust function)
failpoint behavior is to use `cfg_callback` API. This change extends
the customization to the FAILPOINTS environment variable. Application
can provide a function to turn a `call(arg)` task in FAILPOINTS to
a "callback". Unlike `cfg_callback`, this integrates with the rest
of FAILPOINTS syntax, for example:

    3*call(foo)->2*return->panic

`cfg_callback` does not seem to be able to express `return`.

The use-case that motivates this change is to test race conditions
in multiple processes. I'd like to use 2 primitives:

    wait("foo")     // wait for "foo".
    unblock("foo")  // unblock a current or future wait of "foo".

instead of "sleep" to create interesting racy cases deterministically.

Signed-off-by: Jun Wu <quark@lihdd.net>
---
 src/lib.rs | 102 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 19 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 7cb7539..de15be5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -357,21 +357,11 @@ impl Action {
         }
         Some(self.task.clone())
     }
-}
 
-fn partition(s: &str, pattern: char) -> (&str, Option<&str>) {
-    let mut splits = s.splitn(2, pattern);
-    (splits.next().unwrap(), splits.next())
-}
-
-impl FromStr for Action {
-    type Err = String;
-
-    /// Parse an action.
-    ///
-    /// `s` should be in the format `[p%][cnt*]task[(args)]`, `p%` is the frequency,
-    /// `cnt` is the max times the action can be triggered.
-    fn from_str(s: &str) -> Result<Action, String> {
+    fn from_str_with_resolve_call(
+        s: &str,
+        resolve_call: Option<&ResolveCallFunc>,
+    ) -> Result<Action, String> {
         let mut remain = s.trim();
         let mut args = None;
         // in case there is '%' in args, we need to parse it first.
@@ -421,6 +411,14 @@ impl FromStr for Action {
             "pause" => Task::Pause,
             "yield" => Task::Yield,
             "delay" => Task::Delay(parse_timeout()?),
+            "call" => {
+                if let (Some(resolve), Some(arg)) = (resolve_call, args) {
+                    let callback = SyncCallback(resolve(arg));
+                    Task::Callback(callback)
+                } else {
+                    return Err(format!("call is unavailable in this context"));
+                }
+            }
             _ => return Err(format!("unrecognized command {:?}", remain)),
         };
 
@@ -428,6 +426,23 @@ impl FromStr for Action {
     }
 }
 
+fn partition(s: &str, pattern: char) -> (&str, Option<&str>) {
+    let mut splits = s.splitn(2, pattern);
+    (splits.next().unwrap(), splits.next())
+}
+
+impl FromStr for Action {
+    type Err = String;
+
+    /// Parse an action.
+    ///
+    /// `s` should be in the format `[p%][cnt*]task[(args)]`, `p%` is the frequency,
+    /// `cnt` is the max times the action can be triggered.
+    fn from_str(s: &str) -> Result<Action, String> {
+        Self::from_str_with_resolve_call(s, None)
+    }
+}
+
 #[cfg_attr(feature = "cargo-clippy", allow(clippy::mutex_atomic))]
 #[derive(Debug)]
 struct FailPoint {
@@ -534,18 +549,27 @@ pub struct FailScenario<'a> {
     scenario_guard: MutexGuard<'a, &'static FailPointRegistry>,
 }
 
+type ResolveCallFunc = Box<dyn Fn(&str) -> Arc<dyn Fn() + Send + Sync + 'static>>;
+
 /// Customize behaviors setting up [`FailScenario`].
 #[non_exhaustive]
-#[derive(Debug)]
+#[allow(missing_debug_implementations)]
 pub struct SetupOptions {
     /// Environment variable to use. Default: `"FAILPOINTS"`.
     pub env_var_name: &'static str,
+
+    /// Defines how to resolve `call(arg)` as a `task` in the
+    /// `FAILPOINTS` environment variable. The provided function
+    /// taks the `arg` string and returns a function to execute
+    /// as the task.
+    pub resolve_call: Option<ResolveCallFunc>,
 }
 
 impl Default for SetupOptions {
     fn default() -> Self {
         Self {
             env_var_name: "FAILPOINTS",
+            resolve_call: None,
         }
     }
 }
@@ -596,7 +620,12 @@ impl<'a> FailScenario<'a> {
             match order {
                 None => panic!("invalid failpoint: {:?}", cfg),
                 Some(order) => {
-                    if let Err(e) = set(&mut registry, name.to_owned(), order) {
+                    if let Err(e) = set(
+                        &mut registry,
+                        name.to_owned(),
+                        order,
+                        options.resolve_call.as_ref(),
+                    ) {
                         panic!("unable to configure failpoint \"{}\": {}", name, e);
                     }
                 }
@@ -691,13 +720,14 @@ pub fn eval<R, F: FnOnce(Option<String>) -> R>(name: &str, f: F) -> Option<R> {
 /// times.
 ///
 /// The `FAILPOINTS` environment variable accepts this same syntax for its fail
-/// point actions.
+/// point actions. With [`SetupOptions::resolve_call`] and [`FailScenario::setup_with_options`],
+/// `task` can also be `call(arg)` for customized behavior.
 ///
 /// A call to `cfg` with a particular fail point name overwrites any existing actions for
 /// that fail point, including those set via the `FAILPOINTS` environment variable.
 pub fn cfg<S: Into<String>>(name: S, actions: &str) -> Result<(), String> {
     let mut registry = REGISTRY.registry.write().unwrap();
-    set(&mut registry, name.into(), actions)
+    set(&mut registry, name.into(), actions, None)
 }
 
 /// Configure the actions for a fail point at runtime.
@@ -768,12 +798,13 @@ fn set(
     registry: &mut HashMap<String, Arc<FailPoint>>,
     name: String,
     actions: &str,
+    resolve_call: Option<&ResolveCallFunc>,
 ) -> Result<(), String> {
     let actions_str = actions;
     // `actions` are in the format of `failpoint[->failpoint...]`.
     let actions = actions
         .split("->")
-        .map(Action::from_str)
+        .map(|a| Action::from_str_with_resolve_call(a, resolve_call))
         .collect::<Result<_, _>>()?;
     // Please note that we can't figure out whether there is a failpoint named `name`,
     // so we may insert a failpoint that doesn't exist at all.
@@ -1100,4 +1131,37 @@ mod tests {
         assert_eq!(f1(), 1);
         scenario.teardown();
     }
+
+    #[test]
+    #[cfg_attr(not(feature = "failpoints"), ignore)]
+    fn test_setup_with_customized_resolve_call() {
+        env::set_var("FAILPOINTS", "customized_resolve_call=3*call(count)");
+
+        let count = Arc::new(AtomicUsize::new(0));
+        let scenario = FailScenario::setup_with_options(SetupOptions {
+            resolve_call: Some({
+                let count = count.clone();
+                Box::new(move |arg| {
+                    if arg == "count" {
+                        let count = count.clone();
+                        return Arc::new(move || {
+                            count.fetch_add(1, Ordering::AcqRel);
+                        });
+                    }
+                    panic!("unsupported call(): {}", arg);
+                })
+            }),
+            ..Default::default()
+        });
+
+        let f = || {
+            fail_point!("customized_resolve_call");
+        };
+        for i in 0..5 {
+            assert_eq!(count.load(Ordering::Acquire), i.min(3));
+            f();
+        }
+
+        scenario.teardown();
+    }
 }