From 79d7c2b73c8662c452bdbd1251dd10122403e547 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 10:45:58 +0100
Subject: [PATCH 01/15] initial
---
R/ancova.R | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 115 insertions(+), 1 deletion(-)
diff --git a/R/ancova.R b/R/ancova.R
index 0306f5b0..ad36a4cc 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -60,6 +60,40 @@ ancova <- function(
vars,
visits = NULL,
weights = c("counterfactual", "equal", "proportional_em", "proportional")
+) {
+ ancova_core(
+ data = data,
+ vars = vars,
+ visits = visits,
+ weights = weights,
+ ancova_fun = ancova_single
+ )
+}
+
+
+ancova_m_groups <- function(
+ data,
+ vars,
+ visits = NULL,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional")
+) {
+ ancova_core(
+ data = data,
+ vars = vars,
+ visits = visits,
+ weights = weights,
+ ancova_fun = ancova_single_m_group
+ )
+}
+
+
+
+ancova_core <- function(
+ data,
+ vars,
+ visits = NULL,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional"),
+ ancova_fun
) {
outcome <- vars[["outcome"]]
group <- vars[["group"]]
@@ -130,7 +164,7 @@ ancova <- function(
visits,
function(x) {
data2 <- data[data[[visit]] == x, ]
- res <- ancova_single(data2, outcome, group, covariates, weights)
+ res <- ancova_fun(data2, outcome, group, covariates, weights)
names(res) <- paste0(names(res), "_", x)
return(res)
}
@@ -206,3 +240,83 @@ ancova_single <- function(
)
return(x)
}
+
+
+
+ancova_single_m_group <- function(
+ data,
+ outcome,
+ group,
+ covariates,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional")
+) {
+ weights <- match.arg(weights)
+ assert_that(
+ is.factor(data[[group]]),
+ length(levels(data[[group]])) >= 2,
+ length(unique(data[[group]])) == length(levels(data[[group]])),
+ msg = "`data[[vars$group]]` must be a factor variable with 2 levels"
+ )
+
+ data2 <- data[, c(extract_covariates(covariates), outcome, group)]
+
+ # Standardise level names and group variable name to make extraction reliable
+ levels(data2[[group]]) <- paste0("L", seq_along(levels(data2[[group]])) - 1)
+ assert_that(
+ !"rbmiGroup" %in% names(data),
+ msg = c(
+ "`rbmiGroup` is a reserved variable name for internal use, please rename",
+ " your variable to avoid conflicts"
+ )
+ )
+ data2[["rbmiGroup"]] <- data2[[group]]
+ data2 <- data2[!names(data2) %in% group]
+
+ frm <- as_simple_formula(outcome, c(covariates, group))
+ frm_std <- frm_find_and_replace(frm, as.name(group), as.name("rbmiGroup"))
+
+ mod <- lm(formula = frm_std, data = data2)
+ args <- list(model = mod, .weights = weights)
+
+ lsm <- lapply(
+ levels(data2[["rbmiGroup"]]),
+ function(x) {
+ args[["rbmiGroup"]] <- x
+ do.call(lsmeans, args)
+ }
+ )
+ names(lsm) <- sprintf("lsm_%s", levels(data2[["rbmiGroup"]]))
+
+ trt <- lapply(
+ levels(data2[["rbmiGroup"]])[-1],
+ function(x) {
+ term <- sprintf("rbmiGroup%s", x)
+ list(
+ est = coef(mod)[[term]],
+ se = sqrt(vcov(mod)[term, term]),
+ df = df.residual(mod)
+ )
+ }
+ )
+ names(trt) <- sprintf("trt_%s_L0", levels(data2[["rbmiGroup"]])[-1])
+ append(trt, lsm)
+}
+
+
+frm_find_and_replace <- function(frm, find_sym, replace_sym) {
+ left <- frm[[2]]
+ right <- frm[[3]]
+ if (is.call(left)) {
+ left <- frm_find_and_replace(left, find_sym, replace_sym)
+ } else {
+ left <- ifelse(left == find_sym, replace_sym, left)
+ }
+ if (is.call(right)) {
+ right <- frm_find_and_replace(right, find_sym, replace_sym)
+ } else {
+ right <- ifelse(right == find_sym, replace_sym, right)
+ }
+ frm[[2]] <- left
+ frm[[3]] <- right
+ frm
+}
From acc41d1130fb3ce1dbcca3f12c3097d17dfbacc1 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:07:19 +0100
Subject: [PATCH 02/15] improve formula find and replace
---
R/ancova.R | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/R/ancova.R b/R/ancova.R
index ad36a4cc..5484f551 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -304,19 +304,12 @@ ancova_single_m_group <- function(
frm_find_and_replace <- function(frm, find_sym, replace_sym) {
- left <- frm[[2]]
- right <- frm[[3]]
- if (is.call(left)) {
- left <- frm_find_and_replace(left, find_sym, replace_sym)
- } else {
- left <- ifelse(left == find_sym, replace_sym, left)
- }
- if (is.call(right)) {
- right <- frm_find_and_replace(right, find_sym, replace_sym)
- } else {
- right <- ifelse(right == find_sym, replace_sym, right)
+ for (i in seq_along(frm)) {
+ if (is.call(frm[[i]])) {
+ frm[[i]] <- frm_find_and_replace(frm[[i]], find_sym, replace_sym)
+ } else if (is.name(frm[[i]])) {
+ frm[[i]] <- ifelse(frm[[i]] == find_sym, replace_sym, frm[[i]])
+ }
}
- frm[[2]] <- left
- frm[[3]] <- right
frm
}
From 316c6df1308b16f6c8510f0710b956e9b2c30797 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:18:47 +0100
Subject: [PATCH 03/15] added tests for frm_find_and_replace
---
R/ancova.R | 12 ------------
R/utilities.R | 29 +++++++++++++++++++++++++++++
tests/testthat/test-utilities.R | 25 +++++++++++++++++++++++++
3 files changed, 54 insertions(+), 12 deletions(-)
diff --git a/R/ancova.R b/R/ancova.R
index 5484f551..12ead3ae 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -301,15 +301,3 @@ ancova_single_m_group <- function(
names(trt) <- sprintf("trt_%s_L0", levels(data2[["rbmiGroup"]])[-1])
append(trt, lsm)
}
-
-
-frm_find_and_replace <- function(frm, find_sym, replace_sym) {
- for (i in seq_along(frm)) {
- if (is.call(frm[[i]])) {
- frm[[i]] <- frm_find_and_replace(frm[[i]], find_sym, replace_sym)
- } else if (is.name(frm[[i]])) {
- frm[[i]] <- ifelse(frm[[i]] == find_sym, replace_sym, frm[[i]])
- }
- }
- frm
-}
diff --git a/R/utilities.R b/R/utilities.R
index 36759285..df88c4ba 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -912,3 +912,32 @@ set_options <- function() {
}
}
}
+
+
+#' Recursively find and replace symbols in a language object
+#'
+#' This function traverses a language object (such as an expression or call)
+#' and recursively replaces all occurrences of a specified symbol with another symbol.
+#'
+#' @param frm A language object (e.g., call, expression, or list of calls) to search and modify.
+#' @param find_sym A symbol (as a name) to find within \code{frm}.
+#' @param replace_sym A symbol (as a name) to replace \code{find_sym} with.
+#'
+#' @return The modified language object with all instances of \code{find_sym} replaced by \code{replace_sym}.
+#'
+#' @examples
+#' expr <- quote(a + b * c)
+#' frm_find_and_replace(expr, as.name("b"), as.name("x"))
+#' # Returns: a + x * c
+#'
+#' @keywords internal
+frm_find_and_replace <- function(frm, find_sym, replace_sym) {
+ for (i in seq_along(frm)) {
+ if (is.call(frm[[i]])) {
+ frm[[i]] <- frm_find_and_replace(frm[[i]], find_sym, replace_sym)
+ } else if (is.name(frm[[i]])) {
+ frm[[i]] <- ifelse(frm[[i]] == find_sym, replace_sym, frm[[i]])
+ }
+ }
+ frm
+}
diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R
index b9a932fd..8aa10888 100644
--- a/tests/testthat/test-utilities.R
+++ b/tests/testthat/test-utilities.R
@@ -342,3 +342,28 @@ test_that("get_stan_model works as expected depending on covariance and prior on
model <- expect_silent(get_stan_model("us", "lkj"))
expect_snapshot(model)
})
+
+
+
+
+test_that("frm_find_and_replace works as expected", {
+ # Things being tested here include:
+ # - Can change values on both sides of the formula
+ # - Interactions e.g. `*` and `:`
+ # - No arg functions k()
+ # - 1-arg functions h(z)
+ # - 2-arg functions f(z, z)
+ # - Constants + 1
+ # - Data modification functions I(z^2)
+ # - Name subset e.g. zz doesn't get renamed (where we are searching for z)
+ frm <- x + z ~ 1 + z + a + b + zz + z:a + a * z + a * b + h(z) + f(z, z) + k() + I(z^2)
+ actual <- frm_find_and_replace(frm, as.name("z"), as.name("P"))
+ expected <- x + P ~ 1 + P + a + b + zz + P:a + a * P + a * b + h(P) + f(P, P) + k() + I(P^2)
+ environment(actual) <- globalenv()
+ environment(expected) <- globalenv()
+ expect_equal(actual, expected)
+
+
+
+})
+
From 5762a55bbfc4c79fe2b102f5890fe5676919cfcb Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:20:07 +0100
Subject: [PATCH 04/15] added rest of frm_find_and_replace tests
---
tests/testthat/test-utilities.R | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R
index 8aa10888..bd7d4a12 100644
--- a/tests/testthat/test-utilities.R
+++ b/tests/testthat/test-utilities.R
@@ -364,6 +364,12 @@ test_that("frm_find_and_replace works as expected", {
expect_equal(actual, expected)
-
+ # Special names / special characters
+ frm <- ~ ` .. !abc & `:x - 1 * x
+ actual <- frm_find_and_replace(frm, as.name(" .. !abc & "), as.name("bob"))
+ expected <- ~ bob:x - 1 * x
+ environment(actual) <- globalenv()
+ environment(expected) <- globalenv()
+ expect_equal(actual, expected)
})
From c520ece2955f9b2ed8e98726ca48e07c6e63c431 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:20:43 +0100
Subject: [PATCH 05/15] all hail lintr
---
tests/testthat/test-utilities.R | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R
index bd7d4a12..098e36ae 100644
--- a/tests/testthat/test-utilities.R
+++ b/tests/testthat/test-utilities.R
@@ -353,7 +353,7 @@ test_that("frm_find_and_replace works as expected", {
# - No arg functions k()
# - 1-arg functions h(z)
# - 2-arg functions f(z, z)
- # - Constants + 1
+ # - Constants e.g. + 1
# - Data modification functions I(z^2)
# - Name subset e.g. zz doesn't get renamed (where we are searching for z)
frm <- x + z ~ 1 + z + a + b + zz + z:a + a * z + a * b + h(z) + f(z, z) + k() + I(z^2)
@@ -372,4 +372,3 @@ test_that("frm_find_and_replace works as expected", {
environment(expected) <- globalenv()
expect_equal(actual, expected)
})
-
From 9bc47d4f36f3b9c1805573e176c66f34fa903154 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:21:19 +0100
Subject: [PATCH 06/15] run roxygen
---
man/frm_find_and_replace.Rd | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
create mode 100644 man/frm_find_and_replace.Rd
diff --git a/man/frm_find_and_replace.Rd b/man/frm_find_and_replace.Rd
new file mode 100644
index 00000000..6b4955a6
--- /dev/null
+++ b/man/frm_find_and_replace.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utilities.R
+\name{frm_find_and_replace}
+\alias{frm_find_and_replace}
+\title{Recursively find and replace symbols in a language object}
+\usage{
+frm_find_and_replace(frm, find_sym, replace_sym)
+}
+\arguments{
+\item{frm}{A language object (e.g., call, expression, or list of calls) to search and modify.}
+
+\item{find_sym}{A symbol (as a name) to find within \code{frm}.}
+
+\item{replace_sym}{A symbol (as a name) to replace \code{find_sym} with.}
+}
+\value{
+The modified language object with all instances of \code{find_sym} replaced by \code{replace_sym}.
+}
+\description{
+This function traverses a language object (such as an expression or call)
+and recursively replaces all occurrences of a specified symbol with another symbol.
+}
+\examples{
+expr <- quote(a + b * c)
+frm_find_and_replace(expr, as.name("b"), as.name("x"))
+# Returns: a + x * c
+
+}
+\keyword{internal}
From b4dae5ff5341c293bd2c0cb73a9cc3fb0ac109fe Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 11:58:57 +0100
Subject: [PATCH 07/15] added ancova unit tests
---
R/ancova.R | 6 +-
tests/testthat/test-ancova.R | 382 +++++++++++++++++++++++++++++++++++
2 files changed, 385 insertions(+), 3 deletions(-)
diff --git a/R/ancova.R b/R/ancova.R
index 12ead3ae..7dfc765c 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -255,7 +255,7 @@ ancova_single_m_group <- function(
is.factor(data[[group]]),
length(levels(data[[group]])) >= 2,
length(unique(data[[group]])) == length(levels(data[[group]])),
- msg = "`data[[vars$group]]` must be a factor variable with 2 levels"
+ msg = "`data[[vars$group]]` must be a factor variable with >=2 levels"
)
data2 <- data[, c(extract_covariates(covariates), outcome, group)]
@@ -264,9 +264,9 @@ ancova_single_m_group <- function(
levels(data2[[group]]) <- paste0("L", seq_along(levels(data2[[group]])) - 1)
assert_that(
!"rbmiGroup" %in% names(data),
- msg = c(
+ msg = paste(
"`rbmiGroup` is a reserved variable name for internal use, please rename",
- " your variable to avoid conflicts"
+ "your variable to avoid conflicts"
)
)
data2[["rbmiGroup"]] <- data2[[group]]
diff --git a/tests/testthat/test-ancova.R b/tests/testthat/test-ancova.R
index 5226131f..1f2cb714 100644
--- a/tests/testthat/test-ancova.R
+++ b/tests/testthat/test-ancova.R
@@ -236,3 +236,385 @@ test_that("ancova", {
regex = "`k`"
)
})
+
+
+test_that("ancova_m_groups - basic 3-group functionality", {
+ set.seed(201)
+
+ n <- 900
+ dat <- tibble(
+ visit = "vis1",
+ age = rnorm(n),
+ sex = factor(sample(c("M", "F"), size = n, replace = TRUE)),
+ grp = factor(sample(c("Control", "Trt1", "Trt2"), size = n, replace = TRUE)),
+ out = rnorm(n, mean = 50 + 2 * age + 3 * f2n(sex) +
+ 4 * (grp == "Trt1") + 6 * (grp == "Trt2"), sd = 10)
+ )
+
+ # Manual model for comparison
+ mod <- lm(out ~ age + sex + grp, data = dat)
+
+ # Test ancova_m_groups
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = c("age", "sex"),
+ visit = "visit"
+ )
+ )
+
+ # Check output structure
+ expected_names <- c(
+ "trt_L1_L0_vis1", "trt_L2_L0_vis1", # Treatment effects vs reference
+ "lsm_L0_vis1", "lsm_L1_vis1", "lsm_L2_vis1" # LSMeans for all groups
+ )
+ expect_equal(sort(names(result_actual)), sort(expected_names))
+
+
+ expected_trt_L1_L0_vis1 <- list(
+ est = coef(mod)[["grpTrt1"]],
+ se = sqrt(vcov(mod)["grpTrt1", "grpTrt1"]),
+ df = df.residual(mod)
+ )
+ expect_equal(result_actual$trt_L1_L0_vis1, expected_trt_L1_L0_vis1)
+
+
+ expected_trt_L2_L0_vis1 <- list(
+ est = coef(mod)[["grpTrt2"]],
+ se = sqrt(vcov(mod)["grpTrt2", "grpTrt2"]),
+ df = df.residual(mod)
+ )
+ expect_equal(result_actual$trt_L2_L0_vis1, expected_trt_L2_L0_vis1)
+})
+
+
+test_that("ancova_m_groups - 4-group functionality", {
+ set.seed(301)
+
+ n <- 800
+ grp_levels <- c("Placebo", "Low", "Medium", "High")
+ dat <- tibble(
+ visit = "baseline",
+ baseline_score = rnorm(n, mean = 30, sd = 8),
+ grp = factor(sample(grp_levels, size = n, replace = TRUE), levels = grp_levels),
+ out = rnorm(n, mean = 20 + 0.7 * baseline_score +
+ 2 * (grp == "Low") +
+ 5 * (grp == "Medium") +
+ 8 * (grp == "High"), sd = 6)
+ )
+
+ # Manual model for comparison
+ mod <- lm(out ~ baseline_score + grp, data = dat)
+
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = "baseline_score",
+ visit = "visit"
+ )
+ )
+
+ # Check we have all expected components
+ expected_names <- c(
+ "trt_L1_L0_baseline", "trt_L2_L0_baseline", "trt_L3_L0_baseline", # 3 treatment effects
+ "lsm_L0_baseline", "lsm_L1_baseline", "lsm_L2_baseline", "lsm_L3_baseline" # 4 LSMeans
+ )
+ expect_equal(sort(names(result_actual)), sort(expected_names))
+
+ # Verify treatment effects
+ expect_equal(result_actual$trt_L1_L0_baseline$est, coef(mod)[["grpLow"]], tolerance = 1e-10)
+ expect_equal(result_actual$trt_L2_L0_baseline$est, coef(mod)[["grpMedium"]], tolerance = 1e-10)
+ expect_equal(result_actual$trt_L3_L0_baseline$est, coef(mod)[["grpHigh"]], tolerance = 1e-10)
+})
+
+
+test_that("ancova_m_groups - LSMeans with group interactions", {
+ set.seed(401)
+
+ grp_levels <- c("Control", "Treatment1", "Treatment2")
+ sex_levels <- c("Male", "Female")
+ n <- 600
+ dat <- tibble(
+ visit = "week12",
+ age = rnorm(n, mean = 45, sd = 12),
+ sex = factor(sample(sex_levels, size = n, replace = TRUE), levels = sex_levels),
+ grp = factor(sample(grp_levels, size = n, replace = TRUE), levels = grp_levels),
+ # Create interaction effect between group and sex
+ out = rnorm(n, mean = 40 + 0.5 * age +
+ 3 * (sex == "Female") +
+ 5 * (grp == "Treatment1") +
+ 7 * (grp == "Treatment2") +
+ # Group-sex interactions
+ 2 * (grp == "Treatment1" & sex == "Female") +
+ -1 * (grp == "Treatment2" & sex == "Female"),
+ sd = 8)
+ )
+
+ # Test with group*sex interaction in covariates
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = c("age", "sex", "grp*sex"), # Note: using rbmiGroup for interaction
+ visit = "visit"
+ )
+ )
+
+ mod <- lm(out ~ age + sex + grp + grp:sex, data = dat)
+
+
+ # Check structure
+ expected_names <- c(
+ "trt_L1_L0_week12", "trt_L2_L0_week12",
+ "lsm_L0_week12", "lsm_L1_week12", "lsm_L2_week12"
+ )
+ expect_equal(sort(names(result_actual)), sort(expected_names))
+
+
+ suppressMessages({
+ emmean_counter <- as.data.frame(
+ emmeans::emmeans(mod, "grp", counterfactual = "grp")
+ )
+ })
+
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Control", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L0_week12, expected)
+
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Treatment1", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L1_week12, expected)
+
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Treatment2", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L2_week12, expected)
+
+
+ # Same again but with a different weighting scheme
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = c("age", "sex", "grp*sex"), # Note: using rbmiGroup for interaction
+ visit = "visit"
+ ),
+ weights = "equal"
+ )
+ suppressMessages({
+ emmean_counter <- as.data.frame(
+ emmeans::emmeans(mod, "grp", "grp")
+ )
+ })
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Control", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L0_week12, expected)
+
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Treatment1", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L1_week12, expected)
+
+ expected <- as.list(emmean_counter[emmean_counter$grp == "Treatment2", c("emmean", "SE", "df")])
+ names(expected) <- c("est", "se", "df")
+ expect_equal(result_actual$lsm_L2_week12, expected)
+
+})
+
+
+test_that("ancova_m_groups - multiple visits", {
+ set.seed(601)
+
+ n_per_visit <- 200
+ grp_levels <- c("A", "B", "C")
+ dat <- bind_rows(
+ tibble(
+ visit = "visit1",
+ age = rnorm(n_per_visit),
+ grp = factor(sample(grp_levels, size = n_per_visit, replace = TRUE), levels = grp_levels),
+ out = rnorm(n_per_visit, mean = 20 + 2 * age +
+ 3 * (grp == "B") + 5 * (grp == "C"), sd = 4)
+ ),
+ tibble(
+ visit = "visit2",
+ age = rnorm(n_per_visit),
+ grp = factor(sample(c("A", "B", "C"), size = n_per_visit, replace = TRUE)),
+ out = rnorm(n_per_visit, mean = 25 + 2 * age +
+ 4 * (grp == "B") + 7 * (grp == "C"), sd = 4)
+ )
+ )
+
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = "age",
+ visit = "visit"
+ )
+ )
+
+ # Check we have results for both visits
+ visit1_names <- grep("_visit1$", names(result_actual), value = TRUE)
+ visit2_names <- grep("_visit2$", names(result_actual), value = TRUE)
+
+ expect_length(visit1_names, 5) # 2 treatment effects + 3 LSMeans
+ expect_length(visit2_names, 5) # 2 treatment effects + 3 LSMeans
+
+ expected_names <- c(
+ "trt_L1_L0_visit1", "trt_L2_L0_visit1", "lsm_L0_visit1", "lsm_L1_visit1", "lsm_L2_visit1",
+ "trt_L1_L0_visit2", "trt_L2_L0_visit2", "lsm_L0_visit2", "lsm_L1_visit2", "lsm_L2_visit2"
+ )
+ expect_true(all(expected_names %in% names(result_actual)))
+})
+
+
+test_that("ancova_m_groups - no covariates", {
+ set.seed(701)
+
+ n <- 300
+ grp_levels <- c("Control", "Low", "High")
+ dat <- tibble(
+ visit = "final",
+ grp = factor(sample(grp_levels, size = n, replace = TRUE), levels = grp_levels),
+ out = rnorm(n, mean = 40 + 3 * (grp == "Low") + 8 * (grp == "High"), sd = 6)
+ )
+
+ result_actual <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = NULL,
+ visit = "visit"
+ )
+ )
+
+ # Manual model for comparison
+ mod <- lm(out ~ grp, data = dat)
+
+ # Check treatment effects
+ expect_equal(
+ result_actual$trt_L1_L0_final$est,
+ coef(mod)[["grpLow"]],
+ tolerance = 1e-10
+ )
+ expect_equal(
+ result_actual$trt_L2_L0_final$est,
+ coef(mod)[["grpHigh"]],
+ tolerance = 1e-10
+ )
+
+ # Check we have all expected components
+ expected_names <- c(
+ "trt_L1_L0_final", "trt_L2_L0_final",
+ "lsm_L0_final", "lsm_L1_final", "lsm_L2_final"
+ )
+ expect_equal(sort(names(result_actual)), sort(expected_names))
+})
+
+
+test_that("ancova_m_groups - error handling", {
+ set.seed(801)
+
+ n <- 100
+ dat <- tibble(
+ visit = "v1",
+ age = rnorm(n),
+ grp = factor(sample(c("A", "B"), size = n, replace = TRUE)), # Only 2 groups
+ out = rnorm(n)
+ )
+
+ # Should work with 2 groups (minimum requirement)
+ expect_no_error({
+ result <- ancova_m_groups(
+ dat,
+ list(outcome = "out", group = "grp", covariates = "age", visit = "visit")
+ )
+ })
+
+ # Test with single group - should fail
+ dat_single_group <- dat
+ dat_single_group$grp <- factor(rep("A", n))
+
+ expect_error(
+ ancova_m_groups(
+ dat_single_group,
+ list(outcome = "out", group = "grp", covariates = "age", visit = "visit")
+ ),
+ regexp = "must be a factor variable with >=2 levels"
+ )
+
+ # Test reserved variable name conflict
+ dat_conflict <- dat
+ dat_conflict$rbmiGroup <- 1
+
+ expect_error(
+ ancova_m_groups(
+ dat_conflict,
+ list(outcome = "out", group = "grp", covariates = c("age", "rbmiGroup"), visit = "visit")
+ ),
+ regexp = "rbmiGroup.*reserved variable name"
+ )
+})
+
+
+test_that("ancova_m_groups - backward compatibility with 2-group case", {
+ set.seed(901)
+
+ n <- 400
+ grp_levels <- c("Control", "Treatment")
+ sex_levels <- c("M", "F")
+ dat <- tibble(
+ visit = "week8",
+ age = rnorm(n),
+ sex = factor(sample(sex_levels, size = n, replace = TRUE), levels = sex_levels),
+ grp = factor(sample(grp_levels, size = n, replace = TRUE), levels = grp_levels),
+ out = rnorm(n, mean = 30 + 2 * age + 3 * f2n(sex) + 5 * f2n(grp), sd = 7)
+ )
+
+ # Compare ancova_m_groups with traditional ancova for 2-group case
+ result_traditional <- ancova(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = c("age", "sex"),
+ visit = "visit"
+ )
+ )
+
+ result_multigroup <- ancova_m_groups(
+ dat,
+ list(
+ outcome = "out",
+ group = "grp",
+ covariates = c("age", "sex"),
+ visit = "visit"
+ )
+ )
+
+ expect_equal(
+ result_traditional$trt_week8,
+ result_multigroup$trt_L1_L0_week8,
+ tolerance = 1e-10
+ )
+
+ expect_equal(
+ result_traditional$lsm_ref_week8,
+ result_multigroup$lsm_L0_week8,
+ tolerance = 1e-10
+ )
+
+ expect_equal(
+ result_traditional$lsm_alt_week8,
+ result_multigroup$lsm_L1_week8,
+ tolerance = 1e-10
+ )
+
+ exptect_true(length(result_traditional) == length(result_multigroup))
+})
From cf47a1f4771eb995890f06a654cd816d9bc15a45 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:01:02 +0100
Subject: [PATCH 08/15] fix examples for non-exported function
---
R/utilities.R | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/R/utilities.R b/R/utilities.R
index df88c4ba..34217441 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -926,10 +926,11 @@ set_options <- function() {
#' @return The modified language object with all instances of \code{find_sym} replaced by \code{replace_sym}.
#'
#' @examples
+#' \dontrun{
#' expr <- quote(a + b * c)
#' frm_find_and_replace(expr, as.name("b"), as.name("x"))
#' # Returns: a + x * c
-#'
+#' }
#' @keywords internal
frm_find_and_replace <- function(frm, find_sym, replace_sym) {
for (i in seq_along(frm)) {
From 28b0a1e59e8e11eac0e936aacce0fe9566a618b0 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:03:49 +0100
Subject: [PATCH 09/15] updated news.md file
---
NEWS.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/NEWS.md b/NEWS.md
index 142d8c05..cb1814f4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@
* All covariance structures are now also supported for Bayesian multiple imputation: `method_bayes()` gained additional `covariance` and `prior_cov` arguments to allow users to specify the covariance structure and prior for the Bayesian imputation model. Please see the updated statistical specifications vignette for details. (#501, #518)
* New function `mcse()` to calculate the Monte Carlo standard error for pooled estimates from (approximate) Bayesian imputation. (#493)
+* Added support for multi-group ANCOVA analysis with `ancova_m_groups()` function. This enables analysis of covariance with more than two treatment groups, where each non-reference group is compared against the reference group. (#520)
+
# rbmi 1.4.1
From 417374958eef6b48d415c4e868fea9ee98a78c06 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:14:22 +0100
Subject: [PATCH 10/15] added docs
---
NAMESPACE | 1 +
R/ancova.R | 127 +++++++++++++++++++++++++++++
man/ancova_core.Rd | 40 +++++++++
man/ancova_m_groups.Rd | 152 +++++++++++++++++++++++++++++++++++
man/ancova_single.Rd | 1 +
man/ancova_single_m_group.Rd | 80 ++++++++++++++++++
man/frm_find_and_replace.Rd | 3 +-
7 files changed, 403 insertions(+), 1 deletion(-)
create mode 100644 man/ancova_core.Rd
create mode 100644 man/ancova_m_groups.Rd
create mode 100644 man/ancova_single_m_group.Rd
diff --git a/NAMESPACE b/NAMESPACE
index 38e6e6e1..41acd84e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,6 +46,7 @@ export(Stack)
export(add_class)
export(analyse)
export(ancova)
+export(ancova_m_groups)
export(as_class)
export(as_vcov)
export(control_bayes)
diff --git a/R/ancova.R b/R/ancova.R
index 7dfc765c..4f58c747 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -71,6 +71,73 @@ ancova <- function(
}
+#' Multi-Group Analysis of Covariance
+#'
+#' Performs an analysis of covariance with support for multiple treatment groups,
+#' returning treatment effects (contrasts between each non-reference group and the
+#' reference group) and least square means estimates for all groups.
+#'
+#' @inheritParams ancova
+#'
+#' @details
+#' This function extends [ancova()] to support more than two treatment groups.
+#' The function works as follows:
+#'
+#' 1. Select the first value from `visits`.
+#' 2. Subset the data to only the observations that occurred on this visit.
+#' 3. Fit a linear model as `vars$outcome ~ vars$group + vars$covariates`.
+#' 4. Extract treatment effects (each non-reference group vs reference) and
+#' least square means for all treatment groups.
+#' 5. Repeat for all other values in `visits`.
+#'
+#' The reference group is determined by the first factor level of `vars$group`.
+#' Only pairwise comparisons between each non-reference group and the reference
+#' group are computed (not all pairwise comparisons between groups).
+#'
+#' @section Output Structure:
+#' Results are returned as a named list with elements suffixed by visit name:
+#' ```
+#' list(
+#' trt_L1_L0_visit_1 = list(est = ...), # Group L1 vs L0 (reference)
+#' trt_L2_L0_visit_1 = list(est = ...), # Group L2 vs L0 (reference)
+#' lsm_L0_visit_1 = list(est = ...), # LSMean for reference group L0
+#' lsm_L1_visit_1 = list(est = ...), # LSMean for group L1
+#' lsm_L2_visit_1 = list(est = ...), # LSMean for group L2
+#' ...
+#' )
+#' ```
+#' Group levels are standardized to "L0", "L1", "L2", etc., where "L0" represents
+#' the reference group (first factor level).
+#'
+#' @inheritSection lsmeans Weighting
+#'
+#' @examples
+#' \dontrun{
+#' # 3-group analysis
+#' data$treatment <- factor(data$treatment, levels = c("Control", "Low", "High"))
+#' vars <- set_vars(
+#' outcome = "response",
+#' group = "treatment",
+#' visit = "visit",
+#' covariates = c("age", "sex")
+#' )
+#' result <- ancova_m_groups(data, vars)
+#'
+#' # With group interactions
+#' vars_int <- set_vars(
+#' outcome = "response",
+#' group = "treatment",
+#' visit = "visit",
+#' covariates = c("age", "sex", "treatment*age")
+#' )
+#' result_int <- ancova_m_groups(data, vars_int)
+#' }
+#'
+#' @seealso [ancova()] for two-group analysis
+#' @seealso [analyse()]
+#' @seealso [stats::lm()]
+#' @seealso [set_vars()]
+#' @export
ancova_m_groups <- function(
data,
vars,
@@ -88,6 +155,19 @@ ancova_m_groups <- function(
+#' Core ANCOVA Implementation
+#'
+#' Internal function that provides common functionality for both two-group and
+#' multi-group ANCOVA analyses. This function handles input validation, visit
+#' processing, and delegates the actual analysis to the specified ancova function.
+#'
+#' @inheritParams ancova
+#' @param ancova_fun Function to perform the single-visit ANCOVA analysis.
+#' Should be either [ancova_single()] for two-group analysis or
+#' [ancova_single_m_group()] for multi-group analysis.
+#'
+#'
+#' @keywords internal
ancova_core <- function(
data,
vars,
@@ -198,6 +278,7 @@ ancova_core <- function(
#' }
#' @seealso [ancova()]
#' @importFrom stats lm coef vcov df.residual
+#' @keywords internal
ancova_single <- function(
data,
outcome,
@@ -243,6 +324,52 @@ ancova_single <- function(
+#' Multi-Group ANCOVA Implementation for Single Visit
+#'
+#' Internal function that implements analysis of covariance for multiple treatment
+#' groups within a single visit. This function extends [ancova_single()] to handle
+#' more than two groups.
+#'
+#' @inheritParams ancova_single
+#'
+#' @details
+#' This function:
+#' - Accepts factor variables with 2 or more levels for the group parameter
+#' - Standardizes group level names to "L0", "L1", "L2", etc. for consistent output
+#' - Calculates treatment effects as contrasts between each non-reference group and the reference
+#' - Computes least square means for all groups using the specified weighting strategy
+#'
+#' The reference group is always the first factor level (standardized to "L0").
+#' Treatment effects compare each subsequent group ("L1", "L2", ...) against "L0".
+#'
+#' @section Reserved Variable Names:
+#' This function uses `"rbmiGroup"` as an internal variable name. If your dataset
+#' contains a variable with this name, the function will error.
+#'
+#' @return A named list containing:
+#' \itemize{
+#' \item `trt_L1_L0`, `trt_L2_L0`, etc.: Treatment effects (each group vs reference)
+#' \item `lsm_L0`, `lsm_L1`, `lsm_L2`, etc.: Least square means for all groups
+#' }
+#' Each element contains `est` (estimate), `se` (standard error), and `df` (degrees of freedom).
+#'
+#' @examples
+#' \dontrun{
+#' # Internal function - typically called via ancova_m_groups()
+#' data$grp <- factor(c("Control", "Treatment1", "Treatment2"))
+#' result <- ancova_single_m_group(
+#' data = data,
+#' outcome = "response",
+#' group = "grp",
+#' covariates = c("age", "sex"),
+#' weights = "counterfactual"
+#' )
+#' }
+#'
+#' @seealso [ancova_single()] for two-group analysis
+#' @seealso [ancova_m_groups()] for the user-facing multi-group function
+#' @importFrom stats lm coef vcov df.residual
+#' @keywords internal
ancova_single_m_group <- function(
data,
outcome,
diff --git a/man/ancova_core.Rd b/man/ancova_core.Rd
new file mode 100644
index 00000000..227d7b3a
--- /dev/null
+++ b/man/ancova_core.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ancova.R
+\name{ancova_core}
+\alias{ancova_core}
+\title{Core ANCOVA Implementation}
+\usage{
+ancova_core(
+ data,
+ vars,
+ visits = NULL,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional"),
+ ancova_fun
+)
+}
+\arguments{
+\item{data}{A \code{data.frame} containing the data to be used in the model.}
+
+\item{vars}{A \code{vars} object as generated by \code{\link[=set_vars]{set_vars()}}. Only the \code{group},
+\code{visit}, \code{outcome} and \code{covariates} elements are required. See details.}
+
+\item{visits}{An optional character vector specifying which visits to
+fit the ancova model at. If \code{NULL}, a separate ancova model will be fit to the
+outcomes for each visit (as determined by \code{unique(data[[vars$visit]])}).
+See details.}
+
+\item{weights}{Character, either \code{"counterfactual"} (default), \code{"equal"},
+\code{"proportional_em"} or \code{"proportional"}.
+Specifies the weighting strategy to be used when calculating the lsmeans.
+See the weighting section for more details.}
+
+\item{ancova_fun}{Function to perform the single-visit ANCOVA analysis.
+Should be either \code{\link[=ancova_single]{ancova_single()}} for two-group analysis or
+\code{\link[=ancova_single_m_group]{ancova_single_m_group()}} for multi-group analysis.}
+}
+\description{
+Internal function that provides common functionality for both two-group and
+multi-group ANCOVA analyses. This function handles input validation, visit
+processing, and delegates the actual analysis to the specified ancova function.
+}
+\keyword{internal}
diff --git a/man/ancova_m_groups.Rd b/man/ancova_m_groups.Rd
new file mode 100644
index 00000000..d6006d94
--- /dev/null
+++ b/man/ancova_m_groups.Rd
@@ -0,0 +1,152 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ancova.R
+\name{ancova_m_groups}
+\alias{ancova_m_groups}
+\title{Multi-Group Analysis of Covariance}
+\usage{
+ancova_m_groups(
+ data,
+ vars,
+ visits = NULL,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional")
+)
+}
+\arguments{
+\item{data}{A \code{data.frame} containing the data to be used in the model.}
+
+\item{vars}{A \code{vars} object as generated by \code{\link[=set_vars]{set_vars()}}. Only the \code{group},
+\code{visit}, \code{outcome} and \code{covariates} elements are required. See details.}
+
+\item{visits}{An optional character vector specifying which visits to
+fit the ancova model at. If \code{NULL}, a separate ancova model will be fit to the
+outcomes for each visit (as determined by \code{unique(data[[vars$visit]])}).
+See details.}
+
+\item{weights}{Character, either \code{"counterfactual"} (default), \code{"equal"},
+\code{"proportional_em"} or \code{"proportional"}.
+Specifies the weighting strategy to be used when calculating the lsmeans.
+See the weighting section for more details.}
+}
+\description{
+Performs an analysis of covariance with support for multiple treatment groups,
+returning treatment effects (contrasts between each non-reference group and the
+reference group) and least square means estimates for all groups.
+}
+\details{
+This function extends \code{\link[=ancova]{ancova()}} to support more than two treatment groups.
+The function works as follows:
+\enumerate{
+\item Select the first value from \code{visits}.
+\item Subset the data to only the observations that occurred on this visit.
+\item Fit a linear model as \code{vars$outcome ~ vars$group + vars$covariates}.
+\item Extract treatment effects (each non-reference group vs reference) and
+least square means for all treatment groups.
+\item Repeat for all other values in \code{visits}.
+}
+
+The reference group is determined by the first factor level of \code{vars$group}.
+Only pairwise comparisons between each non-reference group and the reference
+group are computed (not all pairwise comparisons between groups).
+}
+\section{Output Structure}{
+
+Results are returned as a named list with elements suffixed by visit name:
+
+\if{html}{\out{}}\preformatted{list(
+ trt_L1_L0_visit_1 = list(est = ...), # Group L1 vs L0 (reference)
+ trt_L2_L0_visit_1 = list(est = ...), # Group L2 vs L0 (reference)
+ lsm_L0_visit_1 = list(est = ...), # LSMean for reference group L0
+ lsm_L1_visit_1 = list(est = ...), # LSMean for group L1
+ lsm_L2_visit_1 = list(est = ...), # LSMean for group L2
+ ...
+)
+}\if{html}{\out{
}}
+
+Group levels are standardized to "L0", "L1", "L2", etc., where "L0" represents
+the reference group (first factor level).
+}
+
+\section{Weighting}{
+
+\subsection{Counterfactual}{
+
+For \code{weights = "counterfactual"} (the default) the lsmeans are obtained by
+taking the average of the predicted values for each patient after assigning all patients
+to each arm in turn.
+This approach is equivalent to standardization or g-computation.
+In comparison to \code{emmeans} this approach is equivalent to:
+
+\if{html}{\out{}}\preformatted{emmeans::emmeans(model, specs = "", counterfactual = "")
+}\if{html}{\out{
}}
+
+Note that to ensure backwards compatibility with previous versions of \code{rbmi}
+\code{weights = "proportional"} is an alias for \code{weights = "counterfactual"}.
+To get results consistent with \code{emmeans}'s \code{weights = "proportional"}
+please use \code{weights = "proportional_em"}.
+}
+
+\subsection{Equal}{
+
+For \code{weights = "equal"} the lsmeans are obtained by taking the model fitted
+value of a hypothetical patient whose covariates are defined as follows:
+\itemize{
+\item Continuous covariates are set to \code{mean(X)}
+\item Dummy categorical variables are set to \code{1/N} where \code{N} is the number of levels
+\item Continuous * continuous interactions are set to \code{mean(X) * mean(Y)}
+\item Continuous * categorical interactions are set to \code{mean(X) * 1/N}
+\item Dummy categorical * categorical interactions are set to \code{1/N * 1/M}
+}
+
+In comparison to \code{emmeans} this approach is equivalent to:
+
+\if{html}{\out{}}\preformatted{emmeans::emmeans(model, specs = "", weights = "equal")
+}\if{html}{\out{
}}
+}
+
+\subsection{Proportional}{
+
+For \code{weights = "proportional_em"} the lsmeans are obtained as per \code{weights = "equal"}
+except instead of weighting each observation equally they are weighted by the proportion
+in which the given combination of categorical values occurred in the data.
+In comparison to \code{emmeans} this approach is equivalent to:
+
+\if{html}{\out{}}\preformatted{emmeans::emmeans(model, specs = "", weights = "proportional")
+}\if{html}{\out{
}}
+
+Note that this is not to be confused with \code{weights = "proportional"} which is an alias
+for \code{weights = "counterfactual"}.
+}
+}
+
+\examples{
+\dontrun{
+# 3-group analysis
+data$treatment <- factor(data$treatment, levels = c("Control", "Low", "High"))
+vars <- set_vars(
+ outcome = "response",
+ group = "treatment",
+ visit = "visit",
+ covariates = c("age", "sex")
+)
+result <- ancova_m_groups(data, vars)
+
+# With group interactions
+vars_int <- set_vars(
+ outcome = "response",
+ group = "treatment",
+ visit = "visit",
+ covariates = c("age", "sex", "treatment*age")
+)
+result_int <- ancova_m_groups(data, vars_int)
+}
+
+}
+\seealso{
+\code{\link[=ancova]{ancova()}} for two-group analysis
+
+\code{\link[=analyse]{analyse()}}
+
+\code{\link[stats:lm]{stats::lm()}}
+
+\code{\link[=set_vars]{set_vars()}}
+}
diff --git a/man/ancova_single.Rd b/man/ancova_single.Rd
index 61cecb29..377b37d5 100644
--- a/man/ancova_single.Rd
+++ b/man/ancova_single.Rd
@@ -98,3 +98,4 @@ ancova_single(iris2, "Sepal.Length", "Species", c("Petal.Length * Petal.Width"))
\seealso{
\code{\link[=ancova]{ancova()}}
}
+\keyword{internal}
diff --git a/man/ancova_single_m_group.Rd b/man/ancova_single_m_group.Rd
new file mode 100644
index 00000000..98789b59
--- /dev/null
+++ b/man/ancova_single_m_group.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ancova.R
+\name{ancova_single_m_group}
+\alias{ancova_single_m_group}
+\title{Multi-Group ANCOVA Implementation for Single Visit}
+\usage{
+ancova_single_m_group(
+ data,
+ outcome,
+ group,
+ covariates,
+ weights = c("counterfactual", "equal", "proportional_em", "proportional")
+)
+}
+\arguments{
+\item{data}{A \code{data.frame} containing the data to be used in the model.}
+
+\item{outcome}{Character, the name of the outcome variable in \code{data}.}
+
+\item{group}{Character, the name of the group variable in \code{data}.}
+
+\item{covariates}{Character vector containing the name of any additional covariates
+to be included in the model as well as any interaction terms.}
+
+\item{weights}{Character, either \code{"counterfactual"} (default), \code{"equal"},
+\code{"proportional_em"} or \code{"proportional"}.
+Specifies the weighting strategy to be used when calculating the lsmeans.
+See the weighting section for more details.}
+}
+\value{
+A named list containing:
+\itemize{
+\item \code{trt_L1_L0}, \code{trt_L2_L0}, etc.: Treatment effects (each group vs reference)
+\item \code{lsm_L0}, \code{lsm_L1}, \code{lsm_L2}, etc.: Least square means for all groups
+}
+Each element contains \code{est} (estimate), \code{se} (standard error), and \code{df} (degrees of freedom).
+}
+\description{
+Internal function that implements analysis of covariance for multiple treatment
+groups within a single visit. This function extends \code{\link[=ancova_single]{ancova_single()}} to handle
+more than two groups.
+}
+\details{
+This function:
+\itemize{
+\item Accepts factor variables with 2 or more levels for the group parameter
+\item Standardizes group level names to "L0", "L1", "L2", etc. for consistent output
+\item Calculates treatment effects as contrasts between each non-reference group and the reference
+\item Computes least square means for all groups using the specified weighting strategy
+}
+
+The reference group is always the first factor level (standardized to "L0").
+Treatment effects compare each subsequent group ("L1", "L2", ...) against "L0".
+}
+\section{Reserved Variable Names}{
+
+This function uses \code{"rbmiGroup"} as an internal variable name. If your dataset
+contains a variable with this name, the function will error.
+}
+
+\examples{
+\dontrun{
+# Internal function - typically called via ancova_m_groups()
+data$grp <- factor(c("Control", "Treatment1", "Treatment2"))
+result <- ancova_single_m_group(
+ data = data,
+ outcome = "response",
+ group = "grp",
+ covariates = c("age", "sex"),
+ weights = "counterfactual"
+)
+}
+
+}
+\seealso{
+\code{\link[=ancova_single]{ancova_single()}} for two-group analysis
+
+\code{\link[=ancova_m_groups]{ancova_m_groups()}} for the user-facing multi-group function
+}
+\keyword{internal}
diff --git a/man/frm_find_and_replace.Rd b/man/frm_find_and_replace.Rd
index 6b4955a6..29df03c3 100644
--- a/man/frm_find_and_replace.Rd
+++ b/man/frm_find_and_replace.Rd
@@ -21,9 +21,10 @@ This function traverses a language object (such as an expression or call)
and recursively replaces all occurrences of a specified symbol with another symbol.
}
\examples{
+\dontrun{
expr <- quote(a + b * c)
frm_find_and_replace(expr, as.name("b"), as.name("x"))
# Returns: a + x * c
-
+}
}
\keyword{internal}
From 6507320ce4c46b13ede50ea6c138b19481737222 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:15:15 +0100
Subject: [PATCH 11/15] fix broken test
---
tests/testthat/test-ancova.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/testthat/test-ancova.R b/tests/testthat/test-ancova.R
index 1f2cb714..8e9340be 100644
--- a/tests/testthat/test-ancova.R
+++ b/tests/testthat/test-ancova.R
@@ -616,5 +616,5 @@ test_that("ancova_m_groups - backward compatibility with 2-group case", {
tolerance = 1e-10
)
- exptect_true(length(result_traditional) == length(result_multigroup))
+ expect_true(length(result_traditional) == length(result_multigroup))
})
From d3dddb74c668a1c13e2963a506767fb7c59815f7 Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:37:44 +0100
Subject: [PATCH 12/15] updated docs / specs
---
R/analyse.R | 5 +++
R/ancova.R | 1 +
man/analyse.Rd | 5 +++
man/ancova.Rd | 2 +
vignettes/quickstart.Rmd | 11 +++++
vignettes/quickstart.html | 90 ++++++++++++++++++++-------------------
6 files changed, 71 insertions(+), 43 deletions(-)
diff --git a/R/analyse.R b/R/analyse.R
index b765fb4d..49167082 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -58,6 +58,11 @@
#' a linear transformation of the outcomes.
#' Thus care is required when applying alternative analysis functions in this setting.
#'
+#' Note that as of verion 1.4.2 rbmi provides the `ancova_m_groups()` function; this is an extension to
+#' the `ancova()` function which supports the analysis of more than 2 treatment arms.
+#' The `ancova()` function is left as the default for backwards compatibility as `ancova_m_groups()`
+#' uses a different naming structure.
+#'
#' The `delta` argument can be used to specify offsets to be applied
#' to the outcome variable in the imputed datasets prior to the analysis.
#' This is typically used for sensitivity or tipping point analyses. The
diff --git a/R/ancova.R b/R/ancova.R
index 4f58c747..8a520138 100644
--- a/R/ancova.R
+++ b/R/ancova.R
@@ -51,6 +51,7 @@
#'
#' @inheritSection lsmeans Weighting
#'
+#' @seealso [ancova_m_groups()] for multi-group analysis
#' @seealso [analyse()]
#' @seealso [stats::lm()]
#' @seealso [set_vars()]
diff --git a/man/analyse.Rd b/man/analyse.Rd
index b69a5440..eecaf43c 100644
--- a/man/analyse.Rd
+++ b/man/analyse.Rd
@@ -89,6 +89,11 @@ method (\code{method = method_condmean()} in \code{\link[=draws]{draws()}}) reli
a linear transformation of the outcomes.
Thus care is required when applying alternative analysis functions in this setting.
+Note that as of verion 1.4.2 rbmi provides the \code{ancova_m_groups()} function; this is an extension to
+the \code{ancova()} function which supports the analysis of more than 2 treatment arms.
+The \code{ancova()} function is left as the default for backwards compatibility as \code{ancova_m_groups()}
+uses a different naming structure.
+
The \code{delta} argument can be used to specify offsets to be applied
to the outcome variable in the imputed datasets prior to the analysis.
This is typically used for sensitivity or tipping point analyses. The
diff --git a/man/ancova.Rd b/man/ancova.Rd
index dd5926e2..2ef6d4e9 100644
--- a/man/ancova.Rd
+++ b/man/ancova.Rd
@@ -120,6 +120,8 @@ for \code{weights = "counterfactual"}.
}
\seealso{
+\code{\link[=ancova_m_groups]{ancova_m_groups()}} for multi-group analysis
+
\code{\link[=analyse]{analyse()}}
\code{\link[stats:lm]{stats::lm()}}
diff --git a/vignettes/quickstart.Rmd b/vignettes/quickstart.Rmd
index 20e52a4e..75f2cc66 100644
--- a/vignettes/quickstart.Rmd
+++ b/vignettes/quickstart.Rmd
@@ -229,6 +229,17 @@ function which determines the names of the key variables within the data and the
Please also note that the names of the analysis estimates contain "ref" and "alt" to refer to the two treatment arms. In particular "ref" refers to the first factor level of `vars$group` which does not necessarily
coincide with the control arm. In this example, since `levels(dat[[vars$group]]) = c("DRUG", PLACEBO`), the results associated with "ref" correspond to the intervention arm, while those associated with "alt" correspond to the control arm.
+Note that as of verion 1.4.2 rbmi provides the `ancova_m_groups()` function; this is an extension to
+the `ancova()` function which supports the analysis of more than 2 treatment arms.
+The `ancova()` function is left as the default for backwards compatibility as `ancova_m_groups()`
+uses a different naming structure. That is, instead of `"ref"` and "`alt`" the values `"L0"`, `"L1"`, `"L2"`, ...
+are used to represent the different arms where `"L0"` is the reference level and `"L1"` is the first
+offset group (e.g. the second factor level). That is the resulting list will contain entries such as:
+`lsm_L1_day40` which contains the least square mean for the second factor level at the `"day40"`
+visit. For treatment effects it will contain entries such as `trt_L1_L0_day40` which contains
+the treatment effect for the second factor level compared to the reference level at the `"day40"`
+visit. See the documentation for `ancova_m_groups()` for more details.
+
Additionally, we can use the `delta` argument of `analyse()` to perform a delta adjustments of the imputed datasets prior to the analysis.
In brief, this is implemented by specifying a data.frame that contains the amount
of adjustment to be added to each longitudinal outcome for each subject and visit, i.e.
diff --git a/vignettes/quickstart.html b/vignettes/quickstart.html
index b9b89f9d..5aa2d52b 100644
--- a/vignettes/quickstart.html
+++ b/vignettes/quickstart.html
@@ -360,17 +360,9 @@ The Data
We use a publicly available example dataset from an antidepressant clinical trial of an active drug versus placebo. The relevant endpoint is the Hamilton 17-item depression rating scale (HAMD17) which was assessed at baseline and at weeks 1, 2, 4, and 6. Study drug discontinuation occurred in 24% of subjects from the active drug and 26% of subjects from placebo. All data after study drug discontinuation are missing and there is a single additional intermittent missing observation.
library(rbmi)
library(dplyr)
-#>
-#> Attaching package: 'dplyr'
-#> The following objects are masked from 'package:stats':
-#>
-#> filter, lag
-#> The following objects are masked from 'package:base':
-#>
-#> intersect, setdiff, setequal, union
-
-data("antidepressant_data")
-dat <- antidepressant_data
+
+data("antidepressant_data")
+dat <- antidepressant_data
We consider an imputation model with the mean change from baseline in the HAMD17 score as the outcome (variable CHANGE
in the dataset). The following covariates are included in the imputation model: the treatment group (THERAPY
), the (categorical) visit (VISIT
), treatment-by-visit interactions, the baseline HAMD17 score (BASVAL
), and baseline HAMD17 score-by-visit interactions. A common unstructured covariance matrix structure is assumed for both groups. The analysis model is an ANCOVA model with the treatment group as the primary factor and adjustment for the baseline HAMD17 score.
rbmi
expects its input dataset to be complete; that is, there must be one row per subject for each visit. Missing outcome values should be coded as NA
, while missing covariate values are not allowed. If the dataset is incomplete, then the expand_locf()
helper function can be used to add any missing rows, using LOCF imputation to carry forward the observed baseline covariate values to visits with missing outcomes. Rows corresponding to missing outcomes are not present in the antidepressant trial dataset. To address this we will therefore use the expand_locf()
function as follows:
@@ -469,14 +461,16 @@ Draws
#> Imputation Type: random
#> Method:
#> name: Bayes
-#> same_cov: TRUE
-#> n_samples: 150
-#> Controls:
-#> warmup: 200
-#> thin: 5
-#> chains: 1
-#> init: mmrm
-#> seed: 214647346
+#> covariance: us
+#> same_cov: TRUE
+#> n_samples: 150
+#> prior_cov: default
+#> Controls:
+#> warmup: 200
+#> thin: 5
+#> chains: 1
+#> init: mmrm
+#> seed: 1074096309
Note the use of set_vars()
which specifies the names of the key variables
within the dataset and the imputation model. Additionally, note that whilst vars$group
and vars$visit
are added as terms to the imputation model by default, their interaction is not,
@@ -603,6 +597,16 @@
Analyse
(in addition to the treatment group) for which the analysis model will be adjusted.
Please also note that the names of the analysis estimates contain “ref” and “alt” to refer to the two treatment arms. In particular “ref” refers to the first factor level of vars$group
which does not necessarily
coincide with the control arm. In this example, since levels(dat[[vars$group]]) = c("DRUG", PLACEBO
), the results associated with “ref” correspond to the intervention arm, while those associated with “alt” correspond to the control arm.
+Note that as of verion 1.4.2 rbmi provides the ancova_m_groups()
function; this is an extension to
+the ancova()
function which supports the analysis of more than 2 treatment arms.
+The ancova()
function is left as the default for backwards compatibility as ancova_m_groups()
+uses a different naming structure. That is, instead of "ref"
and “alt
” the values "L0"
, "L1"
, "L2"
, …
+are used to represent the different arms where "L0"
is the reference level and "L1"
is the first
+offset group (e.g. the second factor level). That is the resulting list will contain entries such as:
+lsm_L1_day40
which contains the least square mean for the second factor level at the "day40"
+visit. For treatment effects it will contain entries such as trt_L1_L0_day40
which contains
+the treatment effect for the second factor level compared to the reference level at the "day40"
+visit. See the documentation for ancova_m_groups()
for more details.
Additionally, we can use the delta
argument of analyse()
to perform a delta adjustments of the imputed datasets prior to the analysis.
In brief, this is implemented by specifying a data.frame that contains the amount
of adjustment to be added to each longitudinal outcome for each subject and visit, i.e.
@@ -701,34 +705,34 @@
Pool
#> trt_4 -0.092 0.683 -1.439 1.256 0.893
#> lsm_ref_4 -1.616 0.486 -2.576 -0.656 0.001
#> lsm_alt_4 -1.708 0.475 -2.645 -0.77 <0.001
-#> trt_5 1.335 0.923 -0.488 3.158 0.15
-#> lsm_ref_5 -4.154 0.659 -5.455 -2.853 <0.001
-#> lsm_alt_5 -2.819 0.646 -4.095 -1.542 <0.001
-#> trt_6 1.954 0.998 -0.016 3.925 0.052
-#> lsm_ref_6 -6.087 0.717 -7.503 -4.671 <0.001
-#> lsm_alt_6 -4.133 0.703 -5.521 -2.745 <0.001
-#> trt_7 2.178 1.129 -0.053 4.41 0.056
-#> lsm_ref_7 -6.974 0.821 -8.597 -5.351 <0.001
-#> lsm_alt_7 -4.796 0.784 -6.345 -3.246 <0.001
+#> trt_5 1.332 0.922 -0.489 3.154 0.151
+#> lsm_ref_5 -4.146 0.658 -5.446 -2.845 <0.001
+#> lsm_alt_5 -2.813 0.644 -4.085 -1.542 <0.001
+#> trt_6 1.946 0.995 -0.02 3.912 0.052
+#> lsm_ref_6 -6.097 0.713 -7.505 -4.689 <0.001
+#> lsm_alt_6 -4.151 0.7 -5.534 -2.768 <0.001
+#> trt_7 2.172 1.118 -0.038 4.382 0.054
+#> lsm_ref_7 -6.987 0.817 -8.603 -5.372 <0.001
+#> lsm_alt_7 -4.816 0.782 -6.362 -3.269 <0.001
#> --------------------------------------------------
The table of values shown in the print message for poolObj
can also be extracted using the as.data.frame()
function:
as.data.frame(poolObj)
-#> parameter est se lci uci pval
-#> 1 trt_4 -0.09180645 0.6826279 -1.4394968 1.2558839 8.931772e-01
-#> 2 lsm_ref_4 -1.61581996 0.4862316 -2.5757714 -0.6558685 1.093708e-03
-#> 3 lsm_alt_4 -1.70762640 0.4749573 -2.6453193 -0.7699335 4.262148e-04
-#> 4 trt_5 1.33531976 0.9230204 -0.4876244 3.1582639 1.499512e-01
-#> 5 lsm_ref_5 -4.15415338 0.6588023 -5.4553174 -2.8529894 2.730742e-09
-#> 6 lsm_alt_5 -2.81883362 0.6463331 -4.0954650 -1.5422022 2.331688e-05
-#> 7 trt_6 1.95436277 0.9976289 -0.0164307 3.9251562 5.191672e-02
-#> 8 lsm_ref_6 -6.08732723 0.7166468 -7.5032662 -4.6713882 1.747812e-14
-#> 9 lsm_alt_6 -4.13296446 0.7025598 -5.5211647 -2.7447642 2.534885e-08
-#> 10 trt_7 2.17814646 1.1288440 -0.0532519 4.4095448 5.564659e-02
-#> 11 lsm_ref_7 -6.97364970 0.8207107 -8.5966654 -5.3506340 3.049429e-14
-#> 12 lsm_alt_7 -4.79550324 0.7838188 -6.3448217 -3.2461848 8.537927e-09
+#> parameter est se lci uci pval
+#> 1 trt_4 -0.09180645 0.6826279 -1.43949684 1.2558839 8.931772e-01
+#> 2 lsm_ref_4 -1.61581996 0.4862316 -2.57577141 -0.6558685 1.093708e-03
+#> 3 lsm_alt_4 -1.70762640 0.4749573 -2.64531931 -0.7699335 4.262148e-04
+#> 4 trt_5 1.33244564 0.9223725 -0.48916744 3.1540587 1.505325e-01
+#> 5 lsm_ref_5 -4.14553184 0.6584403 -5.44594687 -2.8451168 2.851010e-09
+#> 6 lsm_alt_5 -2.81308620 0.6437591 -4.08452555 -1.5416468 2.238611e-05
+#> 7 trt_6 1.94579706 0.9951403 -0.02003552 3.9116296 5.235157e-02
+#> 8 lsm_ref_6 -6.09703116 0.7126425 -7.50494478 -4.6891175 1.158175e-14
+#> 9 lsm_alt_6 -4.15123411 0.6998827 -5.53407714 -2.7683911 1.976048e-08
+#> 10 trt_7 2.17178546 1.1181330 -0.03812541 4.3816963 5.403151e-02
+#> 11 lsm_ref_7 -6.98740689 0.8171118 -8.60323818 -5.3715756 2.188701e-14
+#> 12 lsm_alt_7 -4.81562143 0.7823681 -6.36209275 -3.2691501 7.119562e-09
These outputs gives an estimated difference of
-2.178 (95% CI -0.053 to 4.410)
-between the two groups at the last visit with an associated p-value of 0.056.
+2.172 (95% CI -0.038 to 4.382)
+between the two groups at the last visit with an associated p-value of 0.054.
@@ -447,6 +448,20 @@ How can I analyse the change-
vars = vars
)
+
+
+
Is ANCOVA on more than 2 treatment groups supported?
+
This can be implemented by using the ancova_m_groups()
function instead of the default ancova()
function e.g.
+
anaObj <- rbmi::analyse(
+ imputeObj,
+ ancova_m_groups,
+ vars = vars
+ )
+
Please note that the object created by this function uses a different naming convention to
+that of the ancova()
function which means any manual extraction code looking for specific
+names such as "lsm_alt"
will need to be updated. Please see the documentation for ancova_m_groups()
+for more details.
+
White, Ian R, and Simon G Thompson. 2005.
“Adjusting for Partially Missing Baseline Measurements in Randomized Trials.” Statistics in Medicine 24 (7): 993–1007.
From d291fb31cf536a9c413ea7ebd59af97d2c6cb9fe Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:44:36 +0100
Subject: [PATCH 14/15] update version number
---
R/analyse.R | 2 +-
vignettes/quickstart.Rmd | 2 +-
vignettes/quickstart.html | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/R/analyse.R b/R/analyse.R
index 49167082..c347cda7 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -58,7 +58,7 @@
#' a linear transformation of the outcomes.
#' Thus care is required when applying alternative analysis functions in this setting.
#'
-#' Note that as of verion 1.4.2 rbmi provides the `ancova_m_groups()` function; this is an extension to
+#' Note that as of verion 1.5.0 rbmi provides the `ancova_m_groups()` function; this is an extension to
#' the `ancova()` function which supports the analysis of more than 2 treatment arms.
#' The `ancova()` function is left as the default for backwards compatibility as `ancova_m_groups()`
#' uses a different naming structure.
diff --git a/vignettes/quickstart.Rmd b/vignettes/quickstart.Rmd
index 75f2cc66..8cf4f989 100644
--- a/vignettes/quickstart.Rmd
+++ b/vignettes/quickstart.Rmd
@@ -229,7 +229,7 @@ function which determines the names of the key variables within the data and the
Please also note that the names of the analysis estimates contain "ref" and "alt" to refer to the two treatment arms. In particular "ref" refers to the first factor level of `vars$group` which does not necessarily
coincide with the control arm. In this example, since `levels(dat[[vars$group]]) = c("DRUG", PLACEBO`), the results associated with "ref" correspond to the intervention arm, while those associated with "alt" correspond to the control arm.
-Note that as of verion 1.4.2 rbmi provides the `ancova_m_groups()` function; this is an extension to
+Note that as of verion 1.5.0 rbmi provides the `ancova_m_groups()` function; this is an extension to
the `ancova()` function which supports the analysis of more than 2 treatment arms.
The `ancova()` function is left as the default for backwards compatibility as `ancova_m_groups()`
uses a different naming structure. That is, instead of `"ref"` and "`alt`" the values `"L0"`, `"L1"`, `"L2"`, ...
diff --git a/vignettes/quickstart.html b/vignettes/quickstart.html
index 5aa2d52b..d1174679 100644
--- a/vignettes/quickstart.html
+++ b/vignettes/quickstart.html
@@ -597,7 +597,7 @@ Analyse
(in addition to the treatment group) for which the analysis model will be adjusted.
Please also note that the names of the analysis estimates contain “ref” and “alt” to refer to the two treatment arms. In particular “ref” refers to the first factor level of vars$group
which does not necessarily
coincide with the control arm. In this example, since levels(dat[[vars$group]]) = c("DRUG", PLACEBO
), the results associated with “ref” correspond to the intervention arm, while those associated with “alt” correspond to the control arm.
-Note that as of verion 1.4.2 rbmi provides the ancova_m_groups()
function; this is an extension to
+
Note that as of verion 1.5.0 rbmi provides the ancova_m_groups()
function; this is an extension to
the ancova()
function which supports the analysis of more than 2 treatment arms.
The ancova()
function is left as the default for backwards compatibility as ancova_m_groups()
uses a different naming structure. That is, instead of "ref"
and “alt
” the values "L0"
, "L1"
, "L2"
, …
From 65266fe050951453c89f8e9621ec4c0d0d77dfdd Mon Sep 17 00:00:00 2001
From: gowercr1
Date: Mon, 18 Aug 2025 12:48:49 +0100
Subject: [PATCH 15/15] roxygen
---
man/analyse.Rd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/man/analyse.Rd b/man/analyse.Rd
index eecaf43c..5eac6d21 100644
--- a/man/analyse.Rd
+++ b/man/analyse.Rd
@@ -89,7 +89,7 @@ method (\code{method = method_condmean()} in \code{\link[=draws]{draws()}}) reli
a linear transformation of the outcomes.
Thus care is required when applying alternative analysis functions in this setting.
-Note that as of verion 1.4.2 rbmi provides the \code{ancova_m_groups()} function; this is an extension to
+Note that as of verion 1.5.0 rbmi provides the \code{ancova_m_groups()} function; this is an extension to
the \code{ancova()} function which supports the analysis of more than 2 treatment arms.
The \code{ancova()} function is left as the default for backwards compatibility as \code{ancova_m_groups()}
uses a different naming structure.