From c85532204382ecaf25df44849b77e5199d0b99d8 Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Mon, 24 Mar 2025 15:08:25 +0100 Subject: [PATCH 1/7] changed median_cl_boot (added roundR); added mean_cl_boot funciton; expanded compare2numvars (additional option for CI, different display of result: singleline) --- .gitignore | 13 +-- NAMESPACE | 1 + R/descriptives.R | 39 +++++++- R/tests.R | 214 ++++++++++++++++++++++++++++++++--------- man/compare2numvars.Rd | 11 ++- man/mean_cl_boot.Rd | 30 ++++++ man/median_cl_boot.Rd | 4 +- wrappedtools.Rproj | 37 +++---- 8 files changed, 274 insertions(+), 75 deletions(-) create mode 100644 man/mean_cl_boot.Rd diff --git a/.gitignore b/.gitignore index 2eb7b45..ce41520 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -inst/doc -wrappedtools.Rproj +.Rproj.user +.Rhistory +.RData +.Ruserdata +inst/doc +wrappedtools.Rproj +.DS_Store diff --git a/NAMESPACE b/NAMESPACE index 43efacc..46a620f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(logrange_12357) export(logrange_15) export(logrange_5) export(markSign) +export(mean_cl_boot) export(meansd) export(meanse) export(median_cl_boot) diff --git a/R/descriptives.R b/R/descriptives.R index 70b2ebf..84ff5a2 100644 --- a/R/descriptives.R +++ b/R/descriptives.R @@ -255,6 +255,7 @@ se_median <- function(x) { #' @param conf confidence interval with default 95%. #' @param type type for function boot.ci. #' @param nrepl number of bootstrap replications, defaults to 1000. +#' @param roundDig Number of relevant digits for functio [roundR]. #' #' @return A tibble with one row and three columns: Median, CIlow, CIhigh. #' @@ -262,7 +263,7 @@ se_median <- function(x) { #' # basic usage of median_cl_boot #' median_cl_boot(x = mtcars$wt) #' @export -median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3) { +median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) { x <- na.omit(x) lconf <- (1 - conf) / 2 uconf <- 1 - lconf @@ -271,8 +272,8 @@ median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3) { bb <- boot::boot.ci(bt, type = type) tibble( Median = median(x, na.rm = TRUE), - CIlow = quantile(bt$t, lconf), - CIhigh = quantile(bt$t, uconf) + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) ) } #' Rename output from \link{median_cl_boot} for use in ggplot. @@ -296,6 +297,38 @@ median_cl_boot_gg <- function(x){ rename(y="Median",ymin="CIlow",ymax="CIhigh") return(out) } + +#' Compute confidence interval of mean by bootstrapping. +#' +#' \code{mean_cl_boot} computes lower and upper confidence limits for the +#' estimated mean, based on bootstrapping. +#' +#' @param x Data for computation. +#' @param conf confidence interval with default 95%. +#' @param type type for function boot.ci. +#' @param nrepl number of bootstrap replications, defaults to 1000. +#' @param roundDig Number of relevant digits for functio [roundR]. +#' +#' @return A tibble with one row and three columns: Mean, CIlow, CIhigh. +#' +#' @examples +#' # basic usage of mean_cl_boot +#' mean_cl_boot(x = mtcars$wt) +#' @export +mean_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, + roundDig = 2) ## +{ + x <- na.omit(x) + lconf <- (1 - conf)/2 + uconf <- 1 - lconf + bmean <- function(x, ind) mean(x[ind], na.rm = TRUE) + bt <- boot::boot(x, bmean, nrepl) + bb <- boot::boot.ci(bt, type = type) + tibble(Mean = mean(x, na.rm = TRUE), + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) + ) +} #' Compute absolute and relative frequencies. #' #' \code{cat_desc_stats} computes absolute and relative frequencies for diff --git a/R/tests.R b/R/tests.R index f151d0b..92bae7f 100644 --- a/R/tests.R +++ b/R/tests.R @@ -402,6 +402,9 @@ t_var_test <- function(data, formula, cutoff = .05) { #' @param mark for function [formatP]. #' @param n create columns for n per group? #' @param add_n add n to descriptive statistics? +#' @param singleline Put all group levels in a single line (default) or below each other. +#' @param indentor Optional text element to indent descriptivestats when using singleline = FALSE. Defaults to " ". +#' @param ci Computes lower and upper confidence limits for the estimated mean/median, based on bootstrapping. #' #' @return #' A tibble with variable names, descriptive statistics, and p-value, @@ -428,14 +431,21 @@ compare2numvars <- function(data, dep_vars, indep_var, range = FALSE, rangesep = " ", pretext = FALSE, mark = FALSE, - n = FALSE, add_n = FALSE) { + n = FALSE, add_n = FALSE, + singleline = TRUE, indentor = " ", ci = FALSE) { `.` <- Group <- Value <- Variable <- desc_groups <- NULL if (gaussian) { DESC <- meansd COMP <- t_var_test + DESC_CI <- mean_cl_boot + string <- "(\\d+ ± \\d+)\\s*(\\[\\d+ -> \\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+ ; \\d+\\])" + order <- "\\1 \\4 \\2 \\3" } else { DESC <- median_quart COMP <- wilcox.test + DESC_CI <- median_cl_boot + string <- "(\\d+)\\s*\\((\\d+/\\d+)\\)\\s*(\\[\\d+\\s*->\\s*\\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+\\s*;\\s*\\d+\\])" + order <- "\\1 (\\2) \\5 \\3 \\4" } # descnames <- names(formals(DESC)) # pnames <- names(formals(COMP)) @@ -458,54 +468,166 @@ compare2numvars <- function(data, dep_vars, indep_var, } data_l <- data_l |> filter(!is.na(Group)) - out <- data_l |> - group_by(Variable) |> - do(summarise( - .data = ., - n_groups = paste(table(.$Group[which(!is.na(.$Value))]), collapse = ":"), - desc_all = DESC(.$Value, - roundDig = round_desc, - range = range, rangesep = rangesep, - add_n = add_n - ), - desc_groups = paste(try( - DESC( - x = .$Value, groupvar = .$Group, - roundDig = round_desc, range = range, - rangesep = rangesep, add_n = add_n - ), - silent = TRUE - ), - collapse = ":" - ), - p = formatP(try( - suppressWarnings(COMP(formula = as.formula("Value~Group"), data = .)$p.value), - silent = TRUE - ), - ndigits = round_p, pretext = pretext, - mark = mark - ) |> as.character() - )) |> - ungroup() - out$desc_groups[!str_detect(out$desc_groups, ":")] <- " : " - out <- separate(out, - col = desc_groups, - into = glue::glue("{indep_var} {levels(data_l$Group)}"), - sep = ":" - ) - out <- separate(out, - col = n_groups, - into = glue::glue("n {indep_var} {levels(data_l$Group)}"), - sep = ":" - ) - out$n <- apply(out[, 2:3], 1, function(x) { - sum(as.numeric(x)) - }) - out <- out |> dplyr::select(1, n, starts_with("n "), everything()) - if (n == FALSE) { + out <- data_l |> + group_by(Variable) |> + summarise( + n_groups = paste(table(Group[!is.na(Value)]), collapse = ":"), + desc_all = DESC(Value, roundDig = round_desc, + range = range, + rangesep = rangesep, + add_n = add_n), + all_CI = DESC_CI(Value) |> + transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> + pull(ci), + desc_groups = try(DESC(Value, groupvar = Group, + roundDig = round_desc, + range = range, rangesep = + rangesep, add_n = add_n), + silent = TRUE) |> + paste(collapse = ":"), + p = try(suppressWarnings(COMP(Value ~ Group, data = cur_data())$p.value), + silent = TRUE) |> + formatP(ndigits = round_p, + pretext = pretext, + mark = mark) |> as.character(), + .groups = "drop" + ) + + group_ci <- data_l |> + group_by(Variable, Group) |> + summarise(ci = DESC_CI(Value) |> + transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> + pull(ci), + .groups = "drop") |> + pivot_wider(names_from = Group, values_from = ci, names_prefix = "CI_") + + out <- left_join(out, group_ci, by = "Variable") + out <- out |> + separate(desc_groups, into = c("g1", "g2"), + sep = ":", fill = "right") |> + separate(n_groups, into = glue::glue("n {indep_var} {levels(data_l$Group)}"), + ## into = c("n g1", "n g2") + sep = ":") |> + mutate(n = rowSums(across(starts_with("n "), as.numeric), + na.rm = TRUE)) |> + dplyr::select(1, n, starts_with("n "), everything()) + + if (ci){ + out <- out |> + mutate(desc_all = paste(desc_all, all_CI, sep = " ") |> + str_replace( + string, order), + g1 = paste(g1, out[[10]], sep = " ")|> + str_replace( + string, order), + g2 = paste(g2, out[[11]], sep = " ")|> + str_replace( + string, order)) |> + select(-contains("CI")) + } + else{ + out <- out |> + select(-contains("CI")) + } + + if (!n) { out <- dplyr::select(out, -n, -starts_with("n ")) } + + if (!singleline) { + indentor <- paste0(rep(spacer, 5), collapse = "") + + out_tmp <- + out |> + select(-starts_with("n")) |> + pivot_longer(cols = -c(Variable, p), + names_to = "group", + values_to = "stats") |> + mutate( + Mean = if (gaussian) { + str_extract(stats, "(\\d+)\\s*±\\s*(\\d+)") |> + str_extract("^\\d+") |> + as.character() + } else { + NA_character_}, + SD = if (gaussian) { + str_extract(stats, "(\\d+)\\s*±\\s*(\\d+)") |> + str_extract("\\d+$") |> + as.character() + } else { + NA_character_}, + Median = if (!gaussian) { + str_extract(stats, "(\\d+)\\s*\\((\\d+/\\d+)\\)") |> + str_extract("^\\d+") |> + as.character() + } else { + NA_character_}, + Quartiles = if (!gaussian) { + str_extract(stats, "(\\d+)\\s*\\((\\d+/\\d+)\\)") |> + str_extract("\\d+/\\d+") |> + as.character() + } else { + NA_character_}, + CI = as.character(str_extract(stats,"\\[\\d+; \\d+\\]")) |> + str_remove_all("[\\[\\]]"), + "min -> max" = str_extract(stats, "\\[\\d+\\s*->\\s*\\d+\\]") |> + str_remove_all("[\\[\\]]"), + n = str_extract(stats, "\\[n=\\d+\\]") |> + str_extract("\\d+") |> + as.character() + ) |> + select(-stats) |> + select_if(~ !any(is.na(.))) |> ## to remove empty columns (e.g. if range = FALSE) + pivot_longer(-c(Variable, group, p), + names_to = "stats", + values_to = "values") |> + pivot_wider(names_from = "group", + values_from = "values") + + for (var_i in dep_vars){ ## add additional row with empty fields for groups + row <- filter(out_tmp, Variable == var_i) |> + mutate(stats = "", + desc_all = "", + "g1" = "", + "g2" = "") |> + unique() + out_tmp <- add_row(out_tmp, row) + } + + out <- out_tmp |> + arrange(Variable) |> + group_by(Variable) |> + arrange(stats != "", .by_group = TRUE) |> + ungroup() |> + mutate(Variable = case_when( + stats == "Mean" ~ paste0(indentor, "Mean"), + stats == "Median" ~ paste0(indentor, "Median"), + stats == "CI" ~ paste0(indentor, "95% CI"), + stats == "Quartiles" ~ paste0(indentor, "Quartiles"), + stats == "SD" ~ paste0(indentor, "SD"), + stats == "min -> max" ~ paste0(indentor, "min -> max"), + stats == "n" ~ paste0(indentor, "n"), + TRUE ~ Variable), + p = case_when( + stats == "Mean" ~ "", + stats == "Median" ~ "", + stats == "CI" ~ "", + stats == "SD" ~ "", + stats == "Quartiles" ~ "", + stats == "min -> max" ~ "", + stats == "n" ~ "", + TRUE ~ p) + ) |> + select(Variable, desc_all, g1, g2, p) + } + + out <- out |> + rename( + !!glue::glue("{indep_var} {levels(data_l$Group)[1]}") := "g1", + !!glue::glue("{indep_var} {levels(data_l$Group)[2]}") := "g2" + ) + return(out) } diff --git a/man/compare2numvars.Rd b/man/compare2numvars.Rd index d7c969a..04026b4 100644 --- a/man/compare2numvars.Rd +++ b/man/compare2numvars.Rd @@ -16,7 +16,10 @@ compare2numvars( pretext = FALSE, mark = FALSE, n = FALSE, - add_n = FALSE + add_n = FALSE, + singleline = TRUE, + indentor = " ", + ci = FALSE ) } \arguments{ @@ -43,6 +46,12 @@ compare2numvars( \item{n}{create columns for n per group?} \item{add_n}{add n to descriptive statistics?} + +\item{singleline}{Put all group levels in a single line (default) or below each other.} + +\item{indentor}{Optional text element to indent descriptive stats when using singleline = FALSE. Defaults to " ".} + +\item{ci}{Computes lower and upper confidence limits for the estimated mean/median, based on bootstrapping.} } \value{ A tibble with variable names, descriptive statistics, and p-value, diff --git a/man/mean_cl_boot.Rd b/man/mean_cl_boot.Rd new file mode 100644 index 0000000..1e17db6 --- /dev/null +++ b/man/mean_cl_boot.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/descriptives.R +\name{mean_cl_boot} +\alias{mean_cl_boot} +\title{Compute confidence interval of mean by bootstrapping.} +\usage{ +mean_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) +} +\arguments{ +\item{x}{Data for computation.} + +\item{conf}{confidence interval with default 95\%.} + +\item{type}{type for function boot.ci.} + +\item{nrepl}{number of bootstrap replications, defaults to 1000.} + +\item{roundDig}{Number of relevant digits for functio \link{roundR}.} +} +\value{ +A tibble with one row and three columns: Mean, CIlow, CIhigh. +} +\description{ +\code{mean_cl_boot} computes lower and upper confidence limits for the +estimated mean, based on bootstrapping. +} +\examples{ +# basic usage of mean_cl_boot +mean_cl_boot(x = mtcars$wt) +} diff --git a/man/median_cl_boot.Rd b/man/median_cl_boot.Rd index 5c5934c..6379ed5 100644 --- a/man/median_cl_boot.Rd +++ b/man/median_cl_boot.Rd @@ -4,7 +4,7 @@ \alias{median_cl_boot} \title{Compute confidence interval of median by bootstrapping.} \usage{ -median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3) +median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) } \arguments{ \item{x}{Data for computation.} @@ -14,6 +14,8 @@ median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3) \item{type}{type for function boot.ci.} \item{nrepl}{number of bootstrap replications, defaults to 1000.} + +\item{roundDig}{Number of relevant digits for functio \link{roundR}.} } \value{ A tibble with one row and three columns: Median, CIlow, CIhigh. diff --git a/wrappedtools.Rproj b/wrappedtools.Rproj index 8cc08ce..d064231 100644 --- a/wrappedtools.Rproj +++ b/wrappedtools.Rproj @@ -1,18 +1,19 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace,vignette +Version: 1.0 +ProjectId: c02b083e-d9d6-41ea-923e-1a11ba0d7278 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace,vignette From 5df0479e84b84a437c2a9826289a754ba2998d9a Mon Sep 17 00:00:00 2001 From: Andreas Busjahn Date: Thu, 27 Mar 2025 15:28:25 +0100 Subject: [PATCH 2/7] Delete wrappedtools.Rproj not relevant part, creates conflicts --- wrappedtools.Rproj | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 wrappedtools.Rproj diff --git a/wrappedtools.Rproj b/wrappedtools.Rproj deleted file mode 100644 index d064231..0000000 --- a/wrappedtools.Rproj +++ /dev/null @@ -1,19 +0,0 @@ -Version: 1.0 -ProjectId: c02b083e-d9d6-41ea-923e-1a11ba0d7278 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace,vignette From ec38a51335ce433b616e6af3160eaaaeca11dea9 Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Mon, 7 Apr 2025 16:31:39 +0200 Subject: [PATCH 3/7] changed regex in compare2numvars for string, Median and Quartiles; added possible missing functions in pkgstart --- R/pkgstart.R | 2 +- R/tests.R | 44 ++++++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/R/pkgstart.R b/R/pkgstart.R index 79ecacb..c245213 100644 --- a/R/pkgstart.R +++ b/R/pkgstart.R @@ -40,7 +40,7 @@ NULL #' @importFrom rlist list.append NULL -#' @importFrom forcats fct_lump_n fct_drop +#' @importFrom forcats fct_lump_n fct_drop fct_inorder NULL #' @importFrom grDevices boxplot.stats diff --git a/R/tests.R b/R/tests.R index 92bae7f..f5441c3 100644 --- a/R/tests.R +++ b/R/tests.R @@ -438,7 +438,7 @@ compare2numvars <- function(data, dep_vars, indep_var, DESC <- meansd COMP <- t_var_test DESC_CI <- mean_cl_boot - string <- "(\\d+ ± \\d+)\\s*(\\[\\d+ -> \\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+ ; \\d+\\])" + string <- "(\\d+ ± \\d+)\\s*(\\[\\d+ -> \\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+; \\d+\\])" order <- "\\1 \\4 \\2 \\3" } else { DESC <- median_quart @@ -455,19 +455,19 @@ compare2numvars <- function(data, dep_vars, indep_var, Group = all_of(indep_var), all_of(dep_vars) ) |> - mutate(Group = factor(Group) |> fct_drop()) |> - pivot_longer(-Group,names_to = 'Variable',values_to = 'Value') |> - # gather(key = Variable, value = Value, -Group) |> + mutate(Group = fct_drop(factor(Group))) |> + pivot_longer(-Group,names_to = 'Variable',values_to = 'Value') |> mutate(Variable = forcats::fct_inorder(Variable)) |> # na.omit() |> as_tibble() + if(nlevels(data_l$Group)!=2){ stop(paste0('Other than 2 groups provided for ',indep_var,': ', paste(levels(data_l$Group),collapse='/'), ". Look into function compare_n_numvars.")) } data_l <- data_l |> - filter(!is.na(Group)) + dplyr::filter(!is.na(Group)) out <- data_l |> group_by(Variable) |> @@ -515,20 +515,19 @@ compare2numvars <- function(data, dep_vars, indep_var, if (ci){ out <- out |> - mutate(desc_all = paste(desc_all, all_CI, sep = " ") |> - str_replace( - string, order), - g1 = paste(g1, out[[10]], sep = " ")|> - str_replace( - string, order), - g2 = paste(g2, out[[11]], sep = " ")|> - str_replace( - string, order)) |> - select(-contains("CI")) + mutate( + desc_all = paste(desc_all, all_CI) |> + str_replace(string, order), + g1 = paste(g1, out[[10]]) |> + str_replace(string, order), + g2 = paste(g2, out[[11]]) |> + str_replace(string, order) + ) |> + dplyr::select(-contains("CI")) } else{ out <- out |> - select(-contains("CI")) + dplyr::select(-contains("CI")) } if (!n) { @@ -536,11 +535,9 @@ compare2numvars <- function(data, dep_vars, indep_var, } if (!singleline) { - indentor <- paste0(rep(spacer, 5), collapse = "") - out_tmp <- out |> - select(-starts_with("n")) |> + dplyr::select(-starts_with("n")) |> pivot_longer(cols = -c(Variable, p), names_to = "group", values_to = "stats") |> @@ -558,14 +555,13 @@ compare2numvars <- function(data, dep_vars, indep_var, } else { NA_character_}, Median = if (!gaussian) { - str_extract(stats, "(\\d+)\\s*\\((\\d+/\\d+)\\)") |> - str_extract("^\\d+") |> + str_extract(stats, "^\\d+") |> as.character() } else { NA_character_}, Quartiles = if (!gaussian) { - str_extract(stats, "(\\d+)\\s*\\((\\d+/\\d+)\\)") |> - str_extract("\\d+/\\d+") |> + str_extract(stats, "\\(\\d+/\\d+\\)") |> + str_remove_all("[\\(\\)]") |> as.character() } else { NA_character_}, @@ -592,7 +588,7 @@ compare2numvars <- function(data, dep_vars, indep_var, "g1" = "", "g2" = "") |> unique() - out_tmp <- add_row(out_tmp, row) + out_tmp <- dplyr::add_row(out_tmp, row) } out <- out_tmp |> From a0e17caf8b7ed5c63ce490c5e6652d49c8026406 Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Mon, 7 Apr 2025 16:58:18 +0200 Subject: [PATCH 4/7] added description of changes to NEWS.md for new release --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index e79b8ee..9ef0206 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +#wrappedtools 0.9.8 +- function compare2numvars can now additionally calculate confidence intervals +- function compare2numvars now has the additional option for a singleline or stacked display +- new function mean_cl_boot which calculates the mean and confidence intervals +- function median_cl_boot now has an additional round option + #wrappedtools 0.9.6 - function ksnormal now uses Lilliefors test by default - example for compare_n_numvars was corrected From 83ec853b996e4a11d469373a392624674dccefce Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Wed, 9 Apr 2025 09:42:09 +0200 Subject: [PATCH 5/7] added person in description file --- DESCRIPTION | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 60da9d4..afb75d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,11 @@ Authors@R: c( family = "Asser", role = c("aut"), email = "billyasser@hotmail.co.uk", + comment = ""), + person(given = "Franziska", + family = "Eidloth", + role = c("aut"), + email = "franziska.eidloth@gmail.com", comment = "")) Maintainer: Andreas Busjahn License: GPL-3 From 9584b1093443691e1cfe5ea82342036ba12e02fb Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Wed, 9 Apr 2025 13:06:04 +0200 Subject: [PATCH 6/7] compare2numvars: placed n at the beginning for singleline = FALSE table, combined Mean/ Median with CI in output table for singleline = FALSE, changed add_n = TRUE, if singleline = FALSE and n = TRUE --- NEWS.md | 6 ++--- R/tests.R | 67 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/NEWS.md b/NEWS.md index 452027f..8d8dbc9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,10 @@ -#wrappedtools 0.9.8 +#wrappedtools 0.9.7 +- function identical_cols to find and remove duplicated columns - function compare2numvars can now additionally calculate confidence intervals - function compare2numvars now has the additional option for a singleline or stacked display - new function mean_cl_boot which calculates the mean and confidence intervals - function median_cl_boot now has an additional round option -#wrappedtools 0.9.7 -- function identical_cols to find and remove duplicated columns - #wrappedtools 0.9.6 - function ksnormal now uses Lilliefors test by default - example for compare_n_numvars was corrected diff --git a/R/tests.R b/R/tests.R index 2fb845e..befc04c 100644 --- a/R/tests.R +++ b/R/tests.R @@ -401,7 +401,7 @@ t_var_test <- function(data, formula, cutoff = .05) { #' @param pretext for function [formatP]. #' @param mark for function [formatP]. #' @param n create columns for n per group? -#' @param add_n add n to descriptive statistics? +#' @param add_n add n to descriptive statistics. Will automatically be set to TRUE, if singleline = FALSE and n = TRUE to keep it for the long table format. #' @param singleline Put all group levels in a single line (default) or below each other. #' @param indentor Optional text element to indent descriptivestats when using singleline = FALSE. Defaults to " ". #' @param ci Computes lower and upper confidence limits for the estimated mean/median, based on bootstrapping. @@ -466,6 +466,12 @@ compare2numvars <- function(data, dep_vars, indep_var, paste(levels(data_l$Group),collapse='/'), ". Look into function compare_n_numvars.")) } + + if (!singleline && n && !add_n){ + add_n = TRUE + print(glue::glue("add_n will be set to TRUE to calculate n for long table format (singleline = FALSE)")) + } + data_l <- data_l |> dplyr::filter(!is.na(Group)) @@ -507,7 +513,6 @@ compare2numvars <- function(data, dep_vars, indep_var, separate(desc_groups, into = c("g1", "g2"), sep = ":", fill = "right") |> separate(n_groups, into = glue::glue("n {indep_var} {levels(data_l$Group)}"), - ## into = c("n g1", "n g2") sep = ":") |> mutate(n = rowSums(across(starts_with("n "), as.numeric), na.rm = TRUE)) |> @@ -531,7 +536,7 @@ compare2numvars <- function(data, dep_vars, indep_var, } if (!n) { - out <- dplyr::select(out, -n, -starts_with("n ")) + out <- dplyr::select(out, -starts_with("n")) } if (!singleline) { @@ -542,46 +547,46 @@ compare2numvars <- function(data, dep_vars, indep_var, names_to = "group", values_to = "stats") |> mutate( - Mean = if (gaussian) { - str_extract(stats, "(\\d+)\\s*±\\s*(\\d+)") |> - str_extract("^\\d+") |> - as.character() - } else { - NA_character_}, + n = str_extract(stats, "\\[n=\\d+\\]") |> + str_extract("\\d+") |> + as.character(), + "Mean (95% CI)" = if (gaussian) { + paste0(str_extract(stats, "^\\d+")," (", + str_extract(stats, "\\[\\d+; \\d+\\]") |> + str_remove_all("[\\[\\]]") |> + str_replace(";", "/"), + ")") + } else {NA_character_}, SD = if (gaussian) { str_extract(stats, "(\\d+)\\s*±\\s*(\\d+)") |> str_extract("\\d+$") |> - as.character() - } else { - NA_character_}, - Median = if (!gaussian) { - str_extract(stats, "^\\d+") |> - as.character() - } else { - NA_character_}, + as.character()} + else {NA_character_}, + "Median (95% CI)" = if (!gaussian) { + paste0(str_extract(stats, "^\\d+")," (", + str_extract(stats, "\\[\\d+; \\d+\\]") |> + str_remove_all("[\\[\\]]") |> + str_replace(";", "/"), + ")")} + else {NA_character_}, Quartiles = if (!gaussian) { str_extract(stats, "\\(\\d+/\\d+\\)") |> str_remove_all("[\\(\\)]") |> as.character() } else { NA_character_}, - CI = as.character(str_extract(stats,"\\[\\d+; \\d+\\]")) |> - str_remove_all("[\\[\\]]"), "min -> max" = str_extract(stats, "\\[\\d+\\s*->\\s*\\d+\\]") |> - str_remove_all("[\\[\\]]"), - n = str_extract(stats, "\\[n=\\d+\\]") |> - str_extract("\\d+") |> - as.character() + str_remove_all("[\\[\\]]") ) |> select(-stats) |> - select_if(~ !any(is.na(.))) |> ## to remove empty columns (e.g. if range = FALSE) + select_if(~ !any(is.na(.))) |> pivot_longer(-c(Variable, group, p), names_to = "stats", values_to = "values") |> pivot_wider(names_from = "group", values_from = "values") - for (var_i in dep_vars){ ## add additional row with empty fields for groups + for (var_i in dep_vars){ row <- filter(out_tmp, Variable == var_i) |> mutate(stats = "", desc_all = "", @@ -597,18 +602,16 @@ compare2numvars <- function(data, dep_vars, indep_var, arrange(stats != "", .by_group = TRUE) |> ungroup() |> mutate(Variable = case_when( - stats == "Mean" ~ paste0(indentor, "Mean"), - stats == "Median" ~ paste0(indentor, "Median"), - stats == "CI" ~ paste0(indentor, "95% CI"), + stats == "n" ~ paste0(indentor, "n"), + stats == "Mean (95% CI)" ~ paste0(indentor, "Mean (95% CI)"), + stats == "Median (95% CI)" ~ paste0(indentor, "Median (95% CI)"), stats == "Quartiles" ~ paste0(indentor, "Quartiles"), stats == "SD" ~ paste0(indentor, "SD"), stats == "min -> max" ~ paste0(indentor, "min -> max"), - stats == "n" ~ paste0(indentor, "n"), TRUE ~ Variable), p = case_when( - stats == "Mean" ~ "", - stats == "Median" ~ "", - stats == "CI" ~ "", + stats == "Mean (95% CI)" ~ "", + stats == "Median (95% CI)" ~ "", stats == "SD" ~ "", stats == "Quartiles" ~ "", stats == "min -> max" ~ "", From 7196adc2f2bfacb220e7070349c792ebfda9b86f Mon Sep 17 00:00:00 2001 From: Franziska Eidloth Date: Mon, 14 Apr 2025 13:05:50 +0200 Subject: [PATCH 7/7] changed mean_cl_boot and median_cl_boot: aadditional option for rounding; adjusted regex for string/order in compare2numvars to reorder CI in singleline --- R/descriptives.R | 42 ++++++++++++++++++++++++++++++------------ R/tests.R | 6 +++--- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/R/descriptives.R b/R/descriptives.R index 84ff5a2..3d88a37 100644 --- a/R/descriptives.R +++ b/R/descriptives.R @@ -255,7 +255,8 @@ se_median <- function(x) { #' @param conf confidence interval with default 95%. #' @param type type for function boot.ci. #' @param nrepl number of bootstrap replications, defaults to 1000. -#' @param roundDig Number of relevant digits for functio [roundR]. +#' @param round logical, applies [roundR] function to results. Output is character. +#' @param roundDig number of relevant digits for function [roundR]. #' #' @return A tibble with one row and three columns: Median, CIlow, CIhigh. #' @@ -263,18 +264,26 @@ se_median <- function(x) { #' # basic usage of median_cl_boot #' median_cl_boot(x = mtcars$wt) #' @export -median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) { +median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, round = FALSE, roundDig = 2) { x <- na.omit(x) lconf <- (1 - conf) / 2 uconf <- 1 - lconf bmedian <- function(x, ind) median(x[ind], na.rm = TRUE) bt <- boot::boot(x, bmedian, nrepl) bb <- boot::boot.ci(bt, type = type) - tibble( - Median = median(x, na.rm = TRUE), - CIlow = roundR(quantile(bt$t, lconf), level = roundDig), - CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) - ) + if (round) { + return(tibble( + Median = roundR(median(x, na.rm = TRUE), level = roundDig), + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) + )) + } else { + return(tibble( + Median = median(x, na.rm = TRUE), + CIlow = quantile(bt$t, lconf), + CIhigh = quantile(bt$t, uconf) + )) + } } #' Rename output from \link{median_cl_boot} for use in ggplot. #' @@ -307,6 +316,7 @@ median_cl_boot_gg <- function(x){ #' @param conf confidence interval with default 95%. #' @param type type for function boot.ci. #' @param nrepl number of bootstrap replications, defaults to 1000. +#' @param round logical, applies [roundR] function to results. Output is character. #' @param roundDig Number of relevant digits for functio [roundR]. #' #' @return A tibble with one row and three columns: Mean, CIlow, CIhigh. @@ -316,7 +326,7 @@ median_cl_boot_gg <- function(x){ #' mean_cl_boot(x = mtcars$wt) #' @export mean_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, - roundDig = 2) ## + round = FALSE, roundDig = 2) ## { x <- na.omit(x) lconf <- (1 - conf)/2 @@ -324,10 +334,18 @@ mean_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, bmean <- function(x, ind) mean(x[ind], na.rm = TRUE) bt <- boot::boot(x, bmean, nrepl) bb <- boot::boot.ci(bt, type = type) - tibble(Mean = mean(x, na.rm = TRUE), - CIlow = roundR(quantile(bt$t, lconf), level = roundDig), - CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) - ) + + if(round){ + tibble(Mean = roundR(mean(x, na.rm = TRUE), level = roundDig), + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) + ) + } else{ + tibble(Mean = mean(x, na.rm = TRUE), + CIlow = quantile(bt$t, lconf), + CIhigh = quantile(bt$t, uconf) + ) + } } #' Compute absolute and relative frequencies. #' diff --git a/R/tests.R b/R/tests.R index befc04c..ccf061c 100644 --- a/R/tests.R +++ b/R/tests.R @@ -438,7 +438,7 @@ compare2numvars <- function(data, dep_vars, indep_var, DESC <- meansd COMP <- t_var_test DESC_CI <- mean_cl_boot - string <- "(\\d+ ± \\d+)\\s*(\\[\\d+ -> \\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+; \\d+\\])" + string <- "(\\d+\\s*±\\s*\\d+)\\s*(\\[\\d+\\s*->\\s*\\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+\\s*;\\s*\\d+\\])" order <- "\\1 \\4 \\2 \\3" } else { DESC <- median_quart @@ -483,7 +483,7 @@ compare2numvars <- function(data, dep_vars, indep_var, range = range, rangesep = rangesep, add_n = add_n), - all_CI = DESC_CI(Value) |> + all_CI = DESC_CI(Value, round = TRUE) |> transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> pull(ci), desc_groups = try(DESC(Value, groupvar = Group, @@ -502,7 +502,7 @@ compare2numvars <- function(data, dep_vars, indep_var, group_ci <- data_l |> group_by(Variable, Group) |> - summarise(ci = DESC_CI(Value) |> + summarise(ci = DESC_CI(Value, round = TRUE) |> transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> pull(ci), .groups = "drop") |>