Marketplace
tidy-itc-workflow
Master tidy modelling patterns for ITC analyses following TMwR principles. Covers workflow structure, consistent interfaces, reproducibility best practices, and data validation. Use when setting up ITC analysis projects or building pipelines.
$ 安裝
git clone https://github.com/choxos/ITC-agents /tmp/ITC-agents && cp -r /tmp/ITC-agents/plugins/itc-modelling/skills/tidy-itc-workflow ~/.claude/skills/ITC-agents// tip: Run this command in your terminal to install the skill
SKILL.md
name: tidy-itc-workflow description: Master tidy modelling patterns for ITC analyses following TMwR principles. Covers workflow structure, consistent interfaces, reproducibility best practices, and data validation. Use when setting up ITC analysis projects or building pipelines.
Tidy ITC Workflow
Apply tidy modelling principles from "Tidy Modeling with R" (TMwR) to indirect treatment comparison analyses for consistent, reproducible, and maintainable code.
When to Use This Skill
- Setting up a new ITC analysis project
- Building reproducible analysis pipelines
- Creating standardized interfaces across ITC methods
- Ensuring code quality and maintainability
- Reviewing code for tidy modelling compliance
Core Principles from TMwR
1. The "Pit of Success" Philosophy
- Software should facilitate proper usage by design
- Users should "fall into winning practices" naturally
- Interface must protect users from methodological errors
2. Workflow-Centric Architecture
Every ITC analysis follows this structure:
Data → Validation → Preparation → Analysis → Diagnostics → Reporting
3. Consistent Interfaces
All ITC functions should have predictable patterns:
# Standard function signature pattern
itc_function(
data, # Primary data input
outcome_var, # Outcome variable name
treatment_var, # Treatment variable name
covariates = NULL, # Optional covariates
method = "default", # Method specification
alpha = 0.05, # Significance level
seed = NULL, # For reproducibility
verbose = TRUE, # Progress messages
... # Additional method-specific args
)
# Standard return structure
list(
results = tibble(...), # Main results as tibble
diagnostics = list(...), # Model diagnostics
model = fitted_model, # Raw model object
data_summary = list(...),# Data summary
call = match.call(), # Original call
parameters = list(...) # Analysis parameters
)
ITC Workflow Structure
Step 1: Project Setup
# Recommended project structure
project/
├── R/
│ ├── 01_data_prep.R
│ ├── 02_analysis.R
│ ├── 03_sensitivity.R
│ └── 04_reporting.R
├── data/
│ ├── raw/
│ └── processed/
├── output/
│ ├── figures/
│ └── tables/
├── renv.lock # Package versions
└── _targets.R # Pipeline definition (optional)
Step 2: Environment Setup
# Load packages with explicit namespacing preference
library(tidyverse)
library(meta) # Pairwise MA
library(netmeta) # NMA
library(maicplus) # MAIC
library(stc) # STC
library(multinma) # ML-NMR
# Set global options
options(
dplyr.summarise.inform = FALSE,
mc.cores = parallel::detectCores() - 1
)
# Set seed for reproducibility
set.seed(12345)
Step 3: Data Validation
# Validate IPD structure
validate_ipd <- function(data, outcome_var, treatment_var, covariates = NULL) {
errors <- character()
warnings <- character()
# Check required columns exist
required_cols <- c(outcome_var, treatment_var)
if (!is.null(covariates)) required_cols <- c(required_cols, covariates)
missing_cols <- setdiff(required_cols, names(data))
if (length(missing_cols) > 0) {
errors <- c(errors, paste("Missing columns:", paste(missing_cols, collapse = ", ")))
}
# Check outcome type
if (outcome_var %in% names(data)) {
outcome_vals <- unique(data[[outcome_var]])
if (all(outcome_vals %in% c(0, 1, NA))) {
message("Detected binary outcome")
} else if (is.numeric(data[[outcome_var]])) {
message("Detected continuous outcome")
}
}
# Check treatment levels
if (treatment_var %in% names(data)) {
n_trt <- length(unique(data[[treatment_var]]))
if (n_trt < 2) {
errors <- c(errors, "Treatment variable must have at least 2 levels")
}
message(sprintf("Found %d treatment levels", n_trt))
}
# Check for missing values
if (any(is.na(data[required_cols]))) {
n_missing <- sum(!complete.cases(data[required_cols]))
warnings <- c(warnings, sprintf("%d observations with missing values", n_missing))
}
list(
valid = length(errors) == 0,
errors = errors,
warnings = warnings,
n_obs = nrow(data),
n_complete = sum(complete.cases(data[required_cols]))
)
}
Step 4: Data Preparation (Recipe Pattern)
# Create preparation recipe
create_itc_recipe <- function(data, outcome_var, treatment_var, covariates) {
recipe <- list(
# Step 1: Handle missing values
handle_missing = function(d) {
d[complete.cases(d[c(outcome_var, treatment_var, covariates)]), ]
},
# Step 2: Factor treatment
factor_treatment = function(d) {
d[[treatment_var]] <- factor(d[[treatment_var]])
d
},
# Step 3: Center covariates (for STC/MAIC)
center_covariates = function(d, centers = NULL) {
if (is.null(centers)) {
centers <- sapply(d[covariates], mean, na.rm = TRUE)
}
for (cov in covariates) {
d[[paste0(cov, "_centered")]] <- d[[cov]] - centers[[cov]]
}
attr(d, "covariate_centers") <- centers
d
}
)
class(recipe) <- c("itc_recipe", "list")
recipe
}
# Apply recipe
prep_itc_data <- function(data, recipe) {
result <- data
for (step_name in names(recipe)) {
result <- recipe[[step_name]](result)
}
result
}
Step 5: Analysis Workflow
# Unified analysis interface
run_itc_analysis <- function(
method = c("pairwise_ma", "nma", "maic", "stc", "ml_nmr"),
...
) {
method <- match.arg(method)
# Dispatch to appropriate function
result <- switch(method,
pairwise_ma = run_pairwise_ma(...),
nma = run_nma(...),
maic = run_maic(...),
stc = run_stc(...),
ml_nmr = run_ml_nmr(...)
)
# Add common metadata
result$method <- method
result$timestamp <- Sys.time()
result$session_info <- sessionInfo()
class(result) <- c("itc_result", class(result))
result
}
Step 6: Result Standardization
# Standard result tibble format
standardize_itc_results <- function(result) {
tibble::tibble(
comparison = result$comparison,
effect_measure = result$effect_measure,
estimate = result$estimate,
ci_lower = result$ci_lower,
ci_upper = result$ci_upper,
se = result$se,
p_value = result$p_value,
method = result$method,
n_studies = result$n_studies %||% NA_integer_,
n_patients = result$n_patients %||% NA_integer_,
heterogeneity_i2 = result$i2 %||% NA_real_,
heterogeneity_tau2 = result$tau2 %||% NA_real_
)
}
Reproducibility Best Practices
1. Seed Management
# Set and document seed
ANALYSIS_SEED <- 12345
# Use in all stochastic operations
set.seed(ANALYSIS_SEED)
bootstrap_result <- boot::boot(..., R = 1000)
# For parallel operations
library(doRNG)
registerDoRNG(ANALYSIS_SEED)
2. Package Version Control
# Use renv for package management
renv::init()
renv::snapshot()
# Document versions in output
cat("Package versions:\n")
packageVersion("meta")
packageVersion("netmeta")
packageVersion("maicplus")
3. Session Documentation
# At end of analysis
sink("session_info.txt")
sessionInfo()
sink()
# Or more detailed
writeLines(capture.output(devtools::session_info()), "session_info.txt")
Data Validation Patterns
Binary Outcomes
validate_binary_outcome <- function(data, outcome_var) {
vals <- data[[outcome_var]]
if (!all(vals %in% c(0, 1, NA))) {
stop("Binary outcome must contain only 0, 1, or NA")
}
if (all(vals == 0, na.rm = TRUE) || all(vals == 1, na.rm = TRUE)) {
warning("All outcomes are identical - check data")
}
invisible(TRUE)
}
Survival Outcomes
validate_survival_outcome <- function(data, time_var, event_var) {
if (any(data[[time_var]] < 0, na.rm = TRUE)) {
stop("Survival times must be non-negative")
}
if (!all(data[[event_var]] %in% c(0, 1, NA))) {
stop("Event indicator must be 0, 1, or NA")
}
invisible(TRUE)
}
Aggregate Data
validate_agd <- function(agd, required_fields) {
missing <- setdiff(required_fields, names(agd))
if (length(missing) > 0) {
stop(sprintf("Missing AgD fields: %s", paste(missing, collapse = ", ")))
}
# Check numeric fields are positive
numeric_fields <- c("n_total", "n_events", "mean", "sd")
for (field in intersect(numeric_fields, names(agd))) {
if (any(agd[[field]] < 0, na.rm = TRUE)) {
stop(sprintf("Field '%s' contains negative values", field))
}
}
invisible(TRUE)
}
Result Tibble Standards
All ITC results should return tibbles with consistent column naming:
| Column | Type | Description |
|---|---|---|
| comparison | character | "A vs B" format |
| effect_measure | character | "OR", "HR", "MD", etc. |
| estimate | numeric | Point estimate |
| ci_lower | numeric | Lower CI bound |
| ci_upper | numeric | Upper CI bound |
| se | numeric | Standard error |
| p_value | numeric | P-value |
| method | character | Analysis method |
Common Anti-Patterns to Avoid
1. Hardcoded Values
# Bad
data <- data[data$age > 65, ]
# Good
AGE_THRESHOLD <- 65
data <- data[data$age > AGE_THRESHOLD, ]
2. Missing Validation
# Bad
result <- maic_anchored(weights, ipd, pseudo_ipd)
# Good
stopifnot(inherits(weights, "maicplus_estimate_weights"))
stopifnot(nrow(ipd) > 0)
result <- maic_anchored(weights, ipd, pseudo_ipd)
3. Unreproducible Operations
# Bad
bootstrap_ci <- boot::boot.ci(boot_result)
# Good
set.seed(12345)
boot_result <- boot::boot(data, statistic, R = 1000)
bootstrap_ci <- boot::boot.ci(boot_result)
Resources
- TMwR Book: https://www.tmwr.org/
- tidymodels: https://www.tidymodels.org/
- NICE DSU TSD 18: Population-adjusted indirect comparisons
Repository

choxos
Author
choxos/ITC-agents/plugins/itc-modelling/skills/tidy-itc-workflow
0
Stars
0
Forks
Updated1h ago
Added1w ago