Overview
This example demonstrates how to implement a custom penalty-based cost function for mlxModelFinder, inspired by the approach described in the following paper:
Richardson, S., Irurzun Arana, I., Nowojewski, A. et al. A machine learning approach to population pharmacokinetic modelling automation. Commun Med 5, 327 (2025). https://doi.org/10.1038/s43856-025-01054-8
This cost function combines the model’s statistical fit (AIC) with penalties on various quality metrics such as relative standard errors (RSEs), omega values, shrinkage, and the condition number. It allows model selection to account for both goodness-of-fit and model stability.
Penalty Structure
Each model metric is evaluated against a set of thresholds, with penalties increasing as the metric deviates from acceptable ranges.
The function applies piecewise linear penalties defined as follows:
|
Parameter |
Estimated value |
Threshold |
Penalty |
|---|---|---|---|
|
Fixed effects |
Relative error |
10% |
0 |
|
30% |
20 |
||
|
75% |
100 |
||
|
1000% |
100000 |
||
|
Random effects |
Relative error |
20% |
0 |
|
40% |
30 |
||
|
65% |
100 |
||
|
1000% |
100000 |
||
|
Random effects |
Value (CV%) |
0 (0 CV%) |
1000000 |
|
0.001 (3 CV%) |
1000 |
||
|
0.01 (10 CV%) |
0 |
||
|
1 (131 CV%) |
0 |
||
|
1.65 (205 CV%) |
100 |
||
|
2.3 (300 CV%) |
500 |
||
|
3 (437 CV%) |
10000 |
||
|
3.5 (567 CV%) |
1000000 |
||
|
Random effects |
Shrinkage |
40% |
0 |
|
100% |
200 |
||
|
Condition number |
Condition number |
500 |
0 |
|
1000 |
100 |
||
|
10000 |
1000 |
||
|
1000000 |
10000000 |
Implementation in R
Below is a sample implementation of the penalized cost function.
It reads Monolix output files, extracts relevant metrics, computes penalties via linear interpolation, and returns the penalized AIC value.
# --- Helpers ---------------------------------------------------------------
piecewise_linear_interp <- function(x, xp, yp, na_value = 0) {
stopifnot(length(xp) == length(yp), length(xp) >= 2)
if (is.na(x)) return(na_value)
if (x <= xp[1]) return(yp[1])
if (x >= xp[length(xp)]) return(yp[length(yp)])
i <- findInterval(x, xp)
x0 <- xp[i]; x1 <- xp[i + 1]
y0 <- yp[i]; y1 <- yp[i + 1]
y0 + (y1 - y0) * (x - x0) / (x1 - x0)
}
make_penalty_curve <- function(xp, yp, transform_x = identity) {
force(xp); force(yp); force(transform_x)
function(x) piecewise_linear_interp(transform_x(x), xp = xp, yp = yp)
}
sum_penalty <- function(x, f) {
if (is.null(x) || length(x) == 0) return(0)
sum(vapply(x, f, numeric(1)), na.rm = TRUE)
}
safe_aic <- function() {
ll <- getEstimatedLogLikelihood()
aic_is <- ll$importanceSampling[["AIC"]]
if (!is.null(aic_is) && !is.na(aic_is)) return(aic_is)
warning("AIC from Importance Sampling not found. Using AIC from Linearization.")
aic_lin <- ll$linearization[["AIC"]]
if (is.null(aic_lin) || is.na(aic_lin)) return(NA_real_)
aic_lin
}
read_condition_number <- function(proj_dir) {
fim_file <- file.path(proj_dir, "FisherInformation", "covarianceEstimatesSA.txt")
if (!file.exists(fim_file)) {
warning("FIM file not found. Skipping condition number penalty.")
return(NULL)
}
fim <- as.matrix(read.csv(fim_file, header = FALSE, row.names = 1))
if (any(!is.finite(fim))) return(NULL)
ev <- eigen(fim, only.values = TRUE)$values
max(ev) / min(ev)
}
# --- Main ------------------------------------------------------------------
calculate_penalized_aic <- function() {
# 1) Define penalty curves (data-first)
penalties <- list(
THETA_RSE = make_penalty_curve(
xp = c(10, 30, 75, 1000),
yp = c(0, 20, 100, 100000)
),
OMEGA_RSE = make_penalty_curve(
xp = c(20, 40, 65, 1000),
yp = c(0, 30, 100, 100000)
),
OMEGA_value = make_penalty_curve(
xp = c(0.0000, 0.0010, 0.0100, 1.0000, 1.6500, 2.3000, 3.0000, 3.5000),
yp = c(1e6, 1e3, 0, 0, 100, 500, 1e4, 1e6),
transform_x = function(omega_sd) omega_sd^2
),
OMEGA_shrinkage = make_penalty_curve(
xp = c(40, 100),
yp = c(0, 200)
),
Condition_number = make_penalty_curve(
xp = c(500, 1000, 10000, 1e6),
yp = c(0, 100, 1000, 1e7)
)
)
# 2) Extract metrics
proj_dir <- getProjectSettings()$directory
AIC <- safe_aic()
pop_params <- getEstimatedPopulationParameters()
std_errs <- getEstimatedStandardErrors()$stochasticApproximation
is_theta <- grepl("(_pop|beta_)", names(pop_params)) & !grepl("omega", names(pop_params))
is_omega <- grepl("^omega_", names(pop_params)) & !grepl("corr|cov", names(pop_params))
theta_rse <- std_errs[is_theta, "rse"]
omega_rse <- std_errs[is_omega, "rse"]
omega_values <- pop_params[is_omega]
shrinkage <- getEtaShrinkage()$conditionalMode$shrinkage
shrinkage[shrinkage == 100] <- 0
condition_number <- read_condition_number(proj_dir)
# 3) Compute penalties
breakdown <- c(
THETA_RSE = sum_penalty(theta_rse, penalties$THETA_RSE),
OMEGA_RSE = sum_penalty(omega_rse, penalties$OMEGA_RSE),
OMEGA_value = sum_penalty(omega_values,penalties$OMEGA_value),
OMEGA_shrinkage = sum_penalty(shrinkage, penalties$OMEGA_shrinkage),
Condition_number = if (is.null(condition_number)) 0 else penalties$Condition_number(condition_number)
)
total_penalty <- sum(breakdown, na.rm = TRUE)
penalized_AIC <- AIC + total_penalty
penalized_AIC
}
Usage
Provide this function as an argument to the findModel function:
res <- findModel(
project = "theophylline_project.mlxtran",
library = "pk",
requiredFilters = list(administration = "oral", parametrization = "clearance"),
# optional: filters / algorithm / settings ...
metric_func = calculate_penalized_aic
)