VPCs for every event for repeated time-to-event data

In case of repeated time-to-event data, Monolix VPC plot shows the Kaplan-Meier (KM) for the first event, as well as the mean number of events per subject (cumulative plot).

It is not possible to display in the GUI the KM plot for the second or third event for instance. However, this can be done in R.

The general workflow is the following:

use the lixoftConnectors to load the project
export the VPC simulations using computeChartsData(plot="vpc", exportVPCSimulations = T) [this step can be skipped if the VPC simulations are exported via the GUI using Export > Export VPC simulations]
read the original data as well as the simulated data
filter the original and simulate data to keep only the nth event
use survfit from the survival package to generate a KM curve for the original data and each replicate of the simulated data. A time grid must be specified in order to get the KM curve on the same times for all replicates.
calculate percentiles over replicates for the simulated data
plot the original KM curve and prediction interval using ggplot2

The Surv() function needs to be called in different ways depending if we are considering exact events (using the Kaplan-Meier estimator) or interval censored events (using the Turnbull estimator). The code below recovers the type (exact or interval censored) information from the Monolix project and adapt the Surv() call accordingly.

R

library(lixoftConnectors)
initializeLixoftConnectors(software="monolix", force=T)
library(survival)
library(dplyr)
library(ggplot2)

#========= input settings
# Monolix project
proj <- paste0(getDemoPath(),"/3.models_for_noncontinuous_outcomes/3.3.time_to_event_data_model/weibullRTTE.mlxtran")

# number of events to display
nmax <- 10

# time grid to calculate the curves
timeGrid <- seq(0,60,by=5)
#========================


# load project using lixoftConnectors
loadProject(proj)

# get observation name for the events
types <- getObservationInformation()$type
eventObsName <- names(which(types=="event"))
eventType <- getObservationInformation()$detailedType[eventObsName][[1]]

# export VPC simulations (can also be done via the GUI)
computeChartsData(plot="vpc", exportVPCSimulations = T)

# read file containing simulated data
sim_data <- read.csv(paste0(tools::file_path_sans_ext(proj),"/ChartsData/VisualPredictiveCheck/",eventObsName,"_simulations.txt"))
names(sim_data)[4] <- "events" # replacing name with eventObsName
names(sim_data)[2] <- "id"     # replacing name of ID column

# read file containing original data (dataset)
obs_data <- getObservationInformation()[eventObsName][[1]]
names(obs_data)[3] <- "events"

if(eventType=="intervalCensoredEvents"){
  # split time column into time1 (start of interval) and time2 (end of interval)
  sim_data <- sim_data %>% group_by(rep,id) %>% mutate(time1 = lag(time), time2 = time) %>% filter(row_number()!=1) %>% ungroup()
  obs_data <- obs_data %>% group_by(id)     %>% mutate(time1 = lag(time), time2 = time) %>% filter(row_number()!=1) %>% ungroup()
}

# max number of events
df_nmax <- sim_data %>% group_by(rep, id) %>% summarise(nbEvents=sum(events)) %>% ungroup() %>% summarise(nbEventsMax = max(nbEvents)) %>% ungroup()

# containers
predInterv_allEvents <- NULL
KMoriginal_allEvents <- NULL

# for each nth events
for(i in 1:nmax){
  print(paste0("Event number ",i))
  
  if(eventType=="exactEvents"){
  # for each id and rep, filter to keep first row (DV=0), and nth event (row n+1) 
  sim_nth <- sim_data %>% group_by(rep,id) %>% filter(row_number()==1 | row_number()==min(i+1,n())) %>% ungroup()
  obs_nth <- obs_data %>% group_by(id)     %>% filter(row_number()==1 | row_number()==min(i+1,n())) %>% ungroup()
  
  # create KM curve for each replicate (can be extended to do each replicate and each stratification group)
  # it is important to add the "times" argument to get all KM curves on the same time grid (in order to calculate percentiles later)
  KMcurves        <- summary(survfit(Surv(time, events) ~ rep, data = sim_nth), times = timeGrid)
  # KM curve for original data
  KMcurveOriginal <- summary(survfit(Surv(time, events) ~ 1,   data = obs_nth), times = timeGrid)
  
  }else if(eventType=="intervalCensoredEvents"){
    # for each id and rep, keep interval containing ith event 
    sim_nth <- sim_data %>% group_by(rep,id) %>% filter(row_number()==(which(cumsum(events)>=i)[1])) %>% ungroup() 
    obs_nth <- obs_data %>% group_by(id)     %>% filter(row_number()==(which(cumsum(events)>=i)[1])) %>% ungroup() 
    
    # create KM curve for each replicate (can be extended to do each replicate and each stratification group)
    # it is important to add the "times" argument to get all KM curves on the same time grid (in order to calculate percentiles later)
    # the interval in which teh event has happened in [time1, time2]. No need to have a "status" argument.
    KMcurves        <- summary(survfit(Surv(time=time1, time2=time2, type = "interval2") ~ rep, data = sim_nth), times = timeGrid)
    # same for original data
    KMcurveOriginal <- summary(survfit(Surv(time=time1, time2=time2, type = "interval2") ~ 1,   data = obs_nth), times = timeGrid)
    
  }
  
  # survfit summary object to data.frame object
  KMcurves_df <- do.call(data.frame, lapply(c("time","surv","strata") , function(x) KMcurves[x]))
  
  # calculate percentiles for each time point 
  predInter <- KMcurves_df %>% group_by(time) %>% summarise(p05 = quantile(surv,0.05),
                                                            p95 = quantile(surv,0.95)) %>% ungroup() %>% mutate(EventNumber=paste0(i,"th event"))
  
  # create KM curve for original data
  KMcurveOriginal_df <- do.call(data.frame, lapply(c("time","surv") , function(x) KMcurveOriginal[x])) %>% mutate(EventNumber=paste0(i,"th event"))
  
  # bind together
  predInterv_allEvents <- rbind(predInterv_allEvents,predInter)
  KMoriginal_allEvents <- rbind(KMoriginal_allEvents,KMcurveOriginal_df)
  
}

# EventNumber as factor
KMoriginal_allEvents$EventNumber <- as.factor(KMoriginal_allEvents$EventNumber)
predInterv_allEvents$EventNumber <- as.factor(predInterv_allEvents$EventNumber)

# plot prediction interval with overlay of actual KM curve, stratified by EventNumber
ggplot()+geom_line(data=KMoriginal_allEvents, aes(x=time,y=surv), size=0.5)+
  geom_ribbon(data=predInterv_allEvents, aes(x=time, ymin=p05, ymax=p95), alpha=0.5, fill="#00a4c6") +
  facet_wrap(~EventNumber) + theme_bw()+
  ylab("Survival") + xlab("Time")

Example with exact events

Using the tte3_project.mlxtran from the demos defined in the input settings section:

R

#========= input settings
proj <- paste0(getDemoPath(),"/3.models_for_noncontinuous_outcomes/3.3.time_to_event_data_model/tte3_project.mlxtran")
nmax <- 9
timeGrid <- seq(0,200,by=2)
#========================

We obtain the following plot. The plot for the 1st event is the same as the “survival function” plot in the Monolix GUI.

Example with interval-censored events

This example uses the demo project PKrtte_project.mlxtran. The survival curve using the Turnbull estimator is a step function by nature. In order to have a smooth display (as in the Monolix GUI) set the step of the timeGrid setting to the same value as the intervalLength defined in the structural model.

R

#========= input settings
proj <- paste0(getDemoPath(),"/4.joint_models/4.2.continuous_noncontinuous/PKrtte_project.mlxtran")
nmax <- 9  #number of events to display
timeGrid <- seq(0,60,by=5)  # same steps as intervalLength of structural model
#========================

The resulting plot is shown below. The plot for the 1st event is the same as the “survival function” plot in the Monolix GUI.