Here we present how to perform the simulation studies in Zhang et al. (2022) for the negative binomial regression, with and without interference. We warn the readers that the code presented here may take time to run on a personal computer. Indeed, for the paper, the computations were performed on the “Baobab” and “Yggdrasil” HPC clusters provided by the University of Geneva.

Setting

We use the following setting for the negative binomial regression, with and without interference.

# packages
library(IBpaper)
library(ib)
library(MASS)

# simulation specifics
MC <- 1000 # number of simulations
n <- 200 # sample size
p <- 40 # number of coefficients
# Non-zero coefficients:
# intercept is 2
beta_nz <- c(2.0,1.0,-1.0)
# Coefficients:
beta <- c(beta_nz,rep(0,p-2))
alpha <- 0.7 # overdispersion parameter

# seeds for random number generator
set.seed(895724)
seed <- vector("list",3)
seed$process <- sample.int(1e7,MC) # for the response
seed$design <- sample.int(1e7,MC) # for the design
seed$ib <- sample.int(1e7,MC) # for the iterative bootstrap

# IB specifics
H <- 200 # number of simulated estimators
bbc_control <- ibControl(H=H, maxit=1L) # control for bootstrap bias correction (BBC)
ib_control <- ibControl(H=H, maxit=50L) # control for iterative bootstrap (IB)

Negative binomial regression without interference

Here is the code for the simulation of the negative binomial regression computed on the actual responses without interference.

# For saving the results
res <- list(mle = matrix(ncol=p+2,nrow=MC),
            bbc = matrix(ncol=p+2,nrow=MC),
            jini = matrix(ncol=p+2,nrow=MC))

for(m in 1:MC){
  ##------- simulate the process ---------
  set.seed(seed$process[m]) # set the seed
  x <- matrix(rnorm(n*p, sd = 2 * sqrt(2) / sqrt(p)), nrow = n) # simulate the design
  negbin_object <- make_negbin(x, beta, 1 / alpha) # see ?make_negbin
  # simulate negative binomial responses
  y <- simulation(negbin_object, 
                  control = list(seed=seed$process[m])) 
  
  ##------ MLE estimation ----------------
  # Note: here the MLE is the initial estimator
  fit_mle <- NULL
  try(fit_mle <- glm.nb(y ~ x), silent=T)
  if(is.null(fit_mle)) next
  res$mle[m,1:(p+1)] <- coef(fit_mle)
  res$mle[m,p+2] <- 1 / fit_mle$theta
  
  ##------ Bootstrap bias correction -----
  bbc_control$seed <- seed$ib[m] # update the seed
  fit_bbc <- ib(fit_mle, control = bbc_control, 
                extra_param = TRUE) # compute BBC , see ? ib::ib for more details
  res$bbc[m,] <- getEst(fit_bbc) # retrieve estimator
  
  ##------ IB bias correction ------------
  ib_control$seed <- seed$ib[m] # update the seed
  fit_jini <- ib(fit_mle, control = ib_control, extra_param = TRUE) # compute IB
  res$jini[m,] <- getEst(fit_jini) # retrieve estimator
  # getIteration(fit_jini), if one wants to retrieve the number of iterations

  # print the iteration
  cat(m,"\n")
}

Negative binomial regression with interfered responses

We first need to specify the interference mechanism, i.e. how the responses are interfered.

# Function to simulate interfered responses
# see ?ib::simulation and ?ib::control for more details 
random_censoring <- function(object, control, extra=NULL){
  simulate_negbin <- getFromNamespace("simulate_negbin", ns = "ib")
  y <- simulate_negbin(object, control$H)
  u <- rpois(length(y), lambda = 3)
  y[y<=u] <- u[y<=u]
  matrix(y, ncol=control$H)
}
ib_control <- ibControl(H = H, maxit = 50L, sim = random_censoring) # control for iterative bootstrap (IB)

Here is the code for the simulation of the negative binomial regression computed on the interfered responses.

# For saving the results
res <- list(mle = matrix(ncol=p+2,nrow=MC),
            initial = matrix(ncol=p+2,nrow=MC),
            jini = matrix(ncol=p+2,nrow=MC))

for(m in 1:MC){
  ##------- simulate the process ---------
  set.seed(seed$process[m]) # set the seed
  x <- matrix(rnorm(n*p, sd = 2 * sqrt(2) / sqrt(p)), nr=n) # simulate the design
  negbin_object <- make_negbin(x, beta, 1 / alpha)# see ?make_negbin
  # simulate negative binomial with interfered responses
  y <- simulation(negbin_object, 
                  control = list(sim=random_censoring,
                                 seed=seed$process[m])) 
  
  ##------ MLE estimation ----------------
  fit_mle <- NULL
  try(fit_mle <- glm_negbin(y, x, lambda = 3), silent = TRUE) # see ? glm_negbin
  if(!is.null(fit_em)){
    res$mle[m,] <- fit_mle$par
  }
  
  ##------ Initial estimator (inconsistent) ----------------
  fit_initial <- NULL
  try(fit_initial <- glm.nb(y ~ x), silent=T)
  if(is.null(fit_initial)) next
  res$initial[m,1:(p+1)] <- coef(fit_initial)
  res$initial[m,p+2] <- 1 / fit_initial$theta
  
  ##------ IB bias correction ------------
  ib_control$seed <- seed$ib[m] # update the seed
  fit_jini <- ib(fit_initial, control = ib_control, extra_param = TRUE) # compute IB
  res$jini[m,] <- getEst(fit_jini) # retrieve estimator
  # getIteration(fit_jini), if one wants to retrieve the number of iterations

  # print the iteration
  cat(m,"\n")
}

References

Zhang, Yuming, Yanyuan Ma, Samuel Orso, Mucyo Karemera, Maria-Pia Victoria-Feser, and Stéphane Guerrier. 2022. “A Flexible Bias Correction Method Based on Inconsistent Estimators.” https://arxiv.org/pdf/2204.07907.pdf.