Here we present how to perform the simulation studies in Zhang et al. (2022) for the negative binomial regression, with and without interference. We warn the readers that the code presented here may take time to run on a personal computer. Indeed, for the paper, the computations were performed on the “Baobab” and “Yggdrasil” HPC clusters provided by the University of Geneva.
We use the following setting for the negative binomial regression, with and without interference.
# packages
library(IBpaper)
library(ib)
library(MASS)
# simulation specifics
MC <- 1000 # number of simulations
n <- 200 # sample size
p <- 40 # number of coefficients
# Non-zero coefficients:
# intercept is 2
beta_nz <- c(2.0,1.0,-1.0)
# Coefficients:
beta <- c(beta_nz,rep(0,p-2))
alpha <- 0.7 # overdispersion parameter
# seeds for random number generator
set.seed(895724)
seed <- vector("list",3)
seed$process <- sample.int(1e7,MC) # for the response
seed$design <- sample.int(1e7,MC) # for the design
seed$ib <- sample.int(1e7,MC) # for the iterative bootstrap
# IB specifics
H <- 200 # number of simulated estimators
bbc_control <- ibControl(H=H, maxit=1L) # control for bootstrap bias correction (BBC)
ib_control <- ibControl(H=H, maxit=50L) # control for iterative bootstrap (IB)
Here is the code for the simulation of the negative binomial regression computed on the actual responses without interference.
# For saving the results
res <- list(mle = matrix(ncol=p+2,nrow=MC),
bbc = matrix(ncol=p+2,nrow=MC),
jini = matrix(ncol=p+2,nrow=MC))
for(m in 1:MC){
##------- simulate the process ---------
set.seed(seed$process[m]) # set the seed
x <- matrix(rnorm(n*p, sd = 2 * sqrt(2) / sqrt(p)), nrow = n) # simulate the design
negbin_object <- make_negbin(x, beta, 1 / alpha) # see ?make_negbin
# simulate negative binomial responses
y <- simulation(negbin_object,
control = list(seed=seed$process[m]))
##------ MLE estimation ----------------
# Note: here the MLE is the initial estimator
fit_mle <- NULL
try(fit_mle <- glm.nb(y ~ x), silent=T)
if(is.null(fit_mle)) next
res$mle[m,1:(p+1)] <- coef(fit_mle)
res$mle[m,p+2] <- 1 / fit_mle$theta
##------ Bootstrap bias correction -----
bbc_control$seed <- seed$ib[m] # update the seed
fit_bbc <- ib(fit_mle, control = bbc_control,
extra_param = TRUE) # compute BBC , see ? ib::ib for more details
res$bbc[m,] <- getEst(fit_bbc) # retrieve estimator
##------ IB bias correction ------------
ib_control$seed <- seed$ib[m] # update the seed
fit_jini <- ib(fit_mle, control = ib_control, extra_param = TRUE) # compute IB
res$jini[m,] <- getEst(fit_jini) # retrieve estimator
# getIteration(fit_jini), if one wants to retrieve the number of iterations
# print the iteration
cat(m,"\n")
}
We first need to specify the interference mechanism, i.e. how the responses are interfered.
# Function to simulate interfered responses
# see ?ib::simulation and ?ib::control for more details
random_censoring <- function(object, control, extra=NULL){
simulate_negbin <- getFromNamespace("simulate_negbin", ns = "ib")
y <- simulate_negbin(object, control$H)
u <- rpois(length(y), lambda = 3)
y[y<=u] <- u[y<=u]
matrix(y, ncol=control$H)
}
ib_control <- ibControl(H = H, maxit = 50L, sim = random_censoring) # control for iterative bootstrap (IB)
Here is the code for the simulation of the negative binomial regression computed on the interfered responses.
# For saving the results
res <- list(mle = matrix(ncol=p+2,nrow=MC),
initial = matrix(ncol=p+2,nrow=MC),
jini = matrix(ncol=p+2,nrow=MC))
for(m in 1:MC){
##------- simulate the process ---------
set.seed(seed$process[m]) # set the seed
x <- matrix(rnorm(n*p, sd = 2 * sqrt(2) / sqrt(p)), nr=n) # simulate the design
negbin_object <- make_negbin(x, beta, 1 / alpha)# see ?make_negbin
# simulate negative binomial with interfered responses
y <- simulation(negbin_object,
control = list(sim=random_censoring,
seed=seed$process[m]))
##------ MLE estimation ----------------
fit_mle <- NULL
try(fit_mle <- glm_negbin(y, x, lambda = 3), silent = TRUE) # see ? glm_negbin
if(!is.null(fit_em)){
res$mle[m,] <- fit_mle$par
}
##------ Initial estimator (inconsistent) ----------------
fit_initial <- NULL
try(fit_initial <- glm.nb(y ~ x), silent=T)
if(is.null(fit_initial)) next
res$initial[m,1:(p+1)] <- coef(fit_initial)
res$initial[m,p+2] <- 1 / fit_initial$theta
##------ IB bias correction ------------
ib_control$seed <- seed$ib[m] # update the seed
fit_jini <- ib(fit_initial, control = ib_control, extra_param = TRUE) # compute IB
res$jini[m,] <- getEst(fit_jini) # retrieve estimator
# getIteration(fit_jini), if one wants to retrieve the number of iterations
# print the iteration
cat(m,"\n")
}
Zhang, Yuming, Yanyuan Ma, Samuel Orso, Mucyo Karemera, Maria-Pia Victoria-Feser, and Stéphane Guerrier. 2022. “A Flexible Bias Correction Method Based on Inconsistent Estimators.” https://arxiv.org/pdf/2204.07907.pdf.