simulation_study.Rmd
This vignette reproduce a design considered in Cucci et al. (2022) and implement a Monte Carlo simulation comparing the performance of the GMWMX-1, the GMWMX-2 and the MLE implemented in Hector. Note that this file is not intended to be run on a personal computer but should rather be executed on a high performance computing cluster.
check_if_theta_in_ci <- function(theta_vec, ci_mat) {
vec_emp_coverage <- vector(mode = "logical", length = length(theta_vec))
for (i in seq(length(theta_vec))) {
if (dplyr::between(theta_vec[i], ci_mat[i, 1], ci_mat[i, 2])) {
vec_emp_coverage[i] <- T
} else {
vec_emp_coverage[i] <- F
}
}
return(as.numeric(vec_emp_coverage))
}
# we consider example the model considered in model 3
phase <- 0.45
amplitude <- 2.5
sigma2_wn <- 15
sigma2_powerlaw <- 10
d <- 0.4
bias <- 0
trend <- 5 / 365.25
cosU <- amplitude * cos(phase)
sinU <- amplitude * sin(phase)
Let us consider 5 years of daily data
# consider n years of daily observations
year <- 20
n <- year * 365
We consider a gaussian White noise and a Power Law process as the stochastic model of the residuals .
Note that the functions that enable to generate stochastic models
that include Power Law process, Matèrn process or Fractional Gaussian
noise are (for now) only available from the development version of the
package simts
that can be easily installed with:
install.packages("devtools")
devtools::install_github("SMAC-Group/simts")
# define model for generating gaussian white noise + PLP
model_gaussian_wn_plp <- WN(sigma2 = sigma2_wn) + PLP(sigma2 = sigma2_powerlaw, d = d)
We consider three location shifts (jumps) at three different heights.
# generate data
# define time at which there are jumps
jump_vec <- c(200, 300, 500)
jump_height <- c(10, 15, 20)
We define a seed for reproducibility
# define seed
myseed <- 123
# add trend, gaps and sin
nbr_sin <- 1 # define number of sinusoidal process
and create the matrix
with function create_A_matrix()
.
# define A matrix
A <- create_A_matrix(1:n, jump_vec, n_seasonal = nbr_sin)
We set the vector of functional parameters .
# define beta
x_0 <- c(bias, trend, cosU, sinU, jump_height)
We define the number of simulations to perform
# define number of Monte Carlo simulation
n_simu <- 1000
# define number of parameter estimated (depends on the model)
# bias + trend + height * nbr of jump vec + 2* nbr of sin process + wn + powerlaw parameters
nbr_param_check_coverage <- 4
nbr_param <- 2 + length(jump_vec) + 2 * nbr_sin + 3
dim_mat_results <- (nbr_param + nbr_param_check_coverage) * 3 + 1 * 3
# define matrix of results
mat_results <- matrix(NA, ncol = dim_mat_results, nrow = n_simu)
We perform the simulation, saving for each Monte Carlo run, the estimated parameters of the functional and stochastic model and the empirical coverage on the functional parameters for the GMWMX-1, the GMWMX-2 as well as the MLE implemented in Hector.
for (simu_b in seq(n_simu)) {
# fix seed for reproducibility
set.seed(myseed + simu_b)
# generate residuals from a Gaussian White noise + Power law process
eps <- simts::gen_gts(model = model_gaussian_wn_plp, n = n)
# create time series
yy <- A %*% x_0 + eps
# create gnssts
gnssts_obj <- create.gnssts(t = 1:length(yy), y = yy, jumps = jump_vec)
# MLE
fit_simu_b_mle <- estimate_hector(
x = gnssts_obj,
model = "wn+powerlaw",
n_seasonal = 1
)
# gmwmx 1 step
fit_simu_b_gmwm_1_step <- estimate_gmwmx(
x = gnssts_obj,
model = "wn+powerlaw",
theta_0 = c(0.1, 0.1, 0.1),
n_seasonal = 1,
ci = T,
k_iter = 1
)
# gmwmx 2 steps
fit_simu_b_gmwm_2_step <- estimate_gmwmx(
x = gnssts_obj,
model = "wn+powerlaw",
theta_0 = c(0.1, 0.1, 0.1),
n_seasonal = 1,
ci = T,
k_iter = 2
)
# define vector of estimators
fit_mle <- c(fit_simu_b_mle$beta_hat, fit_simu_b_mle$theta_hat)
fit_gmwm_1_step <- c(fit_simu_b_gmwm_1_step$beta_hat, fit_simu_b_gmwm_1_step$theta_hat)
fit_gmwm_2_step <- c(fit_simu_b_gmwm_2_step$beta_hat, fit_simu_b_gmwm_2_step$theta_hat)
# compute coverage for each method
alpha <- .05
z_val <- qnorm(1 - alpha / 2)
mat_ci_mle_beta <- matrix(c(
fit_simu_b_mle$beta_hat - z_val * fit_simu_b_mle$beta_std,
fit_simu_b_mle$beta_hat + z_val * fit_simu_b_mle$beta_std
),
byrow = F, ncol = 2
)
mat_ci_gmwm_1_step_beta <- matrix(c(
fit_simu_b_gmwm_1_step$beta_hat - z_val * fit_simu_b_gmwm_1_step$beta_std,
fit_simu_b_gmwm_1_step$beta_hat + z_val * fit_simu_b_gmwm_1_step$beta_std
),
byrow = F, ncol = 2
)
mat_ci_gmwm_2_step_beta <- matrix(c(
fit_simu_b_gmwm_2_step$beta_hat - z_val * fit_simu_b_gmwm_2_step$beta_std,
fit_simu_b_gmwm_2_step$beta_hat + z_val * fit_simu_b_gmwm_2_step$beta_std
),
byrow = F, ncol = 2
)
# save empirical coverage
inside_ci_mle <- check_if_theta_in_ci(x_0, mat_ci_mle_beta)[1:4]
inside_ci_gmwm_1 <- check_if_theta_in_ci(x_0, mat_ci_gmwm_1_step_beta)[1:4]
inside_ci_gmwm_2 <- check_if_theta_in_ci(x_0, mat_ci_gmwm_2_step_beta)[1:4]
# define vector of estimated parameters as object
res <- c(
fit_mle, fit_gmwm_1_step,
fit_gmwm_2_step,
inside_ci_mle,
inside_ci_gmwm_1, inside_ci_gmwm_2,
fit_simu_b_mle$estimation_time[3], fit_simu_b_gmwm_1_step$estimation_time[3], fit_simu_b_gmwm_2_step$estimation_time[3]
)
# save in matrix of results
mat_results[simu_b, ] <- res
# print status
cat(paste("Completed simulation", simu_b, "\n",sep = " "))
}
# define names of mat results
name_param_functionnal <- c("bias", "trend", "cosU", "sinU")
colnames(mat_results) <- c(
paste("mle", names(fit_mle), sep = "_"),
paste("gmwm_1_step", names(fit_gmwm_1_step), sep = "_"),
paste("gmwm_2_step", names(fit_gmwm_2_step), sep = "_"),
paste0("mle_inside_ci_", name_param_functionnal),
paste0("gmwm_1_inside_ci_", name_param_functionnal),
paste0("gmwm_2_inside_ci_", name_param_functionnal),
c("time_mle", "time_gmwm_1", "time_gmwm_2")
)