Getting started with RSDC

library(RSDC)

This vignette walks through the package API: estimating the three model variants, using the standard S3 methods on a fitted object, comparing models with AIC/BIC, forecasting, and simulating. To keep the build fast we use a small simulated data set and a reduced optimizer budget via control; in real use, keep the defaults.

A fitted object is an "rsdc_fit"

set.seed(1)
n  <- 300L
X  <- cbind(intercept = 1, z = as.numeric(scale(seq_len(n))))   # include an intercept
beta <- rbind(c(1.5, -0.6),     # regime 1 (logistic stay-prob coefficients)
              c(1.0,  0.2))     # regime 2
rho  <- rbind(0.2, 0.8)         # K = 2 -> one correlation per regime
Sig  <- array(c(matrix(c(1, 0.2, 0.2, 1), 2),
                matrix(c(1, 0.8, 0.8, 1), 2)), dim = c(2, 2, 2))
sim  <- rsdc_simulate(n = n, X = X, beta = beta, mu = matrix(0, 2, 2),
                      sigma = Sig, N = 2, seed = 42)
y <- scale(sim$observations)    # treated as unit-variance standardized residuals

ctrl <- list(itermax = 30, NP = 40, seed = 1)   # small budget for the vignette
fit <- rsdc_estimate("tvtp", residuals = y, N = 2, X = X, control = ctrl)
class(fit)
#> [1] "rsdc_fit"
fit
#> RSDC fit: method = "tvtp", N = 2 regime(s), K = 2 series, T = 300
#> Call: rsdc_estimate(method = "tvtp", residuals = y, N = 2, X = X, control = ctrl)
#> logLik = -827.119   npar = 6   AIC = 1666.24   BIC = 1688.46
#> 
#> Regime correlations (rows = regimes, ascending mean):
#>        [,1]
#> [1,] 0.2793
#> [2,] 0.8696
#> 
#> Transition matrix (at mean covariate for tvtp):
#>        [,1]   [,2]
#> [1,] 1.0000 0.0000
#> [2,] 0.0011 0.9989
#> 
#> Optimiser convergence code: 0 (0 = converged)

Standard methods

AIC() / BIC() work directly because logLik() carries df and nobs:

logLik(fit)
#> 'log Lik.' -827.1 (df=6)
c(AIC = AIC(fit), BIC = BIC(fit), nobs = nobs(fit))
#>  AIC  BIC nobs 
#> 1666 1688  300
coef(fit)
#>   b[g1,x1]   b[g1,x2]   b[g2,x1]   b[g2,x2] rho[g1,12] rho[g2,12] 
#>    10.0000    -4.7441     6.8158     3.6467     0.2793     0.8696

summary() reports a coefficient table with standard errors (from the observed information at the MLE), and confint() gives Wald intervals:

summary(fit)
#> RSDC fit summary: method = "tvtp", N = 2, K = 2, T = 300
#> logLik = -827.119   npar = 6   AIC = 1666.24   BIC = 1688.46
#> 
#> Coefficients:
#>            Estimate Std. Error z value Pr(>|z|)    
#> b[g1,x1]   10.00000    5.82879   1.716   0.0862 .  
#> b[g1,x2]   -4.74415    4.36951  -1.086   0.2776    
#> b[g2,x1]    6.81582         NA      NA       NA    
#> b[g2,x2]    3.64675         NA      NA       NA    
#> rho[g1,12]  0.27932    0.05342   5.228 1.71e-07 ***
#> rho[g2,12]  0.86959    0.04710  18.461  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Transition probabilities (delta-method SE):
#>    entry  estimate        se
#> 1 p[1,1] 0.9999546 0.0002646
#> 2 p[1,2] 0.0000454 0.0002646
#> 3 p[2,1] 0.0010951        NA
#> 4 p[2,2] 0.9989049        NA
#> 
#> Regime diagnostics (stay prob., expected duration, ergodic prob.):
#>   regime stay_prob exp_duration ergodic_prob
#> 1      1    1.0000      22027.5      0.96019
#> 2      2    0.9989        913.2      0.03981
head(confint(fit))
#>                2.5%   97.5%
#> b[g1,x1]    -1.4242 21.4242
#> b[g1,x2]   -13.3082  3.8199
#> b[g2,x1]         NA      NA
#> b[g2,x2]         NA      NA
#> rho[g1,12]   0.1746  0.3840
#> rho[g2,12]   0.7773  0.9619

Model comparison

fit_const <- rsdc_estimate("const", residuals = y, control = ctrl)
fit_noX   <- rsdc_estimate("noX",   residuals = y, N = 2, control = ctrl)
data.frame(
  model = c("const", "noX(N=2)", "tvtp(N=2)"),
  AIC   = c(AIC(fit_const), AIC(fit_noX), AIC(fit)),
  BIC   = c(BIC(fit_const), BIC(fit_noX), BIC(fit))
)
#>       model  AIC  BIC
#> 1     const 1671 1675
#> 2  noX(N=2) 1667 1682
#> 3 tvtp(N=2) 1666 1688

Forecasting and simulation

predict() wraps rsdc_forecast(); supply the conditional volatilities:

sigma_mat <- matrix(1, n, 2, dimnames = list(NULL, c("a", "b")))   # unit vols here
fc <- predict(fit, residuals = y, sigma_matrix = sigma_mat,
              value_cols = c("a", "b"), X = X)
str(fc$predicted_correlations)
#>  num [1:300, 1] 0.675 0.557 0.397 0.281 0.279 ...

simulate() draws a new path from the fitted model (here, TVTP given X):

sim2 <- simulate(fit, X = X, seed = 7)
table(sim2$states)
#> 
#>   1   2 
#> 274  26

Genuine out-of-sample forecasts

For a look-ahead-free forecast, set out_of_sample = TRUE: the filter is run on the first 70% of the sample, its terminal state initializes the held-out run, and the out-of-sample correlations are weighted by filtered (not smoothed) probabilities so no future information leaks in.

fc_oos <- predict(fit, residuals = y, sigma_matrix = sigma_mat,
                  value_cols = c("a", "b"), X = X, out_of_sample = TRUE)
nrow(fc_oos$predicted_correlations)   # length of the 30% hold-out
#> [1] 90
fc_oos$BIC                            # out-of-sample predictive score
#> [1] 1686

Uncertainty, decoding, multi-step forecasts, and tidy output

The following tools were added in version 1.5-0.

Multi-start estimation. control$n_starts repeats the global + local search from several seeds and keeps the highest-likelihood fit, storing each start’s log-likelihood. Because DEoptim is a stochastic global search whose result depends on the seed, this is primarily a stability/reproducibility diagnostic (do the seeds agree?) rather than a replacement for the global search:

fit_ms <- rsdc_estimate("tvtp", residuals = y, N = 2, X = X,
                        control = c(ctrl, list(n_starts = 3)))
fit_ms$start_logliks            # one log-likelihood per start
#> [1] -827.1 -827.7 -828.9

Uncertainty bands on the predicted correlation. rsdc_corr_bands() draws parameter vectors from the fitted sampling distribution, re-runs the filter for each, and returns pointwise quantile bands (one matrix per asset pair, with columns fit, lower, upper):

bands <- rsdc_corr_bands(fit, B = 100, seed = 1)
head(bands[[1]])
#>         fit  lower  upper
#> [1,] 0.6748 0.3124 0.8497
#> [2,] 0.5569 0.2684 0.7913
#> [3,] 0.3969 0.2325 0.6569
#> [4,] 0.2807 0.1745 0.7832
#> [5,] 0.2793 0.1736 0.3751
#> [6,] 0.2793 0.1739 0.8335

Most likely regime path (Viterbi). rsdc_viterbi() returns the single most likely joint regime sequence, complementing the marginal smoothed probabilities:

table(rsdc_viterbi(fit))
#> 
#>   1   2 
#> 279  21

Multi-step-ahead forecasts. rsdc_forecast_ahead() propagates the terminal filtered state through the Markov chain to forecast the regime distribution and the implied correlations h steps ahead (pass X_future for the time-varying case; here the last covariate row is held constant):

fa <- rsdc_forecast_ahead(fit, horizon = 5)
fa$predicted_correlations
#>        [,1]
#> [1,] 0.8696
#> [2,] 0.8696
#> [3,] 0.8696
#> [4,] 0.8696
#> [5,] 0.8696

Tidy output and plotting. rsdc_fit objects work with the generics tidy() / glance() / augment() and a autoplot() method:

generics::tidy(fit)
#>         term estimate std.error statistic   p.value
#> 1   b[g1,x1]  10.0000   5.82879     1.716 8.623e-02
#> 2   b[g1,x2]  -4.7441   4.36951    -1.086 2.776e-01
#> 3   b[g2,x1]   6.8158        NA        NA        NA
#> 4   b[g2,x2]   3.6467        NA        NA        NA
#> 5 rho[g1,12]   0.2793   0.05342     5.228 1.710e-07
#> 6 rho[g2,12]   0.8696   0.04710    18.461 4.220e-76
generics::glance(fit)
#>   method n_regimes logLik  AIC  BIC df nobs
#> 1   tvtp         2 -827.1 1666 1688  6  300
ggplot2::autoplot(fit)          # stacked smoothed regime probabilities

Stacked smoothed regime probabilities over time

More than three regimes

N >= 4 is supported for "noX" and "tvtp":

fit4 <- rsdc_estimate("noX", residuals = y, N = 4,
                      control = list(itermax = 20, NP = 60, seed = 1))
dim(fit4$transition_matrix)
#> [1] 4 4

Validation: parameter recovery

Because the data above were simulated with known regime correlations (rho = c(0.2, 0.8)), we can check that the estimator recovers them — the fitted tvtp(N = 2) correlations should be close to the truth:

rbind(true = c(0.2, 0.8),
      estimated = sort(as.numeric(fit$correlations)))
#>             [,1]   [,2]
#> true      0.2000 0.8000
#> estimated 0.2793 0.8696

A full Monte-Carlo study (five model variants, \(M = 100\) replications, \(T \in \{500, 1000, 2000\}\), \(K \in \{2, 3, 4\}\), reporting bias/RMSE/MCSE) ships with the package in inst/simulation/monte_carlo_study.R and can be run from the package root with source(system.file("simulation", "monte_carlo_study.R", package = "RSDC")).