Getting started with RSDC

library(RSDC)

This vignette walks through the package API: estimating the three model variants, using the standard S3 methods on a fitted object, comparing models with AIC/BIC, forecasting, and simulating. To keep the build fast we use a small simulated data set and a reduced optimizer budget via control; in real use, keep the defaults.

A fitted object is an `"rsdc_fit"`

set.seed(1)
n  <- 300L
X  <- cbind(intercept = 1, z = as.numeric(scale(seq_len(n))))   # include an intercept
beta <- rbind(c(1.5, -0.6),     # regime 1 (logistic stay-prob coefficients)
              c(1.0,  0.2))     # regime 2
rho  <- rbind(0.2, 0.8)         # K = 2 -> one correlation per regime
Sig  <- array(c(matrix(c(1, 0.2, 0.2, 1), 2),
                matrix(c(1, 0.8, 0.8, 1), 2)), dim = c(2, 2, 2))
sim  <- rsdc_simulate(n = n, X = X, beta = beta, mu = matrix(0, 2, 2),
                      sigma = Sig, N = 2, seed = 42)
y <- scale(sim$observations)    # treated as unit-variance standardized residuals

ctrl <- list(itermax = 30, NP = 40, seed = 1)   # small budget for the vignette
fit <- rsdc_estimate("tvtp", residuals = y, N = 2, X = X, control = ctrl)
class(fit)
#> [1] "rsdc_fit"
fit
#> RSDC fit: method = "tvtp", N = 2 regime(s), K = 2 series, T = 300
#> Call: rsdc_estimate(method = "tvtp", residuals = y, N = 2, X = X, control = ctrl)
#> logLik = -827.126   npar = 6   AIC = 1666.25   BIC = 1688.47
#> 
#> Regime correlations (rows = regimes, ascending mean):
#>        [,1]
#> [1,] 0.2793
#> [2,] 0.8696
#> 
#> Transition matrix (evaluated at the in-sample covariate means):
#>       [,1]   [,2]
#> [1,] 1e+00 0.0000
#> [2,] 2e-04 0.9998
#> 
#> Optimiser convergence code: 0 (0 = converged)

Standard methods

AIC() / BIC() work directly because logLik() carries df and nobs:

logLik(fit)
#> 'log Lik.' -827.1 (df=6)
c(AIC = AIC(fit), BIC = BIC(fit), nobs = nobs(fit))
#>  AIC  BIC nobs 
#> 1666 1688  300
coef(fit)
#>   b[g1,x1]   b[g1,x2]   b[g2,x1]   b[g2,x2] rho[g1,12] rho[g2,12] 
#>    10.0000    -4.7439     8.6512     4.7218     0.2793     0.8696

summary() reports a coefficient table with standard errors (from the observed information at the MLE), and confint() gives Wald intervals:

summary(fit)
#> RSDC fit summary: method = "tvtp", N = 2, K = 2, T = 300
#> logLik = -827.126   npar = 6   AIC = 1666.25   BIC = 1688.47
#> 
#> Coefficients:
#>            Estimate Std. Error z value Pr(>|z|)    
#> b[g1,x1]   10.00000    5.82624   1.716   0.0861 .  
#> b[g1,x2]   -4.74391    4.36730  -1.086   0.2774    
#> b[g2,x1]    8.65120         NA      NA       NA    
#> b[g2,x2]    4.72180         NA      NA       NA    
#> rho[g1,12]  0.27931    0.05337   5.234 1.66e-07 ***
#> rho[g2,12]  0.86956    0.04686  18.556  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Transition probabilities at the in-sample covariate means (delta-method SE):
#>    entry  estimate        se
#> 1 p[1,1] 0.9999546 0.0002645
#> 2 p[1,2] 0.0000454 0.0002645
#> 3 p[2,1] 0.0001749        NA
#> 4 p[2,2] 0.9998251        NA
#> 
#> Regime diagnostics at the in-sample covariate means (stay prob., expected duration, ergodic prob.):
#>   regime stay_prob exp_duration ergodic_prob
#> 1      1    1.0000        22027       0.7939
#> 2      2    0.9998         5718       0.2061
head(confint(fit))
#>                2.5%   97.5%
#> b[g1,x1]    -1.4192 21.4192
#> b[g1,x2]   -13.3037  3.8158
#> b[g2,x1]         NA      NA
#> b[g2,x2]         NA      NA
#> rho[g1,12]   0.1747  0.3839
#> rho[g2,12]   0.7777  0.9614

Model comparison

fit_const <- rsdc_estimate("const", residuals = y, control = ctrl)
fit_noX   <- rsdc_estimate("noX",   residuals = y, N = 2, control = ctrl)
data.frame(
  model = c("const", "noX(N=2)", "tvtp(N=2)"),
  AIC   = c(AIC(fit_const), AIC(fit_noX), AIC(fit)),
  BIC   = c(BIC(fit_const), BIC(fit_noX), BIC(fit))
)
#>       model  AIC  BIC
#> 1     const 1671 1675
#> 2  noX(N=2) 1667 1682
#> 3 tvtp(N=2) 1666 1688

Forecasting and simulation

predict() wraps rsdc_forecast(); supply the conditional volatilities:

sigma_mat <- matrix(1, n, 2, dimnames = list(NULL, c("a", "b")))   # unit vols here
fc <- predict(fit, residuals = y, sigma_matrix = sigma_mat,
              value_cols = c("a", "b"), X = X)
str(fc$predicted_correlations)
#>  num [1:300, 1] 0.673 0.555 0.396 0.281 0.279 ...

simulate() draws a new path from the fitted model (here, TVTP given X):

sim2 <- simulate(fit, X = X, seed = 7)
table(sim2$states)
#> 
#>   1   2 
#> 274  26

Genuine out-of-sample forecasts

For a look-ahead-free forecast, set out_of_sample = TRUE: the filter is run on the first 70% of the sample, its terminal state initializes the held-out run, and the out-of-sample correlations are weighted by filtered (not smoothed) probabilities so no future information leaks in.

fc_oos <- predict(fit, residuals = y, sigma_matrix = sigma_mat,
                  value_cols = c("a", "b"), X = X, out_of_sample = TRUE)
nrow(fc_oos$predicted_correlations)   # length of the 30% hold-out
#> [1] 90
fc_oos$BIC                            # out-of-sample predictive score
#> [1] 1686

Uncertainty, decoding, multi-step forecasts, and tidy output

The following tools were added in version 1.5-0.

Multi-start estimation. control$n_starts repeats the global + local search from several seeds and keeps the highest-likelihood fit, storing each start’s log-likelihood. Because DEoptim is a stochastic global search whose result depends on the seed, this is primarily a stability/reproducibility diagnostic (do the seeds agree?) rather than a replacement for the global search:

fit_ms <- rsdc_estimate("tvtp", residuals = y, N = 2, X = X,
                        control = c(ctrl, list(n_starts = 3)))
fit_ms$start_logliks            # one log-likelihood per start
#> [1] -827.1 -827.3 -827.7

Uncertainty bands on the predicted correlation. rsdc_corr_bands() draws parameter vectors from the fitted sampling distribution, re-runs the filter for each, and returns pointwise quantile bands (one matrix per asset pair, with columns fit, lower, upper):

bands <- rsdc_corr_bands(fit, B = 100, seed = 1)
head(bands[[1]])
#>         fit  lower  upper
#> [1,] 0.6728 0.3116 0.8494
#> [2,] 0.5545 0.2683 0.7877
#> [3,] 0.3955 0.2320 0.6589
#> [4,] 0.2807 0.1745 0.7845
#> [5,] 0.2793 0.1736 0.3736
#> [6,] 0.2793 0.1738 0.8336

Most likely regime path (Viterbi). rsdc_viterbi() returns the single most likely joint regime sequence, complementing the marginal smoothed probabilities:

table(rsdc_viterbi(fit))
#> 
#>   1   2 
#> 279  21

Multi-step-ahead forecasts. rsdc_forecast_ahead() propagates the terminal filtered state through the Markov chain to forecast the regime distribution and the implied correlations h steps ahead (pass X_future for the time-varying case; here the last covariate row is held constant):

fa <- rsdc_forecast_ahead(fit, horizon = 5)
fa$predicted_correlations
#>        [,1]
#> [1,] 0.8695
#> [2,] 0.8695
#> [3,] 0.8695
#> [4,] 0.8695
#> [5,] 0.8695

Tidy output and plotting. rsdc_fit objects work with the generics tidy() / glance() / augment() and a autoplot() method:

generics::tidy(fit)
#>         term estimate std.error statistic   p.value
#> 1   b[g1,x1]  10.0000   5.82624     1.716 8.609e-02
#> 2   b[g1,x2]  -4.7439   4.36730    -1.086 2.774e-01
#> 3   b[g2,x1]   8.6512        NA        NA        NA
#> 4   b[g2,x2]   4.7218        NA        NA        NA
#> 5 rho[g1,12]   0.2793   0.05337     5.234 1.661e-07
#> 6 rho[g2,12]   0.8696   0.04686    18.556 7.267e-77
generics::glance(fit)
#>   method n_regimes logLik  AIC  BIC df nobs
#> 1   tvtp         2 -827.1 1666 1688  6  300

ggplot2::autoplot(fit)          # stacked smoothed regime probabilities

Stacked smoothed regime probabilities over time

More than three regimes

N >= 4 is supported for "noX" and "tvtp":

fit4 <- rsdc_estimate("noX", residuals = y, N = 4,
                      control = list(itermax = 20, NP = 60, seed = 1))
dim(fit4$transition_matrix)
#> [1] 4 4

Validation: parameter recovery

Because the data above were simulated with known regime correlations (rho = c(0.2, 0.8)), we can check that the estimator recovers them — the fitted tvtp(N = 2) correlations should be close to the truth:

rbind(true = c(0.2, 0.8),
      estimated = sort(as.numeric(fit$correlations)))
#>             [,1]   [,2]
#> true      0.2000 0.8000
#> estimated 0.2793 0.8696

A full Monte-Carlo study (five model variants, $M = 100$ replications, $T$ up to $2000$, $K$ up to $5$, reporting bias/RMSE and the value of the multi-start protocol) ships with the package in inst/simulation/run_mc_recovery_study.R and can be run from the package root with source(system.file("simulation", "run_mc_recovery_study.R", package = "RSDC")).