Python parity: rqmoms vs vilkovgr/qmoms • rqmoms

Python parity is disabled. Set RQMOMS_BUILD_PY=1 and either RETICULATE_PYTHON or RQMOMS_VENV. (Could not import numpy/qmoms.)

Goal

This vignette demonstrates numerical parity between the pure-R implementation in rqmoms and the original Python package vilkovgr/qmoms for the same inputs and parameters.

For a proper setup of python for testing refer to this script: “dev/01-setup-python.R”

Libraries & packaged data

library(rqmoms)
library(dplyr)
> 
> Attaching package: 'dplyr'
> The following objects are masked from 'package:stats':
> 
>     filter, lag
> The following objects are masked from 'package:base':
> 
>     intersect, setdiff, setequal, union

# Packaged datasets ship with the package
str(qmoms_surface)
> tibble [234 × 7] (S3: tbl_df/tbl/data.frame)
>  $ id             : int [1:234] 10078 10078 10078 10078 10078 10078 10078 10078 10078 10078 ...
>  $ date           : Date[1:234], format: "1996-01-04" "1996-01-04" ...
>  $ days           : int [1:234] 30 30 30 30 30 30 30 30 30 30 ...
>  $ mnes           : num [1:234] 0.843 0.853 0.879 0.886 0.912 ...
>  $ prem           : num [1:234] 0.191 0.0277 0.1657 0.0372 0.1442 ...
>  $ impl_volatility: num [1:234] 0.847 0.778 0.84 0.777 0.834 ...
>  $ delta          : num [1:234] 80 -20 75 -25 70 -30 65 -35 -40 60 ...
head(qmoms_zerocd)
>  [38;5;246m# A tibble: 6 × 3 [39m
>   date        days  rate
>    [3m [38;5;246m<date> [39m [23m      [3m [38;5;246m<int> [39m [23m  [3m [38;5;246m<dbl> [39m [23m
>  [38;5;250m1 [39m 1996-01-02     9  5.76
>  [38;5;250m2 [39m 1996-01-02    15  5.75
>  [38;5;250m3 [39m 1996-01-02    50  5.67
>  [38;5;250m4 [39m 1996-01-02    78  5.61
>  [38;5;250m5 [39m 1996-01-02   169  5.47
>  [38;5;250m6 [39m 1996-01-02   260  5.35

For convenience we define a small wrapper (only used when has_py = TRUE):

py_compute <- function(mnes, vol, days, rate, params) {
  m <- reticulate::import("qmoms")
  out <- m$qmoms_compute(
    mnes = as.numeric(mnes), vol = as.numeric(vol),
    days = as.integer(days), rate = as.numeric(rate),
    params = reticulate::r_to_py(params, convert = TRUE),
    output = "pandas"
  )
  r <- reticulate::py_to_r(out)
  if (is.atomic(r)) { nm <- names(r); r <- as.list(r); if (!is.null(nm)) names(r) <- nm
  } else if (is.data.frame(r)) { r <- as.list(r[1, , drop = TRUE]) }
  r
}

Single surface: side-by-side

params <- rq_default_params()
one <- subset(qmoms_surface,
              id == qmoms_surface$id[1] & days == qmoms_surface$days[1])
r30 <- get_rate_for_maturity(qmoms_zerocd, date = one$date[1], days = one$days[1])

r_out <- qmoms_compute(one$mnes, one$impl_volatility, one$days[1], r30,
                       params = params, output = "list")
str(r_out)
> List of 17
>  $ nopt       : int 234
>  $ smfiv      : num 6.39
>  $ mfiv_bkm   : num 3.1
>  $ mfiv_bjn   : num 3.92
>  $ smfivd     : num 0.0076
>  $ mfivd_bkm  : num 0.0115
>  $ mfivd_bjn  : num 0.01
>  $ mfis       : num -0.0476
>  $ mfik       : num 3.28
>  $ cvix_sigma2: num 1.94
>  $ cvix_mnes20: num 1.2
>  $ rix        : num 0.00151
>  $ rixnorm    : num 0.131
>  $ tlm_sigma2 : num NA
>  $ tlm_delta20: num NA
>  $ slopedn    : num NA
>  $ slopeup    : num -3.46e-16

py_out <- py_compute(one$mnes, one$impl_volatility, one$days[1], r30, params)

keys <- intersect(names(r_out), names(py_out))
tab <- tibble::tibble(
  metric = keys,
  R      = unlist(r_out[keys]),
  Python = unlist(py_out[keys]),
  abs_diff = abs(R - Python)
)
tab <- dplyr::arrange(tab, desc(abs_diff))
knitr::kable(head(tab, 20), digits = 6, caption = "Single surface: top absolute differences")

All groups: parity summary

We merge the zero curve into the surface and run both engines for every (id, date, days) group.

surf_r <- get_rate_for_maturity(qmoms_zerocd, df_surf = qmoms_surface)
groups <- split(surf_r, list(surf_r$id, surf_r$date, surf_r$days), drop = TRUE)
length(groups)
> [1] 9

key_metrics <- c("smfiv","mfiv_bkm","mfiv_bjn",
                 "smfivd","mfivd_bkm","mfivd_bjn",
                 "mfis","mfik","rix","rixnorm","slopedn","slopeup")

compare_one <- function(g) {
  rlist <- qmoms_compute(g$mnes, g$impl_volatility, g$days[1], g$rate[1],
                         params = params, output = "list")
  pylist <- py_compute(g$mnes, g$impl_volatility, g$days[1], g$rate[1], params)
  keys <- intersect(key_metrics, intersect(names(rlist), names(pylist)))
  if (!length(keys)) return(NULL)
  tibble::tibble(
    id   = g$id[1], date = g$date[1], days = g$days[1],
    metric = keys,
    R = unlist(rlist[keys]),
    Python = unlist(pylist[keys]),
    abs_diff = abs(R - Python)
  )
}

all_cmp <- vctrs::vec_rbind(!!!lapply(groups, compare_one))

summary_tab <- all_cmp |>
  dplyr::group_by(metric) |>
  dplyr::summarise(
    n        = dplyr::n(),
    mean_abs = mean(abs_diff, na.rm = TRUE),
    p95_abs  = quantile(abs_diff, 0.95, na.rm = TRUE),
    max_abs  = max(abs_diff, na.rm = TRUE),
    .groups = "drop"
  ) |>
  dplyr::arrange(desc(max_abs))

knitr::kable(summary_tab, digits = 6, caption = "All groups: absolute-difference summary by metric")

Visual check (example metric)

m <- "mfiv_bkm"
x <- all_cmp$abs_diff[all_cmp$metric == m]
hist(x, breaks = 30, main = sprintf("Absolute differences: %s", m), xlab = "abs diff")

Tolerances used in automated tests

For reference, the package’s tests use these tolerances when comparing R to Python:

Variance integrals (smfiv, mfiv_bkm, mfiv_bjn, semivariances): 5e-4
Higher moments (mfis, mfik): 5e-3 and 5e-2, respectively
Slopes (slopedn, slopeup): 5e-3
rix, rixnorm: 5e-4
CVIX/TLM windows: 5e-4 / 5e-2 (window-dependent)

You should see observed differences comfortably within these thresholds when RQMOMS_BUILD_PY=1 is enabled.

Notes

Interpolation uses PCHIP with clamping (via pracma::pchip) to mirror SciPy’s PchipInterpolator.
Grid, integration weights, slope sign convention, and window logic are implemented identically to the Python reference.
Packaged datasets qmoms_surface and qmoms_zerocd are derived from the Python examples; rates are converted to decimal if the CSV used percent.

Credits

This parity check compares against vilkovgr/qmoms, which remains the reference implementation. The R package aims for numerical identity given the same inputs and parameters.