adaptiveFTS/inst/02_smoothing_inst_rcpp.R at main · hmaissoro/adaptiveFTS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Current function
source("./R/02_smoothing.R")
Rcpp::sourceCpp("./src/02_smoothing_rcpp.cpp")

# The data
n <- 1000
Tn <- runif(n)
Yn <- sin(2 * pi * Tn) + rnorm(n = n, mean = 0, sd = 0.25)
plot(Tn, Yn)

# Function
estimate_nw_wrap <- function(y, t, tnew, h){
  if (! is.numeric(y) | ! is.numeric(t) | ! is.numeric(tnew) |! is.numeric(h))
    stop("The arguments 'y', 't', 'tnew', 'h' must be numeric.")
  if (length(t) != length(y) & length(t) < 2)
    stop("The arguments 'y' and 't' must have a length of at least 2 and must be of the same length.")
  if (! is.null(h) & length(h) > 1)
    stop("The bandwidth 'h' must be either NULL or saclar.")

  # Estimate Nadaraya-Watson estimator using estimate_nw_cpp function
  res <- estimate_nw_cpp(y = Yn, t = Tn, tnew = tnew, h = h)

  # convert
  res <- data.table::data.table("h" = h, "tnew" = tnew, "yhat" = c(res))
  return(res)
}

# Comparaison R vs Rcpp N-W estimator
res_nw <- estimate_nw(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05)
res_nw_cpp <- estimate_nw_cpp(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05)
res_nw_cpp_wrap <- estimate_nw_wrap(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05)
all.equal(target = res_nw$yhat, current = res_nw_cpp[, 1])

res_benchmark <- microbenchmark::microbenchmark(
  res_nw <- estimate_nw(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05),
  res_nw_cpp <- estimate_nw_cpp(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05),
  res_nw_cpp_wrap <- estimate_nw_wrap(y = Yn, t = Tn, tnew = seq(0.1, 0.9, len = 100), h = 0.05),
  times = 50
)

print(res_benchmark, unit="relative", order="median")

# Comparaison R vs Rcpp of bandwidth selection
K <- 20
b0 <- 1 / length(Tn)
bK <- length(Tn) ** (- 1 / 3)
a <- exp((log(bK) - log(b0)) / K)
bw_grid <- b0 * a ** (seq_len(K))
rm(b0, bK, a, K) ; gc()

hbest <- estimate_nw_bw(y = Yn, t = Tn, bw_grid = bw_grid)
hbest_cpp <- estimate_nw_bw_cpp(y = Yn, t = Tn, bw_grid = bw_grid)
hbest == hbest_cpp

res_bm_bw_sel <- microbenchmark::microbenchmark(
  current_nw_bw_sel = hbest <- estimate_nw_bw(y = Yn, t = Tn, bw_grid = bw_grid),
  cpp_nw_bw_sel = hbest_cpp <- estimate_nw_bw_cpp(y = Yn, t = Tn, bw_grid = bw_grid),
  times = 50
)

print(res_bm_bw_sel, unit="relative", order="median")

# Estimate best bandwidth on a subset of curves
# Import the data
data("data_far")
get_nw_optimal_bw_cpp(data_far, bw_grid = NULL, nsubset = 30, kernel_name = "epanechnikov")