To build model in memory

cmf_krr_train_mem(y, kernels, alpha_grid_search = TRUE, gamma_grid_search = FALSE, conic_kernel_combination = FALSE, optimize_h = FALSE, mfields = c("q", "vdw", "logp", "abra", "abrb"), set_b_0 = FALSE, print_interm_icv = TRUE, plot_interm_icv = TRUE, print_final_icv = TRUE, plot_final_icv = TRUE, ...)

Arguments

y

kernels

alpha_grid_search

gamma_grid_search

conic_kernel_combination

optimize_h

mfields

set_b_0

print_interm_icv

plot_interm_icv

print_final_icv

plot_final_icv

Details

Value

References

Note

See also

Examples

##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (y, kernels, alpha_grid_search = TRUE, gamma_grid_search = FALSE, conic_kernel_combination = FALSE, optimize_h = FALSE, mfields = c("q", "vdw", "logp", "abra", "abrb"), set_b_0 = FALSE, print_interm_icv = TRUE, plot_interm_icv = TRUE, print_final_icv = TRUE, plot_final_icv = TRUE, ...) { var_y <- var(y) ncomp <- length(y) alphas <- kernels$alphas nalphas <- length(alphas) nfields <- length(mfields) Q2_best_of_best <- -1000 model <- list() fr <- function(par_list) { try_current_hyper_params <- function() { m <- build_krr_model(Km, y, gamma, set_b_0) if (is.null(m)) return() y_pred <- Km %*% m$a + m$b regr <- regr_param(y_pred, y) RMSE <- regr$RMSE R2 <- regr$R2 cv <- cv_krr(10, Km, y, gamma) RMSEcv <- cv$RMSE Q2 <- cv$R2 y_pred_cv <- cv$y_pred_cv minQ2R2 <- min(Q2, R2) if (minQ2R2 > minQ2R2_best) { minQ2R2_best <<- minQ2R2 RMSE_best <<- RMSE R2_best <<- R2 RMSEcv_best <<- RMSEcv Q2_best <<- Q2 if (alpha_grid_search) { ialpha_best <<- ialpha } alpha_best <<- alpha gamma_best <<- gamma a_best <<- m$a b_best <<- m$b y_pred_best <<- y_pred y_pred_cv_best <<- y_pred_cv } } R2_best <- -1000 RMSE_best <- -1 Q2_best <- -1000 RMSEcv_best <- -1 minQ2R2_best <- -1000 alpha_best <- -1 gamma_best <- -1 a_best <- NULL b_best <- NULL y_pred_best <- double() y_pred_cv_best <- double() h <- list() pos <- 1 if (optimize_h) { if (conic_kernel_combination) { for (f in 1:nfields) h[[mfields[f]]] <- abs(par_list[f]) } else { for (f in 1:nfields) h[[mfields[f]]] <- par_list[f] } pos <- pos + nfields if (!alpha_grid_search) { alpha <- par_list[pos] pos <- pos + 1 } if (!gamma_grid_search) gamma <- par_list[pos] } else { for (f in 1:nfields) h[[mfields[f]]] <- 1 if (!alpha_grid_search) { alpha <- par_list[pos] pos <- pos + 1 } if (!gamma_grid_search) gamma <- par_list[pos] } if (alpha_grid_search) { for (ialpha in 1:length(alphas)) { alpha <- alphas[[ialpha]] Km <<- matrix(0, nrow = ncomp, ncol = ncomp) for (f in 1:nfields) { Km <<- Km + h[[mfields[f]]] * kernels[[mfields[f]]][[ialpha]] } if (gamma_grid_search) { for (gamma in gamma_list) { try_current_hyper_params() } } else { try_current_hyper_params() } } alpha_best <- alphas[ialpha_best] } else { Km <<- cmf_calc_combined_kernels_1alpha(kernels, h, alpha, alphas) if (gamma_grid_search) { for (gamma in gamma_list) { try_current_hyper_params() } } else { try_current_hyper_params() } } if (Q2_best > Q2_best_of_best) { Q2_best_of_best <<- Q2_best if (print_interm_icv) { for (f in 1:nfields) cat(sprintf("h_%s=%g ", mfields[f], h[[mfields[f]]])) cat(sprintf("\n")) cat(sprintf("best: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", alpha_best, gamma_best, RMSE_best, R2_best, RMSEcv_best, Q2_best)) flush.console() } if (plot_interm_icv) { cinf_plotxy(y_pred_cv_best, y, xlab = "Predicted", ylab = "Experiment", main = "Scatter Plot for Cross-Validation (Internal)") abline(coef = c(0, 1)) } model$gamma <<- gamma_best for (f in 1:nfields) { model$h[[mfields[f]]] <<- h[[mfields[f]]] model$alpha[[mfields[f]]] <<- alpha_best if (alpha_best < alphas[1]) model$alpha[[mfields[f]]] <<- alphas[1] if (alpha_best > alphas[nalphas]) model$alpha[[mfields[f]]] <<- alphas[nalphas] } model$R2 <<- R2_best model$RMSE <<- RMSE_best model$y_pred <<- y_pred_best model$y_exp <<- y model$Q2 <<- Q2_best model$RMSEcv <<- RMSEcv_best model$y_pred_cv <<- y_pred_cv_best model$a <<- a_best model$b <<- b_best } RMSEcv_best } par_list <- list() if (optimize_h) par_list <- c(par_list, rep(1, nfields)) if (!alpha_grid_search) par_list <- c(par_list, 0.25) if (!gamma_grid_search) par_list <- c(par_list, 5) npars <- length(par_list) if (npars > 1) { res <- optim(par_list, fr) } else if (npars == 1) { res <- optimize(fr, c(0.01, 20)) } else { res <- fr() } model$set_b_0 <- set_b_0 if (print_final_icv) { for (f in 1:nfields) cat(sprintf("h_%s=%g ", mfields[f], model$h[[mfields[f]]])) cat(sprintf("\n")) cat(sprintf("final: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", model$alpha[1], model$gamma, model$RMSE, model$R2, model$RMSEcv, model$Q2)) flush.console() } if (plot_final_icv) { cinf_plotxy(model$y_pred_cv, y, xlab = "Predicted", ylab = "Experiment", main = "Scatter Plot for Cross-Validation (Internal)") abline(coef = c(0, 1)) } model }
#> function (y, kernels, alpha_grid_search = TRUE, gamma_grid_search = FALSE, #> conic_kernel_combination = FALSE, optimize_h = FALSE, mfields = c("q", #> "vdw", "logp", "abra", "abrb"), set_b_0 = FALSE, print_interm_icv = TRUE, #> plot_interm_icv = TRUE, print_final_icv = TRUE, plot_final_icv = TRUE, #> ...) #> { #> var_y <- var(y) #> ncomp <- length(y) #> alphas <- kernels$alphas #> nalphas <- length(alphas) #> nfields <- length(mfields) #> Q2_best_of_best <- -1000 #> model <- list() #> fr <- function(par_list) { #> try_current_hyper_params <- function() { #> m <- build_krr_model(Km, y, gamma, set_b_0) #> if (is.null(m)) #> return() #> y_pred <- Km %*% m$a + m$b #> regr <- regr_param(y_pred, y) #> RMSE <- regr$RMSE #> R2 <- regr$R2 #> cv <- cv_krr(10, Km, y, gamma) #> RMSEcv <- cv$RMSE #> Q2 <- cv$R2 #> y_pred_cv <- cv$y_pred_cv #> minQ2R2 <- min(Q2, R2) #> if (minQ2R2 > minQ2R2_best) { #> minQ2R2_best <<- minQ2R2 #> RMSE_best <<- RMSE #> R2_best <<- R2 #> RMSEcv_best <<- RMSEcv #> Q2_best <<- Q2 #> if (alpha_grid_search) { #> ialpha_best <<- ialpha #> } #> alpha_best <<- alpha #> gamma_best <<- gamma #> a_best <<- m$a #> b_best <<- m$b #> y_pred_best <<- y_pred #> y_pred_cv_best <<- y_pred_cv #> } #> } #> R2_best <- -1000 #> RMSE_best <- -1 #> Q2_best <- -1000 #> RMSEcv_best <- -1 #> minQ2R2_best <- -1000 #> alpha_best <- -1 #> gamma_best <- -1 #> a_best <- NULL #> b_best <- NULL #> y_pred_best <- double() #> y_pred_cv_best <- double() #> h <- list() #> pos <- 1 #> if (optimize_h) { #> if (conic_kernel_combination) { #> for (f in 1:nfields) h[[mfields[f]]] <- abs(par_list[f]) #> } #> else { #> for (f in 1:nfields) h[[mfields[f]]] <- par_list[f] #> } #> pos <- pos + nfields #> if (!alpha_grid_search) { #> alpha <- par_list[pos] #> pos <- pos + 1 #> } #> if (!gamma_grid_search) #> gamma <- par_list[pos] #> } #> else { #> for (f in 1:nfields) h[[mfields[f]]] <- 1 #> if (!alpha_grid_search) { #> alpha <- par_list[pos] #> pos <- pos + 1 #> } #> if (!gamma_grid_search) #> gamma <- par_list[pos] #> } #> if (alpha_grid_search) { #> for (ialpha in 1:length(alphas)) { #> alpha <- alphas[[ialpha]] #> Km <<- matrix(0, nrow = ncomp, ncol = ncomp) #> for (f in 1:nfields) { #> Km <<- Km + h[[mfields[f]]] * kernels[[mfields[f]]][[ialpha]] #> } #> if (gamma_grid_search) { #> for (gamma in gamma_list) { #> try_current_hyper_params() #> } #> } #> else { #> try_current_hyper_params() #> } #> } #> alpha_best <- alphas[ialpha_best] #> } #> else { #> Km <<- cmf_calc_combined_kernels_1alpha(kernels, #> h, alpha, alphas) #> if (gamma_grid_search) { #> for (gamma in gamma_list) { #> try_current_hyper_params() #> } #> } #> else { #> try_current_hyper_params() #> } #> } #> if (Q2_best > Q2_best_of_best) { #> Q2_best_of_best <<- Q2_best #> if (print_interm_icv) { #> for (f in 1:nfields) cat(sprintf("h_%s=%g ", #> mfields[f], h[[mfields[f]]])) #> cat(sprintf("\n")) #> cat(sprintf("best: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", #> alpha_best, gamma_best, RMSE_best, R2_best, #> RMSEcv_best, Q2_best)) #> flush.console() #> } #> if (plot_interm_icv) { #> cinf_plotxy(y_pred_cv_best, y, xlab = "Predicted", #> ylab = "Experiment", main = "Scatter Plot for Cross-Validation (Internal)") #> abline(coef = c(0, 1)) #> } #> model$gamma <<- gamma_best #> for (f in 1:nfields) { #> model$h[[mfields[f]]] <<- h[[mfields[f]]] #> model$alpha[[mfields[f]]] <<- alpha_best #> if (alpha_best < alphas[1]) #> model$alpha[[mfields[f]]] <<- alphas[1] #> if (alpha_best > alphas[nalphas]) #> model$alpha[[mfields[f]]] <<- alphas[nalphas] #> } #> model$R2 <<- R2_best #> model$RMSE <<- RMSE_best #> model$y_pred <<- y_pred_best #> model$y_exp <<- y #> model$Q2 <<- Q2_best #> model$RMSEcv <<- RMSEcv_best #> model$y_pred_cv <<- y_pred_cv_best #> model$a <<- a_best #> model$b <<- b_best #> } #> RMSEcv_best #> } #> par_list <- list() #> if (optimize_h) #> par_list <- c(par_list, rep(1, nfields)) #> if (!alpha_grid_search) #> par_list <- c(par_list, 0.25) #> if (!gamma_grid_search) #> par_list <- c(par_list, 5) #> npars <- length(par_list) #> if (npars > 1) { #> res <- optim(par_list, fr) #> } #> else if (npars == 1) { #> res <- optimize(fr, c(0.01, 20)) #> } #> else { #> res <- fr() #> } #> model$set_b_0 <- set_b_0 #> if (print_final_icv) { #> for (f in 1:nfields) cat(sprintf("h_%s=%g ", mfields[f], #> model$h[[mfields[f]]])) #> cat(sprintf("\n")) #> cat(sprintf("final: alpha=%g gamma=%g RMSE=%g R2=%g RMSEcv=%g Q2=%g \n", #> model$alpha[1], model$gamma, model$RMSE, model$R2, #> model$RMSEcv, model$Q2)) #> flush.console() #> } #> if (plot_final_icv) { #> cinf_plotxy(model$y_pred_cv, y, xlab = "Predicted", ylab = "Experiment", #> main = "Scatter Plot for Cross-Validation (Internal)") #> abline(coef = c(0, 1)) #> } #> model #> } #> <environment: 0x11119d430>