load("checkpoint.RData") library(tidyverse) library(glmnet) library(Matrix) # Save predictor matrix in sparse matrix format that Stan sparse multiply uses mat <- as(lin_model[train_ids,], "TsparseMatrix") n <- nrow(mat) m <- ncol(mat) i <- mat@i + 1L j <- mat@j + 1L x <- mat@x entries <- length(i) tmat <- t(lin_model[train_ids,]) cat(file="staninput_csr.txt", sep="", "n <- ", ncol(tmat), "\n", "m <- ", nrow(tmat), "\n", "entries <- ", length(tmat@x), "\n", "v <- ", deparse(tmat@i+1L), "\n", "u <- ", deparse(tmat@p+1L), "\n", "w <- ", deparse(tmat@x), "\n", "y <- ", deparse(y), "\n") #============================================ # Now compile and run stan model # I could not get the R Stan package to work without crashing so used # the command line version. # On Mac OS with homebrew: # brew install homebrew/science/cmdstan # cd /usr/local/Cellar/cmdstan/2.15.0 # make ~/.../sparse_glm_csr # ./sparse_glm_csr init=0.0001 sample num_warmup=100 num_samples=100 random seed=1234 data file=staninput_csr.txt output refresh=1 file=output_csr_0.0001.csv #============================================ output <- read_csv("output_csr.csv", comment="#") nrow(output) intercept <- output$intercept beta <- select(output, starts_with("beta.")) %>% as.matrix ls <- as.matrix(beta %*% t(lin_model)) + intercept p_stans <- plogis(ls) p_stan <- colMeans(p_stans) # Blend in 10% with previous predictions w <- 0.1 p_pool_2 <- (1-w)*p_pool + w*p_stan auc(y, p_pool_2[train_ids]) data_frame(Patient_ID = patient_ids[test_ids], Diabetes=p_pool_2[test_ids]) %>% write_csv("prediction.csv")