load("checkpoint.RData")

library(tidyverse)
library(glmnet)
library(Matrix)

# Save predictor matrix in sparse matrix format that Stan sparse multiply uses

mat <- as(lin_model[train_ids,], "TsparseMatrix")
n <- nrow(mat)
m <- ncol(mat)
i <- mat@i + 1L
j <- mat@j + 1L
x <- mat@x
entries <- length(i)

tmat <- t(lin_model[train_ids,])
cat(file="staninput_csr.txt", sep="",
    "n <- ", ncol(tmat), "\n",
    "m <- ", nrow(tmat), "\n",
    "entries <- ", length(tmat@x), "\n",
    "v <- ", deparse(tmat@i+1L), "\n",
    "u <- ", deparse(tmat@p+1L), "\n",
    "w <- ", deparse(tmat@x), "\n",
    "y <- ", deparse(y), "\n")


#============================================
# Now compile and run stan model

# I could not get the R Stan package to work without crashing so used
# the command line version.

# On Mac OS with homebrew:
# brew install homebrew/science/cmdstan
# cd /usr/local/Cellar/cmdstan/2.15.0
# make ~/.../sparse_glm_csr

# ./sparse_glm_csr init=0.0001 sample num_warmup=100 num_samples=100 random seed=1234 data file=staninput_csr.txt output refresh=1 file=output_csr_0.0001.csv

#============================================


output <- read_csv("output_csr.csv", comment="#")
nrow(output)

intercept <- output$intercept
beta <- select(output, starts_with("beta.")) %>% as.matrix

ls <- as.matrix(beta %*% t(lin_model)) + intercept
p_stans <- plogis(ls)
p_stan <- colMeans(p_stans)

# Blend in 10% with previous predictions
w <- 0.1
p_pool_2 <- (1-w)*p_pool + w*p_stan
auc(y, p_pool_2[train_ids])

data_frame(Patient_ID = patient_ids[test_ids], Diabetes=p_pool_2[test_ids]) %>% 
    write_csv("prediction.csv")