Bayesian Model with RStan

# Installation

## Installing rstan package

> install.packages("rstan", repos = "https://cloud.r-project.org/", dependencies=TRUE)

1. Restart R after the installation and, before loading RStan, verify that no objects created by an older version of RStan are (perhaps auto-)loaded into R.

2. Verify by executing the code below in R and checking that it returns the value 10:

> fx <- inline::cxxfunction( signature(x = "integer", y = "numeric" ) , '
+   return ScalarReal( INTEGER(x)[0] * REAL(y)[0] ) ;
+ ' )
>
> fx( 2L, 5 ) # should be 10

Now you can load rstan package

> rstan_options(auto_write = TRUE)
> options(mc.cores = parallel::detectCores())

This last option will allow you to automatically save a bare version of a compiled Stan program to the hard disk so that it does not need to be recompiled and to execute multiple Markov chains in parallel.

# Running Bayesian models: Binomial models

Assume that we have observed a random binary vector generated from a Bernoulli distribution with an unknown probability $$\theta$$. We will write R code working with RStan to estimate $$\theta$$ with a bayesian method. + Here’s a basic stan code. It has be saver in a stan file.

> sink("bernoulli.stan")
> cat("
+ data { // data provided by the user
+ int<lower=1> N;
+ int<lower=0, upper=1> x[N];
+ }
+ parameters { // parameters to be inferred
+ real<lower=0, upper=1> theta;
+ }
+ model { // probability model
+ for (i in 1:N) {
+ x[i] ~ bernoulli(theta);
+ }
+ }
+ ",fill=T)
> sink()

Let us notice that no prior distribution is defined in the code above. We consider then by default a flat prior on the parameter $$\theta$$.

We will then run in the code below 4 markov chains in parallel using rstan command.

> x_dat <- list(N = 10,
+                     x = c(1,1,1,0,0,1,0,0,1,0))
>
> fit0 <- stan(file = 'bernoulli.stan', data = x_dat,
+             iter = 1000, chains = 4)

Let us now print the result:

> ## print(fit0)

Rhat and n.eff are respectively the paramters $$\widehat{R}$$ and $$n.eff$$ already defined in the Kernel: https://www.kaggle.com/dhafer/two-sample-mean-with-bayesian-approach

Let us introduce a Beta prior in the model. This prior has to be added in the model part of the stan code.

> sink("bernoulli_beta.stan")
> cat("
+     data { // data provided by the user
+     int<lower=1> N;
+     int<lower=0, upper=1> x[N];
+     }
+     parameters { // parameters to be inferred
+     real<lower=0, upper=1> theta;
+     }
+     model { // probability model
+     for (i in 1:N) {
+     x[i] ~ bernoulli(theta);
+     }
+     theta ~ beta(2,3);
+     }
+     ",fill=T)
> sink()

Let us then run the Markov chain.

> fit1 <- stan(file = 'bernoulli_beta.stan', data = x_dat,
+             iter = 1000, chains = 4)

Here’s then the result

> print(fit1)
## Inference for Stan model: bernoulli_beta.
## 4 chains, each with iter=1000; warmup=500; thin=1;
## post-warmup draws per chain=500, total post-warmup draws=2000.
##
##         mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
## theta   0.47    0.00 0.12   0.24   0.38   0.46   0.55   0.71   804 1.01
## lp__  -10.87    0.02 0.69 -12.90 -11.03 -10.59 -10.42 -10.36  1074 1.00
##
## Samples were drawn using NUTS(diag_e) at Tue Oct  9 21:24:20 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).

# Regression Model with Stan

We will see now how to perform a regression analysis in Stan using a simulated data.

We will first write the Stan code:

> sink("regression_model.stan")
> cat("
+ data {
+   int N; //the number of observations
+   int N2; //the size of the new_X matrix
+   int K; //the number of columns in the model matrix
+   real y[N]; //the response
+   matrix[N,K] X; //the model matrix
+   matrix[N2,K] new_X; //the matrix for the predicted values
+ }
+ parameters {
+   vector[K] beta; //the regression parameters
+   real sigma; //the standard deviation
+ }
+ transformed parameters {
+   vector[N] linpred;
+   linpred <- X*beta;
+ }
+ model {
+   beta[1] ~ cauchy(0,10); //prior for the intercept following Gelman 2008
+
+   for(i in 2:K)
+    beta[i] ~ cauchy(0,2.5);//prior for the slopes following Gelman 2008
+
+   y ~ normal(linpred,sigma);
+ }
+ generated quantities {
+   vector[N2] y_pred;
+   y_pred <- new_X*beta; //the y values predicted by the model
+ }
+ ",fill=T)
> sink()

Let us run then a code to have a simulated data.

> set.seed(45679)
> #the explanatory variables
> dat<-data.frame(x1=runif(100,-2,2),x2=runif(100,-2,2))
>
> #the model
> X<-model.matrix(~x1*x2,dat)
>
> #the regression slopes
> betas<-runif(4,-1,1)
>
> #the standard deviation for the simulated data
> sigma<-1
>
> #the simulated data
> y_norm<-rnorm(100,X%*%betas,sigma)
>
> #a matrix to get the predicted y values
> new_X<-model.matrix(~x1*x2,expand.grid(x1=seq(min(dat$x1),max(dat$x1),length=20),x2=c(min(dat$x2),mean(dat$x2),max(dat$x2)))) Here’s the simulated data > head(X) ## (Intercept) x1 x2 x1:x2 ## 1 1 -1.2348436 -1.5828201 1.9545353 ## 2 1 -1.0591624 -1.4874576 1.5754591 ## 3 1 -0.7649926 -0.7619479 0.5828845 ## 4 1 0.7396569 0.9321936 0.6895035 ## 5 1 1.6214369 -1.2332682 -1.9996667 ## 6 1 0.3058477 1.4795903 0.4525294 > y_norm[1:4] ## [1] -0.4805345 -0.1069732 1.4658954 -0.2114544 > head(new_X) ## (Intercept) x1 x2 x1:x2 ## 1 1 -1.9843290 -1.989904 3.948625 ## 2 1 -1.7771681 -1.989904 3.536394 ## 3 1 -1.5700072 -1.989904 3.124164 ## 4 1 -1.3628463 -1.989904 2.711934 ## 5 1 -1.1556854 -1.989904 2.299703 ## 6 1 -0.9485245 -1.989904 1.887473 We will run now the Markov Chain with rstan > m_norm<-stan(file="regression_model.stan",data = list(N=100,N2=60,K=4,y=y_norm,X=X,new_X=new_X),pars = c("beta","sigma","y_pred")) > ## print(m_norm) Let’s make a diagnostic of the convergence of the Markov Chain > ## library(coda) > ## post_beta<-As.mcmc.list(m_norm,pars="beta") > ## plot(post_beta) > ## autocorr.diag(post_beta,1:10) > ## autocorr.plot(post_beta,1:10) > ## geweke.diag(post_beta) > ## geweke.plot(post_beta) > ## gelman.diag(post_beta) > ## gelman.plot(post_beta) We will plot the pairwise correlation between the parameters, if each parameters is adding additional independent information, the points should form a shapeless cloud. If you have strong correlation between several parameters, then you may consider dropping some as they do not add extra information. > ## pairs(m_norm,pars="beta") We can also be interested to plot the credible intervals. They are another summary for the different parameters in the models, the red bands in this graph show that the parameters have a probability of 0.8 to be within the bands. > ## plot(m_norm,pars=c("beta")) We finally plot the regression lines with their credible intervals at different x2 values. > ## #getting regression curves plus 95% credible intervals > ## new_x<-data.frame(x1=new_X[,2],x2=rep(c("Min","Mean","Max"),each=20)) > ## new_y<-extract(m_norm,pars="y_pred") > ## pred<-apply(new_y[[1]],2,quantile,probs=c(0.025,0.5,0.975)) #the median line with 95% credible intervals > ## #plot > ## plot(dat$x1,y_norm,pch=16)
> ## lines(new_x$x1[1:20],pred[2,1:20],col="red",lwd=3) > ## lines(new_x$x1[1:20],pred[2,21:40],col="orange",lwd=3)
> ## lines(new_x$x1[1:20],pred[2,41:60],col="blue",lwd=3) > ## lines(new_x$x1[1:20],pred[1,1:20],col="red",lwd=1,lty=2)
> ## lines(new_x$x1[1:20],pred[1,21:40],col="orange",lwd=1,lty=2) > ## lines(new_x$x1[1:20],pred[1,41:60],col="blue",lwd=1,lty=2)
> ## lines(new_x$x1[1:20],pred[3,1:20],col="red",lwd=1,lty=2) > ## lines(new_x$x1[1:20],pred[3,21:40],col="orange",lwd=1,lty=2)
> ## lines(new_x\$x1[1:20],pred[3,41:60],col="blue",lwd=1,lty=2)
> ## legend("topright",legend=c("Min","Mean","Max"),lty=1,col=c("red","orange","blue"),bty = "n",title = "Effect of x2 value onnthe regression")