#####################################################################################################
# EBS R Code                                                                                        #
# Used for Screening P-values as described in                                                       #
# "Empirical Bayes Screening of Many P-values with Applications to Microarray Studies" by           #
# Susmita Datta and Somnath Datta                                                                   #
# Bioinformatics, Advance Access published February 4, 2005                                         #
# Disclaimer:  This software comes with no warrantee explicit or implicit. Use at your own risk.   #
# Any damages arising from use of this software are NOT the authors Susmita Datta or Somnath Datta  #
#####################################################################################################


#///////////////////////////////////////////////////////////////////////////////////////////////////#
# EBS returns the indices of p-values that are judged to be significant by the screening procedure  #
# pvo is a vector of the original p-values, alpha is the familywise error rate, B is the number of  #
# bootstrap replicates used, h is a user selectable bandwidth used in the construction of the EB    #
# estimates of location                                                                             #
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\#

  EBS <- function(pvo,alpha=0.05,B=500,h=1) {

  Kh <- function(x)  dnorm(x/h)/h
  ngene <- length(pvo)

#======================================================================================#
# This part calculates B batches of monotonited Empirical Bayes estimates of the       #
# location of the z-transformed p-values under the null hypothesis asuming tests       #
# are independent; h is a user selectable bandwidth; ngene denotes the total number    # 
# of p-values to be screened.                                                          #  
# Depending on the power of your computer and ngene = the length of pvo, and B, this   #
# part may # need several hours. However if you are going to screen multiple samples   #
# each of same length, this part needs to be computed only once and can be stored for  #
# future screening.                                                                    #                                         
#                                                                                      #
   u.n <- u.no <- matrix(0,B,ngene)                                                    #
   for (iii in 1:B)                                                                    #
     {                                                                                 #
      zo.n <- rnorm(ngene)                                                             #
      z.n <- rep(0,ngene)                                                              #
      for (i in 1:ngene) {                                                             #
               sum1 <- sum(Kh(zo.n[i]-zo.n))                                           #
               sum2 <- sum((zo.n[i]-zo.n)*Kh(zo.n[i]-zo.n))                            #
               z.n[i] <- zo.n[i] - (h^-2)*(sum2/sum1)                                  #
                         }                                                             #
      u.n[iii,] <- z.n                                                                 #
      for (ii in (ngene-1):1) u.n[iii,ii] <- min(u.n[iii,ii+1],u.n[iii,ii])            #
     }                                                                                 #
#======================================================================================#




 
# ====================================================================================#
#We Z-transform the p-values                                            

zo <- qnorm(pvo)
zo[zo==-Inf] <- 2*min(zo[(zo!=-Inf) | (zo != Inf)])
zo[zo==Inf] <- 2*max(zo[(zo!=-Inf) | (zo != Inf)])

#======================================================================================#
#This part calculates the EB estimates of location of the transfomed p-values

z <- rep(0,ngene)
for (i in 1:ngene) {
sum1 <- sum(Kh(zo[i]-zo))
sum2 <- sum((zo[i]-zo)*Kh(zo[i]-zo))
z[i] <- zo[i] - (h^-2)*(sum2/sum1)                 
                    }
#=====================================================================================#
#This part computes the step-down p-values for the EB estimates

rk <- order(pvo)
u <- z[rk]
ct <-  rep(0,ngene)
for (ii in 1:ngene) ct[ii] <- sum(u.n[,ii] <= u[ii])  
p.n <- ct/B
for (ii in 2:ngene) p.n[ii] <- max(p.n[ii],p.n[ii-1])

#=====================================================================================#
# Flag the genes based on the step-down p-values

pos <- rk[p.n<alpha]
return(pos)
                                     }


#///////////////////////////////////////////////////////////////////////////////////////////////////
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\