R code for Poisson Simulation

 

 

sim.nb.data <- function(m, l, n, ncom, P, prob.bin, odp){

 

  library(mgcv)

  library(SimSeq)

 

  # lists for results

  data.list <- list()

  score.list <- list()

 

  # RNA-Seq -data to compute mean read counts (to be sampled later)

  data(kidney)

  sample.data <- t(kidney$counts)

  means <- apply(sample.data, 2, mean)

 

  # latent variables from normal distribution

  md<-as.data.frame(matrix(rnorm(l*n),nrow=n,ncol=l))

  # boxplot(md)

 

  ci <- list()

  ci <- replicate(m, sample(1:l, rbinom(1,l,prob.bin),replace = FALSE))

 

  # design matrix to define the relationships between latent variables and new genes

  D <- matrix(0, nrow = m, ncol = l)

  for(i in 1:m){

    D[i, ci[[i]]] <- 1

  }

 

  A <- D%*%t(D)

 

  # connections

  d.up <- ifelse(upper.tri(A), A, 0)

  con <- which(d.up!=0,arr.ind = T)

  con.m <- con

 

  # true positives

  cond.pos <- matrix(0, ncol = m, nrow = m)

  cond.pos <- ifelse(d.up>0, 1, 0)

  pos.m <- cond.pos

 

  # true negatives

  cond.neg <- matrix(1, ncol = m, nrow = m)

  cond.neg <- ifelse(upper.tri(cond.neg), 1, 0)

  cond.neg <- cond.neg - cond.pos

  neg.m <- cond.neg

 

  for(p in 1:P){

   

    cat("sim = ", iter <- p, "\n")

   

    s <- matrix(0, m, m)

    s.w <- matrix(0, m, m)

   

    # empty matrix

    data.sim <- matrix(NA, nrow = n, ncol = m)

    x.new <- rep(NA, n)

    xbeta <- rep(0, l)

   

    # overdispersion

    epsilon <- rgamma(n, shape = odp, scale = 1/odp)

   

    for(k in 1:m){

 

      beta.lp <- rep(0, l)

      sign <- rep(1, length(ci[[k]]))

      beta.lp[sort(ci[[k]])] <- sign

     

      # sample expected values

      # low counts not accepted (problems when fitting nb model)

      mean.rc <- 0

      while(mean.rc < 10){

        mean.rc <- sample(means, 1)

      }

      mean.rc

     

      for(j in 1:n){ 

       

        # beta*x

        for(g in 1:l){

          xbeta[g] <- beta.lp[g]*md[j,g]

        }

        linpred <- sum(xbeta)

       

        lambda <- epsilon[j]*exp(log(mean.rc)+linpred)

        x.new[j] <- rpois(1,lambda)

       

      }

     

      data.sim[,k] <- x.new

    }

   

    data.list[[p]] <- data.sim

   

    # scale data

    data.sc <- scale(data.sim)

   

    # PLS-components

    standr <- function(x) x/as.numeric(sqrt(t(x)%*%x))

   

    tot.c <- as.vector(apply(data.sim, 1, sum))

   

    for(i in 1:m){

     

      X <- data.sc[,-i]

      y <- data.sc[,i]

     

      c <- matrix(0, m-1, ncom)

      TT <- matrix(0, nrow(data.sc), ncom)

     

      c.var <- rep(0, ncom)

      SS <- rep(0, ncom)

      var.expl <- rep(0, ncom)

     

      tX<- X

      for(k in 1:ncom){ # construct ncom PLS components

        c[,k] <- standr(t(tX)%*%y)

        TT[,k] <- tX%*%c[,k]

       

        c.var[k] <- (t(y)%*%TT[,k])/(t(TT[,k])%*%TT[,k])

        SS[k] <- (t(TT[,k])%*%TT[,k])%*%(t(c.var[k])%*%c.var[k])

        var.expl[k] <- SS[k] / sum(diag(t(y)%*%y))

       

        tX <- tX-TT[,k]%*%solve(t(TT[,k])%*%TT[,k])%*%t(TT[,k])%*%tX

      }

     

      w <- var.expl / sum(var.expl) # weights

     

      # glm with negative-binomial link

     

      temp.data <- as.data.frame(cbind(data.sim[,i], TT, tot.c))

      comp.names <- paste("C", 1:ncom, sep = '')

      colnames(temp.data) <- c('y', comp.names, 't.counts')

     

      names <- colnames(temp.data[2:(ncom+1)])

      osa1 <- paste("y~")

      osa2 <- paste(names, collapse = "+")

      form <- paste(osa1, osa2, sep = "")

      form2 <- as.formula(form)

     

      mod <- gam(form2, offset=log(t.counts), family=nb(),data=temp.data)

      summary(mod)

     

      b <- as.matrix(coef(mod)[-1])

     

      temp.beta <- c%*%b

     

      if (i == 1){

        s[i,] <- c(0,temp.beta)

      } else {

        if (i == m){

          s[i,] <- c(temp.beta,0)

        } else {

          s[i,] <- c(temp.beta[1:(i-1)],0,temp.beta[i:(m-1)])

        } 

      }

     

 

     

    }

   

   

    s <- 0.5*(s+t(s))

    s <- s-0.5*(max(s)+min(s))

    s <- s/(0.5*(max(s)-min(s)))

    diag(s) <- 1

 

    score.list[[p]] <- s

 

  }

  return(list(data.list = data.list, score.list = score.list,

              pos.m = pos.m, neg.m = neg.m, con.m = con ))

 

 

}

 

# CALL FUNCTION

 

data1 <- sim.nb.data(100, 10, 20, 3, 1000, 0.05, 10)

data2 <- sim.nb.data(1000, 10, 20, 3, 1000, 0.05, 10)

data3 <- sim.nb.data(100, 10, 100, 3, 1000, 0.05, 10)

data4 <- sim.nb.data(1000, 10, 100, 3, 1000, 0.05, 10)

 

 

###################################################

 

 

# FUNCTION COMPUTING FDR WITH DIFFERENT LEVELS OF QVALUE

 

 

fdr.fun <- function(qvals, score, pos, neg){

 

  # save the results

  tp.data <- list()

  sens.data <- list()

 

  # condition positive

  cond.pos <- pos

  # condition negative

  cond.neg <- neg

 

  # loop over all selected qvalues

 

  j <- 1

 

  for(qvalue in qvals){

    cat("qval = ", iter <- qvalue , "\n")

   

    i <- 1

   

    sens.q <- matrix(0, ncol = 5, nrow = length(score))

    tp.q <- matrix(0, ncol = 5, nrow = length(score))

    # loop over all generated data sets

    for(d in 1:length(score)){

 

    s <- score[[d]]

    s.up <- s[upper.tri(s)]

    m <- ncol(s)

   

      b <- matrix(0, m, m)

      out.density <- density(s.up, adjust = 1)

      b[upper.tri(b, diag=FALSE)] <- (dnorm(s.up,mean(s.up),sd(s.up))/approx(out.density$x, out.density$y,

                                                                      s.up, method = "linear")$y)

     

      apu <- matrix(0, m, m)

      apu[upper.tri(b, diag=FALSE)] <- 1

      apu[which(b > qvalue, arr.ind = TRUE)] <- 0

     

      fdr.m <- which(apu == 1, arr.ind=T)

     

      # test positive

      test.pos.fdr <- matrix(0, ncol = m, nrow = m)

      test.pos.fdr[fdr.m] <- 1

     

      # test negative

      test.neg.fdr <- matrix(1, ncol = m, nrow = m)

      test.neg.fdr <- ifelse(upper.tri(test.neg.fdr), 1, 0)

      test.neg.fdr[fdr.m] <- 0

     

      # true positives

      tp.m.fdr <- cond.pos+test.pos.fdr

      tp.fdr <- length(which(tp.m.fdr == 2))

     

      # false positives

      fp.m.fdr <- cond.neg + test.pos.fdr

      fp.fdr <- length(which(fp.m.fdr == 2))

     

      # true negatives

      tn.m.fdr <- cond.neg + test.neg.fdr

      tn.fdr <- length(which(tn.m.fdr == 2))

     

      # false negatives

      fn.m.fdr <- cond.pos + test.neg.fdr

      fn.fdr <- length(which(fn.m.fdr == 2))

     

      # sensitivity

      if(tp.fdr+fn.fdr == 0){

        sens.fdr <- 0

      } else {

        sens.fdr <- tp.fdr/(tp.fdr+fn.fdr)

      }

     

      # specificity

      if(fp.fdr+tn.fdr == 0){

        spec.fdr <- 0

      } else {

        spec.fdr <- tn.fdr/(fp.fdr+tn.fdr)

      }

     

      # true discovery rate

      if(tp.fdr+fp.fdr == 0){

        tdr.fdr <- 0

      } else {

        tdr.fdr <- tp.fdr/(tp.fdr+fp.fdr)

      }

     

      # true non-discovery rate

      if(tn.fdr+fn.fdr == 0){

        tnr.fdr <- 0

      } else {

        tnr.fdr <- tn.fdr/(tn.fdr+fn.fdr)

      }

     

 

     

      # save the results into ith row of a matrix

      tp.q[i,1] <- qvalue

      tp.q[i,2] <- tp.fdr

      tp.q[i,3] <- fp.fdr

      tp.q[i,4] <- tn.fdr

      tp.q[i,5] <- fn.fdr

     

      sens.q[i,1] <- qvalue

      sens.q[i,2] <- sens.fdr

      sens.q[i,3] <- spec.fdr

      sens.q[i,4] <- tdr.fdr

      sens.q[i,5] <- tnr.fdr

     

      i <- i+1

    }

   

    tp.data[[j]] <- tp.q

    sens.data[[j]] <- sens.q

   

    j <- j+1

   

  }

  return(list(tp.list = tp.data, sens.list = sens.data ))

}

 

 

## CALL FUNCTION

 

qvals <- seq(0, 1, by = 0.05)

 

FDR1 <- fdr.fun(qvals, data1$score.list, data1$pos.m, data1$neg.m)

FDR2 <- fdr.fun(qvals, data2$score.list, data2$pos.m, data2$neg.m)

FDR3 <- fdr.fun(qvals, data3$score.list, data3$pos.m, data3$neg.m)

FDR4 <- fdr.fun(qvals, data4$score.list, data4$pos.m, data4$neg.m)

 

means1 <- lapply(FDR1$sens.list, colMeans)

means2fdr <- lapply(FDR2$sens.list, colMeans)

means3 <- lapply(FDR3$sens.list, colMeans)

means4 <- lapply(FDR4$sens.list, colMeans)

 

 

# corrected SEs, qvalue = 0.2

# # list with qval = 0.2

# P <- 100

# temp1 <- wFDR4$sens.list[[4]]

# temp.mean <- mean(temp1[,2])

# temp.mean

# var.sens <- var(temp1[,2])

# se.sens <-  sqrt(1/P*(var.sens))

# round(se.sens, 3)

# var.spec <- var(temp1[,4])

# se.spec <- sqrt(1/P*(var.spec))

# round(se.spec, 3)

# var.tdr <- var(temp1[,6])

# se.tdr <- sqrt(1/P*(var.tdr))

# round(se.tdr, 3)

# var.tnr <- var(temp1[,8])

# se.tnr <- sqrt(1/P*(var.tnr))

# round(se.tnr, 3)

 

 

 

Made with Namu6