*********************************************************************************** R code for the SVPLS algortihm *************************************************** *********************************************************************************** # The function 'svpls()' returns the significant genes detected from their simultaneous testing of differential expression at a certain level of the false discovery rate, under the best ANCOVA model selected by the AIC. Arguments ----------- # k1 and k2 denote respectively, the number of subjects under the two different varieties. # Y is the log-transformed gene expression matrix # pmax denotes the maximum number of surrogate variables to be considered in the analysis (by default it is taken as 3) # fdr is the specified level of the false discovery rate (by default it is taken as 0.05) Values ------- # Index of the best ANCOVA model # Positive genes detected from the best ANCOVA model with the multiple testing by Benjamini-Hochberg correction at the preassigned FDR level. # AIC for the best ANCOVA model. ************************************************************************************ ------------------------------------------------------------------------------------ library(pls) svpls <- function(k1,k2,Y,pmax,fdr=0.05){ svpls_ancova <- function(k1,k2,Y,pmax,fdr){ G <- nrow(Y) k <- k1 + k2 n <- G*k S <- cv <- numeric() err <- matrix(0,G,k) S2 <- S22 <- C <- matrix(0,pmax,pmax) T <- matrix(0,pmax,n) a <- matrix(0,pmax,k) est_gv1 <- est_gz1 <- g_var1 <- U <- U2 <- numeric() q3 <- x1 <- x2 <- x3 <- x5 <- t <- pval <- numeric() c1 <- c2 <- c3 <- c4 <- c5 <- c6 <- c7 <- c8 <- numeric() m_lab1 <- matrix(rep(apply(Y[,1:k1],1,mean),k1),G,k1,byrow = F) m_lab2 <- matrix(rep(apply(Y[,(k1+1):k],1,mean),k2),G,k2,byrow = F) e <- Y - cbind(m_lab1,m_lab2) pls <- mvr(t(e) ~ t(Y),ncomp = pmax,method = "oscorespls") sc <- scores(pls) Z1 <- matrix(rep(sc[,1],G),G,k,byrow = TRUE) for (l in 1:pmax) S[l] <- sum(sc[1:k1,l]) - sum(sc[(k1+1):k,l]) m <- mean(sc[1:k1,1]) S3 <- sum(sc[1:k1,1]^2) - sum(sc[(k1+1):k,1]^2) W <- sum(sc[1:k1,1]^2) + sum(sc[(k1+1):k,1]^2) M2 <- mean(sc[1:k1,1]^2) M <- (k1 - k2)/k D <- (mean(sc[1:k1,1]) - mean(sc[,1]))/(1 - (S[1]/W)*mean(sc[1:k1,1])) D1 <- 1+S[1]*D D2 <- D3 <- D D5 <- M2 - mean(sc[1:k1,1])^2 D4 <- (S[1]/k - m)/D5 co <- cov(as.vector(t(Y[,1:k1])),as.vector(t(Z1[,1:k1])))*(1-1/(G*k1)) U1 <- (mean(Y[,1:k1]) - mean(Y) + co*D4)/(1-M) y <- as.matrix(as.vector(t(Y))) for (l in 1:pmax){ for (u in 1:pmax){ S22[l,u] <- sum(sc[1:k1,l]*sc[1:k1,u]) - sum(sc[(k1+1):k,l]*sc[(k1+1):k,u]) S2[l,u] <- sum(sc[1:k1,l]*sc[1:k1,u]) + sum(sc[(k1+1):k,l]*sc[(k1+1):k,u]) } } for (s in 1:pmax){ T2[s] <- cov(sc[1:k1,s],sc[1:k1,1])*((k1-1)/k1)/D5 U[s] <- S22[1,s]/D5 + D4*S[s]/(1-M) U2[s] <- (1/(1-M))*(mean(sc[1:k1,s]) + D4*cov(sc[1:k1,s],sc[1:k1,1])*(1-1/k1)) } for (s in 1:pmax){ for (u in 1:k){ if (u <= k1)a[s,u] <- (S[s]/(1-M))*(1/(G*k1) - 1/(G*k)) - (1/G)*sc[u,s] + (U[s]/(G*k1))*(sc[u,1] - m) if (u > k1) a[s,u] <- -(S[s]/(1-M))*(1/(G*k)) - (1/G)*sc[u,s] } } for (l in 1:pmax){ for (u in 1:pmax){ C[l,u] <- S[l]*U2[u] + (S22[1,l]/D5)*cov(sc[1:k1,u],sc[1:k1,1])*(k1-1)/k1 - S2[u,l] } } for (s in 1:pmax) T[s,] <- rep(a[s,],G) P <- solve(C)%*%T beta_hat <- P%*%y q1 <- 1 - S[1]*m/W - M q2 <- S[1]/k - m + S3*m/W q4 <- m/W x4 <- q1/(G*k*(1-M)) m <- mean(sc[1:k1,1]) for (l in 1:pmax) q3[l] <- S2[l,1]*m/W - mean(sc[1:k1,l]) for (p in 1:k1){ x1[p] <- 1/k1 - 1/k - q4*sc[p,1] x3[p] <- ((q2/D5 - q1*D4/(1-M))*(sc[p,1] - m) - (q1/(1-M))*(1-k1/k))/(G*k1) } for (p in 1:k2) x2[p] <- 1/k + q4*sc[k1+p,1] for (l in 1:pmax) x5[l] <- q1*U2[l] + q3[l] - (q2/D5)*cov(sc[1:k1,l],sc[1:k1,1])*(1-1/k1) est_v1 <- U1 - sum(beta_hat*U2) for (l in 1:pmax) cv[l] <- beta_hat[l]*cov(sc[1:k1,1],sc[1:k1,l])*(1-1/k1)/D5 est_vz1 <- co/D5 - sum(cv) for (i in 1:G){ est_gv1[i] <- (sum(Y[i,1:k1]*x1) - sum(Y[i,(k1+1):k]*x2) + sum(rep(x3,G)*as.vector(t(Y[,1:k1]))) + x4*sum(Y[,(k1+1):k]) + sum(x5*beta_hat))/q1 est_gz1[i] <- (sum(Y[i,]*Z1[i,]) - S[1]*est_v1 - sum(S2[,1]*beta_hat) - est_vz1*S3 - est_gv1[i]*S[1])/W } T <- P for (p in 1:k){ if (p <= k1){ cnt1 <- cnt11 <- cnt22 <- 0 for (l in 1:pmax){ cnt11 <- cnt11 + 2*T[l,p]*x1[p]*x5[l] cnt22 <- cnt22 + 2*x3[p]*x5[l]*G*T[l,p] } c1[p] <- x1[p]^2 + G*x3[p]^2 + G*sum(outer(T[,p]*x5,T[,p]*x5)) c3[p] <- cnt11 c7[p] <- 2*x1[p]*x3[p] c8[p] <- cnt22 } if (p > k1){ cnt2 <- cnt3 <- cnt4 <- 0 for (l in 1:pmax){ cnt3 <- cnt3 + (-2*T[l,p]*x2[p-k1]*x5[l]) cnt4 <- cnt4 + (2*x4*G*T[l,p]*x5[l]) } c2[p-k1] <- x2[p-k1]^2 + G*x4^2 + G*sum(outer(T[,p]*x5,T[,p]*x5)) c4[p-k1] <- cnt3 c5[p-k1] <- cnt4 c6[p-k1] <- -2*x2[p-k1]*x4 } } g_var1 <- (sum(c1) + sum(c2) + sum(c3) + sum(c4) + sum(c5) + sum(c6) + sum(c7) + sum(c8))/q1^2 A1 <- Y[,1:k1] - apply(Y,1,mean) A2 <- Y[,(k1+1):k] - apply(Y,1,mean) m1 <- matrix(rep(est_v1 + est_gv1,k1),G,k1,byrow = F)*(M-1) m2 <- matrix(rep(est_v1 + est_gv1,k2),G,k2,byrow = F)*(M+1) b1 <- matrix(rep(est_vz1*(S[1]/k - sc[1:k1,1]),G),G,k1,byrow = TRUE) b2 <- matrix(rep(est_vz1*(S[1]/k + sc[(k1+1):k,1]),G),G,k2,byrow = TRUE) fn <- function(u) sum(beta_hat*sc[u,]) mat1 <- matrix(rep(apply(as.matrix(c(1:k1)),1,fn),G),G,k1,byrow = TRUE) mat2 <- matrix(rep(apply(as.matrix(c((k1+1):k)),1,fn),G),G,k2,byrow = TRUE) err <- cbind(A1+m1+b1-mat1-est_gz1*Z1[,1:k1],A2+m2+b2-mat2-est_gz1*Z1[,(k1+1):k]) sse <- sum(err^2) mse <- sse/(n-3*G-pmax) s2_mle <- sse/n aic <- -2*((-n/2)*(log(2*pi)+log(s2_mle)) - (0.5/s2_mle)*sse - 3*G - pmax - 1) for (i in 1:G){ t[i] <- est_gv1[i]/sqrt(g_var1*mse) pval[i] <- 2*(1 - pt(abs(t[i]),n-3*G-pmax)) } p <- sort(pval) sig <- 0 gene <- index <- rep(0,G) for (i in 1:G){ if (p[i] <= i*fdr/G){ sig <- sig + 1 gene[i] <- order(pval)[i] index[i] <- i } } index <- index[index != 0] return(list(gene[index],pval,aic)) } val <- function(u) return(svpls_ancova(k1,k2,Y,u,fdr)) model_res <- vector("list",length = pmax) model_res <- apply(as.matrix(c(1:pmax)),1,val) model_aic <- numeric() for (i in 1:pmax) model_aic[i] <- model_res[[i]][[3]] opt_model <- which(model_aic == min(model_aic)) sig_genes.opt <- model_res[[opt_model]][[1]] aic_opt <- model_res[[opt_model]][[3]] return(list(opt_model,sig_genes.opt,aic_opt)) } *************************************************************************************** ***************************************************************************************