ADAPTS (Automated Deconvolution Augmentation of Profiles for Tissue Specific cells) Vignette #1: Matrix Augmentation

Part I - A: Deconvolve purified CD138+ tumor samples using LM22 Note: ADAPTSdata and ADAPTSdata2 come from GitHub devtools::install_github(“sdanzige/ADAPTSdata”) devtools::install_github(“sdanzige/ADAPTSdata2”)

library(ADAPTS)
library(ADAPTSdata)
library(ADAPTSdata2)
library(preprocessCore)
library(pheatmap)
library(mclust)
#> Package 'mclust' version 5.4.2
#> Type 'citation("mclust")' for citing this R package in publications.
doParallel::registerDoParallel(cores = parallel::detectCores())

set.seed(42)

LM22 <- ADAPTS::LM22
cd138p <- ADAPTSdata::cd138p
wbm <- ADAPTSdata2::wbm
tumorPer <- ADAPTSdata2::tumorPer
LM22.source <- ADAPTSdata2::fullLM22
addMGSM27 <- ADAPTSdata::addMGSM27

cellCounts <- estCellPercent.DCQ(LM22, cd138p)
tumorPer$LM22.CD138p.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory'),
    match(make.names(tumorPer$celfileRnas),colnames(cellCounts))])

incBool <- !is.na(tumorPer$CD138_Post_Sort) 
curData <- tumorPer[incBool,]
rmse.lm22.cd138p <- sqrt(mean((curData$CD138_Post_Sort - curData$LM22.CD138p.tumor)^2))
ct.lm22.cd138p <- cor.test(curData$CD138_Post_Sort, curData$LM22.CD138p.tumor)
ct.lm22.cd138p.spear <- suppressWarnings(cor.test(curData$CD138_Post_Sort, curData$LM22.CD138p.tumor, method = 'spearman'))
titleStr <- paste0('LM22 Deconvolution of CD138+ Samples:\nRMSE = ', round(rmse.lm22.cd138p), '; Rho = ', round(ct.lm22.cd138p$estimate,2), '; Spear =', round(ct.lm22.cd138p.spear$estimate,2))
plot(curData$CD138_Post_Sort, curData$LM22.CD138p.tumor, main=titleStr, xlab='% Purity', 
     ylab='Deconvolved Tumor %')

cellCounts <- estCellPercent.DCQ(LM22, wbm)
tumorPer$LM22.WBM.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory'),
    match(make.names(tumorPer$celfileRnbx),colnames(cellCounts))])

tumorPer$PC_Mean <- rowMeans(tumorPer[,c('PC_AspNum','PC_BmbxNum')], na.rm=TRUE)
incBool <- !is.nan(tumorPer$PC_Mean) & !is.na(tumorPer$LM22.WBM.tumor)
curData <- tumorPer[incBool,] 
rmse.lm22.WBM <- sqrt(mean((curData$PC_Mean - curData$LM22.WBM.tumor)^2))
ct.lm22.WBM <- cor.test(curData$PC_Mean, curData$LM22.WBM.tumor)
ct.lm22.WBM.spear <- suppressWarnings(cor.test(curData$PC_Mean, curData$LM22.WBM.tumor, method = 'spearman'))
titleStr <- paste0('LM22 Deconvolution of WBM Samples:\nRMSE = ', round(rmse.lm22.WBM), '; Rho = ', round(ct.lm22.WBM$estimate,2), '; Spear =', round(ct.lm22.cd138p.spear$estimate,2))
plot(curData$PC_Mean, curData$LM22.WBM.tumor, main=titleStr, xlab='% WBM Tumor', ylab='Deconvolved Tumor %')

LM22p <- LM22
augData <- addMGSM27[match(rownames(LM22), rownames(addMGSM27)), ]#grepl('PlasmaMemory',colnames(addMGSM27)) | grepl('MM.plasma.cell',colnames(addMGSM27))]
LM22p$PlasmaMemory <- rowSums(augData[,grepl('PlasmaMemory',colnames(augData))], na.rm=TRUE)
LM22p$MM.plasma.cell <- rowSums(augData[,grepl('MM.plasma.cell',colnames(augData))], na.rm=TRUE)
LM22p$osteoclast <- rowSums(augData[,grepl('osteoclast',colnames(augData))], na.rm=TRUE)
LM22p$osteoblast <- rowSums(augData[,grepl('osteoblast',colnames(augData))], na.rm=TRUE)
LM22p$adipocyte <- rowSums(augData[,grepl('adipocyte',colnames(augData))], na.rm=TRUE)

Part II - A: Deconvolve purified CD138+ tumor samples using LM22 + ‘MM.plasma.cell’,‘PlasmaMemory’

cellCounts <- estCellPercent.DCQ(LM22p, cd138p)
tumorPer$LM22p.CD138p.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnas),colnames(cellCounts))])

incBool <- !is.na(tumorPer$CD138_Post_Sort)
curData <- tumorPer[incBool,]
rmse.lm22p.cd138p <- sqrt(mean((curData$CD138_Post_Sort - curData$LM22p.CD138p.tumor)^2))
ct.lm22p.cd138p <- cor.test(curData$CD138_Post_Sort, curData$LM22p.CD138p.tumor)
ct.lm22p.cd138p.spear <- suppressWarnings(cor.test(curData$CD138_Post_Sort, curData$LM22p.CD138p.tumor, method='spearman'))
titleStr <- paste0('LM22 + 5 Deconvolution of CD138+ Samples:\nRMSE = ', round(rmse.lm22p.cd138p), '; Rho = ', round(ct.lm22p.cd138p$estimate,2), '; Spear = ', round(ct.lm22p.cd138p.spear$estimate,2))
plot(curData$CD138_Post_Sort, curData$LM22p.CD138p.tumor, main=titleStr, xlab='% Purity', ylab='Deconvolved Tumor %')

cellCounts <- estCellPercent.DCQ(LM22p, wbm)
tumorPer$LM22p.WBM.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnbx),colnames(cellCounts))])

tumorPer$PC_Mean <- rowMeans(tumorPer[,c('PC_AspNum','PC_BmbxNum')], na.rm=TRUE)
incBool <- !is.nan(tumorPer$PC_Mean) & !is.na(tumorPer$LM22p.WBM.tumor)
curData <- tumorPer[incBool,] 
rmse.lm22p.WBM <- sqrt(mean((curData$PC_Mean - curData$LM22p.WBM.tumor)^2))
ct.lm22p.WBM <- cor.test(curData$PC_Mean, curData$LM22p.WBM.tumor)
ct.lm22p.WBM.spear <- suppressWarnings(cor.test(curData$PC_Mean, curData$LM22p.WBM.tumor, method='spearman'))
titleStr <- paste0('LM22 + 5 Deconvolution of WBM Samples:\nRMSE = ', round(rmse.lm22p.WBM), '; Rho = ', round(ct.lm22p.WBM$estimate,2), '; Spear = ', round(ct.lm22p.WBM.spear$estimate,2) )
plot(curData$PC_Mean, curData$LM22p.WBM.tumor, main=titleStr, xlab='% WBM Tumor', ylab='Deconvolved Tumor %')

olGenes <- rownames(LM22.source)[rownames(LM22.source) %in% rownames(addMGSM27)]
allData <- cbind(LM22.source[olGenes,], addMGSM27[olGenes,])
allData.nq <- as.data.frame(normalize.quantiles(as.matrix(allData)))
rownames(allData.nq) <- rownames(allData)
colnames(allData.nq) <- colnames(allData)
allData <- allData.nq

#Remove low variance genes to make the example run faster
allData.noNA <- allData[!apply(allData, 1, function(x){any(is.na(x))}),]
vars.noNA <- apply(allData.noNA, 1, var)

hist(vars.noNA)

#This will take a while.  To make it faster, filter genes by variance
subData <- allData[vars.noNA>2,]
gList <- rankByT(subData)

olgenes <- rownames(subData)

fullData <- allData[olgenes,colnames(LM22.source)]
colnames(fullData) <- sub('\\.[0-9]+$', '', colnames(fullData)) #Strip any trailing numbers added by make.names()

newData <- allData[olgenes,colnames(addMGSM27)]
colnames(newData) <- sub('\\.[0-9]+$', '', colnames(newData)) #Strip any trailing numbers added by make.names()

#MGSM27 <- AugmentSigMatrix(origMatrix=LM22, fullData=fullData, newData=newData, gList=gList, nGenes=1:100, plotToPDF=FALSE, imputeMissing=TRUE, condTol=1.01, postNorm=TRUE, minSumToRem=NA, addTitle=NULL, autoDetectMin=TRUE, calcSpillOver=FALSE)
#Note:  For the Identifying a High-risk Cellular Signature in the Multiple Myeloma Bone Marrow Microenvironment, we used the call commented out above, which resulted in 601 genes.  It looks like the package used to find the minimima has changed, results in a smaller number of genes (~570).  We recommend trying this option and looking at the condition number curves to see if it makes sense.  Future version will probably upgrade the 'autoDetectMin' option to be smarter. 

MGSM27 <- AugmentSigMatrix(origMatrix=LM22, fullData=fullData, newData=newData, gList=gList, nGenes=1:100, plotToPDF=FALSE, imputeMissing=TRUE, condTol=1.01, postNorm=TRUE, minSumToRem=NA, addTitle=NULL, autoDetectMin=FALSE, calcSpillOver=FALSE)
#>   missForest iteration 1 in progress...
#> randomForest 4.6-14
#> Type rfNews() to see new features/changes/bug fixes.
#> done!
#>   missForest iteration 2 in progress...done!
#>   missForest iteration 3 in progress...done!
#>   missForest iteration 4 in progress...done!
#>   missForest iteration 5 in progress...done!
#>   missForest iteration 6 in progress...done!
#>   missForest iteration 1 in progress...done!
#>   missForest iteration 2 in progress...done!
#>   missForest iteration 3 in progress...done!
#>   missForest iteration 4 in progress...done!
#>   missForest iteration 5 in progress...done!

cellCounts <- estCellPercent.DCQ(MGSM27, cd138p)
tumorPer$MGSM27.CD138p.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnas),colnames(cellCounts))])

incBool <- !is.na(tumorPer$CD138_Post_Sort)
curData <- tumorPer[incBool,]
rmse.mgsm27.cd138p <- sqrt(mean((curData$CD138_Post_Sort - curData$MGSM27.CD138p.tumor)^2))
ct.mgsm27.cd138p <- cor.test(curData$CD138_Post_Sort, curData$MGSM27.CD138p.tumor)
ct.mgsm27.cd138p.spear <- suppressWarnings(cor.test(curData$CD138_Post_Sort, curData$MGSM27.CD138p.tumor, method='spearman'))
titleStr <- paste0('MGSM27 Deconvolution of CD138+ Samples:\nRMSE = ', round(rmse.mgsm27.cd138p), '; Rho = ', round(ct.mgsm27.cd138p$estimate,2), '; Spear = ', round(ct.mgsm27.cd138p.spear$estimate,2) )
plot(curData$CD138_Post_Sort, curData$MGSM27.CD138p.tumor, main=titleStr, xlab='% Purity', ylab='Deconvolved Tumor %')

cellCounts <- estCellPercent.DCQ(MGSM27, wbm)
tumorPer$MGSM27.WBM.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnbx),colnames(cellCounts))])

tumorPer$PC_Mean <- rowMeans(tumorPer[,c('PC_AspNum','PC_BmbxNum')], na.rm=TRUE)
incBool <- !is.nan(tumorPer$PC_Mean) & !is.na(tumorPer$LM22.WBM.tumor)
curData <- tumorPer[incBool,] 
rmse.mgsm27.WBM <- sqrt(mean((as.numeric(curData$PC_Mean) - curData$MGSM27.WBM.tumor)^2))
ct.mgsm27.WBM <- cor.test(as.numeric(curData$PC_Mean), curData$MGSM27.WBM.tumor)
ct.mgsm27.WBM.spear <- suppressWarnings(cor.test(as.numeric(curData$PC_Mean), curData$MGSM27.WBM.tumor, method='spearman'))
titleStr <- paste0('MGSM27 Deconvolution of WBM Samples:\nRMSE = ', round(rmse.mgsm27.WBM), '; Rho = ', round(ct.mgsm27.WBM$estimate,2), '; Spear = ', round(ct.mgsm27.WBM.spear$estimate,2) )
plot(curData$PC_Mean, curData$MGSM27.WBM.tumor, main=titleStr, xlab='% WBM Tumor', ylab='Deconvolved Tumor %')

olGenes <- rownames(LM22.source)[rownames(LM22.source) %in% rownames(addMGSM27)]
allData <- cbind(LM22.source[olGenes,], addMGSM27[olGenes,])
allData.nq <- as.data.frame(normalize.quantiles(as.matrix(allData)))
rownames(allData.nq) <- rownames(allData)
colnames(allData.nq) <- colnames(allData)
allData <- allData.nq

nGenes <- 100

seedMat <- allData.noNA[names(tail(sort(vars.noNA), nGenes)),]
colnames(seedMat) <- sub('\\.[0-9]+$', '', colnames(seedMat))
seedMat.sig <- lapply(unique(colnames(seedMat)), function(x){ rowMeans(seedMat[,colnames(seedMat)==x], na.rm=TRUE)})
seedMat.sig <- do.call(cbind, seedMat.sig)
colnames(seedMat.sig) <- unique(colnames(seedMat))
pheatmap(seedMat.sig)

subData.noNA <- allData.noNA[vars.noNA>2,]
colnames(allData.noNA) <- sub('\\.[0-9]+$', '', colnames(allData.noNA))
gList.noNA <- rankByT(subData.noNA)

topAug <- AugmentSigMatrix(origMatrix=seedMat.sig, fullData=allData.noNA, newData=allData.noNA, gList=gList.noNA, nGenes=1:100, plotToPDF=FALSE, imputeMissing=TRUE, condTol=1.01, postNorm=TRUE, minSumToRem=NA, addTitle=NULL, autoDetectMin=FALSE, calcSpillOver=FALSE)

topAug.remSeed <- topAug[!(rownames(topAug) %in% rownames(seedMat.sig)),]

topAug2 <- AugmentSigMatrix(origMatrix=topAug.remSeed, fullData=allData.noNA, newData=allData.noNA, gList=gList.noNA, nGenes=1:100, plotToPDF=FALSE, imputeMissing=TRUE, condTol=1.01, postNorm=TRUE, minSumToRem=NA, addTitle=NULL, autoDetectMin=FALSE, calcSpillOver=FALSE)

Part VI - A: Deconvolve purified CD138+ tumor samples using de-novo deconvolution matrix

cellCounts <- estCellPercent.DCQ(topAug2, cd138p)
tumorPer$DeNovo.CD138p.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnas),colnames(cellCounts))])

incBool <- !is.na(tumorPer$CD138_Post_Sort)
curData <- tumorPer[incBool,]
rmse.DeNovo.cd138p <- sqrt(mean((curData$CD138_Post_Sort - curData$DeNovo.CD138p.tumor)^2))
ct.DeNovo.cd138p <- cor.test(curData$CD138_Post_Sort, curData$DeNovo.CD138p.tumor)
ct.DeNovo.cd138p.spear <- suppressWarnings(cor.test(curData$CD138_Post_Sort, curData$DeNovo.CD138p.tumor, method='spearman'))
titleStr <- paste0('DeNovo Deconvolution of CD138+ Samples:\nRMSE = ', round(rmse.DeNovo.cd138p), '; Rho = ', round(ct.DeNovo.cd138p$estimate,2), '; Spear = ', round(ct.DeNovo.cd138p.spear$estimate,2) )
plot(curData$CD138_Post_Sort, curData$DeNovo.CD138p.tumor, main=titleStr, xlab='% Purity', ylab='Deconvolved Tumor %')

cellCounts <- estCellPercent.DCQ(topAug2, wbm)
tumorPer$DeNovo.WBM.tumor <- colSums(cellCounts[c('Plasma.cells','B.cells.memory','MM.plasma.cell','PlasmaMemory'),
    match(make.names(tumorPer$celfileRnbx),colnames(cellCounts))])

tumorPer$PC_Mean <- rowMeans(tumorPer[,c('PC_AspNum','PC_BmbxNum')], na.rm=TRUE)
incBool <- !is.nan(tumorPer$PC_Mean) & !is.na(tumorPer$DeNovo.WBM.tumor)
curData <- tumorPer[incBool,] 
rmse.DeNovo.WBM <- sqrt(mean((curData$PC_Mean - curData$DeNovo.WBM.tumor)^2))
ct.DeNovo.WBM <- cor.test(curData$PC_Mean, curData$DeNovo.WBM.tumor)
ct.DeNovo.WBM.spear <- suppressWarnings(cor.test(curData$PC_Mean, curData$DeNovo.WBM.tumor, method='spearman'))
titleStr <- paste0('DeNovo Deconvolution of WBM Samples:\nRMSE = ', round(rmse.DeNovo.WBM), '; Rho = ', round(ct.DeNovo.WBM$estimate,2), '; Spear = ', round(ct.DeNovo.WBM.spear$estimate,2) )
plot(curData$PC_Mean, curData$DeNovo.WBM.tumor, main=titleStr, xlab='% WBM Tumor', ylab='Deconvolved Tumor %')

outMatrix <- matrix(0, nrow=4, ncol=4, dimnames=list(c('LM22','LM22 + 5','MGSM27','DeNovo MGSM27'),c('CD138+.RMSE','CD138+.Rho','WBM.RMSE','WBM.Rho')))
outMatrix[1,1] <- rmse.lm22.cd138p
outMatrix[1,2] <- ct.lm22.cd138p$estimate
outMatrix[1,3] <- rmse.lm22.WBM
outMatrix[1,4] <- ct.lm22.WBM$estimate

outMatrix[2,1] <- rmse.lm22p.cd138p
outMatrix[2,2] <- ct.lm22p.cd138p$estimate
outMatrix[2,3] <- rmse.lm22p.WBM
outMatrix[2,4] <- ct.lm22p.WBM$estimate
  
outMatrix[3,1] <- rmse.mgsm27.cd138p
outMatrix[3,2] <- ct.mgsm27.cd138p$estimate
outMatrix[3,3] <- rmse.mgsm27.WBM
outMatrix[3,4] <- ct.mgsm27.WBM$estimate

outMatrix[4,1] <- rmse.DeNovo.cd138p
outMatrix[4,2] <- ct.DeNovo.cd138p$estimate
outMatrix[4,3] <- rmse.DeNovo.WBM
outMatrix[4,4] <- ct.DeNovo.WBM$estimate

ADAPTS (Automated Deconvolution Augmentation of Profiles for Tissue Specific cells) Vignette #1: Matrix Augmentation

Samuel Danziger, PhD

2019-09-07