Entering edit mode
Hi,
I found different DEGs for legacy and harmonized dataset. I would like to know if my scripts are correct:
####### harmonized dataset queryDown <- GDCquery(project = CancerProject,data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification",workflow.type = "HTSeq - Counts", barcode = c(samplesTN$Sample.ID,samplesTP$V1)) GDCdownload(query = queryDown) dataPrep <- GDCprepare(query = queryDown) dataProcessing <- TCGAanalyze_Preprocessing(object = dataPrep, cor.cut = 0.6, datatype = "HTSeq - Counts") dataNorm <- TCGAanalyze_Normalization(tabDF = dataProcessing,geneInfo = geneInfoHT,method = "geneLength") dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm, method = "quantile", qnt.cut = 0.25) dataPrep_raw <- UseRaw_afterFilter(dataPrep, dataFilt) datasmTP <- dataPrep$barcode[grep("TP",dataPrep_raw$shortLetterCode)] datasmTN <- dataPrep$barcode[grep("NT",dataPrep_raw$shortLetterCode)] datadownDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,datasmTN], mat2 = dataFilt[,datasmTP],Cond1type = "Normal", Cond2type = "Tumor", fdr.cut = 0.01,logFC.cut = 1, method = 'glmLRT') ####legacy dataset query_modelo_1 <- GDCquery(project = "TCGA-BRCA", data.category = "Gene expression", data.type = "Gene expression quantification", experimental.strategy = "RNA-Seq", #sample.type = c("Primary Tumor", "Solid Tissue Normal"), platform = "Illumina HiSeq", file.type = "results",barcode= c(listSamples_tumor_modelo_1$Sample.ID,listSamples_normal_modelo_1$V1), legacy = TRUE) GDCdownload(query_modelo_1) BRCARnaseqSE_modelo_1 <- GDCprepare(query_modelo_1) BRCARnaseq_CorOutliers_modelo_1 <- TCGAanalyze_Preprocessing(BRCARnaseqSE_modelo_1,cor.cut = 0.6) dataNorm_modelo_1 <- TCGAanalyze_Normalization(tabDF = BRCARnaseq_CorOutliers_modelo_1,method = "geneLength", geneInfo = geneInfo) dataFilt_modelo_1 <- TCGAanalyze_Filtering(tabDF = dataNorm_modelo_1,method = "quantile",qnt.cut = 0.25) samplesNT_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("NT",BRCARnaseqSE_modelo_1$shortLetterCode)] samplesTP_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("TP",BRCARnaseqSE_modelo_1$shortLetterCode)] # Diff.expr.analysis (DEA) dataDEGs_modelo_1 <- TCGAanalyze_DEA(mat1 = dataFilt_modelo_1[,samplesNT_modelo_1],mat2 = dataFilt_modelo_1[,samplesTP_modelo_1], Cond1type = "Normal",Cond2type = "Tumor",fdr.cut = 0.01,logFC.cut = 1,method = "glmLRT") dataDEGsFiltLevel <- TCGAanalyze_LevelTab(dataDEGs_modelo_1, "Tumor", "Normal",dataFilt_modelo_1[,samplesNT_modelo_1], dataFilt_modelo_1[,samplesTP_modelo_1]) sessionInfo( )
Thanks