轉錄組分析處理流程

1.fastqc

2.STAR

##build_index
STAR --runThreadN 9 --runMode genomeGenerate \
--genomeDir /data/XXXXX/bio/task_LE-miR/03-2miRSeq/index \
--genomeFastaFiles /data/XXXXXbio/task_LE-miR/03-2miRSeq/chrom.37.fa \
--sjdbGTFfile /data/XXXXX/bio/task_LE-miR/03-2miRSeq/miRNA37note.gtf \
--sjdbOverhang 149

###STAR_ALIGN
ls -d LE*|while read LE; do echo $LE; STAR --runThreadN 5 --genomeDir /data/XXXXX/bio/task_LE-miR/03HumanSequence/index \
--readFilesCommand zcat \
--readFilesIn /data/XXXXX/bio/task_LE-miR/01trim_out/${LE}/*_1.fq.gz \
/data/XXXXX/bio/task_LE-miR/01trim_out/${LE}/*_2.fq.gz \
--outFileNamePrefix /data/XXXXX/bio/task_LE-miR/04align_out/${LE}_ \
--outSAMtype BAM SortedByCoordinate \
--outBAMsortingThreadN 5 \
--quantMode TranscriptomeSAM GeneCounts>>${LE}.log; done

3.RSEM

###RSEM-prepare
rsem-prepare-reference --gtf /data/XXXXX/bio/task_LE-miR/03-2miRSeq/miRNA37note.gtf \
/data/XXXXX/bio/task_LE-miR/03-2miRSeq/chrom.37.fa \
/data/XXXXX/bio/task_LE-miR/05-2RSEM/RSEM_prepare

##RSEM calculate expression
ls -d LE*|while read LE; do echo $LE; rsem-calculate-expression --paired-end --no-bam-output \
--alignments -p 10 \
-q ${LE}/*_Aligned.toTranscriptome.out.bam \
/data/XXXXX/bio/task_LE-miR/05-2RSEM/RSEM_prepare \
/data/XXXXX/bio/task_LE-miR/05-2RSEM/rsem_out/${LE}_ >>${LE}.log; done

4.DeSeq2

####将表達定量結果轉換為矩陣
rsem-generate-data-matrix LE*_.isoforms.results >output.matrix

###DESeq2.R
setwd("/data/XXXXX/bio/task_LE-miR/06DESeq2")


##讀取檔案
input_data <- read.table("deseq2_input-2.txt", header=TRUE, row.names=1)

##取整
input_data <-round(input_data, digits = 0)

##1-I型 2-II型 3-正常+良性 4-補充良性
1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 1, 1, 1, 1, 1, 2, 
3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 2, 3, 2, 2, 1, 1, 3, 3, 
1, 4, 4, 4, 4, 4, 4, 4


##準備工作
input_data <- as.matrix(input_data)
condition <- factor(c(1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2,
 2, 3, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 
 2, 3, 2, 2, 1, 1, 3, 3, 1, 4, 4, 4, 4, 4, 4, 4))
coldata <- data.frame(row.names=colnames(input_data), condition)


##加載包
library(DESeq2)


##建構dds矩陣
dds <- DESeqDataSetFromMatrix(countData=input_data, colData=coldata, design=~condition)


##差異分析
dds <- DESeq(dds)


##提取結果
res <- results(dds)


##看結果
summary(res)


##按p值排序
res <- res[order(res$padj), ]
resdata <-merge(as.data.frame(res), as.data.frame(counts(dds,normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "isoform"


##輸出結果檔案
write.table(resdata, file="diffexpr-results.txt", sep="\t", quote=F, row.names=F)


##可視化展示
plotMA(res)

##提取差異結果
awk '{if($3>0.5 && $7 < 0.05) print $0}' diffexpr-I,II.txt > upgene.txt
awk '{if($3<-0.5 && $ 7< 0.05) print $0}' diffexpr-I,II.txt > downgene.txt

5.differ gene

轉錄組分析處理流程

繼續閱讀

WGS資料分析處理流程

新一代測序技術資料分析第一講（綜述）

<二代測序> 下載下傳 NCBI sra 檔案

NGS 分析流程（二）一、Call Somatic