## this script read all RNA-seq data and perform filtering;
## for the RNA-seq analysis script version 8;
## for v8.1
data.all.count.FPKM<-read.table(file="InputData/NIATTr2.4.A1.count.txt.TMM_normalized.FPKM",sep="\t",header=T,row.names=1);
data.count<-read.table(file="InputData/NIATTr2.4.A1.count.txt",sep="\t",header=T,row.names=1);

dim(data.count)
dim(data.all.count.FPKM)
#head(data.all.count.FPKM)
## low expressed genes are removed; FPKM greater than 5 in at least 3 samples;
data.all.count.FPKM<-FILTER_EXPRESS(group=colnames(data.all.count.FPKM),data=data.all.count.FPKM,number=3,cutoff=5);
data.count<-data.count[row.names(data.all.count.FPKM),]
head(data.count);
dim(data.count)
## log10 transform the FPKM data;
data.all.count.FPKM<-log10(data.all.count.FPKM+1);

## select WW and FAC induced samples;
data.all.count.FPKM.WWFAC<-data.all.count.FPKM[,grep("FAC|WW",colnames(data.all.count.FPKM))];

##filer out low expresed genes, at least FPKM greater than 5 in three samples;
data.count.FPKM.WWFAC<-FILTER_EXPRESS(group=colnames(data.all.count.FPKM.WWFAC),data=data.all.count.FPKM.WWFAC,number=3,cutoff=log10(5+1));
gene.consensus<-intersect(row.names(data.count.FPKM.WWFAC),row.names(data.count));
data.count.FPKM.WWFAC<-data.count.FPKM.WWFAC[gene.consensus,];
data.count.WWFAC<-data.count[gene.consensus,grep("FAC|WW",colnames(data.count))]
dim(data.count.FPKM.WWFAC);
dim(data.count.WWFAC);
