#v8.1
##########################################
## Identify differential expressed genes;
##########################################
Natt.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Natt",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up");
length(Natt.up);
#write.table(Natt.up,file="Nat.upregulated.txt",quote=F,row.names=F)

Nacu.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nacu",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up")
Nlin.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nlin",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up")
Nob.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nob",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up")
Nmi.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nmi",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up")
Npau.up<-SigGeneFinder(data=data.count.WWFAC,subset.s="Npau",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="up")

Natt.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Natt",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")
Nacu.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nacu",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")
Nlin.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nlin",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")
Nob.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nob",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")
Nmi.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nmi",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")
Npau.down<-SigGeneFinder(data=data.count.WWFAC,subset.s="Npau",logFC=log2(1.5),p=0.05,pair="WW|FAC",reg="down")

df<-rbind(c("Natt","up",length(Natt.up)),
          c("Nacu","up",length(Nacu.up)),
          c("Npau","up",length(Npau.up)),
          c("Nlin","up",length(Nlin.up)),
          c("Nob","up",length(Nob.up)),
          c("Nmi","up",length(Nmi.up)),
          c("Natt","down",length(Natt.down)),
          c("Nacu","down",length(Nacu.down)),
          c("Npau","down",length(Npau.down)),
          c("Nlin","down",length(Nlin.down)),
          c("Nob","down",length(Nob.down)),
          c("Nmi","down",length(Nmi.down)));

df<-data.frame(df);
colnames(df)<-c("Species","Induction","Count")
df$Count<-as.numeric(as.character(df$Count))
mean(df$Count[df$Induction=="up"])
sd(df$Count[df$Induction=="up"])

mean(df$Count[df$Induction=="down"])
sd(df$Count[df$Induction=="down"])
library(plotly)
library(plyr)
df<-data.frame(df)
colnames(df)<-c("Species","Induction","Count");
df$Count<-as.numeric(as.character(df$Count))
df$Species=factor(df$Species,levels=c("Nob","Nlin","Natt","Nmi","Nacu","Npau"),ordered=T);
require(ggplot2);

p<-ggplot(df, aes(Species))+geom_bar(data=df[df$Induction =="up",], aes(y = Count, fill = Species), stat = "identity") +
  geom_bar(data=df[df$Induction =="down",],
           aes(y = -Count, fill = Species), stat = "identity");

p
ggsave(p,file="OutputData/data.EdgeR.FACinduced.count.pdf");
max(df$Count[df$Induction=="down"]);
max(df$Count[df$Induction=="up"]);

##############################################
## plot out vendengram for up and down 
## regulated genes among different species.
##############################################
## export raw data to a table;
nrow(data.count.WWFAC)


Natt.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Natt",pair="WW|FAC",allresults=T);
Nacu.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nacu",pair="WW|FAC",allresults=T)
Nlin.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nlin",pair="WW|FAC",allresults=T)
Nob.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nob",pair="WW|FAC",allresults=T)
Nmi.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Nmi",pair="WW|FAC",allresults=T)
Npau.stat<-SigGeneFinder(data=data.count.WWFAC,subset.s="Npau",pair="WW|FAC",allresults=T)
Natt.stat["NIATv7_g38915",];
data.count.WWFAC["NIATv7_g30972",]
Natt.stat.sig.up<-Natt.stat[Natt.stat$FDR<0.05 &Natt.stat$logFC>log2(1.5) & Natt.stat$PValue<=0.05 ,]
Natt.stat.sig.up$Species<-"Natt";
Natt.stat.sig.down<-Natt.stat[Natt.stat$FDR<0.05 &Natt.stat$logFC<=-log2(1.5) ,]
Natt.stat.sig.down$Species<-"Natt";
Natt.stat.sig.down$GeneID<-row.names(Natt.stat.sig.down);
Natt.stat.sig.up$GeneID<-row.names(Natt.stat.sig.up);
row.names(Natt.stat.sig.up)<-NULL
row.names(Natt.stat.sig.down)<-NULL
head(Natt.stat)
Natt.stat["NIATv7_g15285",]
max(Natt.stat$logFC)
data.count.WWFAC["NIATv7_g32174",grep("Nob",colnames(data.count.WWFAC))];
write.table(Natt.stat,file="Natt.stat.txt",col.names=T,quote=F,row.names=T,sep="\t")

Nacu.stat.sig.up<-Nacu.stat[Nacu.stat$FDR<=0.05 &Nacu.stat$logFC>=log2(1.5) ,]
Nacu.stat.sig.down<-Nacu.stat[Nacu.stat$FDR<0.05 &Nacu.stat$logFC<=-log2(1.5) ,]
Nacu.stat.sig.up$Species<-"Nacu";
Nacu.stat.sig.down$Species<-"Nacu";
Nacu.stat.sig.down$GeneID<-row.names(Nacu.stat.sig.down);
Nacu.stat.sig.up$GeneID<-row.names(Nacu.stat.sig.up);
row.names(Nacu.stat.sig.up)<-NULL
row.names(Nacu.stat.sig.down)<-NULL

Nlin.stat.sig.up<-Nlin.stat[Nlin.stat$FDR<0.05 &Nlin.stat$logFC>log2(1.5) ,]
Nlin.stat.sig.down<-Nlin.stat[Nlin.stat$FDR<0.05 &Nlin.stat$logFC<=-log2(1.5) ,]
Nlin.stat.sig.up$Species<-"Nlin";
Nlin.stat.sig.down$Species<-"Nlin";
Nlin.stat.sig.down$GeneID<-row.names(Nlin.stat.sig.down);
Nlin.stat.sig.up$GeneID<-row.names(Nlin.stat.sig.up);
row.names(Nlin.stat.sig.up)<-NULL
row.names(Nlin.stat.sig.down)<-NULL

Nob.stat.sig.up<-Nob.stat[Nob.stat$FDR<0.05 &Nob.stat$logFC>log2(1.5) ,]
Nob.stat.sig.down<-Nob.stat[Nob.stat$FDR<0.05 &Nob.stat$logFC<=-log2(1.5) ,]
Nob.stat.sig.up$Species<-"Nob";
Nob.stat.sig.down$Species<-"Nob";
Nob.stat.sig.down$GeneID<-row.names(Nob.stat.sig.down);
Nob.stat.sig.up$GeneID<-row.names(Nob.stat.sig.up);
row.names(Nob.stat.sig.up)<-NULL
row.names(Nob.stat.sig.down)<-NULL
head(Nob.stat.sig.up[Nob.stat.sig.up$GeneID=="NIATv7_g14832",])
data.count.WWFAC["NIATv7_g14832",grep("Nob",colnames(data.count.WWFAC))]

Nmi.stat.sig.up<-Nmi.stat[Nmi.stat$FDR<0.05 &Nmi.stat$logFC>log2(1.5) ,]
Nmi.stat.sig.down<-Nmi.stat[Nmi.stat$FDR<0.05 &Nmi.stat$logFC<=-log2(1.5) ,]
Nmi.stat.sig.up$Species<-"Nmi";
Nmi.stat.sig.down$Species<-"Nmi";
Nmi.stat.sig.down$GeneID<-row.names(Nmi.stat.sig.down);
Nmi.stat.sig.up$GeneID<-row.names(Nmi.stat.sig.up);
row.names(Nmi.stat.sig.up)<-NULL
row.names(Nmi.stat.sig.down)<-NULL


Npau.stat.sig.up<-Npau.stat[Npau.stat$FDR<0.05 &Npau.stat$logFC>log2(1.5) ,]
Npau.stat.sig.down<-Npau.stat[Npau.stat$FDR<0.05 &Npau.stat$logFC<=-log2(1.5) ,]
Npau.stat.sig.up$Species<-"Npau";
Npau.stat.sig.down$Species<-"Npau";
Npau.stat.sig.down$GeneID<-row.names(Npau.stat.sig.down);
Npau.stat.sig.up$GeneID<-row.names(Npau.stat.sig.up);
row.names(Npau.stat.sig.down)<-NULL
row.names(Npau.stat.sig.up)<-NULL

head(Natt.stat.sig.down)
#intersect(row.names(Natt.stat.sig.up),row.names(Nacu.stat.sig.up));

data.induced<-rbind((Natt.stat.sig.up),(Natt.stat.sig.down),
                    (Nacu.stat.sig.up),(Nacu.stat.sig.down),
                    (Nlin.stat.sig.up),(Nlin.stat.sig.down),
                    (Nob.stat.sig.up),(Nob.stat.sig.down),
                    (Nmi.stat.sig.up),(Nmi.stat.sig.down),
                    (Npau.stat.sig.up),(Npau.stat.sig.down))
head(data.induced);
nrow(data.induced)
write.table(data.induced,file="OutputData/data.induced.EdgeR.all.txt",sep="\t",quote=F);
#nrow(data.induced[data.induced$Species=="Natt" & data.induced$logFC>=log2(1.5),])
nrow(Natt.stat.sig.up)
data.Natt.Nacu.Nlin.sig<-Reduce(intersect, list(as.character(Natt.stat.sig.up$GeneID),
                                           as.character(Nlin.stat.sig.up$GeneID),
                                           as.character(Nacu.stat.sig.up$GeneID)));

data.converved<-data.Natt.Nacu.Nlin.sig;
length(data.converved);
data.converved.expression<-data.all.count.FPKM[data.converved,];
#data.converved.expression["NIATv7_g38915",];

write.table(data.converved.expression,file="OutputData/data.gene.conserved.expression.txt",sep="\t",col.names=T,row.names=T,quote=F)
head(data.converved.expression);

#head(M4.gene)
#M4.gene.conserved<-intersect(data.Natt.Nacu.Nlin.sig,M4.gene);
#grep("NIATv7_g12923",M4.gene.conserved)
#grep("NIATv7_g21011",as.character(Nacu.stat.sig.up$GeneID));

#length(data.Natt.Nacu.Nlin.sig);

#length(intersect(data.Natt.Nacu.Nlin.sig,M4.gene))/length(M4.gene)

##################################################
## Identify genes that were induced in all species.
##################################################
data.EdgeR.all.sig<-Reduce(intersect, list(as.character(Nmi.stat.sig.up$GeneID),
                                           as.character(Natt.stat.sig.up$GeneID),
                                           as.character(Nlin.stat.sig.up$GeneID),
                                           as.character(Nacu.stat.sig.up$GeneID),
                                           as.character(Npau.stat.sig.up$GeneID),
                                           as.character(Nob.stat.sig.up$GeneID)));
data.EdgeR.all.annotation<-data.annotation[data.EdgeR.all.sig,];
head(data.annotation)
data.EdgeR.sig.allspe<-data.induced[data.induced$GeneID %in% data.EdgeR.all.sig,];
head(data.EdgeR.all.annotation)
write.table(data.EdgeR.sig.allspe,file="OutputData/data.EdgeR.sig.allspe.txt",sep="\t",col.names=T,quote=F)
write.table(data.EdgeR.all.annotation,file="OutputData/data.EdgeR.sig.allspe.annotation.txt",sep="\t",col.names=T,quote = F)
length(data.EdgeR.all.sig)
##################################################################
## plot out the overview on FAC-induced transcriptomic responses.
##################################################################
require(gplots)
require(proxy)
data.fpkm.flt<-data.all.count.FPKM.WWFAC
dim(data.fpkm.flt);
head(data.fpkm.flt);
data.fpkm.cor<-cor(data.fpkm.flt);
data.fpkm.cor.tab<-as.data.frame.table(data.fpkm.cor);
head(data.fpkm.flt)
species<-substr(colnames(data.fpkm.flt),0,3)

FillSim<-function(data=NULL){
  data<-as.matrix(data);
  for (i in 1:length(data[,1])){
    data[i,i]<-1;
  }
  return(data);
}
IQRFilter<-function(data=NULL,cutoff=0.5){
  list.rm<-vector();
  for (i in 1:length(row.names(data))){
    if (IQR(as.numeric(data[i,])) <= cutoff  ){
      list.rm<-c(list.rm,i);
    }
  }
  data<-data[-list.rm,];
  return(data);
}

data.fpkm.cor.tab.sub<-matrix(ncol=4,nrow=0);
#spe<-species[5]
head(data.fpkm.flt)
require(proxy)
require(gplots)
for(spe in species){
  data.tmp<-data.fpkm.flt[,grep(spe,colnames(data.fpkm.flt))];
  #head(data.tmp);
  data.tmp<-FILTER_EXPRESS(data.tmp,group=colnames(data.tmp),number=3,cutoff=log10(5+1));
  #data.tmp<-IQRFilter(data.tmp, cutoff=0.5);
  data.tmp.sim<-simil(t(data.tmp),pairwise=T,upper=T);
  data.tmp.sim<-FillSim(data.tmp.sim);
  breaks<-seq(0.85,1,length.out=100);
  pdf(file=paste("OutputData/HeatMap",spe,"TMM.FPKM.pdf",sep=""))
  heatmap.2(data.tmp.sim, col = bluered(99),vline=F,hline=F,density.info="non",trace="none",
            dendrogram="none", scale="none",breaks=breaks,Rowv=T,Colv=T);
  dev.off();
}



