## this script is to analyze and characherise the M4 module;
#v8.1
############################################################
### Check the expression time course of the M4 module genes;
############################################################
#source("SubFunction_all.R");
#source("DE_analysis.R");
#source("LoadRNAseqData.R");
require(sjPlot);

M4.gene.expression<-read.table(file="OutputData/data.all.count.FPKM.M4.txt",sep="\t",header=T,row.names=1);
M4.gene<-row.names(M4.gene.expression);
head(M4.gene);

## export the conserved M4 module;
data.converved.expression<-read.table(file="OutputData/data.gene.conserved.expression.txt",sep="\t",header=T,row.names=1);
head(data.converved.expression);
#grep("NIATv7_g38915",M4.gene);
M4.gene.conserved<-intersect(M4.gene,row.names(data.converved.expression));
length(M4.gene.conserved)
data.TF<-read.table(file="InputData/NIATTr2_TFannotation.txt",sep="\t",row.names=1)
data.PK<-read.table(file="InputData/NIATTr2_PKannotation.txt",sep="\t",row.names=1);
length( intersect(M4.gene.conserved,row.names(data.TF)));
length(intersect(M4.gene.conserved,row.names(data.PK)));


length(M4.gene)
#M4.gene.expression["NIATv7_g27381",]
#data.all.count.FPKM["NIATv7_g29431",]
M4.Nat.up<-intersect(M4.gene,Natt.up); ## only select genes that were induced in N. attenuata
#length(M4.hub.sub);
length(M4.Nat.up);
probe2gene.list<-read.table("InputData/Probe44k2Gene.tab",row.names=1,header=F,sep="\t");
probe.list<-row.names(probe2gene.list)[probe2gene.list[,1] %in%M4.Nat.up ];
Expression.array<-read.table(file="InputData/All_expression_mean_condition.txt",sep="\t",header=T)
Expression.array<-Expression.array[,c("ID","CTL_1h_TL","OS_1h_TL","OS_5h_TL","OS_9h_TL","OS_13h_TL","OS_17h_TL","OS_21h_TL")]
head(Expression.array)
fun.scale<-function(x){
  #x<-as.numeric(as.character(x));
  return ((x-min(x))/diff(range(x)));
}
Expression.array.sub<-Expression.array[Expression.array$ID %in% probe.list,grep("h",colnames(Expression.array))]

nrow(Expression.array.sub)
Expression.array.scaled<-apply(Expression.array.sub[,-1],1,fun.scale);
dim(Expression.array.scaled)
Expression.array.scaled<-data.frame(Expression.array.scaled,ncol=7)

head(Expression.array.scaled);

pdf(file="M4.hub.Expression_time_line.pdf");
Expression2Lineplot((Expression.array[Expression.array$ID %in% probe.list,grep("h",colnames(Expression.array))]),ylim=c(0,20))
dev.off()
pdf(file="M1.hub.Expression_time_heatmap.pdf");
Expression2Heatmap((Expression.array[Expression.array$ID %in% probe.list,grep("h",colnames(Expression.array))]),fixcolor=T)
dev.off()

############################################################
## Check whether M4 module is regulated by JA or not using 
## microarray data;
############################################################
load("InputData/MicroarrayOutPut.v8"); ## load pre-analyzed microarray data;


## check how many of these genes were induced in irAOC;
## find the common FAC induced genes identified by RNA-seq and microarray;
## export the gene module;
M4.gene.up<-intersect(row.names(data.all.flt.sub.tur),Natt.up);
length(M4.gene.up);
output.WT_WWvsFAC30m.sig<-(output.WT_WWvsFAC30m[output.WT_WWvsFAC30m$logFC<=-log2(1.5) & output.WT_WWvsFAC30m$adj.P.Val<=0.05,])
output.WT_WWvsFAC30m.sig$Gene<-probe2gene[as.character(output.WT_WWvsFAC30m.sig$ProbeName),"V2"];
WT.FACinduced<-output.WT_WWvsFAC30m.sig$Gene
WT.FACinduced<-unique(as.character(WT.FACinduced[!is.na(WT.FACinduced)]));
length(WT.FACinduced);
length(Natt.up);
## only consider the genes that has at least one probe on the array;
output.WT_WWvsFAC30m.GeneTested<-probe2gene[row.names(output.WT_WWvsFAC30m),"V2"];
output.WT_WWvsFAC30m.GeneTested<-output.WT_WWvsFAC30m.GeneTested[!is.na(output.WT_WWvsFAC30m.GeneTested)];
Natt.up.arrayset<-intersect(output.WT_WWvsFAC30m.GeneTested,Natt.up);
## 82.0% genes from microarray can be detected by RNA-seq;
length(Natt.up.arrayset)
length(intersect(WT.FACinduced,Natt.up.arrayset))/length(WT.FACinduced);

Natt.up.shared<-intersect(Natt.up.arrayset,WT.FACinduced);

M4.gene.uparrayset<-intersect(M4.gene.up,output.WT_WWvsFAC30m.GeneTested);
M4.gene.up.arrayset.induced<-intersect(M4.gene.uparrayset,WT.FACinduced);

data.M4.natt.induced.sub<-intersect(output.WT_WWvsFAC30m.GeneTested,M4.gene);

## find overlap between WT and irAOC;
output.irAOC30m.sig<-(output.irAOC30m[output.irAOC30m$logFC<=-log2(1.5) & output.irAOC30m$adj.P.Val<=0.05,])
output.irAOC30m.sig$Gene<-probe2gene[as.character(output.irAOC30m.sig$ProbeName),"V2"];
irAOC.FACinduced<-output.irAOC30m.sig$Gene
irAOC.FACinduced<-unique(as.character(irAOC.FACinduced[!is.na(irAOC.FACinduced)]))
head(irAOC.FACinduced)

## overall 85.1% of gene that were induced in WT were also induced in irAOC linel
length(intersect(WT.FACinduced,irAOC.FACinduced))/length(WT.FACinduced)

## 87.6 % are independet of JA;
length(intersect(irAOC.FACinduced,Natt.up.shared) )/ length(Natt.up.shared);
#data.M4.natt.induced.sub.ConfirmedByArray.JAindependent<-intersect(irAOC.FACinduced,data.M4.natt.induced.sub.ConfirmedByArray);

pdf(file="FAC-induced.Array.Heatmap.pdf");
gene.expression.all.30m<-gene.expression.all[,grep("30m",colnames(gene.expression.all))];
#gene.expression.all.30m<-gene.expression.all.30m[,-grep("TEST",colnames(gene.expression.all.30m))];
head(gene.expression.all.30m)
Expression2Heatmap(ExtractGeneExpression(arraydata=gene.expression.all.30m[,grep("NEWWT|irAOC",colnames(gene.expression.all.30m))],probe2gene=probe2gene,gene.list=M4.gene.up.arrayset.induced))
dev.off();



##################################################################
## Intra-species specificity of M4 genes;
##################################################################
source("LoadRNAseqData.R")
M4.gene.expression<-read.table(file="data.all.count.FPKM.M4.txt",sep="\t",header=T,row.names=1);
nrow(M4.gene.expression)
head(data.count)
Treatment<-c("FAC","SL","MS");
Species<-c("Nob","Natt","Nmi","Npau");
#Treat<-Treatment[1];
data.HAE.induction<-matrix(nrow=0,ncol=3);
data.HAE.species.gene<-matrix(nrow=0,ncol=3);
colnames(data.HAE.species.gene)<-c("GeneID","Species","Treatment");
for(Treat in Treatment){
  for(spe in Species){
    compare<-paste("WW",Treat,sep="|");
    induced.gene<-SigGeneFinder(data=data.count,p=0.05,subset.s=spe,logFC=log2(1.5),pair=compare,reg="up");
    induced.gene.matrix<-rbind(data.HAE.species.gene,cbind(GeneID=induced.gene,Species=rep(spe,length(induced.gene)),Treatment=rep(Treat,length(induced.gene))))
    induction.rate<-length(intersect(M4.gene,induced.gene))/length((M4.gene));
    data.HAE.induction<-rbind(data.HAE.induction,c(spe,Treat,induction.rate));
  }
}

induced.gene.matrix<-data.frame(induced.gene.matrix);
head(induced.gene.matrix)
data.HAE.induction<-data.frame(data.HAE.induction);
colnames(data.HAE.induction)<-c("Species","Treatment","Proportion");
## calculate the proportion of induced M4 genes;
data.HAE.induction$Proportion<-100*as.numeric(as.character(data.HAE.induction$Proportion));
data.HAE.induction$Species<-factor(data.HAE.induction$Species,levels=c("Nob","Natt","Nmi","Npau"),ordered=T);
p<- ggplot(data.HAE.induction,aes(x=Species, y=Proportion, fill=Treatment))+
  geom_bar(position=position_dodge(width=0.5), stat="identity",width=0.5)+
  scale_y_continuous(limits = c(0, 100))+
  theme(axis.ticks.x=element_blank())+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_blank(), axis.line = element_line(colour = "black"))
p
ggsave(p,file="HAE_Induction_M4_Percentage1.pdf",width=12,height=8)


## plot out the heatmap for the induction of M4 genes in four species;
pdf(file="Natt.M4.heatmap.pdf");
Expression2Heatmap(data.all.count.FPKM[M4.gene,grep("Natt",colnames(data.all.count.FPKM))])
dev.off()

pdf(file="Nob.M4.heatmap.pdf");
Expression2Heatmap(data.all.count.FPKM[M4.gene,grep("Nob",colnames(data.all.count.FPKM))])
dev.off();

pdf(file="Npau.M4.heatmap.pdf");
Expression2Heatmap(data.all.count.FPKM[M4.gene,grep("Npau",colnames(data.all.count.FPKM))])
dev.off()

pdf(file="Nmi.M4.heatmap.pdf");
Expression2Heatmap(data.all.count.FPKM[M4.gene,grep("Nmi",colnames(data.all.count.FPKM))])
dev.off()



############################################################
## plot the correlation map for JA pathway genes;
############################################################
biocLite("ggplot2")
require("sjPlot")
require("ggplot2")
#detach("package:ggplot2",unload=T)
#detach("package:viridis",unload=T)

source("http://bioconductor.org/biocLite.R")
Gene.JA.pathway<-c("NIATv7_g25804","NIATv7_g32174","NIATv7_g14521","NIATv7_g34489","NIATv7_g36098","NIATv7_g02261",
                   "NIATv7_g35910","NIATv7_g35263","NIATv7_g03444","NIATv7_g28100");
qplot(x=Var1, y=Var2, data=melt(cor(t(data.all.flt.comb[Gene.JA.pathway,]))), fill=value, geom="tile")
head(data.all.flt.comb[Gene.JA.pathway,])

data.JA<-t(data.all.flt.comb[Gene.JA.pathway,grep("WW|FAC",colnames(data.all.flt.comb))]);
#data.JA<-t(data.all.sub.flt.comb[Gene.JA.pathway,grep("Nob",colnames(data.all.sub.flt.comb))]);
data.JA<-as.data.frame(data.JA);
head(data.JA)

Phenotype.JA<-read.csv("InputData/RNA_seq_JA.csv",header=TRUE,row.names=5);

data.JA<-as.data.frame(cbind(data.JA,JA=log(Phenotype.JA[row.names(data.JA),"JA"])))

pdf(file="JA_pathway_correlation.pdf");
#sjp.corr(data.JA,sortCorrelations=F,theme="none",circleSize=5,decimals=2,hideDiagCircle=T);
sjp.corr(data.JA);
dev.off();


##############################################################
## Export the M4 module to cytoscape and visualize the network
##############################################################


## export the network to cytoscape for visualization;
#head(data.all.count.FPKM.WWFAC[M4.gene,]);
length(M4.gene)
#grep("NIATv7_g10386",M4.gene)
Expression2Cytoscape(data=data.all.count.FPKM.WWFAC[M4.gene,grep("FAC",colnames(data.all.count.FPKM.WWFAC))],FileName="M4.module.network.NIATv7",threshold=0.25,power=6,attribute_ID=NULL)
#head(data.all.count.FPKM.WWFAC)
## export the gene expression table for the M4 module genes;
## Annotate the M4 genes;
data.TF<-read.table(file="InputData/NIATTr2_TFannotation.txt",sep="\t",row.names=1)
data.PK<-read.table(file="InputData/NIATTr2_PKannotation.txt",sep="\t",row.names=1);
data.mapman<-read.table(file="InputData/NIATT_v7MapMan_slim.txt",sep="\t",header=T,row.names=3);
head(data.mapman)
M4.gene.conserved.expression<-read.table(file="M4.gene.conserved.expression.txt",header=T,row.names=1,sep="\t");
head(M4.gene.conserved.expression);

data.M4.annotation<-matrix(nrow=0,ncol=3);
for(gene in row.names(M4.gene.conserved.expression)){
  gene.group=0;
  pathway=0;
  #  is.ET<-0;
  if(!is.na(data.TF[gene,1])){
    gene.group=1; ## Transcription factor as 1;
  }
  if(!is.na(data.PK[gene,1])){
    gene.group=2; ## protein kinase as 2;
  }
  if(!is.na(data.mapman[gene,1])){
    if(data.mapman[gene,1]=="ABA"){
      pathway=1; 
    }
    if(data.mapman[gene,1]=="ET"){
      pathway=2;
    }
    if(data.mapman[gene,1]=="GA"){
      pathway=3;
    }
    if(data.mapman[gene,1]=="JA"){
      pathway=4;
    }
    if(data.mapman[gene,1]=="PA"){
      pathway=5;
    }
    if(data.mapman[gene,1]=="SA"){
      pathway=6;
    }
  }
  data.M4.annotation<-rbind(data.M4.annotation,c(gene,gene.group,pathway));  
}
data.M4.annotation<-data.frame(data.M4.annotation);
colnames(data.M4.annotation)<-c("GeneID","Gene.Group","Pathway");
head(data.M4.annotation)
nrow(data.M4.annotation[data.M4.annotation$Gene.Group==1,])
nrow(data.M4.annotation[data.M4.annotation$Gene.Group==2,])

write.table(data.M4.annotation,file="data.M4.annotation4Cytoscape.txt",quote=F,sep="\t",col.names=T,row.names=F)

nrow(M4.gene.expression)
write.table(data.all.flt.comb[M4.gene,],file="M4.module.expression.tab",sep="\t",col.names=T,row.names=T,quote=F)
head(data.all.flt.comb)

write.table(Natt.stat[M4.gene,],file="M4.module.ExpressionChange.tab",sep="\t",col.names=T,row.names=T,quote=F)
M4.gene.expression["NIATv7_g17823",]


