## Identify network and perform network analysis;
## final version for eLife
## 2016.10.19
## load data
#source("Scripts/LoadRNAseqData.R");
#source("Scripts/SubFunction_all.R");
require(sva)
head(data.all.count.FPKM);

## normalize expression data among species;
## remove the variations at species level;
sample.info<-matrix(ncol=2,nrow=0);
species<-substr(colnames(data.count.FPKM.WWFAC),start=0,3);
dim(data.count.FPKM.WWFAC)
length(species)
var.data <- apply(data.count.FPKM.WWFAC, 1, var);
head(var.data)
data.count.FPKM.WWFAC <- data.count.FPKM.WWFAC[!(var.data == 0 ),]
head(data.count.FPKM.WWFAC);
dim(data.count.FPKM.WWFAC)
data.all.flt.comb<-ComBat(dat= data.count.FPKM.WWFAC,batch=species,mod=NULL);
#nrow(data.all.sub.flt)
dim(data.all.flt.comb);

## find top 5000 highly connected genes;
head(data.all.flt.comb)
## select highly connected genes for network analysis;
k.all=softConnectivity(datE=t(data.all.flt.comb),power=6);
data.all.flt.sub=data.all.flt.comb[rank(-k.all,ties.method="first" )<=5000,];
dim(data.all.flt.sub);

## determin the power for construct the network;
head(data.all.flt.sub)
data.input.outlier<-WGCNA.power(data.all.flt.sub);
head(data.all.flt.sub);
colnames(data.all.flt.sub);
phenotype<-read.csv(file="InputData/RNA_seq_JA.csv",header=TRUE,row.names=5);

data.input.module.spe<-WGCNA.plot(data=data.all.flt.sub,power=10,file="FAC_allspeSplit1.TMM.FPKM",plotPDF=F,TOM=FALSE,logTrait=TRUE,deepsplit=1,JAmax=T,phenotype=phenotype);
#save(data.input.module.spe,file="InputData/data.input.module.spe.NIATTr2.4.Rdata");
#load("InputData/data.input.module.spe.NIATTr2.4.Rdata")

data.input.module.spe.color<-data.input.module.spe[[1]];
data.input.module.spe.MEs<-data.input.module.spe[[2]];
data.input.module.spe.num<-data.input.module.spe[[3]];
data.input.module.spe.sig<-data.input.module.spe[[4]];
head(data.input.module.spe.MEs);

## check the candidate genes;
## LOX3
data.input.module.spe.color[row.names(data.all.flt.sub)=="NIATv7_g32174"];
## WIPK
data.input.module.spe.color[row.names(data.all.flt.sub)=="NIATv7_g15285"];

## plot out the correlation with JA for each module
data.input.module.spe.color.factor<-factor(data.input.module.spe.color,levels=c("yellow","green","brown","turquoise","blue"),ordered=T)
#setwd("C:/Projects/inter-species-variation/RNA-Seq/06052015")
pdf(file="Output/module.significance.pdf");   
plotModuleSignificance(abs(as.numeric(data.input.module.spe.sig)),data.input.module.spe.color.factor);
dev.off();

## show the intra-module connectivity at control and FAC induced level.
data.all.flt.sub.tur<-data.all.flt.sub[round(data.input.module.spe.MEs$kMEturquoise,2)>=0.75,];
nrow(data.all.flt.sub.tur);
head(data.all.flt.sub.tur);

ADJ1.WW<-adjacency(t(data.all.flt.sub.tur[,grep("WW",colnames(data.all.flt.sub.tur))]),type = "unsigned", power = 10, corFnc = "cor", corOptions = "use = 'p'", distFnc = "dist", distOptions = "method = 'euclidean'")

ADJ1.FAC<-adjacency(t(data.all.flt.sub.tur[,grep("FAC",colnames(data.all.flt.sub.tur))]),type = "unsigned", power = 10, corFnc = "cor", corOptions = "use = 'p'", distFnc = "dist", distOptions = "method = 'euclidean'")


Alldegrees1.WW=intramodularConnectivity(ADJ1.WW, rep("turquoise",nrow(data.all.flt.sub.tur)));
Alldegrees1.WW$Treatment<-"WW";
Alldegrees1.WW$Color<-"turquoise";


Alldegrees1.FAC=intramodularConnectivity(ADJ1.FAC,  rep("turquoise",nrow(data.all.flt.sub.tur)))
Alldegrees1.FAC$Treatment<-"FAC";
Alldegrees1.FAC$Color<-"turquoise";


quantile(Alldegrees1.FAC$kTotal,prob=0.95 );
#plot(hist(Alldegrees1.FAC$kTotal))
dim(Alldegrees1.FAC[Alldegrees1.FAC$kTotal>45.4,]);


pdf(file="Connectivity.tur.pdf")
boxplot(Alldegrees1.WW$kWithin,Alldegrees1.FAC$kWithin,notch=T,col="turquoise");
dev.off()
data.connectivity<-cbind(Connectivity=c(Alldegrees1.WW$kWithin,Alldegrees1.FAC$kWithin),
                         Treatment=c(rep("WW",nrow(Alldegrees1.WW)),rep("FAC",nrow(Alldegrees1.FAC))))
kruskal.test(Connectivity~Treatment,data=data.connectivity)



## calculate proportion of significantly induced genes for each species;
module.col<-levels(factor(data.input.module.spe.color));
species=c("Nob","Nlin","Natt","Nmi","Nacu","Npau");
data.input.module.spe.MEs
##

data.induced<-read.table(file="OutputData/data.induced.EdgeR.all.txt",sep="\t",header=T,row.names=1);
head(data.induced)
induced.module<-matrix(ncol=3,nrow=0);
for(m in module.col){
  head(data.input.module.spe.MEs)
  module.name<-paste("kME",m,sep="")
  gene.m<-row.names(data.input.module.spe.MEs)[data.input.module.spe.MEs[,module.name]>0.75 ]
  #spe<-species[1]
  for(spe in species){
    data.tmp<-data.induced[data.induced$Species==spe,"GeneID"]
    induced.prop<-100*length(intersect(data.tmp,gene.m) ) / length(gene.m);
    induced.module<-rbind(induced.module,c(m,spe,induced.prop))
  }
}
induced.module<-data.frame(induced.module);
colnames(induced.module)<-c("Module","Species","Induction");
induced.module$Induction<-as.numeric(as.character(induced.module$Induction));
induced.module$Module<-factor(induced.module$Module,levels=c("yellow","green","brown","turquoise","blue"),ordered=T)
induced.module$Species<-factor(induced.module$Species,levels=c("Nob","Nlin","Natt","Nmi","Nacu","Npau"),ordered=T)
require(ggplot2)
p<- ggplot(induced.module,aes(x=Species, y=Induction, fill=Module))+
  geom_bar(position=position_dodge(width=0.5), stat="identity",width=0.4)+
  scale_y_continuous(limits = c(0, 100))
p
ggsave(p,file="induced.module1.pdf");

#nrow(data.all.flt.sub.tur);
pdf(file="PercentageOfInducedGene.pdf");
bp<-barplot(c(length(intersect(as.character(Nob.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur)),
              length(intersect(as.character(Nlin.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur)),
              length(intersect(as.character(Natt.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur)),
              length(intersect(as.character(Nmi.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur)),
              length(intersect(as.character(Nacu.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur)),
              length(intersect(as.character(Npau.stat.sig.up$GeneID),row.names(data.all.flt.sub.tur)))/length(row.names(data.all.flt.sub.tur))),ylim=c(0,1),col="turquoise", xlab="Speices",ylab="% of induced hub genes");
axis(1, at = bp, labels = c("N. obtusifolia", "N. linearis", "N. attenuata", "N. miersii","N. acuminata","N. pauciflora"), cex.axis = 0.5)

dev.off()

## select the M4 module based on module membership > 0.75;
M4.gene<-row.names(data.input.module.spe.MEs)[round(data.input.module.spe.MEs[,"kMEturquoise"],2)>=0.75 ]
head(M4.gene);
length(M4.gene);
data.converved.M4<-(intersect(M4.gene, data.converved));

data.input.module.spe.MEs["NIATv7_g12923",];
#grep("NIATv7_g40277",M4.gene);
### write all genes for GO enrichment analysis
write.table(M4.gene,file="OutputData/data.count.module.M4geneID.txt",col.names=F,row.names=F,quote=F);
write.table(data.converved.M4,file="OutputData/data.count.module.M4Conserved.txt",col.names=F,row.names=F,quote=F);

## export the information of M4 module;


#ID2Name[data.all.flt.sub.tur,];
ID2Name<-read.table("InputData/KnownGene.ID2name_NIATTr2.txt",sep="\t",row.names=1,header=T)

M4.module.out<-data.frame(GeneID=M4.gene,GeneName=ID2Name[M4.gene,"GeneName"])
head(M4.module.out);
M4.module.out.annotated<-M4.module.out[!is.na(M4.module.out[,"GeneName"]),]
head(M4.module.out.annotated)
write.table(data.all.count.FPKM[M4.gene,],file="OutputData/data.all.count.FPKM.M4.txt",col.names=T,row.names=T,sep="\t",quote=F);
write.table(M4.module.out,file="OutputData/data.count.module.M4.annotated.txt",col.names=T,row.names=T,sep="\t",quote=F);



