# Manhattan plot
# #982 samples linear reg model1 and model2
# or 596 samples linear reg model1 using sv or smart sv
# 2018-11-25
# SJH

rm(list=ls())
require(car)

##manhattan plot
	manhattan <- function(dataframe, pchsize=19, colors=c("gray10", "gray100"), ymax="max", 
	                      cex.x.axis=1, limitchromosomes=1:24, suggestiveline=-log10(1E-06),
	                      annotate=NULL, ...) {
	  d=dataframe
	  if (!("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d))) 
	    stop("Make sure your data frame contains columns CHR, BP, and P")
	  if (any(limitchromosomes)) d=d[d$CHR %in% limitchromosomes, ]
	  d=subset(na.omit(d[order(d$CHR, d$BP), ]), (P>0 & P<=1)) # remove na's, sort, and keep only 0<P<=1
	  d$logp = -log10(d$P)
	  d$pos=NA
	  ticks=NULL
	  lastbase=0
	  colors <- rep(colors,max(d$CHR))[1:max(d$CHR)]
	  if (ymax=="max") ymax<-ceiling(max(d$logp))
	  if (ymax<8) ymax<-8
	  numchroms=length(unique(d$CHR))
	  if (numchroms==1) {
	    d$pos=d$BP
	    ticks=floor(length(d$pos))/2+1
	  } else {
	    for (i in unique(d$CHR)) {
	      if (i==1) {
	        d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP
	      } else {
	        lastbase=lastbase+tail(subset(d,CHR==i-1)$BP, 1)
	        d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase
	      }
	      ticks=c(ticks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
	    }
	  }
	  
	  if (numchroms==1) {
	    with(d, plot(pos, logp, ylim=c(0,ymax), ylab=expression(-log[10](italic(p))), xlab=paste("Chromosome",unique(d$CHR),"position"), ...))
	  }	else {
	    with(d, plot(pos, logp, ylim=c(0,ymax), ylab=expression(-log[10](italic(p))), xlab="Chromosome", xaxt="n", type="n", ...))
	    axis(1, at=ticks, lab=unique(d$CHR), ...)
	    icol=1
	    for (i in unique(d$CHR)) {
	      with(d[d$CHR==i, ],points(pos,pch=pchsize, logp, col=colors[icol], ...))
	      icol=icol+1
	    }
	  }
	  if (!is.null(annotate)) {
	    d.annotate=d[which(d$SNP %in% annotate), ]
	    with(d.annotate, points(pos, logp, col="green3", ...))
	  }
	  if (suggestiveline) abline(h=suggestiveline, col="red")
	}

#982 samples linear reg model1
	result_anno=read.csv("allssite_plusanno.csv",header=T,as.is=T,sep=",")
	dim(result_anno)

result_anno_0_1 <- result_anno[which(result_anno$FDR.OUT < 0.1),]
dim(result_anno_0_1)
write.table(result_anno_0_1,"0403_OUT_fdr0_1.csv",quote=F,sep=",",col.name=T,row.name=F)


# manhattan plot
	b <- data.frame(result_anno$probename,result_anno$CHR,result_anno$MAPINFO,result_anno$pOUT)
	colnames(b)=c("SNP","CHR","BP","P")
	b$CHR <- recode(b$CHR,"'X'=23;'Y'=24;''=NA")
	b$CHR <- as.numeric(b$CHR)
	#factor to num will in disorder
	b$CHR <- recode(b$CHR,"1=1;2=10;3=11;4=12;5=13;6=14;7=15;8=16;9=17;
	                10=18;11=19;12=2;13=20;14=21;15=22;16=23;17=24;18=3;19=4;
	                20=5;21=6;22=7;23=8;24=9;else=NA") 
	b <-b[!is.na(b$P),]
	dim(b)

	png("OUT_Manhattan.jpg",width=1200,height=800);
	manhattan(b, limitchromosomes=1:24,colors=c("black","gray","orange"), pchsize=19)
	dev.off()