# combine 20 results and qq plot
getwd()
rm(list=ls())
setwd("C:/Users/sijia/Desktop/current working dictionary/methylation/code/20190403 CHD_EWAS")
result <- read.csv(paste("task37-20190403_CHDEWAS.R/190403_982_CHDEWAS1.csv"))
for (i in 2:15) {
	result_one <- read.csv(paste("task37-20190403_CHDEWAS.R/190403_982_CHDEWAS",i,".csv",sep = ""))
	result <- rbind(result, result_one)
}
dim(result)
#shoule be 747726
rm(result_one,i)
colnames(result)

sink("summary_allsite.txt")
summary(-log10(result$pOUT)) 
summary(-log10(result$psmk)) 
summary(-log10(result$pdrk))
summary(-log10(result$ppa1)) 
summary(-log10(result$ppa2)) 
summary(-log10(result$pbmi))
summary(-log10(result$pdiet)) 
summary(-log10(result$pAGE))
summary(-log10(result$pSEX))
summary(-log10(result$pHOURS))
summary(-log10(result$pEDU))
summary(-log10(result$pMAR))
sink()

## Genomic Control parameter (lambda)
sink("lambda_allsite.txt")
qchisq(median(result$pOUT,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$psmk,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pdrk,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$ppa1,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$ppa2,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pbmi,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pdiet,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pAGE,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pSEX,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pHOURS,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pEDU,na.rm=T),df=1,lower.tail=F)/0.455  #
qchisq(median(result$pMAR,na.rm=T),df=1,lower.tail=F)/0.455  #
sink()

jpeg("qqplot/QQ_plot_allssite_OUT.jpeg",width=1200,height=1200)
lop = sort(-log10(result$pOUT))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(CHD) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$pOUT,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()

jpeg("qqplot/QQ_plot_allssite_smk.jpeg",width=1200,height=1200)
lop = sort(-log10(result$psmk))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Smoking) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$psmk,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()

jpeg("qqplot/QQ_plot_allssite_drk.jpeg",width=1200,height=1200)
lop = sort(-log10(result$pdrk))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Alcohol Consumption) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$pdrk,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()


jpeg("qqplot/QQ_plot_allssite_pa1.jpeg",width=1200,height=1200)
lop = sort(-log10(result$ppa1))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Physical Activity) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$ppa1,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()

jpeg("qqplot/QQ_plot_allssite_pa2.jpeg",width=1200,height=1200)
lop = sort(-log10(result$ppa2))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Physical Activity) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$ppa2,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()

jpeg("qqplot/QQ_plot_allssite_bmi.jpeg",width=1200,height=1200)
lop = sort(-log10(result$pbmi))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Body Mass Index) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$pbmi,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()

jpeg("qqplot/QQ_plot_allssite_diet.jpeg",width=1200,height=1200)
lop = sort(-log10(result$pdiet))
lob = -log10((length(lop):1)/(1+length(lop)))
plot(lob,lop,main="Q-Q plot of log(Diet Score) and CpGs adjusting Age, Sex, Region, Hours before last ate, Education, Marital status",xlab="Expected -log10(Pvalue)",ylab="Observed -log10(Pvalue)",cex.axis=2,cex.lab=1.5)
lines(c(0,50),c(0,50),col="red",lwd=2)
legend("topleft",paste("Genomic Control Factor=",round(qchisq(median(result$pdiet,na.rm=T),df=1,lower.tail=F)/0.455,3)))
dev.off()



dim(result)
result$FDR.OUT=p.adjust(result$pOUT,'BH')
result$FDR.smk=p.adjust(result$psmk,'BH')
result$FDR.drk=p.adjust(result$pdrk,'BH')
result$FDR.pa1=p.adjust(result$ppa1,'BH')
result$FDR.pa2=p.adjust(result$ppa2,'BH')
result$FDR.bmi=p.adjust(result$pbmi,'BH')
result$FDR.diet=p.adjust(result$pdiet,'BH')
length(which(result$FDR.OUT<0.05))
length(which(result$FDR.smk<0.05))
length(which(result$FDR.drk<0.05))
length(which(result$FDR.pa1<0.05))
length(which(result$FDR.pa2<0.05))
length(which(result$FDR.bmi<0.05))
length(which(result$FDR.diet<0.05))


anno=read.csv("C:/Users/sijia/Desktop/current working dictionary/methylation/infinium-methylationepic-v-1-0-b4-manifest-file-csv/anno.csv",header=T,as.is=T,sep=",")
dim(anno)
names(anno)[1] <- "probename"
com=merge(anno,result,by="probename",all.y=T,sort=T)
dim(com)
write.table(com,"allssite_plusanno.csv",quote=F,sep=",",col.name=T,row.name=F)
