#!/public/software/R/3.4.3/bin/Rscript

# relate MEs to  phenotype
# MEs = Module eigengene (the first principal component of a given module)
# 2019-04-05
# Si JH

# sample clustering outliers cut point=9
# sampleTree$labels[!keepSamples]
# [1] "202259340013_R02C01" "202410000076_R05C01"

# Display the current working directory
getwd();
# If necessary, change the path below to the directory where the data files are stored. 
# "." means current directory. On Windows use a forward slash / instead of the usual \.
workingDir = ".";
setwd(workingDir); 
# Load the WGCNA package
library(WGCNA, lib="~/Rlib")
library(RColorBrewer, lib="~/Rlib")
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
lnames = load(file = "0404WGCNA-dataInput-top20k.RData");
#The variable lnames contains the names of loaded variables.
lnames
# Load network data saved in the second part.
lnames = load(file = "0404-top20knetwork-auto-power5.RData");
lnames

#delete 2 outliers in datTraits and t_bn_top_20k datasets
datTraits <- datTraits[!rownames(datTraits) %in% c("202259340013_R02C01","202410000076_R05C01"),]
dim(datTraits)
t_bn_top_20k <- t_bn_top_20k[!rownames(t_bn_top_20k) %in% c("202259340013_R02C01","202410000076_R05C01"),]
dim(t_bn_top_20k)
#should be 980 20000

#=====================================================================================
#
#  Code chunk 2
#
#=====================================================================================


# Define numbers of genes and samples
nGenes = ncol(t_bn_top_20k); #20000
nSamples = nrow(t_bn_top_20k); #980
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(t_bn_top_20k, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
colnames(datTraits)
# is_female
# highest_education
# bmi_calc
# marital_status_g1
# age_at_study_date
# hours_last_ate_x10_g1
# case_control
# diet_6score
# smk_con
# drk_con
# pa1 pa2
# region12  region16  region26  region36  region46  region52  region58  region68  region78

pheno <- datTraits
datTraits <- cbind(pheno$case_control, pheno$age_at_study_date, pheno$is_female, pheno$highest_education, pheno$marital_status_g1, pheno$smk_con, pheno$drk_con, pheno$pa1, pheno$pa2, pheno$diet_6score, pheno$bmi_calc, pheno$hours_last_ate_x10_g1, pheno$region12, pheno$region16, pheno$region26, pheno$region36, pheno$region46, pheno$region52, pheno$region58, pheno$region68, pheno$region78)
colnames(datTraits) <- c("CHD", "Age", "Gender", "Education", "Marital_status", "SMK", "DRK", "PA1", "PA2", "Diet_score", "BMI", "Fasting_time", "Region1","Region2", "Region3","Region4","Region5","Region6","Region7","Region8","Region9")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);

#=====================================================================================
#
#  Code chunk 3
#
#=====================================================================================


pdf(file = "~/OUTPUT/0404WGCNA/0405Module−trait_relationships.pdf", width = 40, height = 20);
# Will display correlations and their p-values
textMatrix =  paste(signif(moduleTraitCor, 2), "\n(",
                           signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = colnames(datTraits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
dev.off()


save.image(file = "~/OUTPUT/0404WGCNA/0405_step3_GS_MM_info.RData")