#!/public/software/R/3.4.3/bin/Rscript
#network construction
#using auto process
#Si JH
#2019-04-05

# sample clustering outliers cut point=9
# sampleTree$labels[!keepSamples]
# [1] "202259340013_R02C01" [2] "202410000076_R05C01"

# Display the current working directory
getwd();
# If necessary, change the path below to the directory where the data files are stored. 
# "." means current directory. On Windows use a forward slash / instead of the usual \.
workingDir = ".";
setwd(workingDir); 
# Load the WGCNA package
library(WGCNA, lib="~/Rlib")
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. This helps speed up certain calculations.
# At present this call is necessary for the code to work.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments. 
# See note above.
enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = "0404WGCNA-dataInput-top20k.RData");
#The variable lnames contains the names of loaded variables.
lnames

#delete 3 outliers in datTraits and t_bn_top_20k datasets
datTraits <- datTraits[!rownames(datTraits) %in% c("202259340013_R02C01","202410000076_R05C01"),]
dim(datTraits)
t_bn_top_20k <- t_bn_top_20k[!rownames(t_bn_top_20k) %in% c("202259340013_R02C01","202410000076_R05C01"),]
dim(t_bn_top_20k)
#should be 980 20000


#=====================================================================================
#
#  Code chunk 3
#
#=====================================================================================

#choose power=5 default maxBlockSize=5000
net = blockwiseModules(t_bn_top_20k, maxBlockSize = 20000, power = 5,
                       TOMType = "unsigned", minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "top20kTOM", 
                       verbose = 3)
table(net$colors)
summary(net)

#=====================================================================================
#
#  Code chunk 4
#
#=====================================================================================
pdf(file = "~/OUTPUT/0404WGCNA/0405Cluster Dendrogram_top20k.pdf", width = 50, height = 50);
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off()

#=====================================================================================
#
#  Code chunk 5
#
#=====================================================================================
# save the module assignment and module eigengene information necessary for subsequent analysis.

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree, 
     file = "0404-top20knetwork-auto-power5.RData")
