---
output:
  pdf_document:
    fig_crop: no
---
Complete analysis for Mikheyev and Linksvayer, Genes associated with ant 
social behavior show distinct transcriptional and evolutionary patterns 

========================================================

`r library(knitr) `
`r opts_chunk$set(cache=FALSE, warning=FALSE, error=FALSE)`

```{r load_libraries, message=FALSE, cache=FALSE}
library(ggplot2)
library(RMySQL)
library(edgeR)
library(DESeq2)
library(GOstats)
library(GSEABase)
library(gplots)
library(gmodels)
library(gtools)
library(pgirmess)
library(class)
library(gridExtra)
library(reshape)
library(WGCNA)
allowWGCNAThreads()
library(boot)
library(multcomp)
library(data.table)
library(doMC)
registerDoMC(6)
```

Read gene expression data from SQL database and filter any gene with more than half of the libraries with FPKM<1.

```{r sql_gene_expression, cache=FALSE}
if (exists("mydb")) dbDisconnect(mydb)
#mydb = dbConnect(MySQL(),  dbname='monomorium', host='ECOEVO.UNIT.OIST.JP')
mydb = dbConnect(MySQL(), user = "root",dbname = "monomorium1", host = "localhost")
contrasts <- dbGetQuery(mydb,"SELECT * FROM contrasts") # read a priori contrasts
# this makes the contrasts matrix compatible with design matrix, which is sorted by level
contrasts <- contrasts[,sort(names(contrasts))]  
factors <- dbGetQuery(mydb,"SELECT * FROM factors")  #read treatments

#Note: MP12 (age15), MP14 (age18), MP19 (prot) have relatively low mapping rates (<70%)

counts <- dbGetQuery(mydb,"SELECT gene_id,MP1,MP2,MP3,MP4,MP5,MP6,MP7,MP8,MP9,MP10,MP11,MP12,MP13,
  MP14,MP15,MP16,MP17,MP18,MP19,MP20,MP21,MP22,MP23,MP24 FROM expected_counts_genes")
row.names(counts) <- counts$gene_id # change first column to row names
counts <- subset(counts,select=-c(gene_id))  
fpkm <- dbGetQuery(mydb,"SELECT gene_id,MP1,MP2,MP3,MP4,MP5,MP6,MP7,MP8,MP9,MP10,MP11,MP12,MP13,
  MP14,MP15,MP16,MP17,MP18,MP19,MP20,MP21,MP22,MP23,MP24 FROM fpkm_genes")
row.names(fpkm) <- rownames(counts)
fpkm <- subset(fpkm,select=-c(gene_id))

#select isoforms where at least half of the libraries have FPKM >1
keep=rowSums(fpkm>= 1)>= ncol(counts)/2  
#create design matrix, and label rows and columns according to our contrasts
design <- model.matrix(~0+factors$factor,data=counts)
rownames(design) <- colnames(counts)
colnames(design) <- names(contrasts)
```

### Visualize cutoff distribution

```{r visualize_cutoff, fig.width=4, fig.height=4}
ggplot(melt(fpkm),aes(x=value,color=variable))+geom_density()+
  scale_x_log10()+theme_bw()+geom_vline(xintercept=1, color="red")+
  xlab('expected counts')+ylab('density')+ theme(legend.position="none")
```

### Conduct differential gene expression analysis

```{r dge_tests}
dge <- DGEList(counts=round(counts[keep,]),group=factors$factor)   #apply filtering!
dge <- calcNormFactors(dge)
dge <- estimateGLMCommonDisp(dge,design,verbose=TRUE)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmFit(dge,design)
lrt <- glmLRT(fit)
```

### plot NMDS as a sanity check
```{r compute_mds, fig.keep='none'}
mdsplot <- plotMDS(dge,top=500)
```
```{r dge_nmds, fig.width=6, fig.height=6}
plot(mdsplot, main="",xlab="Dimension 1",ylab="Dimension 2")
text(mdsplot$cmdscale.out[,1],mdsplot$cmdscale.out[,2],factors$factor)
```

The ages and tasks cluster together, suggesting that the transcriptional data capture some sort of biological signal.

### Examine task-specific gene expression, and determine which behaviors we should focus on
```{r task_expression, fig.width=8, fig.height=8}
#logFC is computed as X - Y, so positive values are upregulated in focal contast
et_forager <- glmLRT(fit, contrast=makeContrasts((carb+prot)/2 - (nurse+groom+troph)/3, 
  levels=design))  

par(mfrow=c(2,2))
summary(de_forager <- decideTestsDGE(et_forager, p=0.05, adjust="BH"))
plotSmear(et_forager, de.tags=rownames(fit)[as.logical(de_forager)])
title("foragers vs. others")
et_nurse <- glmLRT(fit, contrast=makeContrasts(nurse - (carb+prot+groom+troph)/4, levels=design)) 
summary(de_nurse <- decideTestsDGE(et_nurse, p=0.05, adjust="BH"))
plotSmear(et_nurse, de.tags=rownames(fit)[as.logical(de_nurse)])
title("nurses vs. others")
et_groom <- glmLRT(fit, contrast=makeContrasts(groom - (carb+prot+nurse+troph)/4, levels=design)) 
summary(de_groom <- decideTestsDGE(et_groom, p=0.05, adjust="BH"))
plotSmear(et_groom, de.tags=rownames(fit)[as.logical(de_groom)])
title("grooming vs. others")
et_troph <- glmLRT(fit, contrast=makeContrasts(troph - (carb+prot+nurse+groom)/4, levels=design)) 
summary(de_troph <- decideTestsDGE(et_troph, p=0.05, adjust="BH"))
plotSmear(et_troph, de.tags=rownames(fit)[as.logical(de_troph)])
title("trophallaxis vs. others")

par(mfrow=c(1,1))

et_nurse_forager <- glmLRT(fit, contrast=makeContrasts(nurse - (carb + prot)/2, levels=design)) 
summary(de_nurse_forager <- decideTestsDGE(et_nurse_forager, p=0.05, adjust="BH"))
```

Nurses and foragers are the two most distinct behavioral categories, and we'll focus our analysis on the from now on.

## Comparison with fire ants

Test to see if evolutionary rates differ between forager-upregulated and nurse-upregulated genes.
```{r solenopsis, fig.width=4, fig.height=4}
dnds <- dbGetQuery(mydb,"SELECT * FROM dNdS") # read evolutionary rates vs S. invicta
nurse_names <- rownames(et_nurse_forager$table[de_nurse_forager == 1,])
forager_names <- rownames(et_nurse_forager$table[de_nurse_forager == -1,])
nde_names <- rownames(et_nurse_forager$table[de_nurse_forager == 0 ,])
rates <- data.frame(rate = c(dnds[na.omit(match(nurse_names,dnds$gene)),2],
	dnds[na.omit(match(forager_names,dnds$gene)),2],
	dnds[na.omit(match(nde_names,dnds$gene)),2]), 
	task = c(rep("nurse",length(na.omit(match(nurse_names,dnds$gene)))),
		rep("forager",length(na.omit(match(forager_names,dnds$gene)))),
		rep("NDE",length(na.omit(match(nde_names,dnds$gene))))))
rates$task <- factor(rates$task, levels=c("NDE" ,"nurse","forager"))
ggplot(data=rates, aes(x=factor(task),y=rate))+geom_boxplot(notch=TRUE)+scale_y_log10()+theme_bw()

kruskal.test(rate ~ task, data = rates)
kruskalmc(rate ~ task, data = rates)
#excluding 
kruskal.test(rate ~ task, data = rates[rates$task != "NDE",])
```

Looking at the data this way, it looks like nurse genes evolve significantly faster than NDE genes.

### Are more highly expressed genes evolving slower?
```{r fpkm_evolution_plot, fig.width=4, fig.height=4}
cor.test(rowMeans(fpkm[dnds$gene,]),dnds$rate, method="spearman")
ggplot(data.frame(fpkm=rowMeans(fpkm[dnds$gene,]),rate=dnds$rate),aes(fpkm,rate))+
  geom_point(alpha=0.2)+scale_x_log10()+scale_y_log10()+theme_bw()
```

Yes, it looks like more highly expressed genes are indeed evolving slower.

### Are forager genes or nurse, genes more highly expressed?
- *caveat*: more power to detect differential expression in highly expressed genes

```{r fpkm_task_plot, fig.width=4, fig.height=4, }
fpkm_task <- data.frame(fpkm = rowMeans(fpkm[rownames(lrt$table),]), task = de_nurse_forager )
fpkm_task[forager_names,"task"] <- "forager"
fpkm_task[nurse_names,"task"] <- "nurse"
fpkm_task[fpkm_task[,"task"] == 0,"task"] <- "nde"
kruskalmc(fpkm ~ task, fpkm_task)
ggplot(fpkm_task,aes(x=factor(task),y=log(fpkm)))+geom_boxplot()+theme_bw()+
  xlab("")+ylab("log fragments per kilobase mapped (FPKM)")
```

Comparing results with fire ant foraging study by Manfredini et al 2013
```{r solenopsis_task_comparison}
manfredini <- dbGetQuery(mydb, '
SELECT DISTINCT mp_id, microarray.gene_id, expression,rate FROM `mp vs sinv`
JOIN dNdS
ON `mp vs sinv`.mp_id = dNdS.gene_id
LEFT JOIN (SELECT DISTINCT gene_id, IF (logFC>0,"forager","nest") AS expression FROM  
manfredini_expression
JOIN manfredini_names 
ON manfredini_expression.manfredini_id = manfredini_names.manfredini_id) AS microarray
ON sinv_id = microarray.gene_id 
GROUP BY mp_id
') #a few probes target the same gene, so just choose one of them
rownames(manfredini) <- manfredini$mp_id
fisher.test(matrix(c(
	nrow(subset(manfredini, expression=="forager")), # number of sinv forager genes with mp homologs
	nrow(manfredini[is.na(manfredini$expression),]), 
	table(forager_names %in% subset(manfredini,expression=="forager")$mp_id)["TRUE"],
	table(forager_names %in% subset(manfredini,expression=="forager")$mp_id)["FALSE"]),
  ncol=2,byrow=T),alternative="less")

fisher.test(matrix(c(
	nrow(subset(manfredini, expression=="nest")), # number of sinv nest genes with mp homologs
	nrow(manfredini[!is.na(manfredini$expression),]), 
	table(nurse_names %in% subset(manfredini,expression=="nest")$mp_id)["TRUE"],
	table(nurse_names %in% subset(manfredini,expression=="nest")$mp_id)["FALSE"]),ncol=2,byrow=T),
  alternative="less")
```

There is a small, but significant overlap in the genes conserved in various contexts

Another way to look at this, is to look at the *direction* of genes, rather than their overall significance, or correlation between logFC

```{r}
manfredini2 <- dbGetQuery(mydb, '
SELECT DISTINCT mp_id, microarray.id, expression, logFC FROM `mp vs sinv`
JOIN dNdS ON `mp vs sinv`.mp_id = dNdS.gene_id
JOIN (SELECT manfredini_probe2gene.probe, manfredini_probe2gene.gene AS id, 
IF(AVG(manfredini_expt1.out)>AVG(manfredini_expt1.in),"forager","nurse") AS expression, 
log(2,POW(2,manfredini_expt1.in)/POW(2,manfredini_expt1.out)) as logFC 
FROM  manfredini_expt1
  JOIN manfredini_probe2gene 
	ON manfredini_expt1.probe = manfredini_probe2gene.probe GROUP BY id) AS microarray
ON sinv_id = microarray.id
')

rownames(manfredini2) <- manfredini2$mp_id
upreg <- et_nurse_forager$table
upreg$task <- "nurse"
upreg[upreg$logFC<0,"task"] <- "forager"
commonNames <- intersect(rownames(upreg),manfredini2$mp_id)
CrossTable(upreg[commonNames,"task"],manfredini2[commonNames,"expression"],fisher=T)
cor.test(upreg[commonNames,"logFC"],manfredini2[commonNames,"logFC"],method="spearman")

```

So, about 55.8% of the genes are expressed in concordant direction and the correlation in logFC
between the two datasets is rho=0.143

##Comparing patterns of gene expression with honeybees

```{r hbee_comparison}
hbee <- dbGetQuery(mydb, '
SELECT `blast amel vs mp`.amel, `blast amel vs mp`.mp,`alaux dge`.task FROM 
`blast amel vs mp` JOIN `blast mp vs amel`
ON `blast amel vs mp`.amel = `blast mp vs amel`.amel AND `blast amel vs mp`.mp = 
`blast mp vs amel`.mp
LEFT JOIN `alaux dge` 
ON `alaux dge`.gene = SUBSTR(`blast amel vs mp`.amel,1,7)')

# test to see whether there is an enrichment in mp forager genes that have bee homologs
fisher.test(matrix(c(
	nrow(subset(hbee, task=="forager")), # number of honeybee forager genes with mp homologs
	nrow(hbee[is.na(hbee$task),]), 
	table(forager_names %in% subset(hbee,task=="forager")$mp)["TRUE"],
	table(forager_names %in% subset(hbee,task=="forager")$mp)["FALSE"]),ncol=2,byrow=T),
  alternative="less")

fisher.test(matrix(c(
	nrow(subset(hbee, task=="nurse")),  # total hb nurse gene count
	nrow(hbee[is.na(hbee$task),]), 		# number of NDE  
	table(nurse_names %in% subset(hbee,task=="nurse")$mp)["TRUE"],  
  # nurse genes matching mp nurse genes
	table(nurse_names %in% subset(hbee,task=="nurse")$mp)["FALSE"]),ncol=2,byrow=T),
  alternative="less") # genes not matching mp nurse genes
```

By contrast with fire ants, there is no significant overlap with bees.

## Plot heatmap for foragers, nurse and ages
- This heatmap shows just the top genes, so that the patterns are easier to see by eye
```{r heatmap}
polyethism <- grepl("age|nurse|forager",gsub("carb|prot","forager",factors$factor))
forager_nurse_et_names <- rownames(et_nurse_forager$table[de_nurse_forager !=0,]) 
#plot the 20 genes most differentiated between nurses and workers
strong_dge <- order(et_nurse_forager$table[forager_nurse_et_names,"PValue"],decreasing=FALSE)[0:200]
strong_names <- rownames(et_nurse_forager$table[forager_nurse_et_names,][strong_dge,])
col_order <- mixedorder(gsub("nurse","0",factors$factor[polyethism]))
colData <- data.frame(task = factors$factor[polyethism][col_order], 
    lib = colnames(counts)[polyethism][col_order])
```
```{r}
mycols <- colorpanel(n=19,low="white",high="darkgreen")
color_codes <- data.frame(factor = c("age0", "age3", "age6", "age9", "age12", "age15", "age18", 
"groom", "nurse", "troph", "prot","carb"), color = c("#CCFFFF","#66CCFF","#3399FF",
"#0066FF","#0000FF","#0000CC","#000066","#d53e4f","#FFFF00","#fee08b","#660066","#660066"))
colors <-  as.vector(color_codes$color[match(factors$factor[polyethism][col_order], 
    color_codes$factor)])
heatmap.2(log(1+as.matrix(counts[strong_names,polyethism][1:20,col_order])),key=FALSE,
trace="none",col=mycols,labRow=FALSE,density.info="none",Colv=FALSE, 
ColSideColors=as.vector(colors),labCol=factors$factor[polyethism][col_order],
hclustfun=function(c){hclust(c, method="ward")},margins = c(10, 2))

```

```{r}
#plot the top 100 genes most differentiated, with an added dendrogram 
#showing the relationships among all samples based on these 100 genes
heatmap.2(log(1+as.matrix(counts[strong_names,polyethism][1:100,col_order])),
key=FALSE,trace="none",col=mycols,labRow=FALSE,density.info="none", 
ColSideColors=as.vector(colors),labCol=factors$factor[polyethism][col_order],
hclustfun=function(c){hclust(c, method="complete")},margins = c(10, 2))

```

# K-NN 
- supervised learning of task-specific genes. 
```{r knn}

forager_nurse <- grepl("nurse|carb|prot",factors$factor)
tasks <- ! grepl("age",factors$factor)
train <- as.data.frame(t(counts[forager_nurse_et_names,forager_nurse]))
test <- as.data.frame(t(counts[forager_nurse_et_names,! tasks]))
cl <- factor(gsub("carb|prot","forager",factors$factor[forager_nurse]))
model <- knn(train, test, cl, k = 3, prob=TRUE)
cbind(model,factors$factor[! tasks],attr(model,"prob"))[mixedorder(factors$factor[! tasks]),]

#visually see if the ages actually cluster together
temp<-t(counts[forager_nurse_et_names,! tasks])
rownames(temp) <- factors[!tasks,"factor"]
plot(hclust((dist(temp))))

#excluding age0
model2 <- knn(train, test, cl, k = 3, prob=TRUE)
test2 <- as.data.frame(t(counts[forager_nurse_et_names,! tasks]))
model <- knn(train, test, cl, k = 3, prob=TRUE)
cbind(model2,factors$factor[! tasks],attr(model,"prob"))[mixedorder(factors$factor[! tasks]),]
```

k==2 and k==3 give close to the same result.
See [here](phttp://saravananthirumuruganathan.wordpress.com/2010/05/17/a-detailed-introduction-to-k-nearest-neighbor-knn-algorithm/) for more details on knn and choosing k, and [here](http://www.stat.cmu.edu/~jiashun/Research/software/GenomicsData/papers/dudoit.pdf) for a justification of knn as an appropriate tool for classifying gene expression

# Go term enrichment for forager- and nurse-upregulated genes, output to table
```{r GO, cache=TRUE}
#load go terms, and append 
go <- dbGetQuery(mydb,'SELECT gene, GO FROM blast2go JOIN genes_isoforms ON 
genes_isoforms.isoform = blast2go.isoform WHERE go != ""')
go$evidence <- "ISS"
go <- go[,c("GO","evidence","gene")]
dbDisconnect(mydb)
universe <- unique(go$gene)
goFrame=GOFrame(go[go$gene %in% universe,],organism="Monomorium pharaonis")
goAllFrame=GOAllFrame(goFrame)
gsc <- GeneSetCollection(goAllFrame, setType = GOCollection())

# GO terms overrepresented in forager-upregulated genes
forager_upreg_go <- hyperGTest(GSEAGOHyperGParams(name = "worker upregulated",
	geneSetCollection=gsc,geneIds = intersect(forager_names,universe),
	universeGeneIds=universe,ontology = "BP",pvalueCutoff = 0.05,
  conditional = FALSE,testDirection = "over"))
go_forager_up<-cbind(summary(forager_upreg_go),"forager_upregulated_genes","overrepresented")
colnames(go_forager_up)<-c("GOBPID","Pvalue","OddsRatio","ExpCount",
  "Count","Size","GOTerm","GeneCategory","Direction")

# GO terms underrepresented in forager-upregulated genes
forager_downreg_go <- hyperGTest(GSEAGOHyperGParams(name = "worker upregulated",
geneSetCollection=gsc,geneIds = intersect(forager_names,universe),
universeGeneIds=universe,ontology = "BP",pvalueCutoff = 0.05,conditional = FALSE,
testDirection = "under"))
go_forager_down<-cbind(summary(forager_upreg_go),"forager_upregulated_genes",
    "underrepresented")
colnames(go_forager_down)<-c("GOBPID","Pvalue","OddsRatio","ExpCount","Count",
    "Size","GOTerm","GeneCategory","Direction")

# GO terms overrepresneted in nurse-upregulated genes
nurse_upreg_go <- hyperGTest(GSEAGOHyperGParams(name = "worker upregulated",
	geneSetCollection=gsc,geneIds = intersect(nurse_names,universe),
	universeGeneIds=universe,ontology = "BP",pvalueCutoff = 0.05,conditional = FALSE,
  testDirection = "over"))
go_nurse_up<-cbind(summary(nurse_upreg_go),"nurse_upregulated_genes","overrepresented")
colnames(go_nurse_up)<-c("GOBPID","Pvalue","OddsRatio","ExpCount","Count","Size",
  "GOTerm","GeneCategory","Direction")

# GO terms underrepresneted in nurse-upregulated genes
nurse_downreg_go <- hyperGTest(GSEAGOHyperGParams(name = "worker upregulated",
  geneSetCollection=gsc,geneIds = intersect(nurse_names,universe),
	universeGeneIds=universe,ontology = "BP",pvalueCutoff = 0.05,conditional = FALSE,
  testDirection = "under"))
go_nurse_down<-cbind(summary(nurse_upreg_go),"nurse_upregulated_genes","underrepresented")
colnames(go_nurse_down)<-c("GOBPID","Pvalue","OddsRatio","ExpCount","Count","Size",
  "GOTerm","GeneCategory","Direction")

goanalysis<-rbind(go_forager_up,go_forager_down,go_nurse_up,go_nurse_down)
write.csv(goanalysis,file="GOanalysis.csv")

write.table(go_forager_up,file="GOanalysis.csv",append=TRUE,sep=",")
colnames(go_forager_up)<-c("GOBPID","Pvalue","OddsRatio","ExpCount","Count","Size",
  "GOTerm","GeneCategory","Direction")


```

# WGCNA
<!--- knitr complains abour a warning produced by DESeq2, so we suppress errors here  -->
```{r wgcna_init, error=TRUE}
datExpr <- data.frame(t(log(fpkm[keep,]+1,2)))
names(datExpr) <- rownames(counts[keep,])
rownames(datExpr) <- colnames(counts[keep,])
```

```{r wgcna_init2}
#initial exploration, barplot of mean expression per sample and sample quick dendrogram
meanExpressionByArray=apply(datExpr,1,mean,na.rm=T)
NumberMissingByArray=apply(is.na(data.frame(datExpr)),1,sum)
barplot(meanExpressionByArray,xlab="Sample",ylab="Mean expression",
        main="mean expression across samples",cex.names=0.7)
plotClusterTreeSamples(datExpr=datExpr)

#following FemaleLiver-01-dataInput.pdf
gsg=goodSamplesGenes(datExpr,verbose=3);
gsg$allOK   # allOK

datExpr0=datExpr[gsg$goodSamples,gsg$goodGenes]
sampleTree=flashClust(dist(datExpr0),method="average");

#following FemaleLiver-02-networkConstr-auto.pdf
powers=c(c(1:10),seq(from=12,to=16,by=2))
allowWGCNAThreads()
sft=pickSoftThreshold(datExpr0,powerVector=powers,verbose=5)
```

```{r threshold_plot, fig.height=5, fig.width=9}
#to graph results

par(mfrow=c(1,2));
cex=0.9;
plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  xlab="Soft threshold(power)",ylab="Scale free topology model fit, signed R^2",type="n",
	main=paste("Scale independence"));
text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
	labels=powers,cex=1,col="red");
abline(h=0.80,col="red")
plot(sft$fitIndices[,1],sft$fitIndices[,5],
	xlab="Soft threshold(power)",ylab="Mean connectivity",type="n",
	main=paste("Mean connectivity"))
text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex=1,col="red")
```

Power 9 is greater than .8, so we'll use it.

```{r eval=FALSE }

adjacency = adjacency(datExpr, power = 9,type="signed")
TOM = TOMsimilarity(adjacency,TOMType="signed")
```

```{r echo=FALSE }
load(file="~/monomorium_adjacency.rData")
load(file="~/monomorium_TOM.rData")
load(file="~/monomorium_connectivity.rData")
```
  
```{r wgcna_cluster, message=FALSE, cache=TRUE}
datt <- datExpr

# Call the hierarchical clustering function
geneTree = flashClust(as.dist(1-TOM), method = "average");

# set the minimum module size to something relatively large
minModuleSize = 30;

# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = 1-TOM, deepSplit = 2, 
  pamRespectsDendro = FALSE, minClusterSize = minModuleSize);
table(dynamicMods)
dynamicColors = labels2colors(dynamicMods)

# Calculate eigengenes
MEList = moduleEigengenes(datt, colors = dynamicColors)
MEs = MEList$eigengenes

# Calculate dissimilarity of module eigengenes
METree = flashClust(as.dist(1-cor(MEs)), method = "average");
plot(METree, main = "Clustering of module eigengenes",xlab = "", sub = "")
MEDissThres = 0.25

# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")

# Call an automatic merging function
merge <- mergeCloseModules(datt, dynamicColors, cutHeight = MEDissThres, verbose = 0)

# The merged module colors
mergedColors = merge$colors

# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs

# plotting the cluster dendrogram
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

# Rename to moduleColors
moduleColors = mergedColors

# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;

# Define numbers of genes and samples
nGenes = ncol(datt);
nSamples = nrow(datt);

# Recalculate MEs with color labels
invisible(MEs0 <- moduleEigengenes(datt, moduleColors)$eigengenes)
MEs = orderMEs(MEs0)
MEs<-MEs[,order(names(MEs))]

# correlations of genes with eigengenes
moduleGeneCor=cor(MEs,datt)
moduleGenePvalue = corPvalueStudent(moduleGeneCor, nSamples);
```

```{r}

# how many genes in each module?
table(moduleColors)
```

```{r eval=FALSE}
connectivity <- intramodularConnectivity(adjacency,merge$colors)
```


## go-term enrichment in modules, output in table

```{r eval=FALSE }

modules<-c("saddlebrown","salmon","black","paleturquoise","royalblue","violet",
"darkturquoise","darkorange","darkolivegreen","red","orange","purple","darkgrey","blue")

for (i in 1:length(modules)) {
  module_genes <- rownames(counts[keep,])[moduleColors == modules[i]]
  module_upreg <- hyperGTest(GSEAGOHyperGParams(name = paste(modules[i],"upregulated"),
  geneSetCollection=gsc,geneIds = intersect(module_genes,universe),
  universeGeneIds=universe,ontology = "BP",pvalueCutoff = 0.05,conditional = FALSE,
  testDirection = "over"))                             
module_upreg1<-cbind(summary(module_upreg),i,modules[i])
colnames(module_upreg1)<-c("GOBPID","Pvalue","OddsRatio","ExpCount","Count",
  "Size","GOTerm","ModuleNumber","ModuleColor")
  write.table(module_upreg1,file="GOmodule.csv",append=TRUE,sep=",",row.names=FALSE)
}
```


## module-trait correlations
```{r}
plot_order <-  c(8,1,5,6,7,2,3,4,9)
traits = cbind(subset(design, select = -c(troph,groom, carb,prot)),forager=design[,"prot"]+ 
  design[,"carb"])[,plot_order]

moduleTraitCor = cor(MEs, traits, use = "p")
moduleTraitPvalue = p.adjust(corPvalueStudent(moduleTraitCor, nSamples=nrow(MEs)),method="fdr")

textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));

# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = colnames(moduleTraitCor),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = .5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))

```

```{r}

mycols <- colorpanel(n=19,low="white",high="darkgreen")
heatmap.2(moduleTraitCor,key=FALSE,trace="none",col=mycols,Colv=NULL,labCol=colnames(traits),
density.info="none", hclustfun=function(c){hclust(c, method="complete")},margins = c(10, 4),
RowSideColors=gsub("ME","",names(MEs)),labRow=table(moduleColors))

```

### Plotting module-age correlation coefficient over time
````{r}
moduleTraitCor_stack <- melt(moduleTraitCor)
colnames(moduleTraitCor_stack) <- c("module","stage","corr")
moduleTraitCor_stack$stage <- factor(moduleTraitCor_stack$stage,unique(moduleTraitCor_stack[,2]))
ggplot(subset(moduleTraitCor_stack, stage !="nurse" & stage != "forager"),aes(x=stage,y=corr,
  color=module,group=module)) + geom_line()+theme_bw()

pvals <-  c()
for (i in 1:nrow(moduleTraitCor))
	pvals <- c(pvals , cor.test(t(moduleTraitCor)[-c(1,9),i],seq(0,18,3))$p.value)
unique(names(MEs))[p.adjust(pvals,method="fdr")<.05]

#the saddelbrown and blue modules are significantly correlated with age
```


# Connectivity and evolution
This section examines whether connectivity plays a role in the evolution of genes.

### connectivity vs selection

Set up a data frame with connectivities and rates, and see whther total connectivity affects rates of evolution
```{r }
genes_with_rates <- intersect(rownames(connectivity),manfredini$mp_id)
connectivity_rate <- cbind(manfredini[genes_with_rates,"rate"],
  connectivity[rownames(connectivity) %in% genes_with_rates,])
colnames(connectivity_rate) <- c("rate",names(connectivity))
#add task information
connectivity_rate$task <- "all others"
connectivity_rate[forager_names,"task"] <- "forager"
connectivity_rate[nurse_names,"task"] <- "nurse"
connectivity_rate$task <- factor(connectivity_rate$task, levels=c("all others","nurse","forager"))

#evolutionary rate and connectivity
with(connectivity_rate,cor.test(rate,kTotal,method="spearman"))
```

There is a negative correlation between total connectivity and evolutionary rate, which is typical

## expression and connectivity
### average fpkm vs connectivity
```{r}
cor.test(rowMeans(fpkm)[rownames(connectivity_rate)],connectivity_rate$kTotal,method="spearman")
```

Highly connected genes have higher rates of expression, which is also expected.

```{r}
kruskalmc(kTotal ~ task, data = connectivity_rate)
```
All tasks differ in connectivity.

## Model for joint effects of expression and network on evolution
```{r}
connectivity_rate$fpkm <- fpkm_task[rownames(connectivity_rate),"fpkm"]
connectivity_rate$task <- fpkm_task[rownames(connectivity_rate),"task"]
connectivity_rate$task <- factor(connectivity_rate$task, levels=c("nde","nurse","forager"))

connectivity_rate$logFC <- et_nurse_forager$table[rownames(connectivity_rate),"logFC"]

form1 <- log(rate) ~ factor(task)
form2 <- log(rate) ~ log(fpkm)*factor(task)+log(kTotal)*factor(task)

summary(rate1.lm <- lm(form1, data=subset(connectivity_rate,rate<10 )))
summary(rate2.lm <-lm(form2, data=subset(connectivity_rate,rate<10 )))

# verifying results using bootstrap
bs <- function(formula, data, indices) {
  d <- data[indices,] # allows boot to select sample 
  glm(formula, data=d)
  fit <- lm(formula, data=d)
  return(coef(fit)) 
} 
results <- boot(data=connectivity_rate, statistic=bs, R=1000, formula=form1)

for (i in 2:length(rate1.lm$coefficients)) {
  bci <- boot.ci(results, type="basic", index=i)
  print(sprintf("%s,%.4f,%.4f,%.4f", names(rate1.lm$coefficients)[i],
                results$t0[i], bci$basic[4], bci$basic[5]))
}

results <- boot(data=connectivity_rate, statistic=bs, R=1000, formula=form2)

for (i in 2:length(rate2.lm$coefficients)) {
  bci <- boot.ci(results, type="basic", index=i)
  print(sprintf("%s,%.4f,%.4f,%.4f", names(rate2.lm$coefficients)[i],results$t0[i], bci$basic[4], bci$basic[5]))
}

```

# connectivity and context of expression
Here, we are interested in the regulatory context of forager- and nurse-expressed genes. This incudes *all* genes, not just those with evolutionary rate estimates.
```{r }
connectivity$task <- "all others"
connectivity[forager_names,"task"] <- "forager"
connectivity[nurse_names,"task"] <- "nurse"
connectivity$task <- factor(connectivity$task, levels=c("all others","nurse","forager"))

kruskalmc(kTotal ~ task, data = connectivity)
ggplot(connectivity,aes(x=factor(task),y=kTotal))+geom_boxplot(notch=TRUE)+theme_bw()+
  ylab("connectivity")+xlab("")+scale_y_log10()
```
The picture is similar to what you find with genes that have rates.
                         
                         
### Effect of connectivity on presence of S. invicta or A. mellifera ortholog
```{r}
connectivity$homolog <- 0
connectivity[rownames(manfredini)[rownames(manfredini) %in% rownames(connectivity)],"homolog"]<-1

kruskal.test(kTotal ~ factor(homolog), data=connectivity)
ggplot(connectivity,aes(factor(homolog),kTotal))+geom_violin()+facet_grid(.~task)+theme_bw()+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.1,colour = "red")

kruskalmc(homolog ~ task, data=connectivity)

summary(glm(log(kTotal)~factor(task)*factor(homolog),data=connectivity))
```
connectivity and expression is higher for forager-upregulated genes and for genes with S. invicta orthologs. The connectivity of forager- and nurse-upregulated genes is less affected by whether the gene has a S. invicta ortholog than non-differentially expressed genes

```{r }
connectivity$beehomolog <- 0
connectivity[hbee$mp[hbee$mp %in% rownames(connectivity)],"beehomolog"] <- 1

kruskal.test(kTotal ~ factor(beehomolog), data=connectivity)
ggplot(connectivity,aes(factor(beehomolog),kTotal))+geom_violin()+facet_grid(.~task)+theme_bw()+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.1,colour = "red")


kruskalmc(beehomolog ~ task, data=connectivity)

summary(glm(log(kTotal)~factor(task)*factor(beehomolog),data=connectivity))
```
similarly, connectivity is higher for forager-upregulated genes and for genes with A. mellifera orthologs. The connectivity of forager- and nurse-upregulated genes is less affected by whether the gene has a A. mellifera ortholog than non-differentially expressed genes

```{r }

connectivity$fpkm <- rowMeans(fpkm[keep,])
summary(glm(homolog~fpkm*factor(task)+kTotal*factor(task),family=quasibinomial,data=connectivity))
```
The likelihood of a gene to have a S. invicta ortholog is most strongly determined by its connectivity. 

```{r }
summary(glm(beehomolog~fpkm*factor(task)+kTotal*factor(task),family=quasibinomial,
            data=connectivity))
```
similarly, the likelihood of a gene to have a A. mellifera ortholog is most strongly determined by its connectivity. 

```{r }
summary(beehomolog~task,data=connectivity)
with(connectivity,table(beehomolog,task))
chisq.test(with(connectivity,table(beehomolog,task)))
```
49% of forager-upregulated genes have honey bee orthologs, compared to 38% for nurse-upregulated and non-differentially expressed genes

```{r }
summary(homolog~task,data=connectivity)
with(connectivity,table(homolog,task))
chisq.test(with(connectivity,table(homolog,task)))
```
similarly, forager-upregulated genes are much more likely to have fire ant orthologs (54%) compared to nurse-upregulated and non-differentially expressed genes (43%)

```{r results="hide" }
con<-as.data.table(connectivity)
con$hom<-"none"
con[homolog=="1" & beehomolog=="1", hom :="both"]
con[homolog=="1" & beehomolog=="0", hom :="Sinv only"]
con[homolog=="0" & beehomolog=="1", hom :="Amel only"]
con[homolog=="0" & beehomolog=="0", hom :="neither"]
con$hom<-factor(con$hom,levels=c("both","neither","Amel only","Sinv only"))
```

```{r }
with(con,table(hom,task))
chisq.test(with(con,table(hom,task)))
```
forager-upregulated genes are more likely to have both fire ant and honey bee orthologs


Overall, highly connected genes are more likely to have a ortholog, and correspondingly forager-upregulated genes are more conserved.

# Summary plots
```{r fig.width=10, fig.height=6 }

#connectivity vs. honey bee ortholog
fmt <- function() function(x) formatC(signif(x,digits=3), digits=2,format="fg")
connectivity$task<-factor(connectivity$task,labels=c("NDE","Nurse","Forager"))
connectivity$beehomolog<-factor(connectivity$beehomolog,labels=c("No","Yes"))
beeortholog_plot<-ggplot(connectivity,aes(factor(beehomolog),kTotal,fill=task,alpha=0.2))+
  geom_violin()+facet_grid(.~task)+theme_bw()+
  ylab("Connectivity (kTotal)")+scale_y_log10()+
  xlab("Honey bee ortholog")+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0.5,0.5), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")+
  theme(strip.text.x = element_text(size=16),
        strip.background = element_rect(fill="cornsilk1"))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.15,alpha=1,size=1,colour = "black")
beeortholog_plot

#connectivity vs. fire ant ortholog
connectivity$homolog<-factor(connectivity$homolog,labels=c("No","Yes"))
antortholog_plot<-ggplot(connectivity,aes(factor(homolog),kTotal,fill=task,alpha=0.2))+
  geom_violin()+facet_grid(.~task)+theme_bw()+
  ylab("Connectivity (kTotal)")+scale_y_log10()+
  xlab("Fire ant ortholog")+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0.5,0.5), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")+
  theme(strip.text.x = element_text(size=16),
        strip.background = element_rect(fill="cornsilk1"))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.15,alpha=1,size=1,colour = "black")
antortholog_plot

#expression vs. fire ant ortholog
antorthologexpression_plot<-ggplot(connectivity,aes(factor(homolog),
  fpkm,fill=task,alpha=0.2))+
  geom_violin()+facet_grid(.~task)+theme_bw()+
  ylab("Expression (FPKM)")+
  scale_y_log10(labels=fmt(),limits=c(1, 1e3))+
  xlab("Fire ant ortholog")+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0.5,0.5), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")+
  theme(strip.text.x = element_text(size=16),
        strip.background = element_rect(fill="cornsilk1"))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.15,alpha=1,size=1,colour = "black")
antorthologexpression_plot

#expression vs. honey bee ortholog
beeorthologexpression_plot<-ggplot(connectivity,aes(factor(beehomolog),
  fpkm,fill=task,alpha=0.2))+
  geom_violin()+facet_grid(.~task)+theme_bw()+
  ylab("Expression (FPKM)")+
  scale_y_log10(labels=fmt(),limits=c(1, 1e3))+
  xlab("Honey bee ortholog")+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0.5,0.5), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")+
  theme(strip.text.x = element_text(size=16),
  strip.background = element_rect(fill="cornsilk1"))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.15,alpha=1,size=1,colour = "black")
beeorthologexpression_plot

#connectivity vs. behavioral category
connectivity_rate$task<-factor(connectivity_rate$task,labels=c("NDE","Nurse","Forager"))
connectivity_plot <- ggplot(connectivity_rate,aes(x=factor(task),y=kTotal,fill=task,alpha=0.2))+
  geom_violin()+theme_bw()+
  ylab("Connectivity (kTotal)")+scale_y_log10()+
  xlab("")+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar",width=0.15,alpha=1,size=1,colour = "black")
connectivity_plot

#evolutionary rate vs. behavioral category
rate_plot<-ggplot(connectivity_rate,aes(x=factor(task),y=rate,fill=task,alpha=0.2))+
  geom_violin()+theme_bw()+
  ylab("Evolutionary rate (dN/dS)")+xlab("")+
  scale_y_log10(labels=fmt(),limits=c(5e-3, 3))+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar"
  ,width=0.15,alpha=1,size=1,colour = "black")
rate_plot

#expression vs. behavioral category
expression_plot<-ggplot(connectivity_rate,aes(x=factor(task),y=fpkm,fill=task,alpha=0.2))+
  geom_violin()+theme_bw()+
  ylab("Expression (FPKM)")+xlab("")+
  scale_y_log10(labels=fmt(),limits=c(1, 1e4))+
  scale_fill_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  stat_summary(fun.data="mean_cl_boot", geom="errorbar"
               ,width=0.15,alpha=1,size=1,colour = "black")
expression_plot

#fpkm vs connectivity
fpkm_connectivity_plot <- ggplot(connectivity_rate,aes(fpkm,kTotal,color=task))+
  geom_point(alpha=0.7)+scale_x_log10(limits=c(1, 1e4))+
  scale_y_log10(labels=fmt())+theme_bw()+
  ylab("Connectivity (kTotal)")+xlab("Expression (FPKM)")+
  stat_smooth(method="lm",se=FALSE,size=1.25,fullrange=TRUE,aes(group=task))+
  scale_color_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")
fpkm_connectivity_plot  

#rate and fpkm
rate_fpkm_plot <- ggplot(connectivity_rate,aes(fpkm,rate,color=task))+
  geom_point(alpha=0.7)+scale_x_log10(limits=c(1, 1e4))+
  scale_y_log10(labels=fmt(),limits=c(1e-3, 1))+theme_bw()+
  ylab("Evolutionary rate (dN/dS)")+xlab("Expression (FPKM)")+
  stat_smooth(method="lm",se=FALSE,size=1.25,fullrange=TRUE,aes(group=task))+
  scale_color_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")
rate_fpkm_plot  

# evolutionary rates and connectivity
connectivity_rate_plot <- ggplot(connectivity_rate,aes(kTotal,rate,color=task))+
  geom_point(alpha=0.7)+scale_x_log10()+
  scale_y_log10(labels=fmt(),limits=c(1e-3, 1))+theme_bw()+
  ylab("Evolutionary rate (dN/dS)")+xlab("Connectivity (kTotal)")+
  stat_smooth(method="lm",se=FALSE,size=1.25,fullrange=TRUE,aes(group=task))+
  scale_color_manual(values=c("grey","blue","red"))+
  theme(legend.position="none")+theme(plot.margin=unit(c(0,0.5,0,0), "cm"),
  axis.text.y = element_text(size=14),axis.text.x = element_text(size=14),
  axis.title.y = element_text(size=20),axis.title.x = element_text(size=20))+
  ggtitle("")
connectivity_rate_plot

```



```{r wgcna_init_no0, error=TRUE,eval=FALSE,echo=FALSE}
datExpr2 <- data.frame(t(log(fpkm[keep,factors$factor!="age0"]+1,2)))
names(datExpr2) <- rownames(counts[keep,])
rownames(datExpr2) <- colnames(counts[keep,factors$factor!="age0"])

#initial exploration, barplot of mean expression per sample and sample quick dendrogram
meanExpressionByArray=apply(datExpr2,1,mean,na.rm=T)
NumberMissingByArray=apply(is.na(data.frame(datExpr2)),1,sum)
barplot(meanExpressionByArray,xlab="Sample",ylab="Mean expression",main="mean expression across samples",cex.names=0.7)
plotClusterTreeSamples(datExpr=datExpr2)

gsg=goodSamplesGenes(datExpr2,verbose=3);
gsg$allOK   # allOK

datExpr0=datExpr[gsg$goodSamples,gsg$goodGenes]
sampleTree=flashClust(dist(datExpr0),method="average");

powers=c(c(1:10),seq(from=12,to=16,by=2))
allowWGCNAThreads()
sft=pickSoftThreshold(datExpr0,powerVector=powers,verbose=5)

#to graph results

par(mfrow=c(1,2));
cex=0.9;
plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
  xlab="Soft threshold(power)",ylab="Scale free topology model fit, signed R^2",type="n",
  main=paste("Scale independence"));
text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
	labels=powers,cex=1,col="red");
abline(h=0.80,col="red")
plot(sft$fitIndices[,1],sft$fitIndices[,5],
	xlab="Soft threshold(power)",ylab="Mean connectivity",type="n",
	main=paste("Mean connectivity"))
text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex=1,col="red")
```


```{r eval=FALSE, echo=FALSE }
## Day 0 individuals are somewhat outlying. What happens to connectivity if they are excluded?


datt <- datExpr2
adjacency = adjacency(datExpr2, power = 9,type="signed")
TOM = TOMsimilarity(adjacency2,TOMType="signed")

# Call the hierarchical clustering function
geneTree = flashClust(as.dist(1-TOM2), method = "average");

# set the minimum module size to something relatively large
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = 1-TOM2, deepSplit = 2, 
      pamRespectsDendro = FALSE, minClusterSize = minModuleSize);
table(dynamicMods)
dynamicColors = labels2colors(dynamicMods)

# Calculate eigengenes
MEList = moduleEigengenes(datt, colors = dynamicColors)
MEs = MEList$eigengenes

# Calculate dissimilarity of module eigengenes
METree = flashClust(as.dist(1-cor(MEs)), method = "average");
plot(METree, main = "Clustering of module eigengenes",xlab = "", sub = "")
MEDissThres = 0.25

# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")

# Call an automatic merging function
merge <- mergeCloseModules(datt, dynamicColors, cutHeight = MEDissThres, verbose = 0)

# The merged module colors
mergedColors = merge$colors

# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs

# plotting the fabulous dendrogram
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

# Rename to moduleColors
moduleColors = mergedColors

# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;

# Define numbers of genes and samples
nGenes = ncol(datt);
nSamples = nrow(datt);
# Recalculate MEs with color labels
invisible(MEs0 <- moduleEigengenes(datt, moduleColors)$eigengenes)
MEs = orderMEs(MEs0)

# correlations of genes with eigengenes
moduleGeneCor=cor(MEs,datt)
moduleGenePvalue = corPvalueStudent(moduleGeneCor, nSamples);

# how many genes in each module?
table(moduleColors)

connectivityNo0 <- intramodularConnectivity(adjacency,merge$colors)
connectivityNo0$task <- "all others"
connectivityNo0[forager_names,"task"] <- "forager"
connectivityNo0[nurse_names,"task"] <- "nurse"

ggplot(connectivityNo0,aes(task,kTotal))+geom_violin()+theme_bw()
kruskalmc(kTotal~task,data=connectivityNo0)
```
