## This script contains functions to map v-eQTL, map the cis window for independent examples of epistasis with the v-eQTL and test epistatic interactions for confounding with dominance effects of the v-eQTL

#####Functions#####
## For the ith SNP, returns P values for v-eQTL for the real data and 5 permuted datasets
VqtlMapping <- function(i){
  ## Removes general SNP effect on phenotype and calculates squared residuals
  res <- residuals(lm(gene ~ snp.min[i, ] + snp.het[i, ]))^2

  ## rank of real squared residuals and 5 permutations of this rank in perm.rank
  perm.rank <- matrix(rank(res)[unlist(perm)], nrow(perm), 6)
  rank.dose <- rank(snp.dose[i, ])

  ## P value for spearman correlation with dosage, given rank of phenotype
  Pspearman <- function(z){
    r <- cor(z, rank.dose)
    if (!is.na(r)) {
      return(2 * pt(-abs(r / sqrt((1 - r^2) / (n.ind - 2))), df = n.ind - 2))
    } else {
      return(NA)
    }
  }

  return(signif(apply(perm.rank, 2, Pspearman), 5))
}

## Tests a vector x of snp dosage for epistasis with the v-eQTL (dosage in mat[, 2]) conditional on previously discovered epistasis (stored in mat)
EpistasisRegress <- function(x){
    m1 <- try(lmer(gene ~ -1 + mat + x + I(mat[,2] * x) + (1|FAM) + (1|ZYG), REML=F))
    m2 <- try(lmer(gene ~ -1 + mat + x  + (1|FAM) + (1|ZYG), REML=F))
    if (class(m1)=='try-error' | class(m2)=='try-error'){
        return(1)
    } else {
        return(anova(m1, m2)$P[2])
    }
}

#####End of functions######

#####Load library
library(lme4)

##### Objects in R workspace######
## cov is a matrix, first column is family ID, second column is subject ID, third column is 'MZ' for monozygotic twins, 'DZ' for dizygotic twins
## gene is a vector with expression of one gene
## perm is a set of permutations, matrix with 6 columns, first column is identity permutation, other columns are numbers 1-765 in random order
## snp.min is a matrix of the posterior probability of being a minor allele homozygote, column names are subjectb ID, row names location for every SNP in cis window
## snp.het is a matrix of the posterior probability of being a heterozygote, column names are subjectb ID, row names location for every SNP in cis window
## snp.dose is a matrix of snp dosage, column names are subjectb ID, row names location for every SNP in cis window
## v.dose is vector with dosage of v-eQTL
## v.het is vector with posterior probability that v-eQTL is a heterozygote
## v.min is vector with posterior probability that v-eQTL is a minor allele homozygote

## number of individuals to analyse
n.ind <- length(gene)
i <- 1
####Task 1: Test ith SNP for evidence that it acts as a v-eQTL. Produces a vector length 6, first entry is p-value for v-eQTL, next 5 are p-values for permuted data

VqtlMapping(i)

####Task 2: Map cis window for all indpendent examples of epistasis with v-eQTL

## Bonferroni significant threshold
THRESHOLD <- 0.05 / 2519215

## Create random effects factors, the FAM captures the correlations of twins, ZYG the increased correlation of MZ twins
FAM <- factor(cov[, 1])
ZYG <- factor(ifelse(cov[, 3]=='MZ',cov[, 1],cov[, 2]))

## The first iteration of the predictors of expression, a linear intercept and the v-eQTL
mat <- cbind(rep(1, length(v.dose)), v.dose)

## sig is a place to store significant results
sig <- data.frame(epiSNP=NULL, P=NULL)

repeat{
    ## Tests each SNP for significant interaction with v-eQTL, conditional on all previous iterations, if min p < THRESHOLD, add SNP and continue
    p <- apply(snp.dose, 1, EpistasisRegress)
    if (min(p) > THRESHOLD){
        break
    } else {
        best <- which(p==min(p))[1]
        sig <- rbind(sig, c(as.numeric(rownames(snp.dose))[best], min(p)))
        e.snp <- unlist(snp.dose[best, ])
        mat <- cbind(mat, e.snp, e.snp * mat[,2])
        snp.dose <- snp.dose[-best, ]
    }
}

print(sig)

####Task 3: test ith SNP for evidence that epistasis is explained by a dominant effect of v-eQTL

m1 <- lmer(gene ~ v.het + v.min + snp.dose[i, ] + I(v.dose * snp.dose[i, ]) + (1|FAM) + (1|ZYG), REML=F)
m2 <- lmer(gene ~ v.het + v.min + snp.dose[i, ] + (1|FAM) + (1|ZYG), REML=F)
print(anova(m1, m2)$P[2])
