Filtering strategy and output generation for de novo
variant analysis of trio-based whole exome sequencing datasets
Retrieve data and create output folder
setwd("./DNM data")
TrioData <- list.files(path = ".", pattern = ".txt", full.names = TRUE)
dir.create("Output", recursive = TRUE)
Filtering strategy applied to each trio-based WES dataset
for (a in TrioData[1:123]) {
Trio <- read.delim(a, header = TRUE, sep = "\t")
#Add column with ID of corresponding trio
b <- str_split(a, "_hc")[[1]][[1]]
c <- str_split(b, "/")[[1]][[2]]
Trio$ID <- c(c)
Trios <- Trio %>% relocate(ID)
#Filter steps
DeNovo <- Trios %>% filter(grepl("NOVO", De.novo.assessment))
GenomicRegion <- DeNovo %>% filter(Gene.component %in% c("EXON_REGION", "SA_SITE_CANONICAL", "SD_SITE_CANONICAL", "CODING_SPLICE_SITE_REGION"))
Rare <-GenomicRegion %>% filter(SNP.Frequency <= 0.1 & NonCausative…Frequency <= 0.1 & EXAC.AF <= 0.1 & AF_GoNL <= 0.5 & gnomAD.G.AF <= 0.1)
QC <- Rare %>% filter(!grepl("LOW", De.novo.assessment)) %>% filter(variation.reads >= 10 & X..variation >= 20)
NoSynonym <- QC %>% filter(Synonymous == "FALSE")
setwd("Output")
#All *de novo* variants (before filtering) in established IEI genes
CheckIEI <- DeNovo %>% merge(GenePanel, by = "Gene.name")
write.csv(CheckIEI, file = paste0(c, "-IEI.csv"), row.names = FALSE)
#Candidate de novo variants
write.csv(QC, file = paste0(c, "-DNM.csv"), row.names = FALSE)
#Count candidate de novo variants
Count_Candidates <- nrow(QC)
d <- data.frame(Count_Candidates)
d$ID <- c(c)
e <- d %>% relocate(ID)
write.csv(e, file = paste0(c, "-Count.csv"), row.names = FALSE)
setwd("..") }
setwd("Output")
Create one list of all de novo variants in known IEI genes
of all 123 trio-based WES datasets
IndividualIEI <- list.files(path = ".", pattern = "-IEI", full.names = TRUE) %>% lapply(read.csv, colClasses = c("character"))
AllIEI <- join_all(IndividualIEI, by = NULL, type = "full", match = "all")
write.xlsx(AllIEI, file = "IEI_DNM.xlsx")
Create one list of candidate de novo variants of all 123
trio-based WES datasets
AllCandidates <- list.files(path = ".", pattern = "-DNM", full.names = TRUE) %>% lapply(read.csv, colClasses = c("character"))
AllC <- join_all(AllCandidates, by = NULL, type = "full", match = "all")
write.xlsx(AllC, file = "DNM.xlsx")
Create one list of number of candidate de novo variants of
all 123 trio-based WES datasets
IndividualCounts <- list.files(path = ".", pattern = "-Count", full.names = TRUE) %>% lapply(read.csv, colClasses = c("character"))
AllCounts <- join_all(IndividualCounts, by = NULL, type = "full", match = "all")
write.xlsx(AllCounts, file = "Counts.xlsx")