#!/usr/bin/env Rscript

rm(list=ls(all=T))

library(tidyverse)
library(cowplot)

# Script to produce figure2.
# Source data for the panels are in ./data/

#### Panel A: Prostate Spaghetti Plot ####

wgs_frag_lens_norm_long <- read_tsv("data/panelA_Spaghetti_data.tsv")

p.fig2a <- wgs_frag_lens_norm_long %>% 
  filter(frag_len>=80) %>%
  filter(frag_len<=400) %>%
  ggplot(aes(x=frag_len, 
             y=count, 
             col=ctdna_bin, 
             group=sample)) + 
  geom_line(lwd = 0.2, alpha = 0.5) + 
  geom_vline(aes(xintercept=167), lwd=0.5, lty=3) +
  scale_x_continuous(name="Fragment length (bp)") +
  scale_y_continuous(name="Relative frequency") +
  scale_color_brewer(name = "ctDNA%", palette="Set2") +
  theme_bw() +
  theme(legend.position=c(0.8, 0.6)) +
  guides(colour = guide_legend(override.aes = list(alpha=1, lwd=1))) +
  theme(legend.background = element_rect(size=0.2, linetype="solid", color="black"))

#### Panel B: Prostate WGS Signatures Plot ####

wgs_comps_c2_norm_long <- read_tsv("data/panelB_WGS_Sigs_data.tsv")

p.fig2b <- wgs_comps_c2_norm_long %>% 
  filter(frag_len_adjust>=80) %>%
  filter(frag_len_adjust<=400) %>%
  mutate(signature_id = fct_recode(signature_id, "Signature#1 (normal)"="Signature1", "Signature#2 (cancer)"="Signature2")) %>%
  ggplot(aes(x=frag_len_adjust, 
             y=count,
             col=signature_id, 
             group=signature_id)) + 
  geom_line() + 
  geom_vline(aes(xintercept=167), lwd=0.5, lty=3) +
  scale_x_continuous(name="Fragment length (bp)") +
  scale_y_continuous(name="Relative frequency") +
  scale_color_brewer(palette="Set2") +
  theme_bw() +
  theme(legend.position=c(0.7, 0.6)) +
  guides(colour = guide_legend(override.aes = list(alpha=1, lwd=1))) +
  theme(legend.background = element_rect(size=0.2, linetype="solid", color="black")) +
  theme(legend.title=element_blank())

#### Panel C: Prostate Targeted Mutation Signatures Plot ####

mut_data_combined <- read_tsv("data/panelC_TargetedSeq_MutNMF_Sigs_data.tsv")

p.fig2c <- mut_data_combined %>% 
  filter(frag_len>=80) %>%
  filter(frag_len<=400) %>%
  mutate(signature_id = fct_recode(signature_id, "Signature#2 (cancer)"="Signature2")) %>%
  filter(signature_id != "Signature1") %>%
  mutate(signature_id = factor(signature_id, levels=c("Non-mutated", "Mutated", "Signature#2 (cancer)"))) %>%
  ggplot(aes(x=frag_len, 
             y=count,
             col=signature_id)) + 
  geom_line() +
  scale_x_continuous(name="Fragment length (bp)") +
  scale_y_continuous(name="Relative frequency") +
  theme_bw() +
  theme(legend.position=c(0.7, 0.8)) +
  guides(colour = guide_legend(override.aes = list(alpha=1, lwd=1))) +
  theme(legend.background = element_rect(size=0.2, linetype="solid", color="black")) +
  theme(legend.title=element_blank()) +
  scale_color_brewer(palette="Set2")

#### Panel D: Prostate WGS Signature Weight Scatter Plot ####

wgs_scatter_dat <- read_tsv("data/panelD_WGS_Scatter_data.tsv")

p.fig2d <- wgs_scatter_dat %>%
  ggplot(aes(x = ctdna_frac, 
             y = sig2_weight)) +
  geom_point() +
  annotate("text", 
           x = 0.05, 
           y = 0.95, 
           label = paste("r = ",round(cor(wgs_scatter_dat$ctdna_frac, wgs_scatter_dat$sig2_weight), digits = 2), sep="")) +
  scale_x_continuous(name="ctDNA% (VAF-based)", limits = c(0,1)) +
  scale_y_continuous(name="sWGS NMF Signature#2 weight", limits = c(0,1)) +
  theme_bw() +
  scale_color_brewer(palette="Set2") 

#### Panel E: Prostate Targeted Signature Weight Scatter Plot ####

tgt_scatter_dat <- read_tsv("data/panelE_TargetSeq_Scatter_data.tsv")

p.fig2e <- tgt_scatter_dat %>%
  ggplot(aes(x = ctdna_frac, 
             y = sig2_weight)) +
  geom_point() +
  annotate("text", 
           x = 0.05, 
           y = 0.95, 
           label = paste("r = ",round(cor(tgt_scatter_dat$ctdna_frac, tgt_scatter_dat$sig2_weight), digits = 2), sep="")) +
  scale_x_continuous(name="ctDNA% (VAF-based)", limits = c(0,1)) +
  scale_y_continuous(name="Targeted-seq NMF Signature#2 weight", limits = c(0,1)) +
  theme_bw() +
  scale_color_brewer(palette="Set2")

#### Panel F: Subsampling Plot ####

d <- read_tsv("data/panelF_TargetSeq_Scatter_data.tsv")

p.fig2f <-
  d %>%
    pivot_longer(cols = -n_sampled) %>%
    ggplot(aes(x=n_sampled, y=value, color=name)) +
    geom_point() +
    geom_line() +
    scale_x_log10("Number of fragments") +
    scale_y_continuous("Pearson correlation coefficient (r)") +
    theme_bw() +
    theme(legend.position=c(0.75, 0.2)) +
    theme(legend.background = element_rect(size=0.2, linetype="solid", color="black")) +
    theme(legend.title=element_blank())

p.fig2 <- plot_grid(p.fig2a, p.fig2b, p.fig2c, p.fig2d, p.fig2e, p.fig2f, labels = "auto", nrow =2)
ggsave("figure2.pdf", width=12, height=7)
ggsave("figure2.png", width=12, height=7)

