library(dplyr)
library(ggplot2)
library(grid)
# =========================================================
# 1. User settings
# =========================================================
setwd("/Users/anirban/MyLab/Manuscript/Mofida_Polk-aging/Mar26_VOR/Plot_source.data/Figure6/Fig6C+6D/")
input_file <- "./Fig6C+6D_data.csv"
output_fig6c_pdf <- "Fig6C_NucPolkperArea2.pdf"
output_fig6c_jpg <- "Fig6C_NucPolkperArea2.jpg"
output_fig6d_pdf <- "Fig6D_CytoPolkperArea2.pdf"
output_fig6d_jpg <- "Fig6D_CytoPolkperArea2.jpg"
# Plot styling
box_width        <- 0.70
dodge_width      <- 0.80
box_line_width   <- 0.55
ctrl_fill_color  <- "#b9d1ad"
ka_fill_color    <- "#c4b7d0"
panel_fill_color <- "#ffffff"
strip_fill_color <- "#d2d2d2"
line_color       <- "#444444"
# =========================================================
# 2. Load and prepare data
# =========================================================
polk_data <- read.csv(input_file, header = TRUE)
polk_data <- polk_data %>%
mutate(
treatment = factor(treatment, levels = c("CTRL", "KA")),
age       = factor(age, levels = c("yng", "old")),
exposure  = factor(as.character(exposure), levels = c("80", "160")),
age_label = factor(age, levels = c("yng", "old"),
labels = c("young", "early old"))
)
# =========================================================
# 3. Statistical helper functions
# =========================================================
# Compute Cohen's d as (mean(KA) - mean(CTRL)).
# Positive values mean KA > CTRL; negative values mean KA < CTRL.
compute_cohens_d <- function(ctrl_values, ka_values) {
ctrl_values <- ctrl_values[is.finite(ctrl_values)]
ka_values   <- ka_values[is.finite(ka_values)]
n_ctrl <- length(ctrl_values)
n_ka   <- length(ka_values)
if (n_ctrl < 2 || n_ka < 2) {
return(NA_real_)
}
ctrl_sd <- sd(ctrl_values)
ka_sd   <- sd(ka_values)
if (!is.finite(ctrl_sd) || !is.finite(ka_sd) || (ctrl_sd == 0 && ka_sd == 0)) {
return(NA_real_)
}
pooled_sd <- sqrt(
((n_ctrl - 1) * ctrl_sd^2 + (n_ka - 1) * ka_sd^2) / (n_ctrl + n_ka - 2)
)
(mean(ka_values) - mean(ctrl_values)) / pooled_sd
}
# Format p-values for figure labels
format_p_label <- function(p_value) {
if (!is.finite(p_value)) {
return("p=NA")
}
if (p_value < 0.0001) {
return("p<0.0001")
}
sprintf("p=%.4f", p_value)
}
# =========================================================
# 4. Compute panel-wise statistics
#    One comparison per age x exposure panel:
#    CTRL vs KA using Wilcoxon rank-sum (MWU) + Cohen's d
# =========================================================
compute_panel_statistics <- function(data, response_column) {
age_levels <- levels(data$age_label)
exposure_levels <- levels(data$exposure)
stats_rows <- list()
row_index <- 1
for (current_age in age_levels) {
for (current_exposure in exposure_levels) {
panel_data <- data %>%
filter(age_label == current_age, exposure == current_exposure)
ctrl_values <- panel_data %>%
filter(treatment == "CTRL") %>%
pull(.data[[response_column]])
ka_values <- panel_data %>%
filter(treatment == "KA") %>%
pull(.data[[response_column]])
wilcoxon_p_value <- tryCatch(
wilcox.test(ctrl_values, ka_values, exact = FALSE)$p.value,
error = function(e) NA_real_
)
cohens_d_value <- compute_cohens_d(ctrl_values, ka_values)
stats_rows[[row_index]] <- data.frame(
age_label = current_age,
exposure  = current_exposure,
p_value   = wilcoxon_p_value,
d_value   = cohens_d_value,
stringsAsFactors = FALSE
)
row_index <- row_index + 1
}
}
stats_table <- bind_rows(stats_rows) %>%
mutate(
age_label = factor(age_label, levels = c("young", "early old")),
exposure  = factor(exposure, levels = c("80", "160")),
p_label   = vapply(p_value, format_p_label, character(1)),
d_label   = ifelse(
is.finite(d_value),
sprintf("Cohen's d = %.2f", round(d_value, 2)),
"Cohen's d = NA"
)
)
return(stats_table)
}
# =========================================================
# 5. Plotting function
# =========================================================
create_polk_boxplot <- function(data,
response_column,
y_axis_label,
y_limits,
y_breaks,
p_label_y,
d_label_y) {
stats_table <- compute_panel_statistics(data, response_column) %>%
mutate(
p_label_y = p_label_y,
d_label_y = d_label_y
)
ggplot(data, aes(x = exposure, y = .data[[response_column]], fill = treatment)) +
geom_boxplot(
notch = TRUE,
outlier.shape = NA,
width = box_width,
size = box_line_width,
position = position_dodge(width = dodge_width)
) +
facet_wrap(~ age_label, nrow = 1, strip.position = "right") +
geom_text(
data = stats_table,
aes(x = exposure, y = p_label_y, label = p_label),
inherit.aes = FALSE,
fontface = "bold",
size = 6.2
) +
geom_text(
data = stats_table,
aes(x = exposure, y = d_label_y, label = d_label),
inherit.aes = FALSE,
fontface = "bold",
size = 5.2
) +
scale_fill_manual(values = c("CTRL" = ctrl_fill_color, "KA" = ka_fill_color)) +
scale_x_discrete(drop = FALSE) +
scale_y_continuous(
limits = y_limits,
breaks = y_breaks,
expand = c(0.02, 0)
) +
labs(
x = "exposure (mins)",
y = y_axis_label
) +
theme_bw(base_size = 18) +
theme(
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = panel_fill_color, colour = "black", size = 0.8),
panel.border = element_rect(fill = NA, colour = "black", size = 0.8),
panel.spacing = unit(1.0, "lines"),
strip.placement = "outside",
strip.background = element_rect(fill = strip_fill_color, colour = "black", size = 0.8),
strip.text = element_text(size = 18, face = "plain", colour = "black"),
strip.text.y.right = element_text(angle = 270, size = 18),
axis.title.x = element_text(size = 22, face = "plain"),
axis.title.y = element_text(size = 22, face = "plain"),
axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.ticks = element_line(colour = line_color, size = 0.8),
axis.line = element_line(colour = line_color, size = 0.8),
plot.margin = margin(10, 12, 10, 10)
)
}
# =========================================================
# 6. Build figures
# =========================================================
fig6c_plot <- create_polk_boxplot(
data            = polk_data,
response_column = "NucPolkperArea",
y_axis_label    = "Nuclear POLK counts per area",
y_limits        = c(0, 52),
y_breaks        = seq(0, 50, by = 10),
p_label_y       = 50.0,
d_label_y       = 46.8
)
fig6d_plot <- create_polk_boxplot(
data            = polk_data,
response_column = "CytoPolkperArea",
y_axis_label    = "Cytoplasmic POLK counts per area",
y_limits        = c(0, 20),
y_breaks        = seq(0, 20, by = 5),
p_label_y       = 19.0,
d_label_y       = 17.2
)
# =========================================================
# 7. Save figures
# =========================================================
ggsave(output_fig6c_pdf, fig6c_plot, width = 9.29, height = 6.37, units = "in", dpi = 300)
ggsave(output_fig6c_jpg, fig6c_plot, width = 9.29, height = 6.37, units = "in", dpi = 300)
ggsave(output_fig6d_pdf, fig6d_plot, width = 9.29, height = 6.37, units = "in", dpi = 300)
ggsave(output_fig6d_jpg, fig6d_plot, width = 9.29, height = 6.37, units = "in", dpi = 300)
# Display plots
fig6c_plot
fig6d_plot
