rm(list = ls())

# Load necessary packages
library("ggplot2")
library("dplyr")
library("ggdist")
library("lme4")

# Set working directory and load the data
setwd("C:/Users/guyze/OneDrive/Documents/moths/T_maze_experiment") 
# Prepare the data
df <- read.csv("PlantandSound_vs_Soil.csv")
df <- data.frame(
  individual = factor(df$Moth.number),
  distance  = df$Cluster.s.location - 75,  # Center distance around 0
  day = factor(df$Date),
  trial = factor(df$Trial),
  Speaker_side = as.integer(df$Speaker.s.location=='West'),
  cluster_count = rep(1,nrow(df))
)


#### plotting and stats ---------

ggplot(df, aes(x = distance)) +
  geom_density(data = df, color = "black", size = 3) +
#  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    x = "Distance (Cm)",
    y = "Density",
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 45),  
    plot.title = element_blank(),    
    panel.background = element_blank(),  
    panel.border = element_blank(),   
    axis.line = element_line(size = 1.5), 
    axis.title = element_text(size = 55),
    plot.margin = margin(20, 10, 10, 10)  # Increase top margin for more space
  )

# comparison to a random sample
set.seed(1)

df_rep <- df[rep(1:nrow(df), 1:nrow(df)), ]

 
df_rep$random_sample_clusters = sample(c(1,-1),size = nrow(df_rep),replace = TRUE)
df_rep$distance = df_rep$distance*df_rep$random_sample_clusters

ks_test_result_eggs <- ks.test(df_rep$distance, df$distance)
print(ks_test_result_eggs)



#### only first ovipostion per individual ---------
df_first <- df[!duplicated(df$individual), ]


ggplot(df, aes(x = distance)) +
  geom_density(data = df_first, color = "black", size = 3) +
  #  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(
    x = "Distance (Cm)",
    y = "Density",
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 45),  
    plot.title = element_blank(),    
    panel.background = element_blank(),  
    panel.border = element_blank(),   
    axis.line = element_line(size = 1.5), 
    axis.title = element_text(size = 55),
    plot.margin = margin(20, 10, 10, 10)  # Increase top margin for more space
  )

df_rep <- df_first[rep(1:nrow(df_first), 1:nrow(df_first)), ]


df_rep$random_sample_clusters = sample(c(1,-1),size = nrow(df_rep),replace = TRUE)
df_rep$distance = df_rep$distance*df_rep$random_sample_clusters

ks_test_result_eggs <- ks.test(df_rep$distance, df_first$distance)
print(ks_test_result_eggs)


#### plotting ---------


binwidth <- 10
scale_factor <- 10 / 0.02  # = 500

ggplot() +
  # histogram of df, density‐scaled
  geom_histogram(
    data     = df,
    aes(x = distance, y = ..density..),
    binwidth = binwidth,
    fill     = "grey80",
    alpha    = 0.7
  ) +
  # density curves
  geom_density(data = df,
               aes(x = distance),
               color = "black", size = 1.2) +
  geom_density(data     = df_rep,
               aes(x = distance),
               color    = "black",
               size     = 1.2,
               linetype = "dashed") +
  # force density axis 0 → 0.02, count axis 0 → 10
  scale_y_continuous(
    name     = "Density",
    limits   = c(0, 0.02),
    sec.axis = sec_axis(
      trans = ~ . * scale_factor,
      name  = "Count"
    )
  ) +
  labs(x = "Distance (cm)") +
  scale_x_continuous(breaks = c(-75, 0, 75)) +
  # use coord_cartesian to zoom without dropping data
  coord_cartesian(xlim = c(-80, 80)) +
  theme_bw() +
  theme_bw() +
  theme(
    text             = element_text(size = 55),
    panel.background = element_blank(),
    panel.border     = element_blank(),
    axis.line        = element_line(size = 1.5),
    axis.title       = element_text(size = 35),
    plot.margin      = margin(20, 10, 10, 10)
  )
