# Load packages 
library(tidyr)
library(dplyr)
library(forcats)
library(broom)
library(stringr)

# population level models ====

# Read in saved ssf data 
ssf_data <- read.csv("99_output/full_ssf-data.csv")
ssf_data$area <- as.factor(ssf_data$area)

ssf_data$participant.id <- as.factor(ssf_data$participant.id)

ssf_data$sexo <- factor(ssf_data$sexo, 
                        levels = c(0,1),
                        labels = c("Male", "Female"))

ssf_data$lepto.inf <- factor(ssf_data$lepto.inf,
                             levels = c(0,1),
                             labels = c("Neg", "Pos"))

ssf_data$period <- factor(ssf_data$period,
                          levels = c( "5-21", "5-9", "9-13",
                                      "13-17", "17-21"), 
                          labels = c( "05-21", "05-09", "09-13",
                                      "13-17", "17-21"))

# Function to calculate inverse variance and handle Inf values

calculate_inv_var <- function(se_col) {
  inv_var_col <- 1 / (se_col^2)
  inv_var_col[inv_var_col == Inf] <- NA
  return(inv_var_col)
}

# Apply the function to all columns ending with "_se" 
# and create corresponding "_inv.var" columns

ssf_data <- ssf_data %>%
  mutate(across(
    ends_with("_se"),
    ~ calculate_inv_var(.),
    .names = "{str_replace(.col, '_se$', '_inv.var')}"
  ))

ssf_data %>% 
  select(ends_with("_inv.var")) %>% 
  map_dbl(., ~ sum(is.na(.)))

ssf_data %>% 
  group_by(area, period) %>% 
  summarise(across(ends_with("inv.var"), ~ sum(is.na(.))),
            .groups = "drop")

# run population models 

vars <- c("river", "open.sewer", "resid.dom", "hh")

periods <- c( "05-21", "05-09", "09-13",
              "13-17", "17-21")

df.est <- data.frame(
  term      = character(),
  estimate  = numeric(),
  conf.low  = numeric(),
  conf.high = numeric(),
  p.value   = numeric(),
  variable  = character(),
  period    = character()
)


for (i in 1:length(combinations$period)) {
  
  data_filtered <- ssf_data %>% filter(period == combinations$period[i])
  
  model_1 <- lm(as.formula(paste0(combinations$var[i],
                                  "_pe.logOR ~ sexo + calcidadin + area")),
                data = data_filtered,
                weights = get(paste0(combinations$var[i], "_inv.var")))
  
  result_1 <- tidy(model_1, conf.int = TRUE, exponentiate = TRUE) %>%
    select(term, estimate, conf.low, conf.high, p.value) %>%
    filter(!str_detect(term, "area")) %>%
    mutate(variable = combinations$var[i], period = combinations$period[i])
  
  model_2 <- lm(as.formula(paste0(combinations$var[i], "_pe.logOR ~ lepto.inf + sexo + calcidadin + area")),
                data = data_filtered,
                weights = get(paste0(combinations$var[i], "_inv.var")))
  
  result_2 <- tidy(model_2, conf.int = TRUE, exponentiate = TRUE) %>%
    select(term, estimate, conf.low, conf.high, p.value) %>%
    filter(str_detect(term, "lepto")) %>%
    mutate(variable = combinations$var[i], period = combinations$period[i])
  
  df.1 <- bind_rows(result_1, result_2)
  
  df.est <- bind_rows(df.est, df.1)
  
}
