# Load packages 

library(lubridate)
library(raster)
library(sf)
library(dplyr)
library(amt)
library(survival)
library(broom)
library(ggplot2)
library(tidyr)
library(progress)

# NVS ====

# Load rasters, in this case the buffer rasters 

NVS_river.buffer <- raster("04_environmental-data/NVS_river-buff.tif")
NVS_open.sewer.buffer <- raster("04_environmental-data/NVS_open-sewer-buff.tif")
NVS_resid.dom.buffer <- raster("04_environmental-data/NVS_resid-dom-buff.tif")

# Define the periods
periods <- list(
  `5-9` = c(5, 9),    # 5 am - 9 am
  `9-13` = c(9, 13),  # 9 am - 1 pm
  `13-17` = c(13, 17), # 1 pm - 5 pm
  `17-21` = c(17, 21), # 5 pm - 9 pm
  `5-21` = c(5, 21)    # 5 am - 9 pm
)
period_labels <- c("5-9", "9-13", "13-17", "17-21", "5-21")

# Create a grid to run the loop 

ssf_data <- expand.grid(
  participant.id = unique(gps.tibble$participant.id),
  period = period_labels,
  stringsAsFactors = FALSE
)

# Create household column 

ssf_data$household <- as.character(substr(ssf_data$participant.id, 1, 7))

# Initialize other columns with NA
ssf_data[["reloc.below.50"]]    <- NA
ssf_data[["total.time"]]        <- NA
ssf_data[["num.locations"]]     <- NA
ssf_data[["buff_river_pe.OR"]]     <- NA
ssf_data[["buff_river_c.low.OR"]]  <- NA
ssf_data[["buff_river_c.high.OR"]] <- NA
ssf_data[["buff_river_pe.logOR"]] <- NA
ssf_data[["buff_river_c.low.logOR"]] <- NA
ssf_data[["buff_river_c.high.logOR"]] <- NA
ssf_data[["buff_river_se"]]        <- NA
ssf_data[["buff_open.sewer_pe.OR"]]     <- NA
ssf_data[["buff_open.sewer_c.low.OR"]]  <- NA
ssf_data[["buff_open.sewer_c.high.OR"]] <- NA
ssf_data[["buff_open.sewer_pe.logOR"]] <- NA
ssf_data[["buff_open.sewer_c.low.logOR"]] <- NA
ssf_data[["buff_open.sewer_c.high.logOR"]] <- NA
ssf_data[["buff_open.sewer_se"]]        <- NA
ssf_data[["buff_resid.dom_pe.OR"]]      <- NA
ssf_data[["buff_resid.dom_c.low.OR"]]   <- NA
ssf_data[["buff_resid.dom_c.high.OR"]]  <- NA
ssf_data[["buff_resid.dom_pe.logOR"]] <- NA
ssf_data[["buff_resid.dom_c.low.logOR"]] <- NA
ssf_data[["buff_resid.dom_c.high.logOR"]] <- NA
ssf_data[["buff_resid.dom_se"]]         <- NA
ssf_data[["hh_pe.OR"]]   <- NA
ssf_data[["hh_c.low.OR"]]   <- NA
ssf_data[["hh_c.high.OR"]]   <- NA
ssf_data[["hh_pe.logOR"]]   <- NA
ssf_data[["hh_c.low.logOR"]]   <- NA
ssf_data[["hh_c.high.logOR"]]   <- NA
ssf_data[["hh_se"]]   <- NA

ssf_data[["hh_check"]] <- NA

# Set up for loop 

set.seed(123)

pb <- progress_bar$new(total = nrow(ssf_data),
                       format = "[:bar] :percent in :elapsed full time: :eta",
                       clear = FALSE,
                       width = 60)


# Loop to run through each individual for each period 

for (i in 1:nrow(ssf_data)) {
  participant_id <- ssf_data$participant.id[i]
  period_label <- ssf_data$period[i]
  period_range <- periods[[period_label]]
  
  hh <- raster_list[[ssf_data$household[i]]]
  
  if(sum(gps.tibble$participant.id %in% ssf_data$participant.id[i]) > 1) {
    
    list.dup.gps <- gps.tibble$gps.df[which(gps.tibble$participant.id == ssf_data$participant.id[i])]
    
    gps.1 <- do.call(rbind, list.dup.gps)
    
  } else {
    
    gps.1 <- gps.tibble$gps.df[which(gps.tibble$participant.id == ssf_data$participant.id[i])][[1]]
    
  }
  
  gps.1 <- gps.1[, c("utm.x", "utm.y", "time", "time.hms")]
  
  # only keep points within the study area boundaries 
  
  gps.1 <- gps.1 %>%
    filter(utm.x > 560925 & utm.x < 561189 & 
             utm.y > 8569855 & utm.y < 8570260 & 
             !is.na(utm.x) & !is.na(utm.y) & !is.na(time)) %>%
    filter(hour(time) >= period_range[1], hour(time) < period_range[2])
  
  ssf_data$num.locations[i] <- nrow(gps.1)
  
  # skip individuals with less than 50 relocations in the study area 
  
  if (nrow(gps.1) < 50) {
    
    ssf_data$reloc.below.50[i] <- TRUE
    
    pb$tick()
    
    next
    
  } else {
    
    ssf_data$reloc.below.50[i] <- FALSE
    
  }
  
  tryCatch({
    # Create a track object of gps data for one individual 
    track.1 <- make_track(gps.1, .x = utm.x, .y = utm.y, .t = time,
                          all_cols = TRUE, crs = 31984)
    
    
    ssf_dat <- track.1 %>%
      track_resample(rate = seconds(35), tolerance = seconds(5)) %>% # set sampling rate and tolerance 
      steps_by_burst() %>%
      random_steps(n_control = 100) %>% # number of available steps per used step
      extract_covariates(NVS_open.sewer.buffer) %>% # extract environmental factors to all steps 
      extract_covariates(NVS_resid.dom.buffer) %>%
      extract_covariates(NVS_river.buffer) %>%
      extract_covariates(hh) %>% 
      mutate(cos_ta = cos(ta_), log_sl = log(sl_), hour = hour(t1_)) %>%
      rename( hh = layer) %>% 
      filter(!is.na(ta_) & !is.infinite(cos_ta) & !is.infinite(log_sl))
    
    if (nrow(ssf_dat) > 0) {
      
      # fit step selection model to estimate selection coefficients (betas)
      
      m1 <- fit_issf(case_ ~ NVS_river.buff + NVS_open.sewer.buff +
                       NVS_resid.dom.buff + hh +
                       sl_ + log_sl + cos_ta + hour + strata(step_id_),
                     data = ssf_dat, method = "efron", model = TRUE)
      
      # update step-length and turning angle distributions and re-run models 
      
      updated_sl <- update_sl_distr(m1)
      updated_ta <- update_ta_distr(m1)
      
      ssf_dat_2 <- track.1 %>%
        track_resample(rate = seconds(35), tolerance = seconds(5)) %>%
        steps_by_burst() %>%
        random_steps(n_control = 100,
                     sl_distr = updated_sl, ta_dist = updated_ta) %>%
        extract_covariates(NVS_open.sewer.buffer) %>%
        extract_covariates(NVS_resid.dom.buffer) %>%
        extract_covariates(NVS_river.buffer) %>%
        extract_covariates(hh) %>% 
        mutate(cos_ta = cos(ta_), log_sl = log(sl_), hour = hour(t1_)) %>%
        rename( hh = layer) %>% 
        filter(!is.na(ta_) & !is.infinite(cos_ta) & !is.infinite(log_sl))
      
      ssf_data$total.time[i] <- sum(ssf_dat_2$dt_[ssf_dat$case_ == TRUE],
                                    na.rm = TRUE) %>% as.numeric()
      
      ssf_data$hh_check[i] <- ifelse(any(ssf_dat_2$case_ == TRUE & ssf_dat_2$hh == 1),
                                     TRUE, FALSE)
      
      m2 <- clogit(case_ ~ NVS_river.buff + NVS_open.sewer.buff +
                     NVS_resid.dom.buff + hh +
                     sl_ + log_sl + cos_ta + hour + strata(step_id_),
                   data = ssf_dat_2, method = "efron")
      
      
      tidy_results.OR <- tidy(m2, exponentiate = TRUE, conf.int = TRUE)
      tidy_results.logOR <- tidy(m2, exponentiate = FALSE, conf.int = TRUE)
      
      # Assigning results to the corresponding fields in ssf_data
      ssf_data[i, c("buff_river_pe.OR", "buff_river_c.low.OR", "buff_river_c.high.OR")] <- as.numeric(tidy_results.OR[1, c("estimate", "conf.low", "conf.high")])
      ssf_data[i, c("buff_river_pe.logOR", "buff_river_c.low.logOR", "buff_river_c.high.logOR", "buff_river_se")] <- as.numeric(tidy_results.logOR[1, c("estimate", "conf.low", "conf.high", "std.error")])
      
      # Assuming similar structure for other variables:
      ssf_data[i, c("buff_open.sewer_pe.OR", "buff_open.sewer_c.low.OR", "buff_open.sewer_c.high.OR")] <- as.numeric(tidy_results.OR[2, c("estimate", "conf.low", "conf.high")])
      ssf_data[i, c("buff_open.sewer_pe.logOR", "buff_open.sewer_c.low.logOR", "buff_open.sewer_c.high.logOR", "buff_open.sewer_se")] <- as.numeric(tidy_results.logOR[2, c("estimate", "conf.low", "conf.high", "std.error")])
      
      ssf_data[i, c("buff_resid.dom_pe.OR", "buff_resid.dom_c.low.OR", "buff_resid.dom_c.high.OR")] <- as.numeric(tidy_results.OR[3, c("estimate", "conf.low", "conf.high")])
      ssf_data[i, c("buff_resid.dom_pe.logOR", "buff_resid.dom_c.low.logOR", "buff_resid.dom_c.high.logOR", "buff_resid.dom_se")] <- as.numeric(tidy_results.logOR[3, c("estimate", "conf.low", "conf.high", "std.error")])
      
      ssf_data[i, c("hh_pe.OR", "hh_c.low.OR", "hh_c.high.OR")] <- as.numeric(tidy_results.OR[4, c("estimate", "conf.low", "conf.high")])
      ssf_data[i, c("hh_pe.logOR", "hh_c.low.logOR", "hh_c.high.logOR", "hh_se")] <- as.numeric(tidy_results.logOR[4, c("estimate", "conf.low", "conf.high", "std.error")])
    }
    
    pb$tick()
    
  }, error = function(e) {
    
    error.m <- paste("Error processing participant",
                     participant_id, "for period", period_label, ":", e$message)
    
    print(error.m)
    
    # ssf_data$error.message[i] <- error.m
    
    pb$tick()
  })
  
}

