
library(dplyr)
library(stringr)

# Snapshot online data ----------------------------------------------------

extractFiles <- list.files("./Data/WebData", pattern = "KEYWORD_EXTRACT",
                           recursive = TRUE, full.names = TRUE)

extractData <- do.call(rbind, lapply(extractFiles, function(x){
  df <- read.csv(x, stringsAsFactors = FALSE)
  if(dim(df)[2] > 1){
    return(df)
  }
}))

extractData %>% 
  summarise(amphiSpecies = length(unique(sp)),
            keyWords = length(unique(keyw)))

write.csv(x = extractData, file = "./Data/Snapshot Online Data.csv",
          row.names = FALSE)

# Temporal online data ----------------------------------------------------

extractFilesTemp <- list.files("./Data/TemporalData", pattern = "KEYWORD_EXTRACT",
                           recursive = TRUE, full.names = TRUE)

extractDataTemp <- do.call(rbind, lapply(extractFilesTemp, function(x){
  df <- read.csv(x, stringsAsFactors = FALSE)
  if(dim(df)[2] > 1){
    return(df)
  }
}))

extractDataTemp %>% 
  summarise(amphiSpecies = length(unique(sp)),
            keyWords = length(unique(keyw)))

# Add online results to master amphi data ---------------------------------

amphiData <- read.csv("./Data/AmphibiaWeb 2020-08-29.csv", stringsAsFactors = FALSE)

amphiData <- amphiData %>% 
  mutate(amphiName = paste0(genus, " ", species),
         onlineTrade = amphiName %in% unique(extractData$sp),
         onlineTradeTemp = amphiName %in% unique(extractDataTemp$sp),
         onlineTradeEither = ifelse(onlineTrade | onlineTradeTemp, TRUE, FALSE),
         iucn = sub(" - Provisional", "", iucn),
         iucn = factor(iucn, levels = c(
           "Least Concern (LC)",
           "Near Threatened (NT)",
           "Vulnerable (VU)",
           "Endangered (EN)",
           "Critically Endangered (CR)",
           "Extinct in the Wild (EW)",
           "Extinct (EX)",
           "Data Deficient (DD)")
         ))

amphiData %>% 
  summarise(nSppOnlineTrade = sum(onlineTrade),
            nSppOnlineTradeTemp = sum(onlineTradeTemp),
            nSppOnlineEither = sum(onlineTradeEither))

# Add LEMIS data ----------------------------------------------------------

lemisData <- read.csv(file = "./Data/LEMISData/LEMIS_amphibian_data.csv",
                      stringsAsFactors = FALSE)

lemisData <- lemisData %>% 
  mutate(lemisName = str_to_sentence(paste(genus, species))) %>% 
  filter(!str_detect(lemisName, "sp\\.$") & !lemisName == "NA NA")

# 324 lemis listings do not match modern amphiweb names
sum(!unique(lemisData$lemisName) %in% amphiData$amphiName)

# create a vector of names then check if they appear in the LEMIS database
amphiData$lemisTrade <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% lemisData$lemisName),
                TRUE, FALSE))
})

# raw count of LEMIS
length(unique(lemisData$lemisName))
# corrected for synonyms
sum(amphiData$lemisTraded)


# CITES trade database ----------------------------------------------------

citesData <- read.csv(file = "./Data/CITESData/gross_imports_2020-09-20 15_25_comma_separated.csv",
                      stringsAsFactors = FALSE)

citesData %>% 
  filter(str_detect(Taxon, "spp\\.$")) %>% 
  pull(Taxon) %>% 
  unique()

# there are three genera that are only represented by genus only listings
citesData %>% 
  mutate(genus = word(Taxon, 1, 1),
         genOnly = !str_detect(Taxon, "spp\\.$")) %>% 
  group_by(genus) %>% 
  summarise(genOnlyMissed = any(genOnly)) %>% 
  filter(!genOnlyMissed)

citesSpp <- citesData %>% 
  filter(!str_detect(Taxon, "spp\\.$")) %>% 
  pull(Taxon) %>% 
  unique()

# 5 CITES listings do not match modern amphiweb names
sum(!unique(citesSpp) %in% amphiData$amphiName)

# create a vector of names then check if they appear in the CITES database
amphiData$citesTrade <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% citesSpp),
                TRUE, FALSE))
})

# Final column for traded in any source -----------------------------------

amphiData <- amphiData %>% 
  group_by(amphiName) %>% 
  mutate(anyTraded = any(onlineTrade, onlineTradeTemp, onlineTradeEither,
                         lemisTrade, citesTrade))

# final count of species traded
sum(amphiData$anyTraded)
# percentage of total amphibian species
sum(amphiData$anyTraded) / length(unique(amphiData$amphiName)) *100


# Add CITES appendices ----------------------------------------------------

citesApp <- read.csv(file = "./Data/CITESData/Index_of_CITES_Species_[CUSTOM]_2020-09-20 15_51.csv",
         stringsAsFactors = FALSE)

names(citesApp)
citesApp$CurrentListing

citesAppSpp <- citesApp %>% 
  filter(!Species == "") %>% 
  mutate(citesName = paste(Genus, Species),)

# 203 raw count in CITES
length(unique(citesAppSpp$citesName))

# create a vector of names then check if they appear in the LEMIS database
amphiData$citesListed <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% citesAppSpp$citesName),
                TRUE, FALSE))
})

# 200 linked to amphiaweb species
sum(amphiData$citesListed)

# add the exact appendix listed under
amphiData$citesApp <- apply(amphiData, 1, function(x){
  nms <- c(x["amphiName"], x["synonymies"])
  nms <- nms[!nms == ""]
  return(ifelse(any(nms %in% citesAppSpp$citesName),
                citesAppSpp$CurrentListing[citesAppSpp$citesName %in% nms], FALSE))
})

sum(!amphiData$citesApp == "FALSE")
table(amphiData$citesApp)

# number of traded species covered by CITES app
amphiData %>% 
  mutate(tradedRegulated = citesListed & anyTraded) %>% 
  pull(tradedRegulated) %>% 
  sum()

# write.csv(x = amphiData, file = "./Data/AmphibiaWeb Data Trade Augmented.csv",
#           row.names = FALSE)
