
library(dplyr)

# LEMIS data downloaded directly from the Zenodo 10.5281/zenodo.3565869 repo as
# a csv. dplyr v.1.1 did not work (missing function) with fstplyr a dependent
# for the lemis package.

list.files("./Data/LEMISData/")

lemisData <- read.csv(file = "./Data/LEMISData/lemis_2000_2014_cleaned.csv",
                      stringsAsFactors = FALSE)

names(lemisData)

amphiData <- lemisData[which(lemisData$taxa == "amphibian"),]

write.csv(x = amphiData,
          file = "./Data/LEMISData/LEMIS_amphibian_data.csv",
          row.names = FALSE)

lemisData <- read.csv(file = "./Data/LEMISData/LEMIS_amphibian_data.csv",
                      stringsAsFactors = FALSE)

# there are this many genera listed only to genus level, check that all are
# covered by specific species
lemisData %>% 
  mutate(genLevel = str_detect(species, "sp\\.$")) %>% 
  group_by(genus) %>%
  count(genLevel) %>% 
  summarise(perGenLevel = n[genLevel] / (n[!genLevel] + n[genLevel])*100 ) %>% 
  filter(perGenLevel > 99)
# This is suggesting that there are no genera that are only represented by sp.
# level listings.

# Raw species count from LEMIS, excluding genus only listings, but not tackling
# synonyms
lemisData %>% 
  mutate(lemisName = paste(genus, species)) %>% 
  filter(!str_detect(lemisName, "sp\\.$")) %>% # filter out genus only listings for species count
  pull(lemisName) %>% 
  unique() %>% 
  length()
