setwd("O:/Direct_copy_G_Drive_as_of_11-29-2023/Project_files/1.Submitted&Published_Projects/Paleocene_mammals_REVISING_17JUN25/CB_submission&revision/Paleocene_DTA_ms/Paleocene_mammal_revision_CB/Cast_vs_specimen_models")

#####################
# libraries for DTA #
#####################
library(interp)
library(molaR)
library(Rvcg)

#############################################
# DTA test between original and cast models #
#############################################
#read folder of ply files into R# 
files.teeth <- list.files(path="O:/Direct_copy_G_Drive_as_of_11-29-2023/Project_files/1.Submitted&Published_Projects/Paleocene_mammals_REVISING_17JUN25/CB_submission&revision/Paleocene_DTA_ms/Paleocene_mammal_revision_CB/Cast_vs_specimen_models", pattern=".ply")
files <- list()
l.files <- length(files.teeth)

#this loop will save cleaned meshes in the current working directory; ensure that the setwd directory above is the correct one to deposit new cleaned meshes if overwriting is not desired
for(i in 1:l.files) {
  file <- paste("O:/Direct_copy_G_Drive_as_of_11-29-2023/Project_files/1.Submitted&Published_Projects/Paleocene_mammals_REVISING_17JUN25/CB_submission&revision/Paleocene_DTA_ms/Paleocene_mammal_revision_CB/Cast_vs_specimen_models/", files.teeth[i], sep='')
  temp <- vcgPlyRead(file)
  files[[i]] <- molaR_Clean(temp)
  names(files)[i] <- files.teeth[i]
  vcgPlyWrite(files[[i]],files.teeth[i])
}

#recalculate all four DTA indices#
molaR_Batch(DNE=TRUE, RFI=TRUE, OPCr=TRUE, Slope=TRUE)
##outputs used to calculate difference in DTA indices between cast and original specimen CT data.
##percentrages derived from the sample test differences, and calculated and put into updated data matrix.
##additionally, volume-corrected FEA outputs also included in new data matrix.
##new data matrix imported below for re-analysis of DTA/FEA data

#################################################################
# Correlation plots of sampled DTA data and adjusted FEA data   #
#################################################################
#library for plotting correlation tables#
library(corrplot)

#import new data matrix#
#file is O:\Direct_copy_G_Drive_as_of_11-29-2023\Project_files\1.Submitted&Published_Projects\Paleocene_mammals_REVISING_17JUN25\CB_submission&revision\Paleocene_DTA_ms\Paleocene_mammal_revision_CB\DTA&FEA_outputs_FEA_adjusted.csv
#as of 6-23-25#
all_data <- read.csv(file=file.choose(), header=T)

data <- all_data

#data_Pantodont#
data <- subset(data,(Clade %in% "Pantodonta"))
#data non-pantodont#
'%!in%' <- function(x,y)!('%in%'(x,y))
data <- subset(data, subset = (Clade %!in% "Pantodonta"))

##################
# all molar data #
##################
data <- subset(data, subset = Tooth %in% c("M_1","M_2","M_3","M1_","M2_","M3_"))

#####################
# all premolar data #
#####################
data <- subset(data, subset = Tooth %in% c("P_1","P_2","P_3","P_4","P1_","P2_","P3_","P4_"))


#resampling DTA from value range estimated using sample cast v original CT analysis#
sampled_data <- data
for (i in 1:nrow(data)) {
sampled_data[i,48] <- runif(1, min=as.numeric(sampled_data[i,11]), max=as.numeric(sampled_data[i,10])) #DNE sample
sampled_data[i,50] <- runif(1, as.numeric(sampled_data[i,18]), as.numeric(sampled_data[i,17])) #Slope sample
sampled_data[i,49] <- runif(1, as.numeric(sampled_data[i,21]), as.numeric(sampled_data[i,20])) #OPCR sample
sampled_data[i,51] <- runif(1, as.numeric(sampled_data[i,32]), as.numeric(sampled_data[i,31])) #RFI sample
sampled_data[i,52] <- runif(1, as.numeric(sampled_data[i,44]), as.numeric(sampled_data[i,43])) #CompSE sample
sampled_data[i,53] <- runif(1, as.numeric(sampled_data[i,47]), as.numeric(sampled_data[i,46])) #ShearSE sample
}

#then, corrplot from one re-sampled dataset for trial test#
#all data#
all_corr_data <- cor(na.omit(sampled_data[,48:53]),method='kendall')
#E Paleocene data#
EP_data <- cor(na.omit(sampled_data[sampled_data$Interval == 'A',][,48:53]),method='kendall')
#M Paleocene data#
MP_data <- cor(na.omit(sampled_data[sampled_data$Interval == 'B',][,48:53]),method='kendall')
#L Paleocene data#
LP_data <- cor(na.omit(sampled_data[sampled_data$Interval == 'C',][,48:53]),method='kendall')
#combined plots#
#corrplot(EP_data, type="upper", tl.pos='d') #to extract scale from
#stasis plot for figure consideration
corrplot(EP_data*0.2, type="upper", tl.pos='d', cl.pos='n', insig='blank')
corrplot(EP_data, type="lower", tl.pos='d', cl.pos='n', insig='blank',add=TRUE)
#random shuffling to create corrplot visual
corrplot(EP_data*0.2, type="upper", tl.pos='d', cl.pos='n', insig='blank')
rand <- sample(nrow(EP_data))
corrplot(EP_data[rand,], type="lower", tl.pos='d', cl.pos='n', insig='blank',add=TRUE)


rownames(EP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
colnames(EP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
rownames(MP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
colnames(MP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
rownames(LP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
colnames(LP_data)<-c("DNE","OPCR","Slope","RFI","Compress","Shear")
windows()
corrplot(EP_data, type="upper", tl.pos='d', cl.pos='n', insig='blank', addCoef.col ='black')
corrplot(MP_data, type="lower", tl.pos='d', cl.pos='n', insig='blank', addCoef.col ='black',add=TRUE)
windows()
corrplot(MP_data, type="upper", tl.pos='d', cl.pos='n', insig='blank', addCoef.col ='black')
corrplot(LP_data, type="lower", tl.pos='d', cl.pos='n', insig='blank', addCoef.col ='black', add=TRUE)
##in general, these correlation plots show that compress and shear SE values correlate with DNE and OPCR
##thi is consistent with the 2B-PLS analyses and linear regression model analyses below.
##also, the correlation increases in strength across time intervals from early to late Paleocene.
##These visuals are helpful for replacing regression figures in original submission.

########################################
## LBRARIES TO REDO SAMPLE STATISTICS ##
########################################

# libraries #
library(ggplot2)
library(ggpubr)
library(viridis)
library(scales) #for the comma call in scale_y_continuous
library(forcats) #for the fct_Rev function
library(dplyr)
library(ggfortify) #for Ggplot PCA
library(geomorph) #for morphological disparity
'%!in%' <- function(x,y)!('%in%'(x,y)) #function to subset non-pantodont data

#############################################################
# SECTION TO SET PARTITION FOR REPEATED STATISTICAL TESTS   #
#############################################################

## STEP 1 ## RELOAD MAIN DATA ##
#import new data matrix#
#file is O:\Direct_copy_G_Drive_as_of_11-29-2023\Project_files\1.Submitted&Published_Projects\Paleocene_mammals_REVISING_17JUN25\CB_submission&revision\Paleocene_DTA_ms\Paleocene_mammal_revision_CB\DTA&FEA_outputs_FEA_adjusted.csv
#as of 6-23-25
data <- read.csv(file=file.choose(), header=T)
#test output template
results_table <- read.csv(file=file.choose(), header=T)

## STEP 2 ## PARTITION TAXON ##
#for all data, do not subset taxon at this step#
#
#data_Pantodont#
data <- subset(data,(Clade %in% "Pantodonta"))
#data non-pantodont#
'%!in%' <- function(x,y)!('%in%'(x,y))
data <- subset(data, subset = (Clade %!in% "Pantodonta"))

## STEP 3 ## PARTITION TOOTH REGION ##
##########
#all data#
##########
#no further partition of data necessary#

##################
# all molar data #
##################
data <- subset(data, subset = Tooth %in% c("M_1","M_2","M_3","M1_","M2_","M3_"))

#####################
# all premolar data #
#####################
data <- subset(data, subset = Tooth %in% c("P_1","P_2","P_3","P_4","P1_","P2_","P3_","P4_"))


#########################
# individual tooth data #
#########################
data %>%
  count(Tooth) 
#1    M_1 20
#2    M_2 22
#3    M_3 19
#4    M1_ 22
#5    M2_ 23
#6    M3_ 16
#8    P_2  5
#9    P_3 13
#10   P_4 17
#12   P2_  4
#13   P3_ 15
#14   P4_ 19

subset(data, subset = Interval %in% "A") %>%
	count(Tooth)
#1    M_1 10
#2    M_2 11
#3    M_3  8
#4    M1_ 10
#5    M2_ 10
#6    M3_  8
#7    P_2  2
#8    P_3  6
#9    P_4  7
#11   P2_  2
#12   P3_  8
#13   P4_  7

subset(data, subset = Interval %in% "B") %>%
	count(Tooth)
#1    M_1 5
#2    M_2 6
#3    M_3 6
#4    M1_ 4
#5    M2_ 5
#6    M3_ 4
#7    P_2 0
#8    P_3 2
#9    P_4 4
#10   P2_ 1
#11   P3_ 4
#12   P4_ 4

subset(data, subset = Interval %in% "C") %>%
	count(Tooth)
   Tooth n
#1    M_1 5
#2    M_2 5
#3    M_3 5
#4    M1_ 8
#5    M2_ 8
#6    M3_ 4
#8    P_2 3
#9    P_3 5
#10   P_4 6
#11   P2_ 1
#12   P3_ 3
#13   P4_ 8

data <- subset(data, subset = Tooth %in% "M2_") #replace the last part in the call to subset other teeth
#############################################################

#############################################################

#############################################################
# SECTION TO RUN STATISTICAL TESTS FOR DTA AND FEA DATA     #
#############################################################
## STEP 4 ## CALCULATE BOOTSTRAP SAMPLE FOR VARIANCE ##
#variance by bin with resamples#
#set empty vector for 1000 resamples
dne_var_a_sample <- vector(mode="numeric", length=1000)
dne_var_b_sample <- vector(mode="numeric", length=1000)
dne_var_c_sample <- vector(mode="numeric", length=1000)
opcr_var_a_sample <- vector(mode="numeric", length=1000)
opcr_var_b_sample <- vector(mode="numeric", length=1000)
opcr_var_c_sample <- vector(mode="numeric", length=1000)
slope_var_a_sample <- vector(mode="numeric", length=1000)
slope_var_b_sample <- vector(mode="numeric", length=1000)
slope_var_c_sample <- vector(mode="numeric", length=1000)
rfi_var_a_sample <- vector(mode="numeric", length=1000)
rfi_var_b_sample <- vector(mode="numeric", length=1000)
rfi_var_c_sample <- vector(mode="numeric", length=1000)
compse_var_a_sample <- vector(mode="numeric", length=1000)
compse_var_b_sample <- vector(mode="numeric", length=1000)
compse_var_c_sample <- vector(mode="numeric", length=1000)
shearse_var_a_sample <- vector(mode="numeric", length=1000)
shearse_var_b_sample <- vector(mode="numeric", length=1000)
shearse_var_c_sample <- vector(mode="numeric", length=1000)
#nested for loops to generate resampled dataset and then calculate variance from it
for (j in 1:1000) {
sampled_data <- data
for (i in 1:nrow(data)) {
sampled_data[i,48] <- runif(1, min=as.numeric(sampled_data[i,11]), max=as.numeric(sampled_data[i,10])) #DNE sample
sampled_data[i,50] <- runif(1, as.numeric(sampled_data[i,18]), as.numeric(sampled_data[i,17])) #Slope sample
sampled_data[i,49] <- runif(1, as.numeric(sampled_data[i,21]), as.numeric(sampled_data[i,20])) #OPCR sample
sampled_data[i,51] <- runif(1, as.numeric(sampled_data[i,32]), as.numeric(sampled_data[i,31])) #RFI sample
sampled_data[i,52] <- runif(1, as.numeric(sampled_data[i,44]), as.numeric(sampled_data[i,43])) #CompSE sample
sampled_data[i,53] <- runif(1, as.numeric(sampled_data[i,47]), as.numeric(sampled_data[i,46])) #ShearSE sample
}
dne_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$DNE_sample))/4.4 #variance per m.y.
dne_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$DNE_sample))/2.4 #variance per m.y.
dne_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$DNE_sample))/3.2 #variance per m.y.
opcr_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$OPCR_sample))/4.4 #variance per m.y.
opcr_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$OPCR_sample))/2.4 #variance per m.y.
opcr_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$OPCR_sample))/3.2 #variance per m.y.
slope_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$Slope_sample))/4.4 #variance per m.y.
slope_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$Slope_sample))/2.4 #variance per m.y.
slope_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$Slope_sample))/3.2 #variance per m.y.
rfi_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$RFI_sample))/4.4 #variance per m.y.
rfi_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$RFI_sample))/2.4 #variance per m.y.
rfi_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$RFI_sample))/3.2 #variance per m.y.
compse_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$CompSE_sample))/4.4 #variance per m.y.
compse_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$CompSE_sample))/2.4 #variance per m.y.
compse_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$CompSE_sample))/3.2 #variance per m.y.
shearse_var_a_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "A")$ShearSE_sample))/4.4 #variance per m.y.
shearse_var_b_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "B")$ShearSE_sample))/2.4 #variance per m.y.
shearse_var_c_sample[j] <- var(na.omit(subset(sampled_data, subset = Interval %in% "C")$ShearSE_sample))/3.2 #variance per m.y.
}


## STEP 5 ## TEST TIME BIN VARIANCE DIFFERENCES BY DTA/FEA INDEX ##
######
# DNE#
######
#variance by bin
dne_var_a <- var(na.omit(subset(data, subset = Interval %in% "A")$DNE))
dne_var_b <- var(na.omit(subset(data, subset = Interval %in% "B")$DNE))
dne_var_c <- var(na.omit(subset(data, subset = Interval %in% "C")$DNE))
dne_var <- data.frame(c(mean(dne_var_a_sample), mean(dne_var_b_sample), mean(dne_var_c_sample)), c("A","B","C"))
colnames(dne_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
dne_var_a_test <- t.test(dne_var_a_sample, mu = dne_var_a/4.4) #variance per m.y.
dne_var_b_test <- t.test(dne_var_b_sample, mu = dne_var_b/2.4) #variance per m.y.
dne_var_c_test <- t.test(dne_var_c_sample, mu = dne_var_c/3.2) #variance per m.y.
#variance test dne
dne_ab <- var.test(dne_var_a_sample,dne_var_b_sample) #per m.y.
dne_bc <- var.test(dne_var_b_sample,dne_var_c_sample) #per m.y.
#6-24-25: time-corrected var test of sampled data suggests sig increase in A-B and B-C for pantodonts

#populate dne part of table
results_table[1,3] <- dne_var_a
results_table[2,3] <- dne_var_b
results_table[3,3] <- dne_var_c
results_table[1,4] <- dne_var_a_test[[5]]
results_table[2,4] <- dne_var_b_test[[5]]
results_table[3,4] <- dne_var_c_test[[5]]
results_table[2,5] <- dne_ab[[1]]
results_table[3,5] <- dne_bc[[1]]
results_table[2,6] <- dne_ab[[4]][1]
results_table[3,6] <- dne_bc[[4]][1]
results_table[2,7] <- dne_ab[[4]][2]
results_table[3,7] <- dne_bc[[4]][2]
results_table[2,8] <- dne_ab[[3]]
results_table[3,8] <- dne_bc[[3]]
results_table[1,9] <- dne_var_a_test[[1]]
results_table[2,9] <- dne_var_b_test[[1]]
results_table[3,9] <- dne_var_c_test[[1]]
results_table[1,10] <- dne_var_a_test[[4]][1]
results_table[2,10] <- dne_var_b_test[[4]][1]
results_table[3,10] <- dne_var_c_test[[4]][1]
results_table[1,11] <- dne_var_a_test[[4]][2]
results_table[2,11] <- dne_var_b_test[[4]][2]
results_table[3,11] <- dne_var_c_test[[4]][2]
results_table[1,12] <- dne_var_a_test[[3]]
results_table[2,12] <- dne_var_b_test[[3]]
results_table[3,12] <- dne_var_c_test[[3]]

#######
#Slope#
#######
#variance by bin
slope_var_a <- var(subset(data, subset = Interval %in% "A")$Slope)
slope_var_b <- var(subset(data, subset = Interval %in% "B")$Slope)
slope_var_c <- var(subset(data, subset = Interval %in% "C")$Slope)
slope_var <- data.frame(c(mean(slope_var_a_sample), mean(slope_var_b_sample), mean(slope_var_c_sample)), c("A","B","C"))
colnames(slope_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
slope_var_a_test <- t.test(slope_var_a_sample, mu = slope_var_a/4.4) #variance per m.y.
slope_var_b_test <- t.test(slope_var_b_sample, mu = slope_var_b/2.4) #variance per m.y.
slope_var_c_test <- t.test(slope_var_c_sample, mu = slope_var_c/3.2) #variance per m.y.
#variance test slope
slope_ab <- var.test(slope_var_a_sample,slope_var_b_sample)
slope_bc <- var.test(slope_var_b_sample,slope_var_c_sample)

#populate slope part of table
results_table[4,3] <- slope_var_a
results_table[5,3] <- slope_var_b
results_table[6,3] <- slope_var_c
results_table[4,4] <- slope_var_a_test[[5]]
results_table[5,4] <- slope_var_b_test[[5]]
results_table[6,4] <- slope_var_c_test[[5]]
results_table[5,5] <- slope_ab[[1]]
results_table[6,5] <- slope_bc[[1]]
results_table[5,6] <- slope_ab[[4]][1]
results_table[6,6] <- slope_bc[[4]][1]
results_table[5,7] <- slope_ab[[4]][2]
results_table[6,7] <- slope_bc[[4]][2]
results_table[5,8] <- slope_ab[[3]]
results_table[6,8] <- slope_bc[[3]]
results_table[4,9] <- slope_var_a_test[[1]]
results_table[5,9] <- slope_var_b_test[[1]]
results_table[6,9] <- slope_var_c_test[[1]]
results_table[4,10] <- slope_var_a_test[[4]][1]
results_table[5,10] <- slope_var_b_test[[4]][1]
results_table[6,10] <- slope_var_c_test[[4]][1]
results_table[4,11] <- slope_var_a_test[[4]][2]
results_table[5,11] <- slope_var_b_test[[4]][2]
results_table[6,11] <- slope_var_c_test[[4]][2]
results_table[4,12] <- slope_var_a_test[[3]]
results_table[5,12] <- slope_var_b_test[[3]]
results_table[6,12] <- slope_var_c_test[[3]]

######
#OPCR#
######
#variance by bin
opcr_var_a <- var(subset(data, subset = Interval %in% "A")$OPCR)
opcr_var_b <- var(subset(data, subset = Interval %in% "B")$OPCR)
opcr_var_c <- var(subset(data, subset = Interval %in% "C")$OPCR)
opcr_var <- data.frame(c(mean(opcr_var_a_sample), mean(opcr_var_b_sample), mean(opcr_var_c_sample)), c("A","B","C"))
colnames(opcr_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
opcr_var_a_test <- t.test(opcr_var_a_sample, mu = opcr_var_a/4.4) #variance per m.y.
opcr_var_b_test <- t.test(opcr_var_b_sample, mu = opcr_var_b/2.4) #variance per m.y.
opcr_var_c_test <- t.test(opcr_var_c_sample, mu = opcr_var_c/3.2) #variance per m.y.
#variance test opcr
opcr_ab <- var.test(opcr_var_a_sample,opcr_var_b_sample)
opcr_bc <- var.test(opcr_var_b_sample,opcr_var_c_sample)

#populate opcr part of table
results_table[7,3] <- opcr_var_a
results_table[8,3] <- opcr_var_b
results_table[9,3] <- opcr_var_c
results_table[7,4] <- opcr_var_a_test[[5]]
results_table[8,4] <- opcr_var_b_test[[5]]
results_table[9,4] <- opcr_var_c_test[[5]]
results_table[8,5] <- opcr_ab[[1]]
results_table[9,5] <- opcr_bc[[1]]
results_table[8,6] <- opcr_ab[[4]][1]
results_table[9,6] <- opcr_bc[[4]][1]
results_table[8,7] <- opcr_ab[[4]][2]
results_table[9,7] <- opcr_bc[[4]][2]
results_table[8,8] <- opcr_ab[[3]]
results_table[9,8] <- opcr_bc[[3]]
results_table[7,9] <- opcr_var_a_test[[1]]
results_table[8,9] <- opcr_var_b_test[[1]]
results_table[9,9] <- opcr_var_c_test[[1]]
results_table[7,10] <- opcr_var_a_test[[4]][1]
results_table[8,10] <- opcr_var_b_test[[4]][1]
results_table[9,10] <- opcr_var_c_test[[4]][1]
results_table[7,11] <- opcr_var_a_test[[4]][2]
results_table[8,11] <- opcr_var_b_test[[4]][2]
results_table[9,11] <- opcr_var_c_test[[4]][2]
results_table[7,12] <- opcr_var_a_test[[3]]
results_table[8,12] <- opcr_var_b_test[[3]]
results_table[9,12] <- opcr_var_c_test[[3]]

#####
#RFI#
#####
#variance by bin
rfi_var_a <- var(na.omit(subset(data, subset = Interval %in% "A")$RFI))
rfi_var_b <- var(na.omit(subset(data, subset = Interval %in% "B")$RFI))
rfi_var_c <- var(na.omit(subset(data, subset = Interval %in% "C")$RFI))
rfi_var <- data.frame(c(mean(rfi_var_a_sample), mean(rfi_var_b_sample), mean(rfi_var_c_sample)), c("A","B","C"))
colnames(rfi_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
rfi_var_a_test <- t.test(rfi_var_a_sample, mu = rfi_var_a/4.4) #variance per m.y.
rfi_var_b_test <- t.test(rfi_var_b_sample, mu = rfi_var_b/2.4) #variance per m.y.
rfi_var_c_test <- t.test(rfi_var_c_sample, mu = rfi_var_c/3.2) #variance per m.y.
#variance test rfi
rfi_ab <- var.test(rfi_var_a_sample,rfi_var_b_sample)
rfi_bc <- var.test(rfi_var_b_sample,rfi_var_c_sample)

#populate rfi part of table
results_table[10,3] <- rfi_var_a
results_table[11,3] <- rfi_var_b
results_table[12,3] <- rfi_var_c
results_table[10,4] <- rfi_var_a_test[[5]]
results_table[11,4] <- rfi_var_b_test[[5]]
results_table[12,4] <- rfi_var_c_test[[5]]
results_table[11,5] <- rfi_ab[[1]]
results_table[12,5] <- rfi_bc[[1]]
results_table[11,6] <- rfi_ab[[4]][1]
results_table[12,6] <- rfi_bc[[4]][1]
results_table[11,7] <- rfi_ab[[4]][2]
results_table[12,7] <- rfi_bc[[4]][2]
results_table[11,8] <- rfi_ab[[3]]
results_table[12,8] <- rfi_bc[[3]]
results_table[10,9] <- rfi_var_a_test[[1]]
results_table[11,9] <- rfi_var_b_test[[1]]
results_table[12,9] <- rfi_var_c_test[[1]]
results_table[10,10] <- rfi_var_a_test[[4]][1]
results_table[11,10] <- rfi_var_b_test[[4]][1]
results_table[12,10] <- rfi_var_c_test[[4]][1]
results_table[10,11] <- rfi_var_a_test[[4]][2]
results_table[11,11] <- rfi_var_b_test[[4]][2]
results_table[12,11] <- rfi_var_c_test[[4]][2]
results_table[10,12] <- rfi_var_a_test[[3]]
results_table[11,12] <- rfi_var_b_test[[3]]
results_table[12,12] <- rfi_var_c_test[[3]]

##########
#COMPRESS#
##########
#variance by bin
compse_var_a <- var(na.omit(subset(data, subset = Interval %in% "A")$adj_comp_se))
compse_var_b <- var(na.omit(subset(data, subset = Interval %in% "B")$adj_comp_se))
compse_var_c <- var(na.omit(subset(data, subset = Interval %in% "C")$adj_comp_se))
compse_var <- data.frame(c(mean(compse_var_a_sample), mean(compse_var_b_sample), mean(compse_var_c_sample)), c("A","B","C"))
colnames(compse_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
compse_var_a_test <- t.test(compse_var_a_sample, mu = compse_var_a/4.4) #variance per m.y.
compse_var_b_test <- t.test(compse_var_b_sample, mu = compse_var_b/2.4) #variance per m.y.
compse_var_c_test <- t.test(compse_var_c_sample, mu = compse_var_c/3.2) #variance per m.y.
#variance test compse
compse_ab <- var.test(compse_var_a_sample,compse_var_b_sample)
compse_bc <- var.test(compse_var_b_sample,compse_var_c_sample)

#populate compse part of table
results_table[13,3] <- compse_var_a
results_table[14,3] <- compse_var_b
results_table[15,3] <- compse_var_c
results_table[13,4] <- compse_var_a_test[[5]]
results_table[14,4] <- compse_var_b_test[[5]]
results_table[15,4] <- compse_var_c_test[[5]]
results_table[14,5] <- compse_ab[[1]]
results_table[15,5] <- compse_bc[[1]]
results_table[14,6] <- compse_ab[[4]][1]
results_table[15,6] <- compse_bc[[4]][1]
results_table[14,7] <- compse_ab[[4]][2]
results_table[15,7] <- compse_bc[[4]][2]
results_table[14,8] <- compse_ab[[3]]
results_table[15,8] <- compse_bc[[3]]
results_table[13,9] <- compse_var_a_test[[1]]
results_table[14,9] <- compse_var_b_test[[1]]
results_table[15,9] <- compse_var_c_test[[1]]
results_table[13,10] <- compse_var_a_test[[4]][1]
results_table[14,10] <- compse_var_b_test[[4]][1]
results_table[15,10] <- compse_var_c_test[[4]][1]
results_table[13,11] <- compse_var_a_test[[4]][2]
results_table[14,11] <- compse_var_b_test[[4]][2]
results_table[15,11] <- compse_var_c_test[[4]][2]
results_table[13,12] <- compse_var_a_test[[3]]
results_table[14,12] <- compse_var_b_test[[3]]
results_table[15,12] <- compse_var_c_test[[3]]

##########
# SHEAR  #
##########
#variance by bin
shearse_var_a <- var(na.omit(subset(data, subset = Interval %in% "A")$adj_shear_se))
shearse_var_b <- var(na.omit(subset(data, subset = Interval %in% "B")$adj_shear_se))
shearse_var_c <- var(na.omit(subset(data, subset = Interval %in% "C")$adj_shear_se))
shearse_var <- data.frame(c(mean(shearse_var_a_sample), mean(shearse_var_b_sample), mean(shearse_var_c_sample)), c("A","B","C"))
colnames(shearse_var) <- c("variance","interval")
#one-sample t-test of sampled vs original dataset
shearse_var_a_test <- t.test(shearse_var_a_sample, mu = shearse_var_a/4.4) #variance per m.y.
shearse_var_b_test <- t.test(shearse_var_b_sample, mu = shearse_var_b/2.4) #variance per m.y.
shearse_var_c_test <- t.test(shearse_var_c_sample, mu = shearse_var_c/3.2) #variance per m.y.
#variance test shearse
shearse_ab <- var.test(shearse_var_a_sample,shearse_var_b_sample)
shearse_bc <- var.test(shearse_var_b_sample,shearse_var_c_sample)

#populate shearse part of table
results_table[16,3] <- shearse_var_a
results_table[17,3] <- shearse_var_b
results_table[18,3] <- shearse_var_c
results_table[16,4] <- shearse_var_a_test[[5]]
results_table[17,4] <- shearse_var_b_test[[5]]
results_table[18,4] <- shearse_var_c_test[[5]]
results_table[17,5] <- shearse_ab[[1]]
results_table[18,5] <- shearse_bc[[1]]
results_table[17,6] <- shearse_ab[[4]][1]
results_table[18,6] <- shearse_bc[[4]][1]
results_table[17,7] <- shearse_ab[[4]][2]
results_table[18,7] <- shearse_bc[[4]][2]
results_table[17,8] <- shearse_ab[[3]]
results_table[18,8] <- shearse_bc[[3]]
results_table[16,9] <- shearse_var_a_test[[1]]
results_table[17,9] <- shearse_var_b_test[[1]]
results_table[18,9] <- shearse_var_c_test[[1]]
results_table[16,10] <- shearse_var_a_test[[4]][1]
results_table[17,10] <- shearse_var_b_test[[4]][1]
results_table[18,10] <- shearse_var_c_test[[4]][1]
results_table[16,11] <- shearse_var_a_test[[4]][2]
results_table[17,11] <- shearse_var_b_test[[4]][2]
results_table[18,11] <- shearse_var_c_test[[4]][2]
results_table[16,12] <- shearse_var_a_test[[3]]
results_table[17,12] <- shearse_var_b_test[[3]]
results_table[18,12] <- shearse_var_c_test[[3]]

## STEP 6 ## ANOVA TEST OF MEAN TRAIT VALUES USING BOOTSTRAP SAMPLE ##
############
#ANOVA loop#
############
#anova with resamples#
#set empty vector for 1000 resamples
dne_aov_sample <- vector(mode="numeric", length=1000)
opcr_aov_sample <- vector(mode="numeric", length=1000)
slope_aov_sample <- vector(mode="numeric", length=1000)
rfi_aov_sample <- vector(mode="numeric", length=1000)
compse_aov_sample <- vector(mode="numeric", length=1000)
shearse_aov_sample <- vector(mode="numeric", length=1000)
dne_ab_sample <- vector(mode="numeric", length=1000)
opcr_ab_sample <- vector(mode="numeric", length=1000)
slope_ab_sample <- vector(mode="numeric", length=1000)
rfi_ab_sample <- vector(mode="numeric", length=1000)
compse_ab_sample <- vector(mode="numeric", length=1000)
shearse_ab_sample <- vector(mode="numeric", length=1000)
dne_bc_sample <- vector(mode="numeric", length=1000)
opcr_bc_sample <- vector(mode="numeric", length=1000)
slope_bc_sample <- vector(mode="numeric", length=1000)
rfi_bc_sample <- vector(mode="numeric", length=1000)
compse_bc_sample <- vector(mode="numeric", length=1000)
shearse_bc_sample <- vector(mode="numeric", length=1000)

#nested for loops to generate resampled dataset and then calculate variance from it
for (j in 1:1000) {
sampled_data <- data
for (i in 1:nrow(data)) {
sampled_data[i,48] <- runif(1, min=as.numeric(sampled_data[i,11]), max=as.numeric(sampled_data[i,10])) #DNE sample
sampled_data[i,50] <- runif(1, as.numeric(sampled_data[i,18]), as.numeric(sampled_data[i,17])) #Slope sample
sampled_data[i,49] <- runif(1, as.numeric(sampled_data[i,21]), as.numeric(sampled_data[i,20])) #OPCR sample
sampled_data[i,51] <- runif(1, as.numeric(sampled_data[i,32]), as.numeric(sampled_data[i,31])) #RFI sample
sampled_data[i,52] <- runif(1, as.numeric(sampled_data[i,44]), as.numeric(sampled_data[i,43])) #CompSE sample
sampled_data[i,53] <- runif(1, as.numeric(sampled_data[i,47]), as.numeric(sampled_data[i,46])) #ShearSE sample
}
dne_aov_sample[j] <- summary(aov(DNE_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
slope_aov_sample[j] <- summary(aov(Slope_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
opcr_aov_sample[j] <- summary(aov(OPCR_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
rfi_aov_sample[j] <- summary(aov(RFI_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
compse_aov_sample[j] <- summary(aov(CompSE_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
shearse_aov_sample[j] <- summary(aov(ShearSE_sample ~ Interval, data=sampled_data, weights=Weight))[[1]][["Pr(>F)"]][1]
dne_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$DNE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$DNE_sample))[[3]]
opcr_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$OPCR_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$OPCR_sample))[[3]]
slope_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$Slope_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$Slope_sample))[[3]]
rfi_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$RFI_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$RFI_sample))[[3]]
compse_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$CompSE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$CompSE_sample))[[3]]
shearse_ab_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "A")$ShearSE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$ShearSE_sample))[[3]]
dne_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$DNE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$DNE_sample))[[3]]
opcr_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$OPCR_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$OPCR_sample))[[3]]
slope_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$Slope_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$Slope_sample))[[3]]
rfi_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$RFI_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$RFI_sample))[[3]]
compse_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$CompSE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$CompSE_sample))[[3]]
shearse_bc_sample[j] <- t.test(na.omit(subset(sampled_data, subset = Interval %in% "C")$ShearSE_sample),na.omit(subset(sampled_data, subset = Interval %in% "B")$ShearSE_sample))[[3]]
}

#histogram of ANOVA p values from bootstrapped sample
hist(dne_aov_sample)
abline(v = mean(dne_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#
hist(opcr_aov_sample)
abline(v = mean(opcr_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#
hist(slope_aov_sample)
abline(v = mean(slope_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#
hist(rfi_aov_sample)
abline(v = mean(rfi_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#
hist(compse_aov_sample)
abline(v = mean(compse_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#
hist(shearse_aov_sample)
abline(v = mean(shearse_aov_sample), col = "blue", lwd = 2)
abline(v = 0.05, col = "red", lwd = 2)
#

#t test of ANOVa sample p values against 0.05
dne_aov_test <- t.test(dne_aov_sample, mu = 0.05, alternative="less")
dne_aov_test
opcr_aov_test <- t.test(opcr_aov_sample, mu = 0.05, alternative="less")
opcr_aov_test
slope_aov_test <- t.test(slope_aov_sample, mu = 0.05, alternative="less")
slope_aov_test
rfi_aov_test <- t.test(rfi_aov_sample, mu = 0.05, alternative="less")
rfi_aov_test
compse_aov_test <- t.test(compse_aov_sample, mu = 0.05, alternative="less")
compse_aov_test
shearse_aov_test <- t.test(shearse_aov_sample, mu = 0.05, alternative="less")
shearse_aov_test
dne_ab_test <- t.test(dne_ab_sample, mu = 0.05, alternative="less")
dne_ab_test
opcr_ab_test <- t.test(opcr_ab_sample, mu = 0.05, alternative="less")
opcr_ab_test
slope_ab_test <- t.test(slope_ab_sample, mu = 0.05, alternative="less")
slope_ab_test
rfi_ab_test <- t.test(rfi_ab_sample, mu = 0.05, alternative="less")
rfi_ab_test
compse_ab_test <- t.test(compse_ab_sample, mu = 0.05, alternative="less")
compse_ab_test
shearse_ab_test <- t.test(shearse_ab_sample, mu = 0.05, alternative="less")
shearse_ab_test
dne_bc_test <- t.test(dne_bc_sample, mu = 0.05, alternative="less")
dne_bc_test
opcr_bc_test <- t.test(opcr_bc_sample, mu = 0.05, alternative="less")
opcr_bc_test
slope_bc_test <- t.test(slope_bc_sample, mu = 0.05, alternative="less")
slope_bc_test
rfi_bc_test <- t.test(rfi_bc_sample, mu = 0.05, alternative="less")
rfi_bc_test
compse_bc_test <- t.test(compse_bc_sample, mu = 0.05, alternative="less")
compse_bc_test
shearse_bc_test <- t.test(shearse_bc_sample, mu = 0.05, alternative="less")
shearse_bc_test

#map results to results table
results_table[1,13] <- dne_aov_test[[5]]
results_table[1,14] <- dne_aov_test[[1]]
results_table[1,15] <- dne_aov_test[[4]][1]
results_table[1,16] <- dne_aov_test[[4]][2]
results_table[1,17] <- dne_aov_test[[3]]
results_table[4,13] <- slope_aov_test[[5]]
results_table[4,14] <- slope_aov_test[[1]]
results_table[4,15] <- slope_aov_test[[4]][1]
results_table[4,16] <- slope_aov_test[[4]][2]
results_table[4,17] <- slope_aov_test[[3]]
results_table[7,13] <- opcr_aov_test[[5]]
results_table[7,14] <- opcr_aov_test[[1]]
results_table[7,15] <- opcr_aov_test[[4]][1]
results_table[7,16] <- opcr_aov_test[[4]][2]
results_table[7,17] <- opcr_aov_test[[3]]
results_table[10,13] <- rfi_aov_test[[5]]
results_table[10,14] <- rfi_aov_test[[1]]
results_table[10,15] <- rfi_aov_test[[4]][1]
results_table[10,16] <- rfi_aov_test[[4]][2]
results_table[10,17] <- rfi_aov_test[[3]]
results_table[13,13] <- compse_aov_test[[5]]
results_table[13,14] <- compse_aov_test[[1]]
results_table[13,15] <- compse_aov_test[[4]][1]
results_table[13,16] <- compse_aov_test[[4]][2]
results_table[13,17] <- compse_aov_test[[3]]
results_table[16,13] <- shearse_aov_test[[5]]
results_table[16,14] <- shearse_aov_test[[1]]
results_table[16,15] <- shearse_aov_test[[4]][1]
results_table[16,16] <- shearse_aov_test[[4]][2]
results_table[16,17] <- shearse_aov_test[[3]]

results_table[2,13] <- dne_ab_test[[5]]
results_table[2,14] <- dne_ab_test[[1]]
results_table[2,15] <- dne_ab_test[[4]][1]
results_table[2,16] <- dne_ab_test[[4]][2]
results_table[2,17] <- dne_ab_test[[3]]
results_table[5,13] <- slope_ab_test[[5]]
results_table[5,14] <- slope_ab_test[[1]]
results_table[5,15] <- slope_ab_test[[4]][1]
results_table[5,16] <- slope_ab_test[[4]][2]
results_table[5,17] <- slope_ab_test[[3]]
results_table[8,13] <- opcr_ab_test[[5]]
results_table[8,14] <- opcr_ab_test[[1]]
results_table[8,15] <- opcr_ab_test[[4]][1]
results_table[8,16] <- opcr_ab_test[[4]][2]
results_table[8,17] <- opcr_ab_test[[3]]
results_table[11,13] <- rfi_ab_test[[5]]
results_table[11,14] <- rfi_ab_test[[1]]
results_table[11,15] <- rfi_ab_test[[4]][1]
results_table[11,16] <- rfi_ab_test[[4]][2]
results_table[11,17] <- rfi_ab_test[[3]]
results_table[14,13] <- compse_ab_test[[5]]
results_table[14,14] <- compse_ab_test[[1]]
results_table[14,15] <- compse_ab_test[[4]][1]
results_table[14,16] <- compse_ab_test[[4]][2]
results_table[14,17] <- compse_ab_test[[3]]
results_table[17,13] <- shearse_ab_test[[5]]
results_table[17,14] <- shearse_ab_test[[1]]
results_table[17,15] <- shearse_ab_test[[4]][1]
results_table[17,16] <- shearse_ab_test[[4]][2]
results_table[17,17] <- shearse_ab_test[[3]]

results_table[3,13] <- dne_bc_test[[5]]
results_table[3,14] <- dne_bc_test[[1]]
results_table[3,15] <- dne_bc_test[[4]][1]
results_table[3,16] <- dne_bc_test[[4]][2]
results_table[3,17] <- dne_bc_test[[3]]
results_table[6,13] <- slope_bc_test[[5]]
results_table[6,14] <- slope_bc_test[[1]]
results_table[6,15] <- slope_bc_test[[4]][1]
results_table[6,16] <- slope_bc_test[[4]][2]
results_table[6,17] <- slope_bc_test[[3]]
results_table[9,13] <- opcr_bc_test[[5]]
results_table[9,14] <- opcr_bc_test[[1]]
results_table[9,15] <- opcr_bc_test[[4]][1]
results_table[9,16] <- opcr_bc_test[[4]][2]
results_table[9,17] <- opcr_bc_test[[3]]
results_table[12,13] <- rfi_bc_test[[5]]
results_table[12,14] <- rfi_bc_test[[1]]
results_table[12,15] <- rfi_bc_test[[4]][1]
results_table[12,16] <- rfi_bc_test[[4]][2]
results_table[12,17] <- rfi_bc_test[[3]]
results_table[15,13] <- compse_bc_test[[5]]
results_table[15,14] <- compse_bc_test[[1]]
results_table[15,15] <- compse_bc_test[[4]][1]
results_table[15,16] <- compse_bc_test[[4]][2]
results_table[15,17] <- compse_bc_test[[3]]
results_table[18,13] <- shearse_bc_test[[5]]
results_table[18,14] <- shearse_bc_test[[1]]
results_table[18,15] <- shearse_bc_test[[4]][1]
results_table[18,16] <- shearse_bc_test[[4]][2]
results_table[18,17] <- shearse_bc_test[[3]]


## STEP 7 ##
#plotting#
#could use x=fct_rev(as.factor(Interval)) to swap time interval order for plotting on strat column
dne_ratio <- max(na.omit(data$DNE)) / max(dne_var$variance)/2
DNE_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=DNE, fill=as.factor(Interval))) + 
      geom_line(data = dne_var, aes(x = interval, y = variance*dne_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./dne_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
DNE_all

slope_ratio <- max(data$Slope) / max(slope_var$variance)*.75
Slope_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=Slope, fill=as.factor(Interval))) + 
      geom_line(data = slope_var, aes(x = interval, y = variance*slope_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./slope_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
Slope_all

opcr_ratio <- max(data$OPCR) / max(opcr_var$variance)*.7
OPCR_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=OPCR, fill=as.factor(Interval))) + 
      geom_line(data = opcr_var, aes(x = interval, y = variance*opcr_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./opcr_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
OPCR_all

rfi_ratio <- max(na.omit(data$RFI)) / max(rfi_var$variance)*.75
RFI_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=RFI, fill=as.factor(Interval))) + 
      geom_line(data = rfi_var, aes(x = interval, y = variance*rfi_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./rfi_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
RFI_all


#could use x=fct_rev(as.factor(Interval)) to swap time interval order for plotting on strat column
compress_ratio <- max(data$adj_comp_se) / max(compse_var$variance)
compress_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=adj_comp_se, fill=as.factor(Interval))) + 
      geom_line(data = compse_var, aes(x = interval, y = variance*compress_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./compress_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
compress_all

shear_ratio <- max(data$adj_shear_se) / max(shearse_var$variance)*.3
shear_all <- ggplot() +
	geom_violin(data = data, aes(x=as.factor(Interval), y=adj_shear_se, fill=as.factor(Interval))) + 
      geom_line(data = shearse_var, aes(x = interval, y = variance*shear_ratio, group = 1), size=2) + 
      scale_y_continuous(labels=comma,sec.axis = sec_axis(~./shear_ratio, name = 'variance')) +
	xlab("Age bin") + 
      scale_x_discrete(labels=c("A" = "E.Paleocene", "B" = "M.Paleocene", "C" = "L.Paleocene"))
shear_all


#arranged panels
collated_plots <- ggarrange(DNE_all, Slope_all, OPCR_all, RFI_all, compress_all, shear_all, ncol=2, nrow=3, legend="none")

## STEP 8 ##
##then export results table here## change file name as appropriate ##
write.csv(results_table, file="All-taxon_M2_DTA&FEA_statistics.csv")
#plot with main title (change title content as necessary)
annotate_figure(collated_plots, top = text_grob("All-taxon M2 data with bootstrapped sample variance", 
               color = "black", face = "bold", size = 14))
## Then save as PDF ##
#############################################################




###################################
# NEW SECTION FOR REVISION        #
# TWO-BLOCK PARTIAL LEAST SQUARES #
###################################
#use geomorph for two block PLS function
library(geomorph)

## STEP 1 ## RELOAD MAIN DATA ##
#import new data matrix#
#file is O:\Direct_copy_G_Drive_as_of_11-29-2023\Project_files\1.Submitted&Published_Projects\Paleocene_mammals_REVISING_17JUN25\CB_submission&revision\Paleocene_DTA_ms\Paleocene_mammal_revision_CB\DTA&FEA_outputs_FEA_adjusted.csv
#as of 6-23-25
data <- read.csv(file=file.choose(), header=T)


## STEP 2 ## PARTITION TAXON ##
#for all data, do not subset taxon at this step#
#
#data_Pantodont#
data <- subset(data,(Clade %in% "Pantodonta"))
#data non-pantodont#
'%!in%' <- function(x,y)!('%in%'(x,y))
data <- subset(data, subset = (Clade %!in% "Pantodonta"))


##################
# all molar data #
##################
data <- subset(data, subset = Tooth %in% c("M_1","M_2","M_3","M1_","M2_","M3_"))

#####################
# all premolar data #
#####################
data <- subset(data, subset = Tooth %in% c("P_1","P_2","P_3","P_4","P1_","P2_","P3_","P4_"))

#individual tooth data#
data <- subset(data, subset = Tooth %in% "M2_") #replace the last part in the call to subset other teeth

###################################################
# LOOP TO COLLECT R-PLS FOR 1000 BOOSTRAP SAMPLES #
###################################################
#set empty vector for 1000 resamples
pls_all_sample <- vector(mode="numeric", length=1000)
pls_A_sample <- vector(mode="numeric", length=1000)
pls_B_sample <- vector(mode="numeric", length=1000)
pls_C_sample <- vector(mode="numeric", length=1000)

#nested for loops to generate resampled dataset and then calculate variance from it
for (j in 1:1000) {
sampled_data <- data
for (i in 1:nrow(data)) {
sampled_data[i,48] <- runif(1, min=as.numeric(sampled_data[i,11]), max=as.numeric(sampled_data[i,10])) #DNE sample
sampled_data[i,50] <- runif(1, as.numeric(sampled_data[i,18]), as.numeric(sampled_data[i,17])) #Slope sample
sampled_data[i,49] <- runif(1, as.numeric(sampled_data[i,21]), as.numeric(sampled_data[i,20])) #OPCR sample
sampled_data[i,51] <- runif(1, as.numeric(sampled_data[i,32]), as.numeric(sampled_data[i,31])) #RFI sample
sampled_data[i,52] <- runif(1, as.numeric(sampled_data[i,44]), as.numeric(sampled_data[i,43])) #CompSE sample
sampled_data[i,53] <- runif(1, as.numeric(sampled_data[i,47]), as.numeric(sampled_data[i,46])) #ShearSE sample
}
dta_fea_data <- na.omit(sampled_data[,c(2,6,7,48:53)]) #use sampled data from each replicate
dta_fea_data_A <- subset(dta_fea_data, subset = Interval %in% "A")
dta_fea_data_B <- subset(dta_fea_data, subset = Interval %in% "B")
dta_fea_data_C <- subset(dta_fea_data, subset = Interval %in% "C")
pls_all_sample[j] <- two.b.pls(dta_fea_data[,4:7],dta_fea_data[,8:9],print.progress=F)[[1]]
pls_A_sample[j] <- two.b.pls(dta_fea_data_A[,4:7],dta_fea_data_A[,8:9],print.progress=F)[[1]]
pls_B_sample[j] <- two.b.pls(dta_fea_data_B[,4:7],dta_fea_data_B[,8:9],print.progress=F)[[1]]
pls_C_sample[j] <- two.b.pls(dta_fea_data_C[,4:7],dta_fea_data_C[,8:9],print.progress=F)[[1]]
}

#t test to see whether DTA-FEA associations change over time.
t.test(pls_A_sample,pls_B_sample)
t.test(pls_B_sample,pls_C_sample)

#report t test outcomes here:
#all-taxon all data: significant increase in r-pls across time; 0.40 to 0.48 to 0.52.
#all-taxon M2 data: r-pls increased then dropped from 0.79 to 0.89 to 0.81.
#pantodont all data: r-pls decreased slowly from 0.299 to 0.291 then increased to 0.4287.
#pantodont premolar data: r-pls decreased from 0.6889 to 0.5717 to 0.5082.
#pantodont molar data: increase from 0.1464 to 0.7005 then decrease to 0.6471.
#nonpantodont all data: increased from 0.4834 to 0.5318, then decreased to 0.4886.
#nonpantodont premolar data: increased across time from 0.4996 to 0.5217 to 0.5536.
#nonpantodont molar data: decreased from 0.4485 to 0.3970 then increased to 0.4631.

#sample histogram showing distributions of time interval r-pls values.
hist(pls_A_sample, xlim=c(0.2,0.7), breaks=20, ylim=c(0,300),col="blue")
hist(pls_B_sample, add=TRUE, xlim=c(0.2,0.7), breaks=10, ylim=c(0,300),col="green")
hist(pls_C_sample, add=TRUE, xlim=c(0.2,0.7), breaks=10, ylim=c(0,300),col="red")
hist(pls_A_sample, xlim=c(0.2,0.7), breaks=20, ylim=c(0,300),col="blue", add=TRUE)

##################### ##########################################
# variance barplots # # THIS SECTION IS UNMODIFIED IN REVISION #
##################### ##########################################

####### ##########################################
# PCA # # THIS SECTION IS UNMODIFIED IN REVISION #
####### ##########################################

######################################################## ##########################################
## 2D tooth area and sqrt area boxplots as size proxy ## # THIS SECTION IS UNMODIFIED IN REVISION #
######################################################## ##########################################


########################
# DTA-FEA regresssions #
########################
all_data <- read.csv(file=file.choose(), header=T)

#all data
data <- all_data

#data partitions (upper vs lower molar vs premolar)
data <- subset(all_data, subset = Tooth %in% c("M_1","M_2","M_3","M1_","M2_","M3_"))
data <- subset(all_data, subset = Tooth %in% c("P_1","P_2","P_3","P_4","P1_","P2_","P3_","P4_"))

#data_Pantodont
data <- subset(data, subset = Clade %in% "Pantodonta")
#data_Arcto&Anagalid
data <- subset(data, subset = Clade %!in% "Pantodonta")

##BOOTSTRAP RESAMPLING OF ADJUSTED R2 AND P VALUE FOR DTA-FEA REGRESSION##
#set empty vector for 1000 resamples
DNE_compress_p_sample <- vector(mode="numeric", length=1000)
DNE_compress_r2_sample <- vector(mode="numeric", length=1000)
DNE_shear_p_sample <- vector(mode="numeric", length=1000)
DNE_shear_r2_sample <- vector(mode="numeric", length=1000)
Slope_compress_p_sample <- vector(mode="numeric", length=1000)
Slope_compress_r2_sample <- vector(mode="numeric", length=1000)
Slope_shear_p_sample <- vector(mode="numeric", length=1000)
Slope_shear_r2_sample <- vector(mode="numeric", length=1000)
OPCR_compress_p_sample <- vector(mode="numeric", length=1000)
OPCR_compress_r2_sample <- vector(mode="numeric", length=1000)
OPCR_shear_p_sample <- vector(mode="numeric", length=1000)
OPCR_shear_r2_sample <- vector(mode="numeric", length=1000)
RFI_compress_p_sample <- vector(mode="numeric", length=1000)
RFI_compress_r2_sample <- vector(mode="numeric", length=1000)
RFI_shear_p_sample <- vector(mode="numeric", length=1000)
RFI_shear_r2_sample <- vector(mode="numeric", length=1000)

#nested for loops to generate resampled dataset and then calculate variance from it
for (j in 1:1000) {
sampled_data <- data
for (i in 1:nrow(data)) {
sampled_data[i,48] <- runif(1, min=as.numeric(sampled_data[i,11]), max=as.numeric(sampled_data[i,10])) #DNE sample
sampled_data[i,50] <- runif(1, as.numeric(sampled_data[i,18]), as.numeric(sampled_data[i,17])) #Slope sample
sampled_data[i,49] <- runif(1, as.numeric(sampled_data[i,21]), as.numeric(sampled_data[i,20])) #OPCR sample
sampled_data[i,51] <- runif(1, as.numeric(sampled_data[i,32]), as.numeric(sampled_data[i,31])) #RFI sample
sampled_data[i,52] <- runif(1, as.numeric(sampled_data[i,44]), as.numeric(sampled_data[i,43])) #CompSE sample
sampled_data[i,53] <- runif(1, as.numeric(sampled_data[i,47]), as.numeric(sampled_data[i,46])) #ShearSE sample
}
#compiled linear regression calls
DNE_compress <- lm(log10(CompSE_sample) ~ log10(DNE_sample), data=sampled_data) 
DNE_shear <- lm(log10(ShearSE_sample) ~ log10(DNE_sample), data=sampled_data) 
Slope_compress <- lm(log10(CompSE_sample) ~ log10(Slope_sample), data=sampled_data) 
Slope_shear <- lm(log10(ShearSE_sample) ~ log10(Slope_sample), data=sampled_data) 
OPCR_compress <- lm(log10(CompSE_sample) ~ log10(OPCR_sample), data=sampled_data) 
OPCR_shear <- lm(log10(ShearSE_sample) ~ log10(OPCR_sample), data=sampled_data) 
RFI_compress <- lm(log10(CompSE_sample) ~ log10(RFI_sample), data=sampled_data) 
RFI_shear <- lm(log10(ShearSE_sample) ~ log10(RFI_sample), data=sampled_data) 
#collect p and r2 values from each regression replicate
DNE_compress_p_sample[j] <- coef(summary(DNE_compress))["log10(DNE_sample)","Pr(>|t|)"]
DNE_compress_r2_sample[j] <- summary(DNE_compress)$adj.r.squared
DNE_shear_p_sample[j] <- coef(summary(DNE_shear))["log10(DNE_sample)","Pr(>|t|)"]
DNE_shear_r2_sample[j] <- summary(DNE_shear)$adj.r.squared
Slope_compress_p_sample[j] <- coef(summary(Slope_compress))["log10(Slope_sample)","Pr(>|t|)"]
Slope_compress_r2_sample[j] <- summary(Slope_compress)$adj.r.squared
Slope_shear_p_sample[j] <- coef(summary(Slope_shear))["log10(Slope_sample)","Pr(>|t|)"]
Slope_shear_r2_sample[j] <- summary(Slope_shear)$adj.r.squared
OPCR_compress_p_sample[j] <- coef(summary(OPCR_compress))["log10(OPCR_sample)","Pr(>|t|)"]
OPCR_compress_r2_sample[j] <- summary(OPCR_compress)$adj.r.squared
OPCR_shear_p_sample[j] <- coef(summary(OPCR_shear))["log10(OPCR_sample)","Pr(>|t|)"]
OPCR_shear_r2_sample[j] <- summary(OPCR_shear)$adj.r.squared
RFI_compress_p_sample[j] <- coef(summary(RFI_compress))["log10(RFI_sample)","Pr(>|t|)"]
RFI_compress_r2_sample[j] <- summary(RFI_compress)$adj.r.squared
RFI_shear_p_sample[j] <- coef(summary(RFI_shear))["log10(RFI_sample)","Pr(>|t|)"]
RFI_shear_r2_sample[j] <- summary(RFI_shear)$adj.r.squared
}

#t test for signifiance of lm model
t.test(DNE_compress_p_sample , mu = 0.05, alternative="less") #P<0.001
t.test(DNE_shear_p_sample  , mu = 0.05, alternative="less") #p<0.001
t.test(Slope_compress_p_sample  , mu = 0.05, alternative="less") #p=0.08
t.test(Slope_shear_p_sample  , mu = 0.05, alternative="less") #p=0.15
t.test(OPCR_compress_p_sample  , mu = 0.05, alternative="less") #p<0.001
t.test(OPCR_shear_p_sample  , mu = 0.05, alternative="less") #p<0.001
t.test(RFI_compress_p_sample  , mu = 0.05, alternative="less") #p=0.62
t.test(RFI_shear_p_sample  , mu = 0.05, alternative="less") #p=0.48
#pantodont: <0.001/<0.001/0.006/0.055/0.001/0.001/0.42/0.41
#pantodont molar: 0.01/0.006/0.07/0.19/0.003/0.0004/0.53/0.76
#pantodont premolar: 0.0006/0.0001/0.07/0.31/<0.0001/<0.0001/0.07/0.26
#nonpantodont: <0.0001/<0.0001/0.10/0.26/<0.0001/<0.0001/0.23/0.52
#nonpantodont molar: 0.003/<0.0001/0.09/0.012/0.0001/<0.0001/0.20/0.08
#nonpantodont premolar: <0.0001/<0.0001/0.12/0.49/<0.0001/<0.0001/0.34/0.65
#build histograms for r2 of lm model
p1 <- ggplot(as.data.frame(DNE_compress_r2_sample),aes(DNE_compress_r2_sample)) + geom_histogram()# + xlim(0,0.5)
p2 <- ggplot(as.data.frame(DNE_shear_r2_sample),aes(DNE_shear_r2_sample)) + geom_histogram()# + xlim(0,0.5)
p3 <- ggplot(as.data.frame(Slope_compress_r2_sample ),aes(Slope_compress_r2_sample )) + geom_histogram()#+ xlim(0,0.5)
p4 <- ggplot(as.data.frame(Slope_shear_r2_sample ),aes(Slope_shear_r2_sample )) + geom_histogram()#+ xlim(0,0.5)
p5 <- ggplot(as.data.frame(OPCR_compress_r2_sample ),aes(OPCR_compress_r2_sample )) + geom_histogram()#+ xlim(0,0.5)
p6 <- ggplot(as.data.frame(OPCR_shear_r2_sample ),aes(OPCR_shear_r2_sample )) + geom_histogram()#+ xlim(0,0.5)
p7 <- ggplot(as.data.frame(RFI_compress_r2_sample ),aes(RFI_compress_r2_sample )) + geom_histogram()#+ xlim(0,0.5)
p8 <- ggplot(as.data.frame(RFI_shear_r2_sample ),aes(RFI_shear_r2_sample )) + geom_histogram()#+ xlim(0,0.5)

#arranged panels
collated_plots <- ggarrange(p1,p2,p5,p6,p3,p4,p7,p8, ncol=2, nrow=4, legend="none")

#plot with main title (change title content as necessary)
windows()
annotate_figure(collated_plots, top = text_grob("DTA to FEA regression r2 NonPantodont premolar data", 
               color = "black", face = "bold", size = 14))

 
#individual DTA-FEA regression plots
#DNE-compress
DNE_compress_plot <- ggplot(data,aes(log10(DNE), log10(adj_comp_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("DNE") +
  ylab("SE compressive (J)")
DNE_compress_plot

#DNE-shear
DNE_shear_plot <- ggplot(data,aes(log10(DNE), log10(adj_shear_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("DNE") +
  ylab("SE shear (J)")
DNE_shear_plot

#Slope-compress
Slope_compress_plot <- ggplot(data,aes(log10(Slope), log10(adj_comp_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("Slope") +
  ylab("SE compressive (J)")
Slope_compress_plot

#Slope-shear
Slope_shear_plot <- ggplot(data,aes(log10(Slope), log10(adj_shear_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("Slope") +
  ylab("SE shear (J)")
Slope_shear_plot

#OPCR-compress
OPCR_compress_plot <- ggplot(data,aes(log10(OPCR), log10(adj_comp_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("OPCR") +
  ylab("SE compressive (J)")
OPCR_compress_plot

#OPCR-shear
OPCR_shear_plot <- ggplot(data,aes(log10(OPCR), log10(adj_shear_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("OPCR") +
  ylab("SE shear (J)")
OPCR_shear_plot

#RFI-compress
RFI_compress_plot <- ggplot(data,aes(log10(RFI), log10(adj_comp_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("RFI") +
  ylab("Se compressive (J)")
RFI_compress_plot

#RFI-shear
RFI_shear_plot <- ggplot(data,aes(log10(RFI), log10(adj_shear_se), color=Interval)) +
  geom_point() +
  geom_smooth(method='lm') +
  theme_bw() +
  xlab("RFI") +
  ylab("SE shear (J)")
RFI_shear_plot

#arranged panels
collated_plots <- ggarrange(DNE_compress_plot, DNE_shear_plot, OPCR_compress_plot, OPCR_shear_plot, Slope_compress_plot, Slope_shear_plot,
	RFI_compress_plot, RFI_shear_plot, ncol=2, nrow=4, legend="none")

#plot with main title (change title content as necessary)
windows()
annotate_figure(collated_plots, top = text_grob("DTA to FEA regression All-taxon all data", 
               color = "black", face = "bold", size = 14))



##################################### ######################################################################
# DTA-FEA multivariate regresssions # # THIS SECTION REMOVED AND REPLACES WITH 2B-PLS ANALYSES IN REVISION #
##################################### ######################################################################


######################### ##################################################
# FEA variance barplots # # update this section to use adjusted FEA values #
######################### ##################################################
all_data <- read.csv(file=file.choose(), header=T)

#make bins for time interval and data partition
time_bins <- c("A","B","C","A","B","C","A","B","C","A","B","C","A","B","C")
partition_bins <-c("all","all","all","all molar","all molar","all molar","all premolar","all premolar","all premolar","all lower molar","all lower molar","all lower molar","all upper molar","all upper molar","all upper molar")
#partition bin order corresponse to data 5, 4, 3, 2, 1 in that order

#data partitions (upper vs lower molar vs premolar)
data1 <- subset(all_data, subset = Tooth %in% c("M1_","M2_","M3_"))
data2 <- subset(all_data, subset = Tooth %in% c("M_1","M_2","M_3"))
data3 <- subset(all_data, subset = Tooth %in% c("P_1","P_2","P_3","P_4","P1_","P2_","P3_","P4_"))
data4 <- subset(all_data, subset = Tooth %in% c("M_1","M_2","M_3","M1_","M2_","M3_"))
data5 <- all_data
#data <- subset(all_data, subset = Tooth %in% c("P1_","P2_","P3_","P4_"))
#data <- subset(all_data, subset = Tooth %in% c("P_1","P_2","P_3","P_4"))

#variance by bin
compress_var_a <- var(subset(data1, subset = Interval %in% "A")$adj_comp_se)
compress_var_b <- var(subset(data1, subset = Interval %in% "B")$adj_comp_se)
compress_var_c <- var(subset(data1, subset = Interval %in% "C")$adj_comp_se)
compress_var1 <- data.frame(c(compress_var_a, compress_var_b, compress_var_c), c("A","B","C"))
colnames(compress_var1) <- c("variance","interval")

compress_var_a <- var(subset(data2, subset = Interval %in% "A")$adj_comp_se)
compress_var_b <- var(subset(data2, subset = Interval %in% "B")$adj_comp_se)
compress_var_c <- var(subset(data2, subset = Interval %in% "C")$adj_comp_se)
compress_var2 <- data.frame(c(compress_var_a, compress_var_b, compress_var_c), c("A","B","C"))
colnames(compress_var2) <- c("variance","interval")

compress_var_a <- var(subset(data3, subset = Interval %in% "A")$adj_comp_se)
compress_var_b <- var(subset(data3, subset = Interval %in% "B")$adj_comp_se)
compress_var_c <- var(subset(data3, subset = Interval %in% "C")$adj_comp_se)
compress_var3 <- data.frame(c(compress_var_a, compress_var_b, compress_var_c), c("A","B","C"))
colnames(compress_var3) <- c("variance","interval")

compress_var_a <- var(subset(data4, subset = Interval %in% "A")$adj_comp_se)
compress_var_b <- var(subset(data4, subset = Interval %in% "B")$adj_comp_se)
compress_var_c <- var(subset(data4, subset = Interval %in% "C")$adj_comp_se)
compress_var4 <- data.frame(c(compress_var_a, compress_var_b, compress_var_c), c("A","B","C"))
colnames(compress_var4) <- c("variance","interval")

compress_var_a <- var(subset(data5, subset = Interval %in% "A")$adj_comp_se)
compress_var_b <- var(subset(data5, subset = Interval %in% "B")$adj_comp_se)
compress_var_c <- var(subset(data5, subset = Interval %in% "C")$adj_comp_se)
compress_var5 <- data.frame(c(compress_var_a, compress_var_b, compress_var_c), c("A","B","C"))
colnames(compress_var5) <- c("variance","interval")

shear_var_a <- var(subset(data1, subset = Interval %in% "A")$adj_shear_se)
shear_var_b <- var(subset(data1, subset = Interval %in% "B")$adj_shear_se)
shear_var_c <- var(subset(data1, subset = Interval %in% "C")$adj_shear_se)
shear_var1 <- data.frame(c(shear_var_a, shear_var_b, shear_var_c), c("A","B","C"))
colnames(shear_var1) <- c("variance","interval")

shear_var_a <- var(subset(data2, subset = Interval %in% "A")$adj_shear_se)
shear_var_b <- var(subset(data2, subset = Interval %in% "B")$adj_shear_se)
shear_var_c <- var(subset(data2, subset = Interval %in% "C")$adj_shear_se)
shear_var2 <- data.frame(c(shear_var_a, shear_var_b, shear_var_c), c("A","B","C"))
colnames(shear_var2) <- c("variance","interval")

shear_var_a <- var(subset(data3, subset = Interval %in% "A")$adj_shear_se)
shear_var_b <- var(subset(data3, subset = Interval %in% "B")$adj_shear_se)
shear_var_c <- var(subset(data3, subset = Interval %in% "C")$adj_shear_se)
shear_var3 <- data.frame(c(shear_var_a, shear_var_b, shear_var_c), c("A","B","C"))
colnames(shear_var3) <- c("variance","interval")

shear_var_a <- var(subset(data4, subset = Interval %in% "A")$adj_shear_se)
shear_var_b <- var(subset(data4, subset = Interval %in% "B")$adj_shear_se)
shear_var_c <- var(subset(data4, subset = Interval %in% "C")$adj_shear_se)
shear_var4 <- data.frame(c(shear_var_a, shear_var_b, shear_var_c), c("A","B","C"))
colnames(shear_var4) <- c("variance","interval")

shear_var_a <- var(subset(data5, subset = Interval %in% "A")$adj_shear_se)
shear_var_b <- var(subset(data5, subset = Interval %in% "B")$adj_shear_se)
shear_var_c <- var(subset(data5, subset = Interval %in% "C")$adj_shear_se)
shear_var5 <- data.frame(c(shear_var_a, shear_var_b, shear_var_c), c("A","B","C"))
colnames(shear_var5) <- c("variance","interval")


#combine sample variance estimates per FEA metric
compress_var_all <- c(compress_var5$variance,compress_var4$variance,compress_var3$variance,compress_var2$variance,compress_var1$variance)
shear_var_all <- c(shear_var5$variance,shear_var4$variance,shear_var3$variance,shear_var2$variance,shear_var1$variance)


#make bar plots across time bines for each FEA metric with partitions
compress_bar_data <- data.frame(time_bins,partition_bins,compress_var_all)
compress_bar <- ggplot(compress_bar_data, aes(time_bins, compress_var_all, fill = partition_bins)) +
  geom_bar(stat="identity", position = "dodge") + 
  labs(title="Sample Variance SE compress")
compress_bar

shear_bar_data <- data.frame(time_bins,partition_bins,shear_var_all)
shear_bar <- ggplot(shear_bar_data, aes(time_bins, shear_var_all, fill = partition_bins)) +
  geom_bar(stat="identity", position = "dodge") + 
  labs(title="Sample Variance SE shear")
shear_bar

#arranged panels
collated_plots <- ggarrange(compress_bar, shear_bar, ncol=2, nrow=1, legend="none")

#plot with main title (change title content as necessary)
windows()
annotate_figure(collated_plots, top = text_grob("Sample variance for FEA data", 
               color = "black", face = "bold", size = 14))

