library(readxl)
library(ggplot2)
library(ggExtra)
library(gridExtra)
mydata <-
  read_excel("data.xlsx",
             sheet = "Articles")

# Extract information
sampleSize <-
  as.numeric(mydata$`Number of subjects participated (in total) (#)`)
femaleParticipants <-
  as.numeric(mydata$`Number of female participants partcipated (in total) (#)`)
years <- mydata$Year

# Remove non-existent information
years <- years[!is.na(femaleParticipants)]
sampleSize <- sampleSize[!is.na(femaleParticipants)]
femaleParticipants <-
  femaleParticipants[!is.na(femaleParticipants)]

# Loop over results and run a binomial test
seq <- c(1:length(years))
pValues <- vector()
for (i in seq) {
  a <- binom.test(femaleParticipants[i],  sampleSize[i], 0.5)
  pValues[i] <- a$p.value
}

testIfFair <- p.adjust(pValues, method = "BH") < 0.05

proportion <- femaleParticipants / sampleSize

alldata <-
  data.frame(proportion, sampleSize, years, pValues, testIfFair)

#number per source
alldata$x <- alldata$years

Int <- mydata$`Initial Search`
I <- which(Int == 'Yes')
length(I)

Lok <- mydata$`Lok et al`
L <- which(Lok == 'Yes')
length(L)

Sou <- mydata$`Souman et al`
S <- which(Sou == 'Yes')
length(S)

Pch <- mydata$`Pachito et al`
P <- which(Pch == 'Yes')
length(P)

Frb <- mydata$`Forbes et al`
F <- which(Frb == 'Yes')
length(F)

Tuu <- mydata$`Tuunainen et al`
T <- which(Tuu == 'Yes')
length(T)

Slanger <- mydata$`Slanger et al`
Sl <- which(Slanger == 'Yes')
length(Sl)

Den <- mydata$`Dennis et al`
D <- which(Den == 'Yes')
length(D)

#Trying to calculate overlap
source <- mydata[, c(6, 7, 8, 9, 10, 11, 12, 13, 14)]
#MD <- as.matrix(source)
a <- as.integer(as.character(source$`Initial Search`) == "Yes")
b <- as.integer(as.character(source$`Brown et al`) == "Yes")
c <- as.integer(as.character(source$`Lok et al`) == "Yes")
d <- as.integer(as.character(source$`Souman et al`) == "Yes")
e <- as.integer(as.character(source$`Pachito et al`) == "Yes")
f <- as.integer(as.character(source$`Forbes et al`) == "Yes")
g <- as.integer(as.character(source$`Tuunainen et al`) == "Yes")
h <- as.integer(as.character(source$`Slanger et al`) == "Yes")
i <- as.integer(as.character(source$`Dennis et al`) == "Yes")
M <- as.matrix(cbind(a, b, c, d, e, f, g, h, i))


# Plot as scatter plot
b <-
  ggplot(alldata, aes(
    x = years,
    y = proportion,
    colour = (factor(testIfFair))
  )) + geom_point() + theme_linedraw() + theme(
    legend.position = "none",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    aspect.ratio = 1
  ) + ggtitle('Proportion of female \nparticipants') + xlab('Year of publication') + ylab('Proportion of female participants') + scale_color_manual(values =
                                                                                                                                                      c("#999999", "#E69F00"))

b1 <-
  ggMarginal(
    b,
    type = "histogram",
    size = 10,
    groupColour = TRUE,
    groupFill = TRUE
  )

ggsave('figure1.pdf', b1, width = 4, height = 4)

# Calculate the average number of female participants
mean(proportions)

# Calculate the proportion of fair tests
sum(testIfFair) / length(testIfFair)

# Correlation tests
cor.test(sampleSize, proportion, method = "spearman", exact = FALSE)
cor.test(years, proportion, method = "spearman", exact = FALSE)

