# Match peptide IDs from orthology determination to gene IDs used for ohnolog dataset

# Load library dplyr
library(dplyr)
# biomaRt also required, but not loaded to not conflict with dplyr functions

# Read in Ensembl sequence IDs processed from step 1
all_ensembl_ids <- read.table("SequenceIDs_ens.txt")

ensembl <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
ensembl_pep_gen_hsap <- biomaRt::getBM(attributes = c('ensembl_peptide_id', 'ensembl_peptide_id_version', 'ensembl_gene_id'),
      filters = 'ensembl_peptide_id_version', 
      values = grep("ENSP0", all_ensembl_ids, value = TRUE), 
      mart = ensembl)
write.table(ensembl_pep_gen_hsap, "ens_hsap_prot_genes.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)

ensembl <- biomaRt::useMart("ensembl", dataset = "acarolinensis_gene_ensembl")
ensembl_pep_gen_acar <- biomaRt::getBM(attributes = c('ensembl_peptide_id', 'ensembl_peptide_id_version', 'ensembl_gene_id'),
      filters = 'ensembl_peptide_id_version', 
      values = grep("ENSACAP", all_ensembl_ids, value = TRUE), 
      mart = ensembl)
write.table(ensembl_pep_gen_acar, "ens_acar_prot_genes.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)

ensembl <- biomaRt::useMart("ensembl", dataset = "loculatus_gene_ensembl")
ensembl_pep_gen_locu <- biomaRt::getBM(attributes = c('ensembl_peptide_id', 'ensembl_peptide_id_version', 'ensembl_gene_id'),
      filters = 'ensembl_peptide_id_version', 
      values = grep("ENSLOCP", all_ensembl_ids, value = TRUE), 
      mart = ensembl)
write.table(ensembl_pep_gen_locu, "ens_locu_prot_genes.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)

ensembl <- biomaRt::useMart("ensembl", dataset = "drerio_gene_ensembl")
ensembl_pep_gen_drer <- biomaRt::getBM(attributes = c('ensembl_peptide_id', 'ensembl_peptide_id_version', 'ensembl_gene_id'),
      filters = 'ensembl_peptide_id_version', 
      values = grep("ENSDARP", all_ensembl_ids, value = TRUE), 
      mart = ensembl)
write.table(ensembl_pep_gen_drer, "ens_drer_prot_genes.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)

ensembl <- biomaRt::useMart("ensembl", dataset = "mdomestica_gene_ensembl")
ensembl_pep_gen_mdom <- biomaRt::getBM(attributes = c('ensembl_peptide_id', 'ensembl_peptide_id_version', 'ensembl_gene_id'),
      filters = 'ensembl_peptide_id_version', 
      values = grep("ENSMODP", all_ensembl_ids, value = TRUE), 
      mart = ensembl)
write.table(ensembl_pep_gen_mdom, "ens_mdom_prot_genes.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)