# CAFE bash processing

# Commands here were used to process CAFE output for analysis in R
# results_1rate_vert.cafe corresponds to Supplementary File 

# Format of results_1rate_vert.cafe look like this:
# IDs of nodes:(pmar<0>,((cmil<2>,((cpun<4>,rtyp<6>)<5>,(ccar<8>,stor<10>)<9>)<7>)<3>,((locu<12>,(sfor<14>,(drer<16>,(eluc<18>,(gmor<20>,((olat
# <22>,onil<24>)<23>,(gacu<26>,(mmol<28>,(dnig<30>,trub<32>)<31>)<29>)<27>)<25>)<21>)<19>)<17>)<15>)<13>,(lcha<34>,(xtro<36>,((acar<38>,(amis<40>
# ,ggal<42>)<41>)<39>,(oana<44>,(mdom<46>,((lafr<48>,pcap<50>)<49>,(dnov<52>,((hsap<54>,mmus<56>)<55>,(clup<58>,(sscr<60>,(btau<62>,(ttru<64>,(ba
# cu<66>,bmys<68>)<67>)<65>)<63>)<61>)<59>)<57>)<53>)<51>)<47>)<45>)<43>)<37>)<35>)<33>)<11>)<1>
# Output format for: ' Average Expansion', 'Expansions', 'No Change', 'Contractions', and 'Branch-specific P-values' = (node ID, node ID): 
							10									   20										30
(0,11) (2,7) (4,6) (5,9) (8,10) (3,33) (12,15) (14,17) (16,19) (18,21) (20,25) (22,24) (23,27) (26,29) (28,31)
									40										50
(30,32) (13,35) (34,37) (36,43) (38,41) (40,42) (39,45) (44,47) (46,51) (48,50) (49,53) (52,57) (54,56) (55,59) 
	60								68
(58,61) (60,63) (62,65) (64,67) (66,68) 

# Extract gene family-wide p-values for gene families
grep "^'\|^OG" results_1rate_vert.cafe | cut -f1,3 > vert_cafe_1r_table_og-familypval.txt

# Extract Viterbi p-values for branches for gene families
grep '^# Output format for:\|^OG' results_1rate_vert.cafe | cut -f1,4 | sed 's/#.*: /Nodes\t/' | tr ',' '\t' | tr -d '(' | sed 's/)/\t/g' | sed 's/\t\t/\t/g' | sed 's/\t$//' | sed 's/\t $//' > vert_cafe_1r_table_viterbipval.txt

# Extract trees with ancestral state sizes for gene families
grep "^'\|^OG" results_1rate_vert.cafe | cut -f1,2 | sed 's/$/;/' > vert_cafe_1r_table_og-trees.txt

# Output files were then analyzed in R using commands in cafe_output_R_process.txt