#! /bin/sh
# extract numer dots / brackets for each position of an RNAfold file.
echo "calculating secondary structure predictions at 37C (stored in RNAfold.out)"
RNAfold --noPS -T 37 < $1 > RNAfold.out

echo "preparing output for ss-frequency calculation..."
egrep -v 'C|G|A|T|N|>' RNAfold.out  | awk '{print $1}' > temp

# extract the frequencies three types of ss-motif (dot, open bracket, closing bracket) and calculate their frequencies 
echo "extracting ss-frequencies"
gawk 'NF>0{for (i=1; i<=NF; i++) if($i==".") k[i]++; else k[i]=k[i]+0;} END{for(i=1; i<=length(k); i++) print i,"\t",k[i]}' FS=  temp >dot_f.temp
gawk 'NF>0{for (i=1; i<=NF; i++) if($i=="(") k[i]++; else k[i]=k[i]+0;} END{for(i=1; i<=length(k); i++) print k[i]}' FS=  temp >ob_f.temp
gawk 'NF>0{for (i=1; i<=NF; i++) if($i==")") k[i]++; else k[i]=k[i]+0;} END{for(i=1; i<=length(k); i++) print k[i]}' FS=  temp >cb_f.temp
# combine them in one file

echo "creating outputfile - column order is '.' '(' ')'"

paste dot_f.temp ob_f.temp cb_f.temp > ssfreq_out.txt

echo "cleaning up"
rm dot_f.temp
rm ob_f.temp
rm cb_f.temp
rm temp

echo "finished :)"
