#!/usr/local/bin/perl
use strict;

##### DISCLAIMER
#
# motifsearchTreeDecomposed.pl: look for motifs in 3'UTR DNA sequences to 
# predict the group (mode, timing) of translational regulation by CPE
#
# authors: Sylvain Foissac (implementation), Pique et al. (everything else)
# reference: "A Combinatorial Code for CPE-Mediated Translational Control"
#            Pique et al., Cell 132;3 (pp 434-48), 2008
# (especially fig S7: www.cell.com/cgi/content/full/132/3/434/DC1/mmc1.pdf)
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License or the Artistic License.
# This program is distributed in the big hope that it will be useful, but
# WITHOUT ANY WARRANTY. Use at your own risk, blame only yourself.
#
# This said, any question, comment, request, proposition, idea, improvement,
# critism, postal card or bottle of good wine is more than welcome.. Thanks!
#
# contact: sylvainfoissac@gmail.com

my $USAGE =<<'+++EOH+++';
USAGE:

  FastaToTbl your_input_(multi)fastafile | ./motifsearchTreeDecomposed.pl
OR
  cat your_input_(multi)fastafile | gawk '{if (substr($1,1,1)==">"){if (NR>1){printf "\n%s ", substr($1,2,length($1)-1)}else{printf "%s ", substr($1,2,length($1)-1)}}else{printf "%s", $0}}END{printf "\n"}' | ./motifsearchTreeDecomposed.pl
OR
  ./motifsearchTree.pl < TblFile

(TblFile format: one sequence per line, first field is the ID, second field is the sequence, fields are space separated)
+++EOH+++

    my $RANDOM_SHUFFLING_MODE=0;

my $VERBOSE=0;

##### DEFINING THE MOTIFS:
my %REGEXP;
$REGEXP{"HEXA"}='(A[AT]TAAA)';
# Custom CPEC by Raul on Feb 2018
$REGEXP{"CPEC"}='(TTTTGT)';
# The rest is commented
#$REGEXP{"CPEC"}='(TTTTAA?T)';
#$REGEXP{"CPENC"}='((TTTTACT)|(TTTTAA[AG]T)|(TTTTCAT))';
# these mixed cases are now decomposed
##$REGEXP{"CPECNC"}='((TTTTAA?T)|(TTTTACT)|(TTTTAA[AG]T)|(TTTTCAT))';
##$REGEXP{"CPEHEXA"}='((TTTTAAATT?AAA)|(TTTTAATT?AAA)|(TTTTATTAAA)|(TTTTCATTAAA))';
##$REGEXP{"CPECHEXA"}='((TTTTAATT?AAA)|(TTTTATTAAA))';
##$REGEXP{"CPENCHEXA"}='((TTTTAAATT?AAA)|(TTTTCATTAAA))';
#$REGEXP{"PBE"}='(TGTA[ACGT]ATA)';

my @MOTIF= keys %REGEXP;

##### DEFINING THE DISTANCES:
my $polyAdist=30;
my $min0_50=0;
my $max0_50=50;
my $min3_45=3;
my $max3_45=45;
my $min15_30=15;
my $max15_30=30;
my $min0_30=0;
my $max0_30=30;
my $min0_2=0;
my $max0_2=2;
my $min46_100=46;
my $max46_100=100;
my $min0_36=0;
my $max0_36=36;
my $min0_45=0;
my $max0_45=45;
my $minminus1_minus4=-4;
my $maxminus1_minus4=-1;
# motifs with fixed length:
my $hexalength=6;
my $pbelength=8;

my $n=0;

# for random function
srand(time ^ $$ ^ unpack "%L*", `ps axww | gzip`);

# Read the sequences provided in input
while(<>) {
    $n++;
    chomp();
    my @L= split();
    ($#L==1)||die "$USAGE";

    my ($ID,$seq)=($L[0],$L[1]);
    my $seqlen=length($seq);

    my %BEG;
    my %END;
    print STDERR ".";

    ## FIRST PART, LOOKING FOR THE MOTIFS
    # localization of the binding sites in the sequences

    while (my($motif, $regexp) = each %REGEXP) {
	while ($seq =~ m/(?=$regexp)/gi) {
	    # special case for the polyA hexanucleotide signal, only if close to the end of the seq
#	    if (($motif ne "HEXA" && $motif ne "CPECHEXA" && $motif ne "CPENCHEXA") || ($seqlen-pos($seq) < $polyAdist+length($1)+1)) {
	    if (($motif ne "HEXA") || ($seqlen-pos($seq) < $polyAdist+length($1)+1)) {

#		if ($RANDOM_SHUFFLING_MODE==1) {
# insert here the randomization: if we are considering the negative control, shuffle the motifs in the sequence
# we do not touch the HEXA
		if ($RANDOM_SHUFFLING_MODE==1 && $motif ne "HEXA") {
		    my $tmp=int(rand($seqlen - length($1) -1));
		    push(@{$BEG{$motif}},$tmp);
		    push(@{$END{$motif}},($tmp+length($1)-1));
		}
		else {
# normal mode, remember the position of this occurrence of the motif
		    push(@{$BEG{$motif}},pos($seq));
		    push(@{$END{$motif}},(pos($seq)+length($1)-1));
		}
	    }
	}

    }

# Internal test, sequences should be already filtered for the presence of the polyA signal hexanucleotide
    if($#{$BEG{"HEXA"}} == -1) {
	print STDERR "WARNING! in seq $ID , polyA hexanucleotide signal was not found!!\n";
	next();
    }

    ## SECOND PART, LOOKING FOR THE GROUPS
    ## model with a Constraint Satisfaction Problem, for each group the set of constraints defined by the relative position of the motifs is tested, all occurrences are checked through a search tree.

# GROUPS
    my $repression1=0;
    my $repression2=0;
    my $repression3=0;
    my $repression4=0;
    my $activation_biphasic1 =0;
    my $activation_biphasic2 =0;
    my $activation_biphasic3 =0;
    my $activation_biphasic4 =0;
    my $activation_biphasic5 =0;
    my $activation_biphasic6 =0;
    my $activation_early_strong1 =0;
    my $activation_early_strong2 =0;
    my $activation_early_strong3 =0;
    my $activation_early_weak1 =0;
    my $activation_early_weak2 =0;
    my $activation_early_weak3 =0;
    my $activation_early_weak4 =0;
    my $activation_early_weak5 =0;
    my $activation_early_weak6 =0;
    my $activation_early_weak7 =0;
    my $activation_late_strong1=0; 
    my $activation_late_strong2=0;
    my $activation_late_strong3=0;
    my $activation_late_strong4=0; 
    my $activation_late_strong5=0;
    my $activation_late_strong6=0;
    my $activation_late_weak1  =0; 
    my $activation_late_weak2  =0;
    my $activation_late_weak3  =0;
    my $activation_late_weak4  =0;
    my $activation_late_weak5  =0;
    my $activation_late_weak6  =0;
    my $activation_late_weak7  =0;
    my $activation_late_weak8  =0;
    my $activation_late_weak9  =0;

    my $repression1_final=0;
    my $repression2_final=0;
    my $repression3_final=0;
    my $repression4_final=0;
    my $activation_biphasic1_final =0;
    my $activation_biphasic2_final =0;
    my $activation_biphasic3_final =0;
    my $activation_biphasic4_final =0;
    my $activation_biphasic5_final =0;
    my $activation_biphasic6_final =0;
    my $activation_early_strong1_final =0;
    my $activation_early_strong2_final =0;
    my $activation_early_strong3_final =0;
    my $activation_early_weak1_final =0;
    my $activation_early_weak2_final =0;
    my $activation_early_weak3_final =0;
    my $activation_early_weak4_final =0;
    my $activation_early_weak5_final =0;
    my $activation_early_weak6_final =0;
    my $activation_early_weak7_final =0;
    my $activation_late_strong1_final =0; 
    my $activation_late_strong2_final =0;
    my $activation_late_strong3_final =0;
    my $activation_late_strong4_final =0; 
    my $activation_late_strong5_final =0;
    my $activation_late_strong6_final =0;
    my $activation_late_weak1_final =0; 
    my $activation_late_weak2_final =0;
    my $activation_late_weak3_final =0;
    my $activation_late_weak4_final =0;
    my $activation_late_weak5_final =0;
    my $activation_late_weak6_final =0;
    my $activation_late_weak7_final =0;
    my $activation_late_weak8_final =0;
    my $activation_late_weak9_final =0;

    my $activation_late_strong_FINAL  =0;
    my $activation_late_weak_FINAL =0;
    my $activation_late_FINAL =0;
    my $activation_biphasic_FINAL =0;
    my $activation_early_strong_FINAL =0;
    my $activation_early_weak_FINAL =0;
    my $activation_early_FINAL =0;
    my $activation_FINAL =0;
    my $repression_FINAL =0;
    my $total_FINAL =0;
# total means regulated (activation or repression or both)

# start from the polyA signal
    {
	my $hexa=${$BEG{"HEXA"}}[$#{$BEG{"HEXA"}}];
	# repression1,2 and 3 need a CPEC
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    # repression1: need for another cpec before
	    for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
		my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
		my $cpec2end=${$END{"CPEC"}}[$cpec2index];
		if ($cpecbeg-$cpec2end-1 >= $min0_50 && $cpecbeg-$cpec2end-1 <= $max0_50) {
		    $repression1 =1;
		    ($VERBOSE) && print "$ID repression1 CPEC $cpec2beg-$cpec2end CPEC $cpecbeg-$cpecend HEXA $hexa\n";
		}
	    }
	    # repression2: need for a CPENC before
	    for (my $cpenc2index=0;$cpenc2index<=$#{$BEG{"CPENC"}};$cpenc2index++) {
		my $cpenc2beg=${$BEG{"CPENC"}}[$cpenc2index];
		my $cpenc2end=${$END{"CPENC"}}[$cpenc2index];
		if ($cpecbeg-$cpenc2end-1 >= $min0_50 && $cpecbeg-$cpenc2end-1 <= $max0_50) {
		    $repression2 =1;
		    ($VERBOSE) && print "$ID repression2 CPENC $cpenc2beg-$cpenc2end CPEC $cpecbeg-$cpecend HEXA $hexa\n";
		}
	    }
	    # repression3: need for a CPENC after
	    for (my $cpenc2index=0;$cpenc2index<=$#{$BEG{"CPENC"}};$cpenc2index++) {
		my $cpenc2beg=${$BEG{"CPENC"}}[$cpenc2index];
		my $cpenc2end=${$END{"CPENC"}}[$cpenc2index];
		if ($cpenc2beg-$cpecend-1 >= $min0_50 && $cpenc2beg-$cpecend-1 <= $max0_50) {
		    $repression3 =1;
		    ($VERBOSE) && print "$ID repression3 CPEC $cpecbeg-$cpecend CPENC $cpenc2beg-$cpenc2end HEXA $hexa\n";
		}
	    }
	}

	# repression4: need for 2 CPENC and a PBE before
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    for (my $cpenc2index=0;$cpenc2index<=$#{$BEG{"CPENC"}};$cpenc2index++) {
		my $cpenc2beg=${$BEG{"CPENC"}}[$cpenc2index];
		my $cpenc2end=${$END{"CPENC"}}[$cpenc2index];
		if ($cpencbeg-$cpenc2end-1 >= $min0_50 && $cpencbeg-$cpenc2end-1 <= $max0_50) {
		    foreach my$pbebeg (@{$BEG{"PBE"}}) {
			my $pbeend=$pbebeg+$pbelength-1;
			if ($cpenc2beg-$pbeend-1 >= $min15_30 && $cpenc2beg-$pbeend-1 <= $max15_30 ) {
			    $repression4=1;
			    ($VERBOSE) && print "$ID repression4 PBE $pbebeg-$pbeend CPENC $cpenc2beg-$cpenc2end CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			}
		    }
		}
	    }
	}

# ACTIVATIONDECOMPOSED EARLY STRONG
	# strong1: need a CPEC close to the H
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) { ## TODO: optimize to avoid n^2 comparisons
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($hexa-$cpecend-1 >= $min3_45 && $hexa-$cpecend-1 <= $max3_45) {
		$activation_early_strong1 =1;
		($VERBOSE) && print "$ID activation_early_strong1 CPEC $cpecbeg-$cpecend HEXA $hexa\n";
	    }
	}
	# strong2 and strong3: need a CPENC
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) { ## TODO: optimize to avoid n2 comparisons
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($hexa-$cpencend-1 >= $min3_45 && $hexa-$cpencend-1 <= $max3_45 ) {
		
		# strong2 need also a PBE
		foreach my$pbebeg (@{$BEG{"PBE"}}) {
		    my $pbeend=$pbebeg+$pbelength-1;
		    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
			$activation_early_strong2 =1;
			($VERBOSE) && print "$ID activation_early_strong2 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
		# and for strong3 we want a CPEC
		for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
		    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
		    my $cpecend=${$END{"CPEC"}}[$cpecindex];
		    if ($cpencbeg-$cpecend-1 >= $min0_50 && $cpencbeg-$cpecend-1 <= $max0_50) {
			
			$activation_early_strong3 =1;
			($VERBOSE) && print "$ID activation_early_strong3 CPEC $cpecbeg-$cpecend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
	    }
	}

# ACTIVATIONDECOMPOSED EARLY WEAK
	# weak1: need a CPEC close to the H
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) { ## TODO: optimize to avoid n2 comparisons
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($hexa-$cpecend-1 >= $min0_2 && $hexa-$cpecend-1 <= $max0_2) {

		# Found a weak1
		$activation_early_weak1 =1;
		($VERBOSE) && print "$ID activation_early_weak1 CPEC $cpecbeg-$cpecend HEXA $hexa\n";
	    }
	}
	# weak2 and weak3: need a CPENC
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) { ## TODO: optimize to avoid n2 comparisons
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($hexa-$cpencend-1 >= $min0_2 && $hexa-$cpencend-1 <= $max0_2 ) {
		
		# weak2 need also a PBE
		foreach my$pbebeg (@{$BEG{"PBE"}}) {
		    my $pbeend=$pbebeg+$pbelength-1;
		    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
			
			$activation_early_weak2 =1;
			($VERBOSE) && print "$ID activation_early_weak2 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
	    }
	}

	# weak 3-7: same but further from the polyA
	# weak3: need a CPEC far from the H
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($hexa-$cpecend-1 >= $min46_100 && $hexa-$cpecend-1 <= $max46_100) {

		# Found a weak3!
		$activation_early_weak3 =1;
		($VERBOSE) && print "$ID activation_early_weak3 CPEC $cpecbeg-$cpecend HEXA $hexa\n";
	    }
	}
	# weak4 and weak5: need a CPENC
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($hexa-$cpencend-1 >= $min46_100 && $hexa-$cpencend-1 <= $max46_100 ) {
		
		# weak4 need also a PBE
		foreach my$pbebeg (@{$BEG{"PBE"}}) {
		    my $pbeend=$pbebeg+$pbelength-1;
		    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
			
			$activation_early_weak4 =1;
			($VERBOSE) && print "$ID activation_early_weak4 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
		# and for weak5 we want a CPEC
		for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
		    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
		    my $cpecend=${$END{"CPEC"}}[$cpecindex];
		    if ($cpencbeg-$cpecend-1 >= $min0_50 && $cpencbeg-$cpecend-1 <= $max0_50) {
			
			$activation_early_weak5 =1;
			($VERBOSE) && print "$ID activation_early_weak5 CPEC $cpecbeg-$cpecend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
		
	    }
	}
	# weak6: need a CPEC after the H
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($cpecbeg-($hexa+$hexalength-1)-1 >= $min0_30 && $cpecbeg-($hexa+$hexalength-1)-1 <= $max0_30) {
		$activation_early_weak6=1;
		($VERBOSE) && print "$ID activation_early_weak6 HEXA $hexa CPEC $cpecbeg-$cpecend\n";
	    }
	}
	# weak7: need a CPENC after the H and a PBE before
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($cpencbeg-($hexa+$hexalength-1)-1 >= $min0_30 && $cpencbeg-($hexa+$hexalength-1)-1 <= $max0_30) {
		foreach my$pbebeg (@{$BEG{"PBE"}}) {
		    my $pbeend=$pbebeg+$pbelength-1;
		    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 && $pbebeg < $hexa ) {
			
			$activation_early_weak7 =1;
			($VERBOSE) && print "$ID activation_early_weak7 PBE $pbebeg-$pbeend HEXA $hexa CPENC $cpencbeg-$cpencend\n";
		    }
		}
	    }
	}

# ACTIVATIONDECOMPOSED BIPHASIC: biphasic1-3: same with a CPEC close to the hexa
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($hexa-$cpecend-1 >= $min0_2 && $hexa-$cpecend-1 <= $max0_2) {

		# biphasic1: need a CPEC
		for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
		    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
		    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
		    if ($cpecbeg-$cpec2end-1 >= $min0_36 && $cpecbeg-$cpec2end-1 <= $max0_36 ) {
			
			# Found a biphasic1!
			$activation_biphasic1 =1;
			($VERBOSE) && print "$ID activation_biphasic1 CPEC $cpec2beg-$cpec2end CPEC $cpecbeg-$cpecend HEXA $hexa\n";
		    }
		}
		
		# biphasic2 and biphasic3: need a CPENC
		for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
		    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
		    my $cpencend=${$END{"CPENC"}}[$cpencindex];
		    if ($cpecbeg-$cpencend-1 >= $min0_36 && $cpecbeg-$cpencend-1 <= $max0_36 ) {

			# biphasic2 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
			    
				$activation_biphasic2 =1;
				($VERBOSE) && print "$ID activation_biphasic2 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}

			# and for biphasic3 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpencbeg-$cpec2end-1 >= $min0_50 && $cpencbeg-$cpec2end-1 <= $max0_50) {
			    
				$activation_biphasic3 =1;
				($VERBOSE) && print "$ID activation_biphasic3 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}
		    }
		}
	    }
	}
	# biphasic4-6: same with a CPENC close to the hexa
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($hexa-$cpencend-1 >= $min0_2 && $hexa-$cpencend-1 <= $max0_2) {

		# biphasic4: need a CPEC
		for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
		    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
		    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
		    if ($cpencbeg-$cpec2end-1 >= $min0_36 && $cpencbeg-$cpec2end-1 <= $max0_36 ) {
			
			# Found a biphasic4!
			$activation_biphasic4 =1;
			($VERBOSE) && print "$ID activation_biphasic4 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
		
		# biphasic5 and biphasic6: need a CPENC
		for (my $cpenc2index=0;$cpenc2index<=$#{$BEG{"CPENC"}};$cpenc2index++) {
		    my $cpenc2beg=${$BEG{"CPENC"}}[$cpenc2index];
		    my $cpenc2end=${$END{"CPENC"}}[$cpenc2index];
		    if ($cpencbeg-$cpenc2end-1 >= $min0_36 && $cpencbeg-$cpenc2end-1 <= $max0_36 ) {

			# biphasic5 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpenc2beg-$pbeend-1 >= $min15_30 && $cpenc2beg-$pbeend-1 <= $max15_30 ) {
			    
				$activation_biphasic5 =1;
				($VERBOSE) && print "$ID activation_biphasic5 PBE $pbebeg-$pbeend CPENC $cpenc2beg-$cpenc2end CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}

			# and for biphasic6 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpenc2beg-$cpec2end-1 >= $min0_50 && $cpenc2beg-$cpec2end-1 <= $max0_50) {
			    
				$activation_biphasic6 =1;
				($VERBOSE) && print "$ID activation_biphasic6 CPEC $cpec2beg-$cpec2end CPENC $cpenc2beg-$cpenc2end CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}
		    }
		}
	    }
	}




##     }
## 
## # ACTIVATIONDECOMPOSED LATE 1-3: CPECHEXA with no downstream polyA HEXA
## # ACTIVATIONDECOMPOSED WEAK 1-3: same configurations but the distance from the CPECHEXA is longer
##     if ( ($#{$BEG{"CPECHEXA"}} > -1) && (${$END{"CPECHEXA"}}[$#{$END{"CPECHEXA"}}]==${$END{"HEXA"}}[$#{$END{"HEXA"}}]) ) {
## 	my $cpechexabeg=${$BEG{"CPECHEXA"}}[$#{$BEG{"CPECHEXA"}}];
## 	my $cpechexaend=${$END{"CPECHEXA"}}[$#{$END{"CPECHEXA"}}];

# ACTIVATIONDECOMPOSED LATE strong 1-3 and weak 1-3: a CPE is overlapping the HEXA
	for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
	    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
	    my $cpecend=${$END{"CPEC"}}[$cpecindex];
	    if ($hexa-$cpecend-1 >= $minminus1_minus4 && $hexa-$cpecend-1 <= $maxminus1_minus4) {

		# late strong1: upstream CPEC
		for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
		    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
		    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
		    if ($cpecbeg-$cpec2end-1 >= $min0_45 && $cpecbeg-$cpec2end-1 <= $max0_45 ) {
		
			$activation_late_strong1 =1;
			($VERBOSE) && print "$ID activation_late_strong1 CPEC $cpec2beg-$cpec2end CPEC $cpecbeg-$cpecend HEXA $hexa\n";
		    }
		    # late weak1: same but further
		    if ($cpecbeg-$cpec2end-1 >= $min46_100 && $cpecbeg-$cpec2end-1 <= $max46_100 ) {
		
			$activation_late_weak1 =1;
			($VERBOSE) && print "$ID activation_late_weak1 CPEC $cpec2beg-$cpec2end CPEC $cpecbeg-$cpecend HEXA $hexa\n";
		    }
		}
		# late strong2 and late strong3: need a CPENC
		# late weak2 and 3: also
		for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
		    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
		    my $cpencend=${$END{"CPENC"}}[$cpencindex];

		    # late strong 2 and 3
		    if ($cpecbeg-$cpencend-1 >= $min0_45 && $cpecbeg-$cpencend-1 <= $max0_45 ) {
			# late_strong2 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
				$activation_late_strong2 =1;
				($VERBOSE) && print "$ID activation_late_strong2 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}
			# and for late_strong3 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpencbeg-$cpec2end-1 >= $min0_50 && $cpencbeg-$cpec2end-1 <= $max0_50) {
				$activation_late_strong3 =1;
				($VERBOSE) && print "$ID activation_late_strong3 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}
		    }
		    # late weak 2 and 3, same elements but further
		    if ($cpecbeg-$cpencend-1 >= $min46_100 && $cpecbeg-$cpencend-1 <= $max46_100 ) {
			# late_weak2 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpencbeg-$pbeend-1 >= $min15_30 && $cpencbeg-$pbeend-1 <= $max15_30 ) {
				$activation_late_weak2 =1;
				($VERBOSE) && print "$ID activation_late_weak2 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}
			# and for late_weak3 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpencbeg-$cpec2end-1 >= $min0_50 && $cpencbeg-$cpec2end-1 <= $max0_50) {
				$activation_late_weak3 =1;
				($VERBOSE) && print "$ID activation_late_weak3 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend CPEC $cpecbeg-$cpecend HEXA $hexa\n";
			    }
			}
		    }
		}
		# for late weak4 and late weak5, we want a cpec or cpenc after the cpechexa
		# late weak4: downstream CPEC
		for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
		    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
		    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
		    if ($cpec2beg-($hexa+$hexalength-1)-1 >= $min0_30 && $cpec2beg-($hexa+$hexalength-1)-1 <= $max0_30) {
			$activation_late_weak4 =1;
			($VERBOSE) && print "$ID activation_late_weak4 CPEC $cpecbeg-$cpecend HEXA $hexa CPEC $cpec2beg-$cpec2end\n";
		    }
		}
		# late weak5: downstream CPENC
		for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
		    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
		    my $cpencend=${$END{"CPENC"}}[$cpencindex];
		    if ($cpencbeg-($hexa+$hexalength-1)-1 >= $min0_30 && $cpencbeg-($hexa+$hexalength-1)-1 <= $max0_30) {
			
			$activation_late_weak5 =1;
			($VERBOSE) && print "$ID activation_late_weak5 CPEC $cpecbeg-$cpecend HEXA $hexa CPENC $cpencbeg-$cpencend\n";
		    }
		}
	    }
	} # end of looking for overlapping CPEC

# ACTIVATIONDECOMPOSED LATE STRONG 4-6 and WEAK 6-8: CPENC overlapping HEXA
	for (my $cpencindex=0;$cpencindex<=$#{$BEG{"CPENC"}};$cpencindex++) {
	    my $cpencbeg=${$BEG{"CPENC"}}[$cpencindex];
	    my $cpencend=${$END{"CPENC"}}[$cpencindex];
	    if ($hexa-$cpencend-1 >= $minminus1_minus4 && $hexa-$cpencend-1 <= $maxminus1_minus4) {

## 		if ( ($#{$BEG{"CPENCHEXA"}} > -1) && (${$END{"CPENCHEXA"}}[$#{$END{"CPENCHEXA"}}]==${$END{"HEXA"}}[$#{$END{"HEXA"}}]) ) {
## 		    my $cpenchexabeg=${$BEG{"CPENCHEXA"}}[$#{$BEG{"CPENCHEXA"}}];
## 		    my $cpenchexaend=${$END{"CPENCHEXA"}}[$#{$END{"CPENCHEXA"}}];

		# late strong4: upstream CPEC
		for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
		    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
		    my $cpecend=${$END{"CPEC"}}[$cpecindex];
		    if ($cpencbeg-$cpecend-1 >= $min0_45 && $cpencbeg-$cpecend-1 <= $max0_45 ) {

			$activation_late_strong4 =1;
			($VERBOSE) && print "$ID activation_late_strong4 CPEC $cpecbeg-$cpecend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		    # late weak: same but further
		    if ($cpencbeg-$cpecend-1 >= $min46_100 && $cpencbeg-$cpecend-1 <= $max46_100 ) {
		
			$activation_late_weak6 =1;
			($VERBOSE) && print "$ID activation_late_weak6 CPEC $cpecbeg-$cpecend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
		    }
		}
		# late strong5 and late strong6: need a CPENC
		# late weak2 and 3: also
		for (my $cpenc2index=0;$cpenc2index<=$#{$BEG{"CPENC"}};$cpenc2index++) {
		    my $cpenc2beg=${$BEG{"CPENC"}}[$cpenc2index];
		    my $cpenc2end=${$END{"CPENC"}}[$cpenc2index];

		    # late strong 2 and 3
		    if ($cpencbeg-$cpenc2end-1 >= $min0_45 && $cpencbeg-$cpenc2end-1 <= $max0_45 ) {
			# late_strong5 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpenc2beg-$pbeend-1 >= $min15_30 && $cpenc2beg-$pbeend-1 <= $max15_30 ) {
				$activation_late_strong5 =1;
				($VERBOSE) && print "$ID activation_late_strong5 PBE $pbebeg-$pbeend CPENC $cpenc2beg-$cpenc2end CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}
			# and for late_strong6 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpenc2beg-$cpec2end-1 >= $min0_50 && $cpenc2beg-$cpec2end-1 <= $max0_50) {
				$activation_late_strong6 =1;
				($VERBOSE) && print "$ID activation_late_strong6 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}
		    }
		    # late weak 7 and 8, same elements but further
		    if ($cpencbeg-$cpenc2end-1 >= $min46_100 && $cpencbeg-$cpenc2end-1 <= $max46_100 ) {
			# late_weak7 need also a PBE
			foreach my$pbebeg (@{$BEG{"PBE"}}) {
			    my $pbeend=$pbebeg+$pbelength-1;
			    if ($cpenc2beg-$pbeend-1 >= $min15_30 && $cpenc2beg-$pbeend-1 <= $max15_30 ) {
				$activation_late_weak7 =1;
				($VERBOSE) && print "$ID activation_late_weak7 PBE $pbebeg-$pbeend CPENC $cpencbeg-$cpencend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}
			# and for late_weak8 we want a CPEC
			for (my $cpec2index=0;$cpec2index<=$#{$BEG{"CPEC"}};$cpec2index++) {
			    my $cpec2beg=${$BEG{"CPEC"}}[$cpec2index];
			    my $cpec2end=${$END{"CPEC"}}[$cpec2index];
			    if ($cpenc2beg-$cpec2end-1 >= $min0_50 && $cpenc2beg-$cpec2end-1 <= $max0_50) {
				$activation_late_weak8 =1;
				($VERBOSE) && print "$ID activation_late_weak8 CPEC $cpec2beg-$cpec2end CPENC $cpencbeg-$cpencend CPENC $cpencbeg-$cpencend HEXA $hexa\n";
			    }
			}
		    }
		}

		# late weak9: downstream CPEC
		for (my $cpecindex=0;$cpecindex<=$#{$BEG{"CPEC"}};$cpecindex++) {
		    my $cpecbeg=${$BEG{"CPEC"}}[$cpecindex];
		    my $cpecend=${$END{"CPEC"}}[$cpecindex];
		    if ($cpecbeg-($hexa+$hexalength-1)-1 >= $min0_30 && $cpecbeg-($hexa+$hexalength-1)-1 <= $max0_30) {

			$activation_late_weak9 =1;
			($VERBOSE) && print "$ID activation_late_weak9 CPENC $cpencbeg-$cpencend HEXA $hexa CPEC $cpecbeg-$cpecend\n";
		    }
		}
	    }
	}
    }
# FINAL groups
# applying the priority rules defined experimentally

    # repression is independent
    $repression_FINAL= (($repression1) || ($repression2) || ($repression3) || ($repression4));
    $repression1_final=$repression1;
    $repression2_final=$repression2;
    $repression3_final=$repression3;
    $repression4_final=$repression4;

    # activation groups are exclusive:
    if (($activation_late_strong1) || ($activation_late_strong2) || ($activation_late_strong3) || ($activation_late_strong4) || ($activation_late_strong5) || ($activation_late_strong6)) {
	$activation_late_strong1_final= $activation_late_strong1; 
	$activation_late_strong2_final= $activation_late_strong2;
	$activation_late_strong3_final= $activation_late_strong3;
	$activation_late_strong4_final= $activation_late_strong4; 
	$activation_late_strong5_final= $activation_late_strong5;
	$activation_late_strong6_final= $activation_late_strong6;
	$activation_late_strong_FINAL = 1;
	$activation_late_FINAL = 1;
	$activation_FINAL = 1;
    }
    else { # not activation_late_strong
	if ( ($activation_late_weak1) || ($activation_late_weak2) || ($activation_late_weak3) || ($activation_late_weak4) || ($activation_late_weak5) || ($activation_late_weak6) || ($activation_late_weak7) || ($activation_late_weak8) || ($activation_late_weak9) ) {
	    $activation_late_weak1_final =$activation_late_weak1; 
	    $activation_late_weak2_final =$activation_late_weak2;
	    $activation_late_weak3_final =$activation_late_weak3;
	    $activation_late_weak4_final =$activation_late_weak4;
	    $activation_late_weak5_final =$activation_late_weak5;
	    $activation_late_weak6_final =$activation_late_weak6;
	    $activation_late_weak7_final =$activation_late_weak7;
	    $activation_late_weak8_final =$activation_late_weak8;
	    $activation_late_weak9_final =$activation_late_weak9;
	    $activation_late_weak_FINAL = 1;
	    $activation_late_FINAL = 1;
	    $activation_FINAL = 1;
	}
	else { # nor activation_late_weak
	    if ( ($activation_biphasic1)||($activation_biphasic2)||($activation_biphasic3)||($activation_biphasic4)||($activation_biphasic5)||($activation_biphasic6)) {
		$activation_biphasic1_final =$activation_biphasic1;
		$activation_biphasic2_final =$activation_biphasic2;
		$activation_biphasic3_final =$activation_biphasic3;
		$activation_biphasic4_final =$activation_biphasic4;
		$activation_biphasic5_final =$activation_biphasic5;
		$activation_biphasic6_final =$activation_biphasic6;
		$activation_biphasic_FINAL = 1;
		$activation_FINAL = 1;
	    }
	    else { # nor biphasic
		if ( ($activation_early_strong1)||($activation_early_strong2)||($activation_early_strong3)) {
		    $activation_early_strong1_final =$activation_early_strong1;
		    $activation_early_strong2_final =$activation_early_strong2;
		    $activation_early_strong3_final =$activation_early_strong3;
		    $activation_early_strong_FINAL=1;
		    $activation_early_FINAL=1;
		    $activation_FINAL=1;
		}
		else { # nor early strong
		    if (($activation_early_weak1)||($activation_early_weak2)||($activation_early_weak3)||($activation_early_weak4)||($activation_early_weak5)||($activation_early_weak6)||($activation_early_weak7)) {
			$activation_early_weak1_final =$activation_early_weak1;
			$activation_early_weak2_final =$activation_early_weak2;
			$activation_early_weak3_final =$activation_early_weak3;
			$activation_early_weak4_final =$activation_early_weak4;
			$activation_early_weak5_final =$activation_early_weak5;
			$activation_early_weak6_final =$activation_early_weak6;
			$activation_early_weak7_final =$activation_early_weak7;
			$activation_early_weak_FINAL=1;
			$activation_early_FINAL=1;
			$activation_FINAL=1;
		    }
		}
	    }
	}
    }
    $total_FINAL=(($activation_FINAL) || ($repression_FINAL));

    ($repression1_final) && print "$ID repression1_final\n";
    ($repression2_final) && print "$ID repression2_final\n";
    ($repression3_final) && print "$ID repression3_final\n";
    ($repression4_final) && print "$ID repression4_final\n";
    ($activation_biphasic1_final) && print "$ID activation_biphasic1_final\n";
    ($activation_biphasic2_final) && print "$ID activation_biphasic2_final\n";
    ($activation_biphasic3_final) && print "$ID activation_biphasic3_final\n";
    ($activation_biphasic4_final) && print "$ID activation_biphasic4_final\n";
    ($activation_biphasic5_final) && print "$ID activation_biphasic5_final\n";
    ($activation_biphasic6_final) && print "$ID activation_biphasic6_final\n";
    ($activation_early_strong1_final) && print "$ID activation_early_strong1_final\n";
    ($activation_early_strong2_final) && print "$ID activation_early_strong2_final\n";
    ($activation_early_strong3_final) && print "$ID activation_early_strong3_final\n";
    ($activation_early_weak1_final) && print "$ID activation_early_weak1_final\n";
    ($activation_early_weak2_final) && print "$ID activation_early_weak2_final\n";
    ($activation_early_weak3_final) && print "$ID activation_early_weak3_final\n";
    ($activation_early_weak4_final) && print "$ID activation_early_weak4_final\n";
    ($activation_early_weak5_final) && print "$ID activation_early_weak5_final\n";
    ($activation_early_weak6_final) && print "$ID activation_early_weak6_final\n";
    ($activation_early_weak7_final) && print "$ID activation_early_weak7_final\n";
    ($activation_late_strong1_final) && print "$ID activation_late_strong1_final\n"; 
    ($activation_late_strong2_final) && print "$ID activation_late_strong2_final\n";
    ($activation_late_strong3_final) && print "$ID activation_late_strong3_final\n";
    ($activation_late_strong4_final) && print "$ID activation_late_strong4_final\n"; 
    ($activation_late_strong5_final) && print "$ID activation_late_strong5_final\n";
    ($activation_late_strong6_final) && print "$ID activation_late_strong6_final\n";
    ($activation_late_weak1_final) && print "$ID activation_late_weak1_final\n"; 
    ($activation_late_weak2_final) && print "$ID activation_late_weak2_final\n";
    ($activation_late_weak3_final) && print "$ID activation_late_weak3_final\n";
    ($activation_late_weak4_final) && print "$ID activation_late_weak4_final\n";
    ($activation_late_weak5_final) && print "$ID activation_late_weak5_final\n";
    ($activation_late_weak6_final) && print "$ID activation_late_weak6_final\n";
    ($activation_late_weak7_final) && print "$ID activation_late_weak7_final\n";
    ($activation_late_weak8_final) && print "$ID activation_late_weak8_final\n";
    ($activation_late_weak9_final) && print "$ID activation_late_weak9_final\n";
    ($repression_FINAL) && print "$ID repression_FINAL\n";
    ($activation_late_strong_FINAL) && print "$ID activation_late_strong_FINAL\n";
    ($activation_late_weak_FINAL) && print "$ID activation_late_weak_FINAL\n";
    ($activation_late_FINAL) && print "$ID activation_late_FINAL\n";
    ($activation_biphasic_FINAL) && print "$ID activation_biphasic_FINAL\n";
    ($activation_early_strong_FINAL) && print "$ID activation_early_strong_FINAL\n";
    ($activation_early_weak_FINAL) && print "$ID activation_early_weak_FINAL\n";
    ($activation_early_FINAL) && print "$ID activation_early_FINAL\n";
    ($activation_FINAL) && print "$ID activation_FINAL\n";
    ($total_FINAL) && print "$ID total_FINAL\n";

#    if ($RANDOM_SHUFFLING_MODE==1) {
#	(($repression_FINAL) || ($activation_FINAL)) && print "$ID regulated\n";
#    }
}
