%% Summary

% This work is licensed under the Creative Commons Attribution 3.0 CC-BY License. To view a copy of this license, visit http://creativecommons.org/licenses/by/3.0/
% Written by Ronja Woloszczuk, 20th October 2014.
% Submitted as part of "Transcription mediated insulation and interference
% direct gene cluster expression switches", Nyugen et al., eLife 

% This script allows the simulation of gene orienations and gene expression levels 
% and comparison to observed values. It is assumed that genes that are
% located more than 1000 bp apart do not excert a direct influence on
% each other and are therefore excluded from the analysis. Gene names, 
% location and expression levels are as shown Table 1 and were ordered 
% according to their chromosome and start sites.

%% Setting up and loading relavant matrices.
load RB; % names of genes in reductive building phase from Tu et al., 2005
load RC; % names of genes in reductive building phase from Tu et al., 2005
load OX; % names of genes in reductive building phase from Tu et al., 2005

% Create matrices according to the specifications below

% create matrix glugalnetseq_sorted which contains an ordered list of genes 
% from Table 1 with the following values in the specified column

%   (1) chromosome number
%   (2) strand: 1 - + strand, 0 - - strand
%   (3) gene type: 1 - ORF-T, 2 - SUTs, 3 - CUTs, 4 - other
%   (4) gene type: 1 - NA (not cycling), 2 - OX,  3 - RB, 4 - RC, NA- not
%   an ORF gene
%   (5) start, i.e. first base of gene relative to the start of the
%   chromosome
%   (6) stop, i.e. last base of gene relative to the start of the
%   chromosome
%   (7) gene length
%   (8) expression rate in glucose in reads/nt - sense strand
%   (9) expression rate in galactose in reads/nt - sense strand
%   (10) distance between genes -1 and 0
%   (11) distance between genes 0 and +1

load glugalnetseq_sorted; 
load glugalnetseq_sorted_names; % cell with corresponsing gene names in columns 
% (1) systemactic (2) trivial names

    
%% calculate the number of genes and corresponding expression of genes 
% with the following specifications

orientation=[]; % number of genes on each chromosome

glusnocpb=[]; % expression rate in glucose in reads/nt - sense strand of genes in the category
galsnocpb=[]; % expression rate in galactose in reads/nt - sense strand of genes in the category
netglunetgal=[]; % expression rate ratio in glucose/galactose - sense strands of genes in the category
names_orientation={}; % names of genes in each category 
max_dis=1000; % maximum distance between genes

n=ones(1,33);
m=ones(1,33);
l=0;

descripts = [                   % matrix used to describe the condition of the simulation
    
    1 1 0 0 NaN NaN;            % 1- genes in a tandem array
    0 1 0 1 NaN NaN;            % 2- genes in a divergent array
    1 1 0 0 NaN NaN;            % 3- genes in a tandem array
    1 0 1 0 NaN NaN;            % 4- genes in a convergent array
    
    1 1 1 0 0 0;                % 5- 5' tandem, 3' tandem
    0 1 1 0 0 1;                % 6- 5' divergent, 3' tandem
    1 1 0 1 0 0;                % 7- 5' tandem, 3' convergent
    0 1 0 0 1 0;                % 8- 5' divergent, 3' convergent

    1 1 0 0 2 2;                % 9- 5' tandem, OX.OX (2nd gene considered)
    1 1 0 0 2 4;                % 10- 5' tandem, OX.RC (2nd gene considered)
    1 1 0 0 4 2;                % 11- 5' tandem, RC.OX (2nd gene considered)
    1 1 0 0 4 4;                % 12- 5' tandem, RC.RC (2nd gene considered)
    
    0 1 0 1 2 2;                % 13- 5' divergent, OX.OX (2nd gene considered)
    0 1 0 1 2 4;                % 14- 5' divergent, OX.RC (2nd gene considered)
    0 1 0 1 4 2;                % 15- 5' divergent, RC.OX (2nd gene considered)
    0 1 0 1 4 4;                % 16- 5' divergent, RC.RC (2nd gene considered)
    
    1 0 1 0 4 4;                % 17- 5' convergent, OX.OX (1st gene considered)
    
    1 0 2 2 1 NaN;              % 18- OX.OX.NA
    1 0 2 2 2 NaN;              % 19- OX.OX.OX
    
    1 1 0 0 2 2;                % 20- 5' tandem, CUT.ORF (2nd gene considered)
    1 1 0 0 2 4;                % 21- 5' tandem, ORF.CUT (2nd gene considered)
    1 1 0 0 4 2;                % 22- 5' tandem, ORF.SUT (2nd gene considered)
    1 1 0 0 4 4;                % 23- 5' tandem, SUT.ORF (2nd gene considered)
    
    0 1 0 1 3 1;                % 24- 5' divergent, CUT.ORF (2nd gene considered)
    0 1 0 1 1 3;                % 25- 5' divergent, ORF.CUT (2nd gene considered)
    0 1 0 1 1 2;                % 26- 5' divergent, ORF.SUT (2nd gene considered)
    0 1 0 1 2 1;                % 27- 5' divergent, SUT.SUT (2nd gene considered)
    
    1 0 3 1 3 NaN;              % 28- CUT.ORF.CUT
    1 0 3 1 1 NaN;              % 29- CUT.ORF.ORF
    1 0 3 1 2 NaN;              % 30- CUT.ORF.SUT
    1 0 2 1 3 NaN;              % 31- SUT.ORF.CUT
    1 0 2 1 1 NaN;              % 32- SUT.ORF.ORF
    1 0 2 1 2 NaN;              % 33- SUT.ORF.SUT

    ];


for i=1:16  
    
    % create current matrix; this matrix is different for each chromosome
    % as each chromosome is considered separately; only ORF genes are considered;
    
    clear current
    clear counts
    
    % set up matrix as before
    % genes can be in position -1, 0 (position of interest) and +1
    
    % ORF genes only
    current=[NaN;NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,2)];       % strand, gene position -1
    current(:,2)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,2);NaN];  % strand, gene position 0
    current(:,3)=[glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,2);NaN;NaN];  % strand, gene position +1
    
    current(:,4)=[NaN;NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,4)];  % YMC phase, gene position -1
    current(:,5)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,4);NaN];  % YMC phase, gene position 0
    current(:,6)=[glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,4);NaN;NaN];  % YMC phase, gene position +1
    
    current(:,7)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,5);NaN];  % start, i.e. first base of gene relative to the start of the chromosome
    current(:,8)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,6);NaN];  % stop, i.e. first base of gene relative to the start of the chromosome
    current(:,9)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,8);NaN];  % expression rate in glucose in reads/nt - sense strand
    current(:,10)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,9);NaN]; % expression rate in galactose in reads/nt - sense strand
    current(:,11)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,10);NaN];% distance between genes -1 and 0
    current(:,12)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1,11);NaN];% distance between genes 0 and 1
    
    % calculate the number of ORF genes in tandem and divergent
    % orientations in 5'
    
    row_pos=1:2; 
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&&(current(:,12))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&&(current(:,12))<=max_dis|...
                    current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&&(current(:,12))<=max_dis|...
                    current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);
    
    n(j)=n(j)+orientation(j,i);
    
    clear index
      
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&&(current(:,12))<=max_dis|...
                 current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&(current(:,11))<=max_dis);
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{l+index(h),2};
        m(j)=m(j)+1;
    end
    
    
    end
    
    % calculate the number/expression of ORF genes in tandem and convergent
    % orientations in 3'
    
    row_pos=3:4;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&(current(:,11))<=max_dis|...
                     current(:,2)==descripts(j,4)&current(:,1)==descripts(j,3)&&(current(:,12))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&(current(:,11))<=max_dis|...
                    current(:,2)==descripts(j,4)&current(:,1)==descripts(j,3)&&(current(:,12))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&(current(:,11))<=max_dis|...
                    current(:,2)==descripts(j,4)&current(:,1)==descripts(j,3)&&(current(:,12))<=max_dis,10);

    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);
      
    n(j)=n(j)+orientation(j,i);
    
    clear index
      
    index=counts(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&(current(:,11))<=max_dis|...
                 current(:,2)==descripts(j,4)&current(:,1)==descripts(j,3)&&(current(:,12))<=max_dis);
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{l+index(h),2};
        m(j)=m(j)+1;
    end
    
    
    end
    
    % calculate the number/expression of ORF genes in the context of both adjacent ORF
    % genes
    
    row_pos=5:8;

    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);
       
    n(j)=n(j)+orientation(j,i);
    
    clear index
      
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis);
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{l+index(h),2};
        m(j)=m(j)+1;
    end
    
    
    end
    
    % calculate the number/expression of OX and RC genes in the context
    % adjacent OX and RC genes in tandem and divergent orientations
    
    row_pos=9:16;
    
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...                      
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);   


    n(j)=n(j)+orientation(j,i);
    
    clear index
    counts=1:length(current); 
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                 (current(:,2)==descripts(j,2)&current(:,6)==descripts(j,3)&current(:,5)==descripts(j,4))&(current(:,11))<=max_dis);
    

    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{index(h),1};%{l+index(h),2};
        j
        m(j)
        h
        m(j)=m(j)+1;
    end
    
end
    
    % calculate the number/expression of OX.OX in a convergent orientation
    
    row_pos=17;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                     current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,12))<=max_dis);
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                    current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,12))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                    current(:,2)==descripts(j,2)&current(:,6)==descripts(j,3)&current(:,5)==descripts(j,4)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);   

    n(j)=n(j)+orientation(j,i);
    
    clear index
      counts=1:length(current); 
    index=counts(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                 (current(:,2)==descripts(j,2)&current(:,4)==descripts(j,4)&current(:,5)==descripts(j,3))&&(current(:,12))<=max_dis);
     
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{index(h),1};%{l+index(h),2};
        m(j)=m(j)+1;
    end
    
end
    
    % calculate the number/expression of OX.OX.NA and OX.OX.OX 
    
    row_pos=18:19;
    
    clear current
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);

    n(j)=n(j)+orientation(j,i);
    
    clear index
      
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,12))<=max_dis&(current(:,11))<=max_dis);
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{l+index(h),2};
        m(j)=m(j)+1;
    end
    
    
end
    
    % all genes 
    current=[NaN;NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,2)];
    current(:,2)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,2);NaN];
    current(:,3)=[glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,2);NaN;NaN];
    
    current(:,4)=[NaN;NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,3)];
    current(:,5)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,3);NaN];
    current(:,6)=[glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,3);NaN;NaN];
    
    current(:,7)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,12);NaN];
    current(:,8)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,13);NaN];
    current(:,9)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,8);NaN];
    current(:,10)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,9);NaN];
    current(:,11)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,10);NaN];
    current(:,12)=[NaN;glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,11);NaN];
    
    counts=0:sum(sum(glugalnetseq_sorted(:,1)==i,2))+1;
    
    % calculate the number/expression of ORF, SUT and CUT genes in the context
    % adjacent ORF, SUT and CUT genes in tandem and divergent orientations
    
    row_pos=20:27;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...                      
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);

    n(j)=n(j)+orientation(j,i);
    
    clear index
    counts=1:length(current); 
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                 (current(:,2)==descripts(j,2)&current(:,6)==descripts(j,3)&current(:,5)==descripts(j,4))&(current(:,11))<=max_dis);
    

    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{index(h),1};%{l+index(h),2};
        j
        m(j)
        h
        m(j)=m(j)+1;
    end
    
end
    
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis);
                 
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...                      
        current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);

    n(j)=n(j)+orientation(j,i);
    
    clear index
    counts=1:length(current); 
    index=counts(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,12))<=max_dis|...
                 (current(:,2)==descripts(j,2)&current(:,6)==descripts(j,3)&current(:,5)==descripts(j,4))&(current(:,11))<=max_dis);
    

    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{index(h),1};%{l+index(h),2};
        j
        m(j)
        h
        m(j)=m(j)+1;
    end
    
end
    
    % calculate the number/expression of ORF genes flanked by SUT, CUT and
    % ORF
    
    row_pos=28:33;
    
   for g=1:length(row_pos)
    
    j=row_pos(g);
    
    orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                     current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,12))<=max_dis);
    glusnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                    current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,12))<=max_dis,9);
    galsnocpb(j,n(j):n(j)+orientation(j,i)-1)=current(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                    current(:,2)==descripts(j,2)&current(:,6)==descripts(j,3)&current(:,5)==descripts(j,4)&(current(:,11))<=max_dis,10);
    netglunetgal(j,n(j):n(j)+orientation(j,i)-1)=glusnocpb(j,n(j):n(j)+orientation(j,i)-1)./galsnocpb(j,n(j):n(j)+orientation(j,i)-1);   

    n(j)=n(j)+orientation(j,i);
    
    clear index
      counts=1:length(current); 
    index=counts(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,11))<=max_dis|...
                 (current(:,2)==descripts(j,2)&current(:,4)==descripts(j,4)&current(:,5)==descripts(j,3))&&(current(:,12))<=max_dis);
    
    for h=1:length(index)
        names_orientation{m(j),j}= glugalnetseq_sorted_names{l+index(h),1};
        m(j)=m(j)+1;
    end
        end
    
end
%% simulate number of genes 

for s=1:1000
    
    rand_orientation=[];

rand_glusnocpb=[];
rand_galsnocpb=[];
rand_glu_as=[];
rand_gal_as=[];
rand_glusnocpb_o_glu_as=[];
rand_galsnocpb_o_gal_as=[];
rand_netglunetgal=[];
rand_glusnocpb_o_glu_as_o_galsnocpb_o_gal_as=[];
    rand_glusnocpb=[];
    rand_galsnocpb=[];
    rand_netglunetgal=[];
    rand_miglumigal=[];
    rand_miglu=[];
    rand_migal=[];
    rand_netglumiglu=[];
    rand_netgalmigal=[];
    rand_netmicro=[];

max_dis=1000;

n=ones(1,33);
m=ones(1,33);
l=0;

for i=1:16  
    
    clear current
    clear counts
    clear a
    clear b
    clear c
    
    a=randperm(sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==1|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4)); % select number of genes on chromosome of interest
    b=zeros(sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==1|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4),1); % randomly assign strand to each gene (number of genes on each strand is kept constant for each chromosome)
    b(a(1:sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,2)==1&glugalnetseq_sorted(:,4)==1|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4)))=1; 
    
    a=randperm(sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==1|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4)); % select number of genes on chromosome of interest
    c=zeros(sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==1|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3|glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4),1);  % randomly assign YMC type to each gene (number of genes of each YMC type is kept constant for each chromosome)
    c(a(1:sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==1)))=1;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==2)))=2;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==3)))=3;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,4)==4)))=4;
    
    d=glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,10); % distances are kept constant
    e=glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,11);
    
    current=[NaN;NaN;b];
    current(:,2)=[NaN;b;NaN];
    current(:,3)=[b;NaN;NaN];
    
    current(:,4)=[NaN;NaN;c];
    current(:,5)=[NaN;c;NaN];
    current(:,6)=[c;NaN;NaN];
    
    current(:,7)=[NaN;d;NaN];
    current(:,8)=[NaN;e;NaN];

    
    % create current matrix; this matrix is different for each chromosome
    % as each chromosome is considered separately; only ORF genes are considered;
    
    clear current
    clear counts
    
    % set up matrix as before
    % genes can be in position -1, 0 (position of interest) and +1
    
    % ORF genes only
   
    % calculate the number of ORF genes in tandem and divergent
    % orientations in 5'
    
    row_pos=1:2; 
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&&(current(:,8))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&(current(:,7))<=max_dis);         
    end
    
    % calculate the number/expression of ORF genes in tandem and convergent
    % orientations in 3'
    
    row_pos=3:4;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&(current(:,7))<=max_dis|...
                     current(:,2)==descripts(j,4)&current(:,1)==descripts(j,3)&&(current(:,8))<=max_dis);
                 

    end
    
    % calculate the number/expression of ORF genes in the context of both adjacent ORF
    % genes
    
    row_pos=5:8;

    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,8))<=max_dis&(current(:,7))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,8))<=max_dis&(current(:,7))<=max_dis);
    
    
    end
    
    % calculate the number/expression of OX and RC genes in the context
    % adjacent OX and RC genes in tandem and divergent orientations
    
    row_pos=9:16;
    
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,8))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,7))<=max_dis);
                 
  
    
end
    
    % calculate the number/expression of OX.OX in a convergent orientation
    
    row_pos=17;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,7))<=max_dis|...
                     current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,8))<=max_dis);
   
end
    
    % calculate the number/expression of OX.OX.NA and OX.OX.OX 
    
    row_pos=18:19;
    
    clear current
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,3)==descripts(j,3)&&(current(:,8))<=max_dis&(current(:,7))<=max_dis|...         
        current(:,1)==descripts(j,4)&current(:,2)==descripts(j,5)&current(:,3)==descripts(j,6)&&(current(:,8))<=max_dis&(current(:,7))<=max_dis);
                 
    
end
    
    % all genes 
    clear current
    clear counts
    clear a
    clear b
    clear c
    
    a=randperm(sum(glugalnetseq_sorted(:,1)==i)); % select number of genes on chromosome of interest
    b=zeros(sum(glugalnetseq_sorted(:,1)==i),1); % randomly assign strand to each gene (number of genes on each strand is kept constant for each chromosome)
    b(a(1:sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,2)==1)))=1; 
    
    a=randperm(sum(glugalnetseq_sorted(:,1)==i)); % select number of genes on chromosome of interest
    c=zeros(sum(glugalnetseq_sorted(:,1)==i),1);  % randomly assign gene type to each gene (number of genes of each gene type is kept constant for each chromosome)
    c(a(1:sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==1)))=1;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==2)))=2;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==3)))=3;
    c(a(sum(c~=0)+1:sum(c~=0)+sum(glugalnetseq_sorted(:,1)==i&glugalnetseq_sorted(:,3)==4)))=4;
    
    d=glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,10); % distances are kept constant
    e=glugalnetseq_sorted(glugalnetseq_sorted(:,1)==i,11);
    
    current=[NaN;NaN;b];
    current(:,2)=[NaN;b;NaN];
    current(:,3)=[b;NaN;NaN];
    
    current(:,4)=[NaN;NaN;c];
    current(:,5)=[NaN;c;NaN];
    current(:,6)=[c;NaN;NaN];
    
    current(:,7)=[NaN;d;NaN];
    current(:,8)=[NaN;e;NaN];
    
    % calculate the number/expression of ORF, SUT and CUT genes in the context
    % adjacent ORF, SUT and CUT genes in tandem and divergent orientations
    
    row_pos=20:27;
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,8))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,7))<=max_dis);            
    
    
end
    
    
    for g=1:length(row_pos)
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,1)==descripts(j,1)&current(:,2)==descripts(j,2)&current(:,4)==descripts(j,5)&current(:,5)==descripts(j,6)&&(current(:,8))<=max_dis|...
                     current(:,2)==descripts(j,3)&current(:,3)==descripts(j,4)&current(:,6)==descripts(j,5)&current(:,5)==descripts(j,6)&(current(:,7))<=max_dis);
                 
    
    
end
    
    % calculate the number/expression of ORF genes flanked by SUT, CUT and
    % ORF
    
    row_pos=28:33;
    
    j=row_pos(g);
    
    curr_rand_orientation(j,i)=sum(current(:,3)==descripts(j,2)&current(:,2)==descripts(j,1)&current(:,6)==descripts(j,6)&current(:,5)==descripts(j,5)&(current(:,7))<=max_dis|...
                     current(:,1)==descripts(j,3)&current(:,2)==descripts(j,4)&current(:,4)==descripts(j,6)&current(:,5)==descripts(j,5)&&(current(:,8))<=max_dis);

    
end

summary_rand_orientation(1:33,j)=sum(curr_rand_orientation(:,1:16),2); % sum of genes in each orientation
end

%% simulate expression levels
for s=1:10000
    for i=1:10000
        
    a=randperm(sum(glugalnetseq_sorted(:,3)==1)); % number of ORF genes- randomized
    c=glugalnetseq_sorted(glugalnetseq_sorted(:,3)==1,9:10); % ORF gene expression
    b=c(a(1:orientation(i,17)),:); % genes selected (same number as observed) 
    
summary_rand_glusnocpb.glu_gal_median(i,s)=nanmedian(b(:,1)); % median expression in glucose in each simulation
summary_rand_galsnocpb.glu_gal_median(i,s)=nanmedian(b(:,2)); % median expression in galactose in each simulation
summary_rand_netglunetgal.glu_gal_median(i,s)=nanmedian(b(:,1)./b(:,2)); % median expression in glucose/galactose in each simulation
    end
end
%% calculate p values

% orientation
for i=1:33; p(i,1)=sum(orientation(i,17)<summary_rand_orientation(i,:))./size(summary_rand_orientation,2);end
for i=1:33; p(i,2)=sum(orientation(i,17)>summary_rand_orientation(i,:))./size(summary_rand_orientation,2);end
p(:,3)=orientation(:,17);
p(:,4)=mean(summary_rand_orientation,2);

% expression glucose
for i=1:33; p(i,5)=sum(nanmedian(glusnocpb(i,glusnocpb(i,:)~=0))<summary_rand_glusnocpb.glu_gal_median(i,:))./size(summary_rand_glusnocpb.glu_gal_median,2);end
for i=1:33; p(i,6)=sum(nanmedian(glusnocpb(i,glusnocpb(i,:)~=0))>summary_rand_glusnocpb.glu_gal_median(i,:))./size(summary_rand_glusnocpb.glu_gal_median,2);end
for i=1:33; p(i,7)=nanmedian(glusnocpb(i,glusnocpb(i,:)~=0));end
p(:,8)=mean(summary_rand_glusnocpb.glu_gal_median,2);

% expression galactose
for i=1:33; p(i,9)=sum(nanmedian(galsnocpb(i,galsnocpb(i,:)~=0))<summary_rand_galsnocpb.glu_gal_median(i,:))./size(summary_rand_galsnocpb.glu_gal_median,2);end
for i=1:33; p(i,10)=sum(nanmedian(galsnocpb(i,galsnocpb(i,:)~=0))>summary_rand_galsnocpb.glu_gal_median(i,:))./size(summary_rand_galsnocpb.glu_gal_median,2);end
for i=1:33; p(i,11)=nanmedian(galsnocpb(i,galsnocpb(i,:)~=0));end
p(:,12)=mean(summary_rand_galsnocpb.glu_gal_median,2);

% expression glucose/galactose
for i=1:33; p(i,13)=sum(nanmedian(netglunetgal(i,netglunetgal(i,:)~=0))<summary_rand_netglunetgal.glu_gal_median(i,:))./size(summary_rand_netglunetgal.glu_gal_median,2);end
for i=1:33; p(i,14)=sum(nanmedian(netglunetgal(i,netglunetgal(i,:)~=0))>summary_rand_netglunetgal.glu_gal_median(i,:))./size(summary_rand_netglunetgal.glu_gal_median,2);end
for i=1:33; p(i,15)=nanmedian(netglunetgal(i,netglunetgal(i,:)~=0));end
p(:,16)=mean(summary_rand_netglunetgal.glu_gal_median,2);


