

%% tRNA data. Compiling information from Dong et al, 1996. 
tRNA = get_tRNA_data_v3();

tRNA_abundance = [];
for i =1:length(tRNA)
    tRNA_abundance(i) =tRNA{i}.abundance;
end


%% coli (Gene's data), MOPS complete

codon_size = 3;
buffer = 5;
winsor_cut = 0.02;
density_thresh = 0;

data_dir = % path to ribosome profiling data
file_name = [];
file_name = 'GSE53767_fp_rdm_pooled*.wig';
species = 'ecoli';

[codons_list,reads_codons_coli,~,~] = compute_codon_usage(data_dir,...
    file_name,species,codon_size,buffer,winsor_cut,density_thresh);

codon_usage = reads_codons_coli./sum(reads_codons_coli);


%%
[~, amino_acid, synonymous_codons] = get_codon_dictionary();

for i = 1:length(amino_acid.codons)
    
    for j = 1:length(amino_acid.codons{i})
        
        ind_codon = find(strcmp(amino_acid.codons{i}{j},codons_list));
        amino_acid.codon_usage{i}(j) = codon_usage(ind_codon);
        
    end
    
    amino_acid.summed_usage(i) = sum(amino_acid.codon_usage{i});
    
end


% reorder based on summed usage
[~,ind_sort] = sort(amino_acid.summed_usage,'descend');
str_prop = fieldnames(amino_acid);
aa = [];
for i = 1:length(str_prop)
    aa.(str_prop{i}) = amino_acid.(str_prop{i})(ind_sort);
end


% sort internally by codon usage for each aa
for i = 1:length(aa.codons)
    aa_codon_usage = aa.codon_usage{i};
    
    [~,ind_sort] = sort(aa_codon_usage,'descend');
    
    aa.codon_usage{i} = aa.codon_usage{i}(ind_sort);
    aa.codons{i} = aa.codons{i}(ind_sort);
    
end


% following my notation for coarse-grained estimation
f = [];
ordered_codon_list = [];
aa_ind_codons = [];
for i = 1:length(aa.codon_usage)
    for j = 1:length(aa.codon_usage{i})
        f = [f aa.codon_usage{i}(j)];
        ordered_codon_list{end+1} = aa.codons{i}{j};
        aa_ind_codons(end+1) = i;
    end
end

%% reordering the tRNA data structure

aa_ind_tRNA = [];

ind_sort_tRNA = [];
all_tRNA_ind = 1:length(tRNA);

counter = 1;
for i = 1:length(ordered_codon_list)
    for j = 1:length(all_tRNA_ind)
        if sum(strcmp(tRNA{all_tRNA_ind(j)}.codons_recognized,ordered_codon_list{i}))>0
            ind_sort_tRNA(end+1) = all_tRNA_ind(j);
            all_tRNA_ind(j) = [];
            counter = counter+1;
            break
        end
    end
end

tRNA_sort = tRNA(ind_sort_tRNA);
tRNA_names = [];
for i = 1:length(tRNA_sort)
    tRNA_names{i} = tRNA_sort{i}.name;
end
for i = 1:length(tRNA_sort)
    aa_ind_tRNA(i) = find(strcmp(aa.name,tRNA_sort{i}.aa_name));
end

rel_tRNA_abundace_sort = tRNA_abundance(ind_sort_tRNA)/sum(tRNA_abundance);

%% generate specificity matrix

load('Glasbey_colormap_20190725.mat');

%%

% S(i,j): i corresponds to codons, j corresponds to tRNAs

S = zeros(length(ordered_codon_list),length(tRNA));

for i = 1:length(ordered_codon_list)
    for j = 1:length(tRNA_sort)
        if sum(strcmp(tRNA_sort{j}.codons_recognized,ordered_codon_list{i}))>0
            S(i,j) = 1;
        end
    end
end


S_prod = 0*ones(length(ordered_codon_list),length(tRNA));
for i = 1:length(ordered_codon_list)
    for j = 1:length(tRNA_sort)
        if sum(strcmp(tRNA_sort{j}.codons_recognized,ordered_codon_list{i}))>0
            S_prod(i,j) = (rel_tRNA_abundace_sort(j)*f(i));
        end
    end
end

S_prod_aa_norm = 0*ones(length(ordered_codon_list),length(tRNA));
for i = 1:length(ordered_codon_list)
    for j = 1:length(tRNA_sort)
        if sum(strcmp(tRNA_sort{j}.codons_recognized,ordered_codon_list{i}))>0
            S_prod(i,j) = (rel_tRNA_abundace_sort(j)*f(i));
        end
    end
end


%% display raw specificity matrix
lw = 2;
S(:,length(tRNA)+1) = S(:,length(tRNA));
S(length(ordered_codon_list)+1,:) = S(length(ordered_codon_list),:);


subplot = @(m,n,p) subtightplot (m, n, p, [0.05 0.05], [0.01 0.01], [0.01 0.01]);


figure
hold on
surf(S);%,shading flat
for i = 1:length(aa.name)
    ind_tRNAs = find(aa_ind_tRNA==i);
    x = ind_tRNAs;
    ind_codons = find(aa_ind_codons==i);
    y = ind_codons;
    
    plot3([min(x) max(x)+1 max(x)+1 min(x) min(x)],...
        [min(y) min(y) max(y)+1 max(y)+1 min(y)],...
        1.1*[1 1 1 1 1],...
        '-','Color',pale_RGB(Glasbey_colormap(i,:),0.5),'LineWidth',lw)
    
    text(max(x)+1.5,mean(y),1.1,aa.name{i},...
        'Color',pale_RGB(Glasbey_colormap(i,:),0.5))
    
    
end

for i = 1:length(tRNA_names)
   text(i+0.5,-2,tRNA_names{i},...
       'Color',Glasbey_colormap(aa_ind_tRNA(i),:),'FontSize',8,...
        'Rotation',90);
end

for i = 1:length(ordered_codon_list)
   text(-1,i+0.5,ordered_codon_list{i},...
       'Color',Glasbey_colormap(aa_ind_codons(i),:),'FontSize',8);
end
colormap(gray)
view(2)
axis equal
axis off
set(gcf,'Position',[703          63        1328        1038])



%% display product matrix 
lw = 2;
S_prod(:,length(tRNA)+1) = S_prod(:,length(tRNA));
S_prod(length(ordered_codon_list)+1,:) = S_prod(length(ordered_codon_list),:);


subplot = @(m,n,p) subtightplot (m, n, p, [0.05 0.05], [0.01 0.01], [0.01 0.01]);


figure
hold on
surf(S_prod);%,shading flat
for i = 1:length(aa.name)
    ind_tRNAs = find(aa_ind_tRNA==i);
    x = ind_tRNAs;
    ind_codons = find(aa_ind_codons==i);
    y = ind_codons;
    
    plot3([min(x) max(x)+1 max(x)+1 min(x) min(x)],...
        [min(y) min(y) max(y)+1 max(y)+1 min(y)],...
        1.1*[1 1 1 1 1],...
        '-','Color',pale_RGB(Glasbey_colormap(i,:),0.5),'LineWidth',lw)
    
    text(max(x)+1.5,mean(y),1.1,aa.name{i},...
        'Color',pale_RGB(Glasbey_colormap(i,:),0.5))
    
    
end

for i = 1:length(tRNA_names)
   text(i+0.5,-2,tRNA_names{i},...
       'Color',Glasbey_colormap(aa_ind_tRNA(i),:),'FontSize',8,...
        'Rotation',90);
end

for i = 1:length(ordered_codon_list)
   text(-1,i+0.5,ordered_codon_list{i},...
       'Color',Glasbey_colormap(aa_ind_codons(i),:),'FontSize',8);
end
colormap(gray)
view(2)
axis equal
axis off
set(gcf,'Position',[703          63        1328        1038])

%% tRNA abundances
figure
surf([rel_tRNA_abundace_sort rel_tRNA_abundace_sort(end); rel_tRNA_abundace_sort rel_tRNA_abundace_sort(end)])
for i = 1:length(tRNA_names)
   text(i+0.5,2,tRNA_names{i},...
       'Color',Glasbey_colormap(aa_ind_tRNA(i),:),'FontSize',8,...
        'Rotation',90);
end

colormap(gray)
view(2)
axis equal
axis off

%% codon usage
figure
surf([f f(end); f f(end)]')
for i = 1:length(ordered_codon_list)
   text(2,i+0.5,ordered_codon_list{i},...
       'Color',Glasbey_colormap(aa_ind_codons(i),:),'FontSize',8);
end

colormap(gray)
view(2)
axis equal
axis off


%% Computing the correction to the TC on rate. 
 
reads_codons = reads_codons_coli;

codon_usage = reads_codons/sum(reads_codons);
summand = 0;
tic
pairs_found = [];
for i = 1:length(codons_list)
    for j = 1:length(tRNA)
        ind = strcmp(tRNA{j}.codons_recognized,codons_list{i});
        if sum(ind)==1
            summand = summand + codon_usage(i)*tRNA{j}.abundance;
            
            pairs_found(end+1,:) = [i j find(ind) (codon_usage(i)*tRNA{j}.abundance)];
           
        end
    end
end

%%

[~,ind_sort] = sort(pairs_found(:,end),'Descend');

for i = 1:length(ind_sort)
    fprintf(sprintf('%3s\t%3s\t%8s\tcodon usage = %.4f\ttRNA_i = %.4f\tproduct = %.5f\n',...
        codons_list{pairs_found(ind_sort(i),1)}, ...
        tRNA{pairs_found(ind_sort(i),2)}.codons_recognized{pairs_found(ind_sort(i),3)},...
        tRNA{pairs_found(ind_sort(i),2)}.name,codon_usage(pairs_found(ind_sort(i),1)),...
        tRNA{pairs_found(ind_sort(i),2)}.abundance, pairs_found(ind_sort(i),4)));
        
    
end



