function [genes_by_stop, n_type_stop, stop_tetra] = translation_properties_stop_tetra(data_dir,file_name,...
    species,codon_size,buffer_start,buffer_stop,winsor_cut,density_thresh,length_thresh)


%% get gene data and ribosome profiling data
if strcmp(species,'bsub')
    [start_forward, stop_forward, genes_f, ...
        start_reverse, stop_reverse, genes_r] = get_bsub_genes();
    genome = get_bsub_genome();
    stop_forward = stop_forward-3;
    start_reverse = start_reverse+3;
    
elseif strcmp(species,'vnat1')
    dir_name = '/Users/jbl/Documents/MIT/research/D1/gene_annotations/vibrio_natriegens/chr1_CP009977.1';
    file_name2 = 'CP009977.1.faa';
    [gene_names, strand, start, stop] = read_gene_annotation_v2(file_name2,dir_name);
    genes_f = gene_names(strand==1)';
    genes_r = gene_names(strand==0)';
    start_forward = start(strand==1);
    start_reverse = start(strand==0)+3;
    stop_forward = stop(strand==1)-3;
    stop_reverse = stop(strand==0);
    
    genome = get_genomes(species);
    
elseif strcmp(species,'vnat2')
    dir_name = '/Users/jbl/Documents/MIT/research/D1/gene_annotations/vibrio_natriegens/chr2_CP009978.1';
    file_name2 = 'CP009978.1.faa';
    [gene_names, strand, start, stop] = read_gene_annotation_v2(file_name2,dir_name);
    genes_f = gene_names(strand==1)';
    genes_r = gene_names(strand==0)';
    start_forward = start(strand==1);
    start_reverse = start(strand==0)+3;
    stop_forward = stop(strand==1)-3;
    stop_reverse = stop(strand==0);
    
    genome = get_genomes(species);
    
elseif strcmp(species,'caulo')
    dir_name = '/Users/jbl/Documents/MIT/research/D1/gene_annotations/caulobacter_crescentus';
    file_name2 = 'CP001340.1.faa';
    [gene_names, strand, start, stop] = read_gene_annotation_v2(file_name2,dir_name);
    genes_f = gene_names(strand==1)';
    genes_r = gene_names(strand==0)';
    start_forward = start(strand==1);
    start_reverse = start(strand==0)+3;
    stop_forward = stop(strand==1)-3;
    stop_reverse = stop(strand==0);
    
    genome = get_genomes(species);
    
elseif strcmp(species,'ecoli')
    [start_forward, stop_forward, genes_f, ...
        start_reverse, stop_reverse, genes_r] = get_ecoli_genes();
    genome = get_ecoli_genome();
end
genome_size = length(genome);
data = zeros(genome_size,2);
% index key:
% 1: forward
% 2: reverse
counts = extract_RNA_seq_data(file_name,data_dir,genome_size);
data(:,1) = counts(1,:);
data(:,2) = counts(2,:);

disp('Done extracting data.')

%% codon reads
nucs = {'A','T','C','G'};
stop_codons = {'TAA','TGA','TAG'};
stop_tetra = [];
for i = 1:length(stop_codons)
    for j = 1:length(nucs)
        stop_tetra{end+1} = [stop_codons{i} nucs{j}];
    end
end

n_type_stop = zeros(length(stop_tetra),1);

for i = 1:length(stop_tetra)
    genes_by_stop(i).name = '';
    genes_by_stop(i).density = [];
    genes_by_stop(i).gene_length = [];
end


tic
counter = 0;
for i =1:length(start_forward)-1
    
    range = (start_forward(i)+codon_size*buffer_start):(stop_forward(i)-codon_size*buffer_stop);
    
    gene_length = stop_forward(i)-start_forward(i);
    
    if (gene_length>length_thresh)
        
        data_gene = squeeze(data(range,1));
        wins_data_gene = winsorize(data_gene,winsor_cut);
        
        mean_gene_reads = mean(wins_data_gene);
        try
            tetra_end = genome((stop_forward(i)+1):(stop_forward(i)+codon_size+1));
        catch
            bla
        end
        ind_tetra = find(strcmp(stop_tetra,tetra_end));
        
        
        
        if ~isempty(ind_tetra)  && (mean_gene_reads>density_thresh)
            
            n_type_stop(ind_tetra) = n_type_stop(ind_tetra)+1;
            
            genes_by_stop(ind_tetra).name{end+1} = genes_f{i};
            genes_by_stop(ind_tetra).density(end+1) = mean_gene_reads;
            genes_by_stop(ind_tetra).gene_length(end+1) = gene_length;
            
            if mod(i,300)==0
                toc
            end
        elseif ~(mean_gene_reads>density_thresh)
            counter = counter+1;
        end
    end
end


for i =2:length(start_reverse)
    
    range = (start_reverse(i)+codon_size*buffer_stop):(stop_reverse(i)-codon_size*buffer_start);
    
    gene_length = stop_reverse(i)-start_reverse(i);
    
    data_gene = squeeze(data(range,2));
    wins_data_gene = winsorize(data_gene,winsor_cut);
    
    mean_gene_reads = mean(wins_data_gene);
    
    tetra_end = RCsequence(genome((start_reverse(i)-codon_size-1):(start_reverse(i)-1)));
    ind_tetra = find(strcmp(stop_tetra,tetra_end));
    
    
    
    if ~isempty(ind_tetra)&& (mean_gene_reads>density_thresh) && (gene_length>length_thresh)
        
        n_type_stop(ind_tetra) = n_type_stop(ind_tetra)+1;
        
        genes_by_stop(ind_tetra).name{end+1} = genes_r{i};
        genes_by_stop(ind_tetra).density(end+1) = mean_gene_reads;
        genes_by_stop(ind_tetra).gene_length(end+1) = gene_length;
        
        if mod(i,300)==0
            toc
        end
    elseif ~(mean_gene_reads>density_thresh)
        counter = counter+1;
    end
end

counter


%% assigning TE value:

% get TE data
if strcmp(species,'ecoli')
    % extracting data: coli MOPS complete.
    cur_dir = pwd;
    data_dir = '/Users/jbl/Documents/MIT/research/D1/protein_level_ratio';
    file_name = 'TE_ecoli_20171202.txt';
    cd(data_dir)
    cd(cur_dir);
elseif strcmp(species,'bsub')
    % extracting data: bsub, LB.
    cur_dir = pwd;
    data_dir = '/Users/jbl/Documents/MIT/research/D1/protein_level_ratio';
    cd(data_dir)
    file_name = 'TE_bsub_20171202.txt';
    cd(cur_dir);
elseif strcmp(species,'vnat1') || strcmp(species,'vnat2')
    % extracting data: bsub, LB.
    cur_dir = pwd;
    data_dir = '/Users/jbl/Documents/MIT/research/D1/protein_level_ratio';
    cd(data_dir)
    file_name = 'TE_vnat_20171202.txt';
    cd(cur_dir);
elseif strcmp(species,'caulo')
    % extracting data: bsub, LB.
    cur_dir = pwd;
    data_dir = '/Users/jbl/Documents/MIT/research/D1/protein_level_ratio';
    cd(data_dir)
    file_name = 'TE_caulo_20171202.txt';
    cd(cur_dir);
end

fid = fopen(file_name);
content = textscan(fid,'%s %f','Delimiter','\t','HeaderLines',1);
genes_TE = content{1};
TE = content{2};
fclose(fid);


% assigning TE to stop tetranucleotide
[ecoli_synonyms,bsub_synonyms] = get_gene_synonyms();
if strcmp(species,'ecoli')
    syn = ecoli_synonyms;
elseif strcmp(species,'bsub')
    syn = bsub_synonyms;
else
    syn = genes_TE;
end

for i = 1:length(stop_tetra)
    for j = 1:length(genes_by_stop(i).name)
        
        id = strcmp_synonyms(genes_TE,genes_by_stop(i).name{j},syn,1);
        if length(id)<2
            if ~isnan(id)
                try
                    genes_by_stop(i).TE(j) = TE(id);
                catch
                    bla
                end
            end
        else
            genes_by_stop(i).TE(j) = NaN;
        end
    end
end