%
%   explore  oOcyte data 
% 

plt.MS_clusters = 1; 
plt.dump = 0;
plt.Gray = [.4 .4 .4]; 
plt.GrayLight = [.6 .6 .6]; 
plt.Cyan = [.0 1. 1.]; 
plt.Blue = [.04 .14 .98];  % MATHEMATICA  blue 
plt.Green = [.16 1. .18];  % MATHEMATICA green
plt.GreenDark = [.26 .58 0.17];  %  Dark Green
plt.Magenta = [1. 0. 1.];  % Magenta 
plt.Khaki = [.52 .38 .12]; % Khaki 

    MS2 = importdata('Marian/norm24Fr.Sym.csv')   % this is 16 fractions 
    MS2.raw_data = MS2.data(:, 1:10); 
    MS2.numChannels = size(MS2.data, 2) - 1;  % without the 11-th which is "skin" aka folical material 
MS2.stages = [0 2:10]; 
MS2.IDs = MS2.textdata(2:end, 2);
MS2.Sym = MS2.textdata(2:end, 1);
MS2.SymGenome = MS2.textdata(2:end, 3);
% figure;plot(oO.data(:,1), oO.data(:,2), 'ro'); xlabel('time Zero'); ylabel('time 10 h');gname(oO.Sym)
%  figure;for i=1:length(oO.data),  plot(oO.data(i,:));title(oO.Sym(i));ylabel(oO.IDs(i));pause;end
imprt.MS_detrend_method = 4; 
switch imprt.MS_detrend_method
    case 1     %  pre-computer median for most populated flat cluster 
        %  HSP90B1, RPL21 (most ribosomal, heat shock) are in this cluster 
        detrend = [1.12 1.10 0.88 1.06 0.90 0.94];  
    case 11  %  cluster raw MS into 16 clusters, two largest are flat 
        %  one 1212 and 1147 proteins (total 2359/5960 or 40%) 
        detrend = [1.09 1.06 0.86 1.06 0.89 1.04]; 
    case 2
        detrend = ones(1,6);   %  no detrending 
    case 3        % by single protein HSP90B1
        detrend = MS2.raw_data(find(match(MS2.IDs, 'XeXenL6RMv10029501m')),:); 
    case 4 
        detrend = sum(MS2.raw_data, 1);  %  so total protein is flat 
    case 5 
        detrend = [1.09 1.06 0.85 1.05 0.92 1.03]; % 1000 flat ribosomal "U" peptides 
    case 55
        detrend = [1.10 1.05 0.86 1.03 0.94 1.02]; % 4000 flat ribisomal "U+R" peptides
    case 56
        detrend = [1.12 1.06 0.86 1.04 0.92 1.01]; % sum over 5000 Ribo U+R peptides 
    case 57
        detrend = [1.12 1.07 0.87 1.03 0.92 0.99]; % sum over peptides of Ribo+Ptsm+Glyco
    otherwise    %  by median 
        detrend = median(MS2.raw_data);     
end  % 
MS2.numGenes = size(MS2.raw_data, 1); 
detrend = detrend/mean(detrend);  % normalize so it sums to 6 
MS2.detrend  = MS2.raw_data ./ repmat(detrend, MS2.numGenes, 1);
MS2.stand_data = MS2.numChannels * MS2.detrend ./ repmat(sum(MS2.detrend,2),1,size(MS2.detrend,2));
MS2.dynamism = zeros(MS2.numGenes, 1); 
for i = 1:MS2.numGenes; MS2.dynamism(i) = pdist([MS2.stand_data(i,:); ones(1, MS2.numChannels)], 'cos'); end
MS2.linestyle = '-'; MS2.linewidth = 2; MS2.linemarker = 'o'; 
MS2.lbl = sprintf('Protein-C');  % "C" for Complete    
MS2.col = sprintf('#00FF00'); MS2.color = plt.Green;  % [0. 1. 0.];  % Green 
data = MS2.stand_data; MS2.cln = 96;  cln = MS2.cln; 
    [MS2.ind, c] = kmeans(data, cln, 'replicates',1,'maxiter', 10000, 'dist', 'cosi', ...
      'start', 'cluster', 'emptyaction', 'singleton' ,'Display','iter');  
  clstProt.size = zeros(1,cln); for i=1:cln; clstProt.size(i) = sum(MS2.ind==i); end
  [~,ord] = sort(clstProt.size,'descend'); % re-order according to the cluster size
  buf = MS2.ind; for i=1:cln; MS2.ind(buf == ord(i)) =i; end; clear buf;       
  plot_clusters(data, MS2.stages, MS2.ind, cln, 1,  0,1,[], [])
  colormap Lines, gg = colormap; gg = [gg;gg]; 
    for i=1:cln
      lineProps{i}.Color = gg(i,:); 
      lineProps{i}.LineStyle = '-'; lineProps{i}.LineWidth = ceil(30*sum(MS2.ind==i)/size(MS2.ind,1));
    end
  plot_clusters(data, MS2.stages, MS2.ind, cln, 0, 0,1,lineProps,[])
1;


