function explorePhos01(); 

plt.MS_clusters = 1; 
plt.dump = 0;
plt.Gray = [.4 .4 .4]; 
plt.GrayLight = [.6 .6 .6]; 
plt.Cyan = [.0 1. 1.]; 
plt.Blue = [.04 .14 .98];  % MATHEMATICA  blue 
plt.Green = [.16 1. .18];  % MATHEMATICA green
plt.GreenDark = [.26 .58 0.17];  %  Dark Green
plt.Magenta = [1. 0. 1.];  % Magenta 
plt.Khaki = [.52 .38 .12]; % Khaki  

pth.data = "/Users/peshaG4/Research/oO/data/"; 

Defs = importdata('/Users/peshaG4/Research/GeneSymbol/data/XlaHuman.27150636.proteins.genesym.defs.uniq.txt'); 
Defs.Sym = Defs.textdata(:,3); 
Defs.IDs = Defs.textdata(:,1); 

    Haas17 = loadMSdata(pth.data, 'oocytesTimecourseDP1.Sym.csv', Defs, 2, 4, [1:9]);
    Marian17 = loadMSdata(pth.data, 'norm24Fr.Sym.csv', Defs, 2, 4, [1:9]);
    Haas19 = loadMSdata(pth.data, 'Haas2019/Leon_proteome_newDatabase_20190115.csv', Defs, 1, 4, [1:9]);  % proteome JGI 9.1.2+mito reference 
    
MS2.stages = [0 2:9]; % [0 2:10]; 
MS2.stand_data = [Haas17.stand_data; Marian17.stand_data; Haas19.stand_data];
MS2.IDs = [Haas17.IDs; Marian17.IDs; Haas19.IDs];
MS2.Sym = [Haas17.Sym Marian17.Sym Haas19.Sym];

% cluster protein data 
data = MS2.stand_data; 
MS2.cln = 96;  cln = MS2.cln;  
    [MS2.ind, c] = kmeans(data, cln, 'replicates',3,'maxiter', 100000, 'dist', 'cosi', ...
      'start', 'cluster', 'emptyaction', 'singleton' ,'Display','iter');  
  clstProt.size = zeros(1,cln); for i=1:cln; clstProt.size(i) = sum(MS2.ind==i); end
  [~,ord] = sort(clstProt.size,'descend'); % re-order according to the cluster size
  buf = MS2.ind; for i=1:cln; MS2.ind(buf == ord(i)) =i; end; clear buf;       
  plot_clusters(data, MS2.stages, MS2.ind, cln, 1,  0,1,[], [])
  colormap Lines, gg = colormap; gg=[gg;gg]; 
    for i=1:cln
      lineProps{i}.Color = gg(i,:); 
      lineProps{i}.LineStyle = '-'; lineProps{i}.LineWidth = ceil(30*sum(MS2.ind==i)/size(MS2.ind,1));
    end
  plot_clusters(data, MS2.stages, MS2.ind, cln, 0, 0,1,lineProps,[])


% Ph = importdata('Leon_phospho_20181220.srt.csv');  % actual phospho data 
Ph = importdata(strcat(pth.data, 'Haas2019/Leon_phosph_newDatabase_20190115.txt')); 

Ph.IDs = Ph.textdata(2:end,1);  
for ndx = 1:length(Ph.IDs)
    whch = find(ismember(Defs.IDs, Ph.IDs(ndx))); 
    if isempty(whch) 
        Ph.Sym{ndx} = "";
    else 
        Ph.Sym{ndx} = Defs.Sym{whch}; 
    end 
end
if 0 
    figure; 
    for ndx = 1:length(Ph.IDs); 
        plot([0 2:9], Ph.data(ndx,1:9),'LineWidth', 3);title(Ph.Sym{ndx}); 
        pause; 
    end 
end

MS2 = Ph; 
MS2.raw_data = MS2.data(:, 1:9); 
    MS2.numChannels = size(MS2.data, 2) - 2;
imprt.MS_detrend_method = 4; 
switch imprt.MS_detrend_method
    case 1     %  pre-computer median for most populated flat cluster 
        %  HSP90B1, RPL21 (most ribosomal, heat shock) are in this cluster 
        detrend = [1.12 1.10 0.88 1.06 0.90 0.94];  
    case 11  %  cluster raw MS into 16 clusters, two largest are flat 
        %  one 1212 and 1147 proteins (total 2359/5960 or 40%) 
        detrend = [1.09 1.06 0.86 1.06 0.89 1.04]; 
    case 2
        detrend = ones(1,6);   %  no detrending 
    case 3        % by single protein HSP90B1
        detrend = MS2.raw_data(find(match(MS2.IDs, 'XeXenL6RMv10029501m')),:); 
        %detrend = MS2.raw_data(find(match(MS.IDs, 'XeXenL6RMv10006708m')),:); %RPL21
    case 4 
        detrend = sum(MS2.raw_data, 1);  %  so total protein is flat 
    case 5 
        detrend = [1.09 1.06 0.85 1.05 0.92 1.03]; % 1000 flat ribosomal "U" peptides 
    case 55
        detrend = [1.10 1.05 0.86 1.03 0.94 1.02]; % 4000 flat ribisomal "U+R" peptides
    case 56
        detrend = [1.12 1.06 0.86 1.04 0.92 1.01]; % sum over 5000 Ribo U+R peptides 
    case 57
        detrend = [1.12 1.07 0.87 1.03 0.92 0.99]; % sum over peptides of Ribo+Ptsm+Glyco
    otherwise    %  by median 
        detrend = median(MS2.raw_data);     
end  % 
MS2.numGenes = size(MS2.raw_data, 1); 
detrend = detrend/mean(detrend);  % normalize so it sums to the numChanels  
MS2.detrend  = MS2.raw_data ./ repmat(detrend, MS2.numGenes, 1);
MS2.stand_data = MS2.numChannels * MS2.detrend ./ repmat(sum(MS2.detrend,2),1,size(MS2.detrend,2));
MS2.dynamism = zeros(MS2.numGenes, 1); 
MS2.stages = [0 2:9]; 

MS2.linestyle = '-'; MS2.linewidth = 2; MS2.linemarker = 'o'; 
MS2.lbl = sprintf('Protein-C');  % "C" for Complete    
MS2.col = sprintf('#00FF00'); MS2.color = plt.Green;  % [0. 1. 0.];  % Green 

data = MS2.stand_data; 
MS2.cln = 96;  cln = MS2.cln;  
    [MS2.ind, c] = kmeans(data, cln, 'replicates',3,'maxiter', 100000, 'dist', 'cosi', ...
      'start', 'cluster', 'emptyaction', 'singleton' ,'Display','iter');  
  clstProt.size = zeros(1,cln); for i=1:cln; clstProt.size(i) = sum(MS2.ind==i); end
  [~,ord] = sort(clstProt.size,'descend'); % re-order according to the cluster size
  buf = MS2.ind; for i=1:cln; MS2.ind(buf == ord(i)) =i; end; clear buf;       
  plot_clusters(data, MS2.stages, MS2.ind, cln, 1,  0,1,[], [])
  colormap Lines, gg = colormap; gg=[gg;gg]; 
    for i=1:cln
      lineProps{i}.Color = gg(i,:); 
      lineProps{i}.LineStyle = '-'; lineProps{i}.LineWidth = ceil(30*sum(MS2.ind==i)/size(MS2.ind,1));
    end
  plot_clusters(data, MS2.stages, MS2.ind, cln, 0, 0,1,lineProps,[])
1  
  % what is in this cluster 
MS2.Sym(sort(find(MS2.ind == 80)'))

  % plot by symbol 
sym = 'CDK1';
nd = find(match(MS2.Sym, sym));
data = MS2.stand_data(nd,:); 
if (~isempty(nd)); figure; plot(data');title(sym);set(gca,'Ylim',[0, max(max(data))]); 
else; display('gene not found '); end
%  
%  try recluster without flat proteins 
data = MS2.stand_data(ismember(MS2.ind ,[10:36]),:);

end 


function MS = loadMSdata(pth, file, Defs, colID, detrend_method, cols); 
%
%   colID - in which column gene ID is found 
% 
    % MS.stages = times;     % time  started then about 2 hours till the first point 
    MS = importdata(strcat(pth, file)) ;
    MS.raw_data = MS.data(:, cols); 
    MS.numGenes = size(MS.raw_data, 1); 
    MS.numChannels = size(MS.data, 2); 
    MS.IDs = MS.textdata(2:end, colID);
for ndx = 1:length(MS.IDs)
    whch = find(ismember(Defs.IDs, MS.IDs(ndx))); 
    if isempty(whch) 
        MS.Sym{ndx} = "";
    else 
        MS.Sym{ndx} = Defs.Sym{whch}; 
    end 
end
    switch detrend_method
    case 1     %  pre-computer median for most populated flat cluster 
        %  HSP90B1, RPL21 (most ribosomal, heat shock) are in this cluster 
        detrend = [1.12 1.10 0.88 1.06 0.90 0.94];  
    case 11  %  cluster raw MS into 16 clusters, two largest are flat 
        %  one 1212 and 1147 proteins (total 2359/5960 or 40%) 
        detrend = [1.09 1.06 0.86 1.06 0.89 1.04]; 
    case 2
        detrend = ones(1,6);   %  NONE - no detrending 
    case 3        % by single protein HSP90B1
        detrend = MS.raw_data(find(match(MS.Sym, 'HSP90B1')),:); 
        %detrend = MS.raw_data(find(match(MS.Sym, 'RPL21')),:);   % RPL21
    case 4 
        detrend = sum(MS.raw_data, 1);  %  so total protein is flat 
    case 5 
        detrend = [1.09 1.06 0.85 1.05 0.92 1.03]; % 1000 flat ribosomal "U" peptides 
    case 55
        detrend = [1.10 1.05 0.86 1.03 0.94 1.02]; % 4000 flat ribisomal "U+R" peptides
    case 56
        detrend = [1.12 1.06 0.86 1.04 0.92 1.01]; % sum over 5000 Ribo U+R peptides 
    case 57
        detrend = [1.12 1.07 0.87 1.03 0.92 0.99]; % sum over peptides of Ribo+Ptsm+Glyco
    otherwise    %  by median 
        detrend = median(MS2.raw_data);     
    end 
    detrend = detrend/mean(detrend);  % normalize so it sums to the numChanels  
    MS.detrend  = MS.raw_data ./ repmat(detrend, MS.numGenes, 1);
    MS.stand_data = MS.numChannels * MS.detrend ./ repmat(sum(MS.detrend,2),1,size(MS.detrend,2));    
end
