clear all;
%Directory='/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/';
Directory='C:\Users\Misha\workspace\DoMummer\Data\API\RefSeq\1e5\';
tr = phytreeread([Directory,'Bacteria_genus.nwk']);
genus_names = get(tr,'LeafNames');
distM = pdist(tr,'nodes','leaves','squareform',true);


rmin=300;
Pref=[];
Dist=[];
GenusInterest='*';
L1Tot=0;
L2Tot=0;
files1 = dir(['/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/SequencesGenera/',GenusInterest,'_*']);
files2 = dir(['/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/SequencesGenera/*_',GenusInterest]);

mT=[]; rT=[];
for file = [files1',files2']
    file.name
    m=importdata(['/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/SequencesGenera/',file.name]); r=m(:,1);m=m(:,2);

        Name=file.name;
        C = strsplit(Name,'_'); 
        Genus1=C{1};
        Genus2=C{2};
        if (ismember(Genus1,genus_names)&&(ismember(Genus2,genus_names)))
            L1=importdata(['/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/Lengths/',Genus1]);
            L2=importdata(['/home/sheinman/workspace/DoMummer/Data/RefSeq/1e5/Lengths/',Genus2]);
            Count = L1*L2;
            
                if (strcmp(Genus1,GenusInterest))
                    L2Tot=L2Tot+L2;
                    L1Tot=L1;
                else
                    L2Tot=L2Tot+L1;
                    L1Tot=L2;
                end;
                mT=[mT;m]; rT=[rT;r];
                rV=10.^((log10(rmin)-0.0001):0.05:(log10(max(rT))+0.0001)); 
                rL=(rV(2:end).*rV(1:(end-1))).^0.5;
                mL=rL*0;
                for j=1:(length(rV)-1),
                    Ind2=find((rT>=rV(j)).*(rT<=rV(j+1)));
                    mL(j)=sum(mT(Ind2));% r(Ind2)=[];  m(Ind2)=[];
                end;
                mL=mL./diff(rV)/(L1Tot*L2Tot);
                Ind3=find(mL<=0);Ind3=1:(min([Ind3 length(mL)+1])-1);   
                rL=rL(Ind3); mL=mL(Ind3);
                
                subplot(2,2,4);
                AT=exp(mean(log(mL)+3*log(rL)));
                plot(rL,mL,'.','DisplayName',Name);hold on;
                plot(rL,AT./rL.^3,'r');hold off;
                set(gca,'xscale','log');
                set(gca,'yscale','log');
                title([Name,' ',num2str(AT)],'Interpreter','none'); 
                drawnow;
            if ((length(unique(r))>=3) && (max(r)>=1e2) )
            %Ind5=find(r>=rmin); r=r(Ind5); m=m(Ind5);
            rV=10.^((log10(rmin)-0.0001):0.1:(log10(max(r))+0.0001)); 
            rL=(rV(2:end).*rV(1:(end-1))).^0.5;
            mL=rL*0;
            for j=1:(length(rV)-1),
                Ind2=find((r>=rV(j)).*(r<=rV(j+1)));
                mL(j)=sum(m(Ind2));% r(Ind2)=[];  m(Ind2)=[];
            end;
            
            Ind3=find(mL<=0);Ind3=1:(min([Ind3 length(mL)+1])-1);   
            dmL=mL.^0.5./diff(rV)/(Count);  
            mL=mL./diff(rV)/(Count); 
            %rL=rL(Ind3); mL=mL(Ind3); dmL=dmL(Ind3);
            %dmL=mL.^0.5/(Count);mL=mL/(Count); 

            %Ind3=1:length(mL);




            %close(h1);
            %h=figure;

            %Ind=find((rL>=rmin).*(mL>0)); 
            x=log(rL(:)); y=log(mL(:));
            %A=exp(sum(log(mL*Count.*rL.^3).*mL*Count)/sum(mL*Count))/Count; dA=(A*Count)*(1/sum(mL*Count))^0.5/Count;  alpha=3; 
            %A=sum(1./r.^3)/sum(1./(m)./r.^6)/Count; dA=1/sum(1./(m)./r.^6)^0.5/Count;  alpha=3; A
            A=exp(mean(y(rL>1e3)+3*x(rL>1e3))); alpha=3; dA=0;
            %p=polyfit(x,y,1);A=exp(p(2)); alpha=-p(1); A
            %F = @(p,x)log(abs(p(2))./(exp(x)+abs(p(1))).^3); 
            %[p,resnorm,~,exitflag,output] = lsqcurvefit(F,[A,1000],x,y);p=abs(p);A=p(2);g=p(1); dA=0;
            %mdl = fitnlm(x,y,F,[A,10]); A=abs(mdl.Coefficients.Estimate(2));g=abs(mdl.Coefficients.Estimate(1)); dA=0; p=abs([g,A]);
            R2=1-sum((log(A./(exp(x)).^alpha)-y).^2)/sum((y-mean(y)).^2);
            
                Ind1 = find(strcmp(genus_names, Genus1));
                Ind2 = find(strcmp(genus_names, Genus2));
                dist=distM(Ind1,Ind2);
                dist=dist/2; %dist=10^(round(log10(dist)*4)/4);
                %PrefM(Ind1,Ind2)=log10(A);
            if ((abs(alpha-3)<=10.5)&&(dist>-10)&&(R2*0>=0*0.8))         
                subplot(2,2,1);
                %errorbar(rL,mL/A,dmL/2,'o','DisplayName',Name);hold on;
                plot(rL,mL/A,'.-','DisplayName',Name);hold on;
                plot(exp(x),1./exp(x).^alpha,'r');hold on;
                %plot(exp(x),exp(F(p,x)),'r');hold off;
                set(gca,'xscale','log');
                set(gca,'yscale','log');
                xlim([300/1.1,max(rL)*1.1]); ylim([min(mL)/1.1/A,max(mL)*1.1/A]);
                title([Name,' ',num2str(R2),' ',num2str(log10(A)),' ',num2str(dist)],'Interpreter','none'); 
                

                Pref=[Pref;A]; Dist=[Dist;10^(round(log10(dist)*2)/2)];
                Cor=corr(log(Pref),log(Dist),'type','Pearson'); %Cor=Cor(1,2);
                
                subplot(2,2,2);
                errorbar(dist,A,dA,'.','DisplayName',[Name,' ',num2str(dist)]);hold on;set(gca,'xscale','log');set(gca,'yscale','log');drawnow;
                title(num2str(Cor));
                DistU=unique(Dist); PrefU=DistU*0;
                for i=1:length(DistU),
                    PrefU(i)=median(Pref(DistU(i)==Dist));
                end;
                subplot(2,2,3);
                loglog(DistU,PrefU,'o');hold off; 
                %subplot(2,2,4);
                %plot(Ind1,Ind2,'.','DisplayName',[Name,' ',num2str(dist)]);hold on;
                k = waitforbuttonpress;

                %xlim([300/1.1,max(rL)*1.1]); ylim([min(mL)/1.1/A,max(mL)*1.1/A]);
            end;    
        else
            Genus1
            Genus2
        end;
    end;
end;


