%{
This script simulates 1000 sessions according to the experimental settings described in the paper for figs. 1,2
The learning algorithm, according to which choices are made in each trial, is Q-learning (equations 1 and 2 in the paper)
For each session it simulates 20 random-walk neurons and 20 action-value
neurons according to the description in Materials and Methods
For each session it also estimates the action-values from the behavior (by
calling the function Estimate_Q)
Finally, it regresses the spike counts of the neurons on the estimated
action-values. Neurons are classified as action-value neurons if exactly
one of the two t-values from the regression (the t-value of the intercept is excluded) was larger in absolute value than 2
The analyses of these neurons appear in Figs. 1,2
%}

close all
%num of random-walk neurons
N=20;
%magnitude of fluctuation in raandom-walk neurons
sigma=0.1;
%number of blocks
numBlocks=4;
%number of Trial to use in statistical tests with reward probability -
%stationary phase
numStat=20;
%proportion of higher reward chosen needed to end block
prop=2/3;
%time of measurement - translates firing rates into spike counts
T=1;
%setup of possible blocks
setup=[0.5 0.5 0.1 0.9; 0.1 0.9 0.5 0.5];
%variables for estimated alpha and beta
estAlphaRem=[];
estBetaRem=[];
%Mean firing rate below which we will not include the neuron in the
%calculations
Thres=1;


%which stats in regression test
whichstats={'tstat','fstat'};
%initialize data sets that will be filled with regression results
% reg - regression on final values, regTrail - regresion according
%to Qlearning trial by trial


regTrial=cell(3,2);

%variables for estimated alpha and beta
estAlphaRem=[];
estBetaRem=[];
%here all the information about the sessions will be stored -
%action-values, choice, reward, etc.
AllQs={};
%load and define seed, so that the simulated neurons will remain the same
%between different runs
load('seedMem_for_figs_1_and_2.mat')
rng(seedMem);


    %alpha & beta are parameters in the Qlearning algorithm of value
    alpha=0.1;
    beta=2.5;
    
    experiments=0;

    while experiments<1000
        
        %countSuccess
        experiments=experiments+1;
        %initializing Qlearning
        Q=[0.5 ; 0.5];
        %This line is only meant to keep a new version synchronized with
        %the seed in older versions
        NeurInput=randn(1,N);
        %these are the random-walk neurons
        NeurActivityPopMean=ones(1,N)*2.5;

        % training sessions%%
        
        %NeurActivityForStats - arrays for the activities of
        %neurons for statistical tests
        NeurActivityForStats=ones(500,N,numBlocks)*-1;
        
        % choosing order of blocks, under normal conditions this
        % will mean randomly permuting order of blocks based on setup previously chosen
        rPerm=randperm(4);
        %these are the reward probabilities for the blocks in the current
        %session
        RChoose = setup(:,rPerm);
        %spike count in 1s is distributed poisson around firing rate
        NeurActivity=poissrnd(NeurActivityPopMean*T);
        NeurActivityForStats(1,:,1)=NeurActivity;
        
        %choice is softmax on Q(1),Q(2), 1-choose 1 0-choose 2
        %and reward (R) for action is stochastic according to reward schedule
        choice=rand<(1/(1+exp(-beta*(Q(1)-Q(2)))));
        R=(rand<RChoose(1,1))*(choice(end)==1)+(rand<RChoose(2,1))*(choice(end)==0);
        
        for k=1:numBlocks
            
            %i marks trial number within each block - for first block we
            %already have the first trial, otherwise i=0
            if k==1
                i=1;
            else
                i=0;
            end
            
            %Current block continues until the action with the larger reward is chosen
            %some proportion of time. Number of trials must be
            %larger than is required to calculate proportion           
            while (i<numStat) || sum(choice(max(length(choice)-i+1,length(choice)-numStat+1):end)==(RChoose(1,k)>RChoose(2,k)))<=ceil(numStat*prop)
                %updating value for Qlearning Q1 and Q2 represent the values for the
                %actions. Pr1Qlearning is the probability of choosing 1 according to
                %Qlearning
                if choice(end)
                    Q=[Q [(1-alpha)*Q(1,end)+alpha*R(end) ; Q(2,end)]];
                else
                    Q=[Q [Q(1,end) ; (1-alpha)*Q(2,end)+alpha*R(end)]];
                end
 
                %add step to random walk: firing rate in new trial, the new firing rate cannot be smaller than 0 
                NeurActivityPopMean=max(NeurActivityPopMean+randn(1,N)*sigma,zeros(1,N));
                %the activity of the random-walk neurons is distributed
                %poisson around the firing rate
                NeurActivity=poissrnd(NeurActivityPopMean*T);
                %update choice, reward and trial number
                choice=[choice rand<(1/(1+exp(-beta*(Q(1,end)-Q(2,end)))))];
                R=[R (rand<RChoose(1,k))*(choice(end)==1)+(rand<RChoose(2,k))*(choice(end)==0)];
                i=i+1;
                %add activity to array
                NeurActivityForStats(i,:,k)=NeurActivity;
            end
            
        end
        
        %session is over - now analyses
        % stats
        
        %estimate Q with ML from behavior
        [estQ,estAlphaRem,estBetaRem]=Estimate_Q(choice,R,estAlphaRem,estBetaRem);
        %update AllQs array: {session,1} - estimated action-values, {session,2} - the
        %order of blocks given in reward probabilities, {session,3} -
        %number of trials at the end of each block, {session, 4} - choice
        %in each trial, {session,5} - reward given at each trial
        S=size(AllQs,1);
        AllQs{S+1,1}= estQ';
        AllQs{S+1,2}=RChoose;
        trialsPerBlock=find(NeurActivityForStats(:,1,1)>-1,1,'last');
        for j=2:4
            trialsPerBlock=[trialsPerBlock trialsPerBlock(end)+find(NeurActivityForStats(:,1,j)>-1,1,'last')];
        end
        AllQs{S+1,3}=trialsPerBlock;
        AllQs{S+1,4}=choice;
        AllQs{S+1,5}=R; 

        %decide on modulation for Q-values
        modulation=rand*2.35;
        

        
        %initialize neurActivity for regressions
        NeurActivityForTrialReg=[];
        
        %organize data for statistical analysis
        
        for k=1:numBlocks
            NeurActivityForTrialReg=[NeurActivityForTrialReg ; NeurActivityForStats(1:find(NeurActivityForStats(:,1,k)>-1,1,'last'),:,k)];
        end

        %deleting neurons with a below threshold mean firing rate in all
        %blocks
        
        NeurActivityForStats(NeurActivityForStats==-1)=NaN;
        BelowThres=nanmean(NeurActivityForStats,1);
        BelowThres=sum(BelowThres>Thres,3)<1;
        NeurActivityForTrialReg(:,BelowThres)=[];

        
        %create action-value neurons, by adding poisson noise to Q-values
        QForTrialReg=[2.5-modulation/2+Q*modulation ; 2.5+modulation/2-Q*modulation ];
        QForTrialReg=poissrnd(QForTrialReg);

 
        %more for AllQs: {session,6} - are the actual action-values,
        %{session,7} - are the action-value neurons, {session,8} - is the modulation of the neurons in this session, {session,9} - are the random-walk neurons
        S=size(AllQs,1);
        AllQs{S,6}=Q';
        AllQs{S,7}=QForTrialReg';
        AllQs{S,8}=modulation;
        AllQs{S,9}=NeurActivityForTrialReg;

        %regression on estimated action-values
        %for action-value neurons
        for i=1:4
            rStats=regstats(QForTrialReg(i,:), estQ','linear',whichstats);
            t=rStats.tstat;
            f=rStats.fstat;
            tvalue=t.t;
            regTrial{1,1}=[regTrial{1,1} ;  tvalue(2)];
            regTrial{2,1}=[regTrial{2,1} ; tvalue(3)];
            regTrial{3,1}=[regTrial{3,1} ; f.pval];
        end
        %for random-walk
        for i=1:size(NeurActivityForTrialReg,2)
            rStats=regstats(NeurActivityForTrialReg(:,i), estQ','linear',whichstats);
            t=rStats.tstat;
            f=rStats.fstat;
            tvalue=t.t;
            regTrial{1,2}=[regTrial{1,2} ;  tvalue(2)];
            regTrial{2,2}=[regTrial{2,2} ; tvalue(3)];
            regTrial{3,2}=[regTrial{3,2} ; f.pval];
        end
 
        %end of experiments
    end
    %we simulated only 4 action-value neurons, this will simulate 16 more
    %in each session, to compare their number with the random-walk number
    load('seedRem16000Neurons_21_4.mat')
    rng(seedRem16000Neurons_21_4);
    for num=1:size(AllQs,1)
        estQ=AllQs{num,1};
        RChoose=AllQs{num,2};
        trialsPerBlock=AllQs{num,3};
        choice=AllQs{num,4};
        R=AllQs{num,5};
        Q=AllQs{num,6};
        modulation=AllQs{num,8};
        
        QFiringRate=[2.5-modulation/2+Q*modulation  2.5+modulation/2-Q*modulation ];

        for repeatsAnalysis=1:4
            QForTrialReg=poissrnd(QFiringRate');
            QForTrialReg=QForTrialReg';
            AllQs{num,7}=[AllQs{num,7}  QForTrialReg];
              for i=1:4
                %regression on estimated Q
                rStats=regstats(QForTrialReg(:,i), estQ,'linear',whichstats);
                t=rStats.tstat;
                f=rStats.fstat;
                tvalue=t.t;
                regTrial{1,1}=[regTrial{1,1} ;  tvalue(2)];
                regTrial{2,1}=[regTrial{2,1} ; tvalue(3)];
                regTrial{3,1}=[regTrial{3,1} ; f.pval];
              end
            end
        end

    for Group=1:2
        
       %get only t-values that were larger than 2
        absRegTrial{1,Group}=abs(regTrial{1,Group})>2;
        absRegTrial{2,Group}=abs(regTrial{2,Group})>2;
         %1 - right action-value, 2 - left action-value, 3 -
        %state/preference
        absRegTrialValue{1,Group}=((absRegTrial{1,Group}-absRegTrial{2,Group})==1);
        absRegTrialValue{2,Group}=((absRegTrial{2,Group}-absRegTrial{1,Group})==1);
        absRegTrialValue{3,Group}=((absRegTrial{1,Group}+absRegTrial{2,Group})==2);
        
    end
    
    %stats Tables

    RegTrialStatsTable=zeros(5,4);
    for Group=1:2
 
        RegTrialStatsTable(1,Group)=mean(absRegTrial{1,Group});
        RegTrialStatsTable(2,Group)=mean(absRegTrial{2,Group});
        RegTrialStatsTable(3,Group)=mean(absRegTrialValue{1,Group});
        RegTrialStatsTable(4,Group)=mean(absRegTrialValue{2,Group});
        RegTrialStatsTable(5,Group)=mean(absRegTrialValue{3,Group});
    end
    

    
    RegTrialStatsTable
    










