function [l] = llbbdist2p_V2 (x, a, r, Z,doprior)

np = length(x);
beta = exp(x(1));            % sensitivity to reward
w = 1./(1+exp(-x(2)));     % learning rate
f = 1./(1+exp(-x(3))); 
v = x(4); 

if doprior
    lp = -1/2 * (x-Z.mu)'*Z.nui*(x-Z.mu) -  1/2*log(1/det(Z.nui/(2*pi))); %
else
    lp=0;
end

% initialize
l=0;

Q=ones(2,2);

for t=1:length(a)
   
    q(1) = Q(1,1)/(Q(1,1)+Q(1,2));
    q(2) = Q(2,1)/(Q(2,1)+Q(2,2));
    
    V(1) = (Q(1,1)*Q(1,2))/((Q(1,1)+Q(1,2))^2*(Q(1,1)+Q(1,2)+1));
    V(2) = (Q(2,1)*Q(2,2))/((Q(2,1)+Q(2,2))^2*(Q(2,1)+Q(2,2)+1));
    
    if t>1; q(ca) = q(ca) + v*V(ca); end
    
    q = beta*q;
    
    l0 = max(q);
    la = q(a(t)) - l0 - log(sum(exp(q-l0)));
    l = l + la;
    
    if a(t)==1;
        ca=1; ua=2;
    elseif a(t)==2
        ca=2; ua=1;
    end
    
    
    if r(t)==1
        Q(ca,1)=(1-w)*Q(ca,1)+w+1;
        Q(ca,2)=(1-w)*Q(ca,2)+w;
    elseif r(t)==0
        Q(ca,1)=(1-w)*Q(ca,1)+w;
        Q(ca,2)=(1-w)*Q(ca,2)+w+1;
    end
    Q(ua,:)=(1-f)*Q(ua,:)+f;
    
end
l  = -l  - sum(lp);
