function [l] = llba (x, a, r, Z,doprior)

np = length(x);
beta = exp(x(1));            % sensitivity to reward          
alfa = 1./(1+exp(-x(2)));     % learning rate

if doprior
	 lp = -1/2 * (x-Z.mu)'*Z.nui*(x-Z.mu) - 1/2*log(1/det(Z.nui/(2*pi))); %
else
	lp=0;
end

% initialize 
l=0;

Q=zeros(2,1)+0.5; 
 
for t=1:length(a)
	er = beta * r(t);

	q = Q(:); 

	l0 = max(q);
	la = q(a(t)) - l0 - log(sum(exp(q-l0)));
	l = l + la;


	Q(a(t)) = Q(a(t)) + alfa * (er - Q(a(t)));  

end
l  = -l  - sum(lp);
