function [alpha, beta, gamma, loglik] = fwdback_YZ_skew(init_state_distrib, ...
   transmat, obslik, varargin)
% FWDBACK Compute the posterior probs. in an HMM using the forwards backwards algo.
%
% [alpha, beta, gamma, loglik, xi, gamma2] = fwdback(init_state_distrib, transmat, obslik, ...)
%
% Note: varargin does not work in this version.
% Notation:
% Y(t) = observation, Q(t) = hidden state
%
% INPUT:
% init_state_distrib(i) = Pr(Q(1) = i)
% transmat(i,j) = Pr(Q(t) = j | Q(t-1)=i)
% obslik(i,t) = Pr(Y(t)| Q(t)=i)
%   (Compute obslik using skewnormpdf.m on your data sequence first.)
%
% Optional parameters may be passed as 'param_name', param_value pairs.
% Parameter names are shown below; default values in [] - if none, argument
% is mandatory.
% Optional arguments:
% 'fwd_only' - if 1, only do a forwards pass and set beta=[], gamma2=[]  [0]
%
% OUTPUTS:
% alpha(i,t) = p(Q(t)=i | y(1:t)) (or p(Q(t)=i, y(1:t)) if scaled=0)
% beta(i,t) = p(y(t+1:T) | Q(t)=i)*p(y(t+1:T)|y(1:t)) (or p(y(t+1:T) | Q(t)=i) if scaled=0)
% gamma(i,t) = p(Q(t)=i | y(1:T))
% loglik = log p(y(1:T))

%
% If fwd_only = 1, these become
% alpha(i,t) = p(Q(t)=i | y(1:t))
% beta = []
% gamma(i,t) = p(Q(t)=i | y(1:t))

% Examples:
%
% [alpha, beta, gamma, loglik] = fwdback(pi, A, multinomial_prob(sequence, B));



if nargout >= 6, compute_xi = 1; else compute_xi = 0; end

[Q T] = size(obslik);

scale = ones(1,T);

% scale(t) = Pr(O(t) | O(1:t-1)) = 1/c(t) as defined by Rabiner (1989).
% Hence prod_t scale(t) = Pr(O(1)) Pr(O(2)|O(1)) Pr(O(3) | O(1:2)) ... = Pr(O(1), ... ,O(T))
% or log P = sum_t log scale(t).
% Rabiner suggests multiplying beta(t) by scale(t)

loglik = 0;

alpha = zeros(Q,T);
gamma = zeros(Q,T);


%%%%%%%%% Forwards %%%%%%%%%%

t = 1;
alpha(:,t) = init_state_distrib(:) .* obslik(:,t);
[alpha(:,t), scale(t)] = normalise(alpha(:,t));

for t=2:T
    m = transmat'*alpha(:,t-1);
    alpha(:,t) = m.* obslik(:,t);
    [alpha(:,t), scale(t)] = normalise(alpha(:,t));
end
if any(scale==0)
   loglik = -inf;
else
   loglik = sum(log(scale));
end

%%%%%%%%% Backwards %%%%%%%%%%

beta = zeros(Q,T);
beta(:,T) = ones(Q,1)./scale(T);
gamma(:,T) = normalise(alpha(:,T) .* beta(:,T));
t=T;
for t=T-1:-1:1
 b = beta(:,t+1) .* obslik(:,t+1);
 beta(:,t) = transmat * b;
 beta(:,t) = beta(:,t)./scale(t);
 gamma(:,t) = normalise(alpha(:,t) .* beta(:,t));
end

% We now explain the equation for gamma2
% Let zt=y(1:t-1,t+1:T) be all observations except y(t)
% gamma2(Q,M,t) = P(Qt,Mt|yt,zt) = P(yt|Qt,Mt,zt) P(Qt,Mt|zt) / P(yt|zt)
%                = P(yt|Qt,Mt) P(Mt|Qt) P(Qt|zt) / P(yt|zt)
% Now gamma(Q,t) = P(Qt|yt,zt) = P(yt|Qt) P(Qt|zt) / P(yt|zt)
% hence
% P(Qt,Mt|yt,zt) = P(yt|Qt,Mt) P(Mt|Qt) [P(Qt|yt,zt) P(yt|zt) / P(yt|Qt)] / P(yt|zt)
%                = P(yt|Qt,Mt) P(Mt|Qt) P(Qt|yt,zt) / P(yt|Qt)
