 function [LL, LLgrad] = hmmfun_grad(x,data,Qnst,ng,priorg,km,dt)
% This function calculate the log likelihood and its gradient of a continous hidden markov model,
% given it parameters.
% Input:
% x: optimization parameter
% data: the observed time series 
% Qnst: Number of HMM state
% ng: Number of independent energy parameter
% priorg: initial state probability
% km: preconstant for the rate equation
% dt: sampling time interval
% Output:
% LL:  log likelihood of a HMM model
% LLgrad: the gradient of loglik with respect to the model parameters in
%         the input

% data is one dimensional array (row array) containing the observable in the HMM model
% Qnst is the number of states in the HMM model
%x =[ 0     0     2     2     2     0    10    20    25    25    25];

T=length(data);
Nopt=length(x);
nmu=ng+Qnst;   % state position end index in x.
% Make an energy matrix
gs=[x(1:Qnst-1) 0]';  % state energy
g=diag(gs);
ld=diag(true(Qnst,1));  % True for diagonal element
ltriu=triu(true(Qnst),1); % True for upper diagonal elements, excluding diagonal elements
ltril=ltriu'; % True for lower diagonal elements
g(ltril)=x(Qnst:ng);
tmp=g';
g(ltriu)=tmp(ltriu);

g1=repmat(gs,[1 Qnst]);
q=km.*exp(g1-g);
q(ld)=0;
q(ld)=-sum(q,2);

G=(g1-g1')./2;
G=exp(G);  % G(i,j)=exp((gs(i)-gs(j))/2)

% Calculate the symmetrix rate matrix Q
Q=G'.*q;

% Compute the eigenvalue decomposition of Q
[Ue,lambda]=eig(Q);
lambda=lambda(ld);  % lambda is now a column array of eigenvalues
U=cell(Qnst,1); % The symmetric U matrix for spectrum decomposition
for i=1:Qnst
    U{i}=Ue(:,i)*Ue(:,i)';
end
lambda_exp=exp(lambda.*dt);

% Compute the transition probability matrix transmat
transmat=zeros(Qnst);
for i=1:Qnst
    transmat=transmat+lambda_exp(i).*U{i};
end
transmat=G.*transmat;

%calculate the Gam matrix
Gam=zeros(Qnst);
for i=1:Qnst
    Gam(i,i)=dt*lambda_exp(i);
    for j=i+1:Qnst
        Gam(i,j)=(lambda_exp(i)-lambda_exp(j))/(lambda(i)-lambda(j));
        Gam(j,i)=Gam(i,j);
    end
end

% Calculate log-likelihood and -BIC value
mu=x(ng+1:nmu);
Sigma=x(nmu+1:end);
sig=sqrt(Sigma);
obslik=zeros(Qnst,T);
for i=1:Qnst
   obslik(i,:)=normpdf(data,mu(i),sig(i));
end

[alpha, beta, gamma, loglik] = fwdback_YZ(priorg,transmat,obslik);
%LL=loglik-0.5*Nopt*log(T);  %BIC
LL=-loglik;   % for minimization

%----------------Calculate the gradient (LLgrad)---------------------------
LLgrad=zeros(size(x));
LLa=zeros(Qnst,Qnst);  % The gradient of LL with respect to the TPM
LLg=zeros(Qnst,Qnst);  % the gradient of LL with respect to the energy
b1=beta(:,1);   % Save the beta elements at t=1
beta(:,1)=0;    % Zero the t=1 points for the following matrix operation. excluding sum over t=T
LLa=alpha*(circshift(beta.*obslik,[0 -1]))'; 

% Calculate the H matrix
H=cell(Qnst,Qnst); % Define only for upper triangular part 
h=zeros(Qnst,Qnst); % Sum of all matrix elements of H.
Ut=Ue';
for m=1:Qnst
    for n=1:Qnst
        H{m,n}=Gam.*(Ut(:,m)*Ue(n,:)).*(Ut*(LLa.*G)*Ue);
        h(m,n)=sum(sum(H{m,n}));
    end
end

% Compute the gradient the log-likelihood with respect to energy
Lam=repmat(lambda,[1 Qnst]);
Lam=Lam+Lam';
for m=1:Qnst
    LLg(m,m)=sum(LLa(m,:).*transmat(m,:))-sum(LLa(:,m).*transmat(:,m));
    LLg(m,m)=0.5.*(LLg(m,m)+sum(sum(Lam.*H{m,m})));
    for n=m+1:Qnst
        LLg(m,n)=q(m,n)*h(m,m)+q(n,m)*h(n,n)-Q(m,n)*(h(m,n)+h(n,m));
    end
end

% Convert back to 1D 
LLg=LLg';
tmp=LLg(ld);
LLgrad(1:Qnst-1)=tmp(1:Qnst-1)';
LLgrad(Qnst:ng)=LLg(ltril);

% Calculate the gradient with respect to mu and Sigma
LLmu=zeros(Qnst,1);
LLsig=zeros(Qnst,1);
for i=1:Qnst
    tmp=(data-mu(i))./Sigma(i);
    LLmu(i)=sum(gamma(i,:).*tmp);
    LLsig(i)=0.5*sum(gamma(i,:).*(tmp.*tmp-1./Sigma(i)));
end

% Convert to 1-D format
LLgrad(ng+1:nmu)=LLmu(:);
LLgrad(nmu+1:end)=LLsig(:);

LLgrad=-LLgrad;  % Optimizing BIC instead of Loglik
% x
% LL




