import numpy as np
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# USAGE *py [ Figure_4-Source_Data_File_2.txt ] [fig header name]
# panels A, B, and C were generated separately and mergec in Photoshop
# this script provides a sample to generate one panel with 3 plots

# load data
data = np.loadtxt(sys.argv[1]+'.dat', delimiter='\t')

# DEFINE FUNCTIONS, based on PyEmma's scripts for these quantities
# histogram data
# compute probabilities, p1 & p2
# estimate relative conttribution from each probability == -ln(p2/p1)

def get_histogram(
        xall, yall, nbins=100,
        weights=None, avoid_zero_count=False):
    z, xedge, yedge = np.histogram2d(
        xall, yall, bins=nbins, weights=weights)
    x = 0.5 * (xedge[:-1] + xedge[1:])
    y = 0.5 * (yedge[:-1] + yedge[1:])
    if avoid_zero_count:
        z = np.maximum(z, np.min(z[z.nonzero()]))
    return x, y, z.T # transpose to match x/y-directions

def to_density(z):
    return z / float(z.sum())

def rel_energy(z, minener_zero=False):
    pi = to_density(z)
    rel_energy = np.inf * np.ones(shape=z.shape)
    nonzero = pi.nonzero()
    rel_energy[nonzero] = -np.log(pi[nonzero])
    if minener_zero:
        rel_energy[nonzero] -= np.min(rel_energy[nonzero])
    return rel_energy

# Compute and plot histograms, function gets 2D HIST already
# nbins=100 (default)
x,y,hist=get_histogram(data[:,1],data[:,2])

# heatmap of probabilities plot with seaborn
f=rel_energy(hist, minener_zero=True)

# get moving avg for time series
dt = pd.DataFrame({'time': data[:,0], 'c2': data[:,1], 'c14': data[:,2]})
m1=dt.c2.rolling(15, center=True).mean()
m2=dt.c14.rolling(15, center=True).mean()
ma1=np.array(m1)
ma2=np.array(m2)

# set font sizes
s=8
m=10
l=12
#plt.rcParams.update({'font.size': 10})
plt.rc('font', size=s)
plt.rc('axes', titlesize=s)
plt.rc('axes', labelsize=s)
plt.rc('legend', fontsize=s)
plt.rc('xtick', labelsize=s)
plt.rc('ytick', labelsize=s)

# plot individual histograms (overlay) AND 2D hist using SEABORN
fig, ((axt, axs, axf)) = plt.subplots(3,1, figsize=(3,6.5))

# time series
axt.plot(data[:,0],data[:,1],'k-', lw=1, alpha=0.3)
axt.plot(data[:,0],data[:,2],'r-', lw=1, alpha=0.3)
axt.plot(dt.time, ma1,'k-', lw=1, label='C2-center')
axt.plot(dt.time, ma2,'r-', lw=1, label='C14-center')
axt.legend(loc="best", frameon=False)
axt.set_ylabel(r'z-distance ($\AA$)')
axt.set_xlabel('time (ns)')
axt.set_ylim(-1,45)

# 1D overlay
sns.distplot(data[:,1], color="k", norm_hist=True, label='C2-center', ax=axs, kde_kws={"lw":1})
sns.distplot(data[:,2], color="r", norm_hist=True, label='C14-center', ax=axs, kde_kws={"lw":1})
axs.set_xlabel('distance from bilayer center ($\AA$)')
axs.set_ylabel('probability density')
axs.legend(loc="best", frameon=False)
axs.set_ylim(0,0.20)
axs.set_xlim(20,57)

# relative  probability contribution
c=axf.contourf(x,y,hist,20, cmap='Reds')
axf.set_xlabel('C2-center ($\AA$)')
axf.set_ylabel('C14-center ($\AA$)')
cbar=fig.colorbar(c)
cbar.ax.set_ylabel('ln ( p_C2 / p_C14 )')
axf.set_xlim(25,50)
axf.set_ylim(30,50)

fig.tight_layout()
plt.savefig(sys.argv[2]+'.png')
#close plot
plt.clf()
plt.cla()
plt.close()

