# ANALYSIS OF THE REGULARITY OF HOLE SIZE IN A MOUSE RETINA

from scipy import misc
from pylab import np, plt
import seaborn as sns
import pandas as pd
from __future__ import division
import os
from scipy.stats import ttest_ind
from skimage.measure import regionprops, label
from skimage.segmentation import clear_border
from joblib import Parallel, delayed
import itertools

sns.set_style('whitegrid')


def genotype(retina_name):
    if 'Yap Taz double' in retina_name:
        return 'YapTaz'
    elif 'Yap_' in retina_name:
        return 'Yap'
    elif 'Taz_' in retina_name:
        return 'Taz'
    else:
        return 'unclear'


def get_animal_number(retina_name):
    return int(retina_name[retina_name.rfind('#') + 1:retina_name.rfind('#') + 2])


def get_hole_distribution(image_name):
    # read image
    im_original = np.invert(misc.imread(image_name + '.tif'))

    if 'Yap_' in image_name:
        im_original = np.invert(im_original)

    if len(np.shape(im_original)) == 3:
        im_original = im_original[:, :, 0]

    # clear borders
    im_cleared = clear_border(im_original)

    # label the image and find all connected regions
    lbl_image = label(im_cleared, connectivity=1)

    # get properties of all holes
    prop_list = regionprops(lbl_image)

    # put most important ones in a list
    # hole_props = [{'area': p['area'], 'eccentricity': p['eccentricity'], 'perimeter': p['perimeter'], 'solidity': p['solidity']} for p in prop_list]

    # ... and return them as a data frame

    return [p['area'] for p in prop_list], [p['perimeter'] for p in prop_list]


# collect filenames in a list
image_name_list = []
for path, subdirs, files in os.walk('/Users/silvanus/Dropbox/Silvanus (1)/3. Regularity of plexus'):
    for name in files:
        if not '.tif' in name:
            continue
        image_name_list.append(path + '/' + name[:name.find('.')])

# EVALUATE ALL IMAGES IN PARALLEL
allholes_list = Parallel(n_jobs=8)(delayed(get_hole_distribution)(fn_) for fn_ in image_name_list)

# MERGE WITH ADDITIONAL INFORMATION ABOUT ANIMAL NUMBER, CONTROL AND PHENOTYPE
genotype_list = [genotype(rn_) for rn_ in image_name_list]
control_list = ['WT#' in rn_ for rn_ in image_name_list]
animal_number_list = [get_animal_number(rn_) for rn_ in image_name_list]

hole_area_list = [rn_[0] for rn_ in allholes_list]
hole_perimeter_list = [rn_[1] for rn_ in allholes_list]

# CREATE A DATAFRAME
all_images_df = pd.DataFrame({'image_name': image_name_list, 'genotype': genotype_list, 'control': control_list,
                              'animal_number': animal_number_list,
                              'hole_areas': hole_area_list, 'hole_perimeters': hole_perimeter_list})

# PLOT DISTRIBUTIONS OF ALL HOLES COMBINED, REGARDLESS OF THE ANIMAL NUMBER
prop = []
for n, g in all_images_df.groupby(['genotype', 'control']):
    prop.append({'genotype': n[0], 'is_control': n[1], 'all_holes': list(itertools.chain(*g['hole_areas'])),
                 'all_holes_perimeter': list(itertools.chain(*g['hole_perimeters']))})

plot_no_gt = {'Yap': 0, 'Taz': 1, 'YapTaz': 2}
plot_c_ctr = {False: 'red', True: 'blue'}

circ_bin = np.linspace(0, 6, 20)
area_bin = np.linspace(0, 40000, 20)

f, axarr_area = plt.subplots(3, 2)

# calculate means and stds
for p in prop:
    ar = np.array(p['all_holes'])
    per = np.array(p['all_holes_perimeter'])

    per = per[ar > 500]
    ar = ar[ar > 500]
    circularity = per ** 2 / (4 * np.pi * ar)

    p['mean_area'] = np.mean(ar)
    p['std_area'] = np.std(ar)

    p['mean_circularity'] = np.mean(circularity)
    p['std_circularity'] = np.std(circularity)

    sns.distplot(ar, label='genotype ' + p['genotype'] + ', ' + 'control:' + str(p['is_control']), bins=area_bin,
                 ax=axarr_area[plot_no_gt[p['genotype']], 0], kde_kws={"color": "k", "lw": 0},
                 color=plot_c_ctr[p['is_control']])

    sns.distplot(circularity, label='genotype ' + p['genotype'] + ', ' + 'control:' + str(p['is_control']),
                 bins=circ_bin,
                 ax=axarr_area[plot_no_gt[p['genotype']], 1], kde_kws={"color": "k", "lw": 0},
                 color=plot_c_ctr[p['is_control']])

for i in range(3):
    axarr_area[i, 0].set_xlim([0, 40000])
    axarr_area[i, 1].set_xlim([0, 6])
    axarr_area[i, 0].legend()
    axarr_area[i, 1].legend()

axarr_area[2, 0].set_title('area')
axarr_area[2, 1].set_title('circularity')

plt.savefig('results/all_distributions.pdf')


# MAKE BAR PLOTS

prop = []
for n, g in all_images_df.groupby(['genotype', 'control', 'animal_number']):
    prop.append({'genotype': n[0], 'is_control': n[1], 'animal_number': n[2], 'all_holes': list(itertools.chain(*g['hole_areas'])),
                 'all_holes_perimeter': list(itertools.chain(*g['hole_perimeters']))})

for p in prop:
    ar = np.array(p['all_holes'])
    per = np.array(p['all_holes_perimeter'])

    per = per[ar > 500]
    ar = ar[ar > 500]
    circularity = per ** 2 / (4 * np.pi * ar)

    p['mean_area'] = np.mean(ar)
    p['std_area'] = np.std(ar)

    p['mean_area [um^2]'] = np.mean(ar)*0.4151329**2
    p['std_area [um^2]'] = np.std(ar)*0.4151329**2

    p['mean_circularity'] = np.mean(circularity)
    p['std_circularity'] = np.std(circularity)

prop_df = pd.DataFrame(prop)

f, axarr_bp = plt.subplots(1, 4)
sns.barplot(data=pd.DataFrame(prop), y='mean_circularity', x='genotype', hue='is_control', ax=axarr_bp[0])
sns.barplot(data=pd.DataFrame(prop), y='mean_area', x='genotype', hue='is_control', ax=axarr_bp[1])
sns.barplot(data=pd.DataFrame(prop), y='std_circularity', x='genotype', hue='is_control', ax=axarr_bp[2])
sns.barplot(data=pd.DataFrame(prop), y='std_area', x='genotype', hue='is_control', ax=axarr_bp[3])

plt.savefig('results/barplots_distributions.pdf')

# TEST STATISTICS
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(prop_df['is_control'])]['mean_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(~prop_df['is_control'])]['mean_circularity'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(prop_df['is_control'])]['mean_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(~prop_df['is_control'])]['mean_circularity'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(prop_df['is_control'])]['mean_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(~prop_df['is_control'])]['mean_circularity'])


print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(prop_df['is_control'])]['std_area'],
          prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(~prop_df['is_control'])]['std_area'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(prop_df['is_control'])]['std_area'],
          prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(~prop_df['is_control'])]['std_area'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(prop_df['is_control'])]['std_area'],
          prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(~prop_df['is_control'])]['std_area'])

print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(prop_df['is_control'])]['mean_area'],
          prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(~prop_df['is_control'])]['mean_area'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(prop_df['is_control'])]['mean_area'],
          prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(~prop_df['is_control'])]['mean_area'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(prop_df['is_control'])]['mean_area'],
          prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(~prop_df['is_control'])]['mean_area'])


print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(prop_df['is_control'])]['std_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'Yap') * np.array(~prop_df['is_control'])]['std_circularity'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(prop_df['is_control'])]['std_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'YapTaz') * np.array(~prop_df['is_control'])]['std_circularity'])
print ttest_ind(prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(prop_df['is_control'])]['std_circularity'],
          prop_df[np.array(prop_df['genotype'] == 'Taz') * np.array(~prop_df['is_control'])]['std_circularity'])


prop_df[['genotype', 'is_control', 'animal_number', 'mean_area [um^2]', 'std_area [um^2]', 'mean_circularity', 'std_circularity']].to_csv('results/all_animals.csv')
