import os
import numpy as np
import pandas as pd

def file_found(path = '.', target = '.txt'):
    file_list = []
    for home, dirs, files in os.walk(path):
        for file in files:
            if target in file:
                file_list.append(file)
    return file_list

def txt2array(txt_file):
    dataset= []
    for data in txt_file:
        data1 = data.strip('\n')
        data2 = data1.split('\t')
        data3 = ['0' if x == '' else x for x in data2]
        try:
            dataset.append(list(map(eval, data3)))
        except SyntaxError:
            pass
    dataset = np.array(dataset, dtype = 'float32')
    return dataset

def meta_combined_data_create(file_list):
#    meta_columns = ['id', 'genotype', 'gender']
    meta_columns = ['id', 'genotype']
    combined_data_columns = ['id', 't', 'activity']
    metadata_df = pd.DataFrame(columns = meta_columns)
    combined_data = pd.DataFrame(columns = combined_data_columns)
    
    DD_len = []


    for filename in file_list:
#        gender_geno = filename.split('-')[2]
#        gender = gender_geno.split('_')[0]
#        geno = gender_geno.split('_')[1]
        geno = filename.split('-')[2]
        
        f = open(filename).readlines()
        #print(f)
        dataset = txt2array(f)
        #print(dataset.shape)
        fly_num = dataset.shape[1]
        DD_len.append(dataset.shape[0])
#        gender_list = [gender] * fly_num
        geno_list = [geno] * fly_num
        id_list = []
        for i in range(fly_num):
#            id_card = gender_geno + '_' + str(i)
            id_card = geno + '_' + str(i)
            id_list.append(id_card)
        ## metadata creation ##   
#        combine = np.array([id_list, geno_list, gender_list]).T
        combine = np.array([id_list, geno_list]).T
        inter_v = pd.DataFrame(combine, columns = meta_columns)
        metadata_df = metadata_df.append(inter_v, ignore_index = False)
         
        ## combined data creation ##
        for i, ids in enumerate(id_list):
            final_id = [ids] * dataset.shape[0]
            time = range(dataset.shape[0])
            combine = np.array([final_id, time, dataset[:,i]]).T
            inter_v = pd.DataFrame(combine, columns = combined_data_columns)
            combined_data = combined_data.append(inter_v, ignore_index = False)
    
    
    metadata_df.to_csv('metadata.csv')
    combined_data.to_csv('combined_data.csv')
    f = open('./DD_maxlen.txt', 'w')
    f.write(str(max(DD_len)))
    f.close()

if __name__ == '__main__':
    file_list = file_found()
    meta_combined_data_create(file_list)
#    
    
          
         
    
    
    
    
    
    
    

