# duct-tape python CCM has a python method that runs the R version of CCM
# TODO: Comment all functions.
from os import system, path, makedirs, remove, rmdir
from pandas import DataFrame, read_csv
import numpy as np
import platform

def array2seq(array):
	sep = ","
	arg = sep.join([str(item) for item in array])
	return f"c({arg})"

def run_ccm(data = False, data_filename = "data.csv", temp_folder = "tmp",
			max_tau = 10, max_E = 10, min_lib_size = 25, max_lib_size = 150,
			lib_size_step = 25, num_samples = 300,
			lib=None, pred=None, min_tau = 1, min_E = 1,
			prediction_horizon = np.arange(-8,9,2), rand_libs=True, surr_test=False,
			n_surr_datasets = 100, n_surr_samples = 1, surr_type = "ebisuzaki",
			surr_period=1,
			script_filename = "script.R", remove_temp_files=False, replace=True,
			seed=None):
	"""
	Args:
		data (numpy array): Data as an nx2 matrix where each column is
			one of the two time series
		data_filename (string): temprorary file for storing the data
		temp_folder (string): filepath to temporary folder for R script,
			R input, and R output
		max_tau (int): maximum value of the tau parameter for delay vector
			embedding
		max_E (int): maximum value of the E parameter for delay vector embedding
		min_lib_size (int): minimum library size for CCM. Set to -1 to
			automatically choose the maximum library size as the maximum
			allowed without replacement.
		max_lib_size (int): maximum library size for CCM. Set to -1 to
			automatically choose the maximum library size as the maximum
			allowed without replacement.
		lib_size_step (int): library size step for CCM. Set to -1 to let
			lib_size_step be max_lib_size - min_lib_size.
		num_samples (int): number of random samples to use for CCM
		lib (2-tuple or None): minimum and maximum time points for library,
			defaults to all time points
		pred (2-tuple or None): minimum and maximum time points for library,
			defaults to all time points
		min_tau (int): minimum value of the tau parameter for delay vector
			embedding
		min_E (int): minimum value of the E parameter for delay vector embedding
		prediction_horizon (iterable): prediction horizons to use for CCM
		rand_libs (bool): whether to use random libraries
		surr_test (bool): if True, run CCM with surrogate datasets
		n_surr_datasets (int): number of surrogate datasets to create
		n_surr_samples (int): number of random samples per surrogate to use for
			CCM (analogous to the `num_samples` paramter)
		surr_type (string): method for making surrogate data using the
			make_surrogate_data function in the rEDM package. Valid choices are
			"ebisuzaki" and "random_shuffle".
		surr_period (number): period for seasonal surrogate data
		script_filename (string): temporary file for storing the script
		remove_temp_files (bool): if True, removes temporary files, and also
			removes the temporary folder if the folder is empty after removing
			temporary files
		replace (bool): If True, sample random vectors with replacement
		seed (int): seed for random number generator
	"""
	# handle folder stuff
	if temp_folder[-1] is "/":
		temp_folder = temp_folder[:-1]
	if not path.exists(temp_folder):
		makedirs(temp_folder)
	data_filename = temp_folder + "/" + data_filename
	script_filename = temp_folder + "/" + script_filename

	# if data are passed in, save the data as a pandas dataframe
	if not (data is False):
		df = DataFrame(data=data, columns=['A', 'B'])
		df.to_csv(data_filename, index=False)
	else:
		data = pd.read_csv(data_filename)

	# prepare arguments for r programming
	data_filename_ = "\"" + data_filename + "\""
	# max_tau, min_tau = -min_tau, -max_tau # rEDM v0.7.3 uses regular tau as input; do not take the negative
	surr_type_ = "\"" + surr_type + "\""
	surr_test_ = {True: "TRUE", False: "FALSE"}[surr_test]
	prediction_horizon = array2seq(prediction_horizon)
	rand_libs = {True: "TRUE", False: "FALSE"}[rand_libs]
	replace = {True: "TRUE", False: "FALSE"}[replace]
	open_brac = "{"
	close_brac = "}"
	n_pts = data.shape[0]
	if type(lib) == type(None):
		lib_begin = 1
		lib_end = n_pts
	else:
		lib_begin, lib_end = lib
	if type(pred) == type(None):
		pred_begin = 1
		pred_end = n_pts
	else:
		pred_begin, pred_end = pred
	if type(seed) == type(None):
		seed = 'NULL'

	# R program
	program = f"""# parameters
max_tau = {max_tau}
max_E = {max_E}
min_tau = {min_tau}
min_E = {min_E}
min_lib_size = {min_lib_size}
max_lib_size = {max_lib_size}
lib_size_step = {lib_size_step}
num_samples = {num_samples}
lib_begin = {lib_begin}
lib_end = {lib_end}
pred_begin = {pred_begin}
pred_end = {pred_end}
rand_libs = {rand_libs}
prediction_horizon = {prediction_horizon}
input_filename = {data_filename_}
n_surr = {n_surr_datasets}
n_surr_samples = {n_surr_samples}
surr_type = {surr_type_}
surr_test = {surr_test_}
surr_period = {surr_period}
replace = {replace}
seed = {seed}

# import library
library(rEDM)

# import data
input_data = read.csv(input_filename)
vars <- colnames(input_data)

# determine optimal embedding for series A
ts <- input_data$A
lib <- c(lib_begin, lib_end)
pred <- c(pred_begin, pred_end)
simplex_output = data.frame()
for (tau in min_tau : max_tau) {open_brac}
  for (E in min_E : max_E){open_brac}
    sp_libsize = lib_end - lib_begin
    simplex_output <- rbind(simplex_output, ccm(input_data, lib, pred, tau=tau, E=E, random_libs=FALSE,
                        lib_sizes=sp_libsize, tp=1, lib_column='A', target_column='A'))
  {close_brac}
{close_brac}
optim_embed = which.max(unlist(simplex_output["rho"])) # for v0.7.3
tau_star_A = simplex_output$tau[optim_embed]
E_star_A = simplex_output$E[optim_embed]

# set min_lib_size = -1 to automatically choose min_lib_size
if (min_lib_size == -1){open_brac}
  min_lib_size = lib_end - lib_begin + 1 - tau_star_A * (E_star_A - 1) # for v0.7.3
{close_brac}

# set max_lib_size = -1 to automatically choose max_lib_size
if (max_lib_size == -1){open_brac}
  max_lib_size = lib_end - lib_begin + 1 - tau_star_A * (E_star_A - 1) # for v0.7.3
  if ((replace == FALSE) && rand_libs){open_brac}
    max_lib_size = max_lib_size - 1
  {close_brac}
{close_brac}

# set lib_size_step = -1 for single step
if (lib_size_step == -1) {open_brac}
  lib_size_step = max_lib_size - min_lib_size
{close_brac}

### run CCM with with optimal embedding of predictor

A_xmap_B <- data.frame() #NEW
for (tp in prediction_horizon){open_brac}
  ccm_tp <- ccm(input_data, E = E_star_A, tau = tau_star_A,
  								  lib=c(lib_begin, lib_end), pred=c(pred_begin, pred_end),
                                  random_libs = rand_libs, replace = replace, lib_column = "A",
                                  target_column = "B", lib_sizes = seq(min_lib_size, max_lib_size, by = lib_size_step),
                                  num_samples = num_samples, tp = tp,
                                  RNGseed = seed)
  ccm_tp['tp'] = tp
  A_xmap_B <- rbind(A_xmap_B, ccm_tp)
{close_brac} #NEW

write.csv(A_xmap_B,'{temp_folder}/AxB1.csv')

### redo with surrogate time series
# execution of surrogate series calculations
if (surr_test){open_brac}
  surr_B = make_surrogate_data(input_data$B, surr_type, n_surr)
  surr_AxB = data.frame() # initialize data frame in which to store result
  # begin the for loop here...
  for (sidx in 1:n_surr){open_brac}
    surr_data = data.frame(input_data$A, surr_B[,sidx])
    colnames(surr_data) <- c("A", "B")
    # run CCM on surrogate data

    A_xmap_B = data.frame() #NEW
    for (tp in prediction_horizon){open_brac}
	ccm_tp <- ccm(surr_data, E = E_star_A, tau = tau_star_A,
					lib=c(lib_begin, lib_end), pred=c(pred_begin, pred_end),
                    random_libs = rand_libs, replace = replace, lib_column = "A",
                    target_column = "B", lib_sizes = max_lib_size,
                    num_samples=n_surr_samples, tp = tp,
                    RNGseed = seed)
	ccm_tp['tp'] = tp
    A_xmap_B <- rbind(A_xmap_B, ccm_tp)
    {close_brac} #NEW
    A_xmap_B$rep <- rep(sidx, length(prediction_horizon)) #NEW

    # save result in the ledger data structures
    surr_AxB <- rbind(surr_AxB, A_xmap_B)
  {close_brac}
  write.csv(surr_AxB,'{temp_folder}/AxB_surr.csv')
{close_brac}   """

	# write to file
	file = open(script_filename, "w")
	file.write(program)
	file.close()

	# run the program
	if platform.system() is 'Windows':
		system(r"C:\Users\alexe\R\R-3.6.1\bin\Rscript.exe " + script_filename)
	else:
		system("Rscript " + script_filename)

	# retrieve the data
	AxB1 = read_csv(f'{temp_folder}/AxB1.csv')
	if surr_test:
		AxB_surr = read_csv(f'{temp_folder}/AxB_surr.csv')

	# remove temp files
	if remove_temp_files:
		remove(f"{temp_folder}/AxB1.csv")
		if surr_test:
			remove(f"{temp_folder}/AxB_surr.csv")
		remove(script_filename)
		if not (data is False):
			remove(data_filename)
		try: # remove the temp folder if it is empty
			rmdir(temp_folder)
		except Exception as e:
			print(str(e))

	# return CCM result
	if surr_test:
		return (AxB1, AxB_surr)
	return (AxB1)

def EX_gt_EY(X, Y, n_boot=10**5, paired=False):
    """
    Test the null hypothesis that E[X] = E[Y] against the alternative hypothesis
        that E[X] > E[y] by computing s = E[X] - E[Y] and comparing the observed
        value of s the distribution of s from bootstrap resampling of X-E[X] and
        Y-E[Y].
    Args:
        * X (numpy array): 1d dataset
        * Y (numpy array): 1d dataset
        * n_boot (int): number of bootstraps
        * paired (bool): whether samples are paired or unpaired
    Returns:
        p-value; lower p-value indicates greater confidence in the statement
        "E[X] > E[Y]"
    """
    if paired:
        assert X.size == Y.size
    X = X.flatten().copy()
    Y = Y.flatten().copy()
    s_obs = np.mean(X) - np.mean(Y)
    s_boot = []
    for trial in range(n_boot):
        xidx = np.random.choice(X.size, size=X.size, replace=True)
        X_ = (X-np.mean(X))[xidx]
        if paired:
            Y_ = (Y-np.mean(Y))[xidx]
        else:
            yidx = np.random.choice(Y.size, size=Y.size, replace=True)
            Y_ = (Y-np.mean(Y))[yidx]
        s_boot.append(np.mean(X_) - np.mean(Y_))
    s_boot = np.array(s_boot)
    return np.sum(s_boot >= s_obs) / n_boot
