#' simulate_data
#'
#' @description This function is used to simulate data illustrative of the
#' relationship between cor_xy, cor_xd, and the variance ratio. This function
#' is used to generate datasets A-E (dataset E is subjected to a ceiling
#' effect prior to analysis).
#'
#' @param n sample size
#' @param seed seed for reproducibility
#' @param mu_x mean of the $x$ observations
#' @param mu_y mean of the $y$ observations
#' @param var_x variance of the $x$ observations
#' @param var_ratio variance ratio (`var_y` / `var_x`)
#' @param cor_xy correlation betwene $x$ and $y$
#' @param empirical argument to `MASS::mvrnorm` -- should variances and correlations
#' be exactly as specified, or take these values on average?
#' @param ... additional arguments
#'
#' @return dataframe containing simulated generated data
#' @export
#'
#' @examples
simulate_data = function(n = 30, seed = 1,
												 mu_x = 30, mu_y = 30,
												 var_x = 100, var_ratio = 1, cor_xy = 0,
												 empirical = TRUE, ...) {

	set.seed(seed)

	var_y = var_x * var_ratio
	cov_xy = cor_xy * sqrt(var_x) * sqrt(var_y)

	Sigma = matrix(c(var_x, cov_xy, cov_xy, var_y), 2, 2)

	obs_data =
		MASS::mvrnorm(n, mu = c(mu_x, mu_y), Sigma = Sigma, empirical = empirical)

	colnames(obs_data) = c("x", "y")

	df =
		as_tibble(obs_data) %>%
		mutate(
		  subj = 1:n,
  		delta = y - x
	  ) %>%
		select(subj, x, y, delta)

	df
}



#' random_recov_data
#'
#' Given a starting dataset, this function extracts observed baseline values
#' and, given these, simulates follow-up values between the baseline and 66
#' (the max allowed value) using a uniform distribution. The mechanism is
#' intended to mimic the random recovery process implemented in various papers.
#' This function is used in the context of clustering analysis and simulations.
#'
#' @param seed seed for reproducibility
#' @param start_df data set that is used as the basis for data simulated under
#' random recovery.
#'
#' @return dataframe containing randomly generated data
#' @export
#'
#' @examples
random_recov_data = function(seed, start_df) {

  set.seed(seed)

  x_vals =
    start_df %>%
    pull(x)

  n = length(x_vals)

  random_df =
    tibble(
      x = x_vals,
      ii = 66 - x,
      y = runif(n, x, 66),
      delta = y - x,
      prop = delta / (66 - x),
      severe = FALSE
    )

  random_df
}
