# encoding: utf-8
from __future__ import print_function

'''
Script to run the benchmarks in the publication
Stimberg, Marcel, Romain Brette, and Dan Goodman (2019).
“Brian 2: An Intuitive and Efficient Neural Simulator.”

The packages required for running this script can be installed with the conda
package manager. After downloading the environment description provided in the
file "environment.yml", you can create an environment based on these packages
with
    conda env create -f environment.yml
See https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html
for details.

The benchmark results in the article were measured on Ubuntu 18.04, on a
3.70GHz Intel Xeon® CPU E5-1630 v3 – obviously, results will differ on other
machines.

The joblib library is used to store results to disk. Previously acquired results
will therefore be directly loaded from disk, you can therefore safely interrupt
a benchmarking run without losing all benchmark results acquired so far.
'''
import matplotlib
matplotlib.use('Agg')
import pandas as pd

from brian2 import *
from brian2.devices.device import reinit_devices

from benchmark_utils import evaluate_subset


# Global options for all benchmarks
repetitions = 3

if __name__ == '__main__':
    # Run the benchmarks

    n_to_test = [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000]

    for heterogeneous in [False]:
        suffix = 'heterogeneous' if heterogeneous else 'homogeneous'
        print('Running benchmarks with {} neuron population'.format(suffix))
        benchmark_results = []

        # Brian1 benchmarks
        from CUBA_Brian1 import run_benchmark as run_benchmark_Brian1
        for target in ['numpy']:  # experimental code generation very slow
            for n in n_to_test:
                runtime = 10000 if n < 10000 else 1000  # use shorter runs for big networks
                for rep in range(repetitions):
                    for actual_runtime in [0, runtime]:
                        print('Brian1: {} neurons with target {} (repetition: {}) for {:.0f}s... '.format(n, target, rep + 1, actual_runtime/1000.), end='')
                        result = run_benchmark_Brian1(n_neurons=n, target=target,
                                                      runtime=actual_runtime,
                                                      heterogeneous=heterogeneous)
                        benchmark_results.append({'simulator': 'Brian 1',
                                                  'target': target,
                                                  'runtime': actual_runtime,
                                                  'trial': rep,
                                                  'n_neurons': n,
                                                  'n_synapses': result['n_synapses'],
                                                  'simtime': result['time'],
                                                  'rate_exc': result['rate'][0],
                                                  'rate_inh': result['rate'][1],
                                                  })
                        print(' took {:.2f}s'.format(result['time']))

        # Brian2 benchmarks
        from CUBA_Brian import run_benchmark as run_benchmark_Brian
        for target in ['numpy', 'weave', 'cpp_standalone']:
            reinit_devices()
            for n in n_to_test:
                runtime = 10000 if n < 10000 else 1000  # use shorter runs for big networks
                for rep in range(repetitions):
                    print('Brian2: {} neurons with target {} (repetition: {}) for {:.0f}s... '.format(n, target, rep + 1, runtime/1000.), end='')
                    result = run_benchmark_Brian(n_neurons=n, target=target,
                                                 runtime=runtime,
                                                 heterogeneous=heterogeneous)
                    benchmark_result = {'simulator': 'Brian 2',
                                        'target': target,
                                        'runtime': runtime,
                                        'trial': rep,
                                        'n_neurons': n,
                                        'n_synapses': result['n_synapses'],
                                        'simtime': result['time'],
                                        'rate_exc': result['rate'][0],
                                        'rate_inh': result['rate'][1],
                                        }
                    if target == 'cpp_standalone':
                        benchmark_result['threads'] = 0
                    benchmark_results.append(benchmark_result)
                    print(' took {:.2f}s'.format(result['time']))
                    if target == 'cpp_standalone':
                        device.reinit()
        # Do multithreaded cpp
        threads = 12
        for n in n_to_test:
            runtime = 10000 if n < 10000 else 1000  # use shorter runs for big networks
            for rep in range(repetitions):
                print('Brian2: {} neurons with {} threads and target {} (repetition: {}) for {:.0f}s... '.format(n, threads, target, rep + 1, runtime/1000.), end='')
                result = run_benchmark_Brian(n_neurons=n, target='cpp_standalone',
                                             threads=threads, runtime=runtime,
                                             heterogeneous=heterogeneous)
                benchmark_result = {'simulator': 'Brian 2',
                                    'target': 'cpp_standalone',
                                    'runtime': runtime,
                                    'threads': threads,
                                    'trial': rep,
                                    'n_neurons': n,
                                    'n_synapses': result['n_synapses'],
                                    'simtime': result['time'],
                                    'rate_exc': result['rate'][0],
                                    'rate_inh': result['rate'][1],
                                    }
                benchmark_results.append(benchmark_result)
                print(' took {:.2f}s'.format(result['time']))
                device.reinit()

        # NEST benchmarks
        from CUBA_NEST import run_benchmark as run_benchmark_NEST
        for threads in [1, 12]:
            for n in n_to_test:
                runtime = 10000 if n < 10000 else 1000  # use shorter runs for big networks
                for rep in range(repetitions):
                    for actual_runtime in [0, runtime]:
                        print('NEST: {} neurons with {} threads (repetition: {}) for {:.0f}s... '.format(n, threads, rep + 1, actual_runtime/1000.), end='')
                        result = run_benchmark_NEST(n_neurons=n, threads=threads,
                                                    runtime=actual_runtime,
                                                    heterogeneous=heterogeneous)
                        benchmark_result = {'simulator': 'NEST',
                                            'target': 'N/A',
                                            'runtime': actual_runtime,
                                            'threads': threads,
                                            'trial': rep,
                                            'n_neurons': n,
                                            'n_synapses': result['n_synapses'],
                                            'simtime': result['time'],
                                            'rate_exc': result['rate'][0],
                                            'rate_inh': result['rate'][1],
                                            }
                        benchmark_results.append(benchmark_result)
                        print(' took {:.2f}s'.format(result['time']))

        # NEURON benchmarks
        from CUBA_NEURON import run_benchmark as run_benchmark_NEURON
        for n in n_to_test:
            runtime = 10000 if n < 10000 else 1000  # use shorter runs for big networks
            for rep in range(repetitions):
                print('NEURON: {} neurons (repetition: {})... '.format(n, rep + 1), end='')
                result = run_benchmark_NEURON(n_neurons=n, runtime=runtime,
                                              heterogeneous=heterogeneous)
                benchmark_result = {'simulator': 'NEURON',
                                    'target': 'N/A',
                                    'runtime': runtime,
                                    'trial': rep,
                                    'n_neurons': n,
                                    'n_synapses': result['n_synapses'],
                                    'simtime': result['time'],
                                    'rate_exc': result['rate'][0],
                                    'rate_inh': result['rate'][1],
                                    }
                benchmark_results.append(benchmark_result)
                print(' took {:.2f}s'.format(result['time']))

        full_results = pd.DataFrame(benchmark_results)
        # Note that if you re-run the benchmarks, all results will be read from disk

        # Save all data to a CSV file
        full_results.to_csv('results/benchmark_results_{}.csv'.format(suffix))

        plt.style.use('seaborn-talk')
        # Stored plots are only for quick checks, we'll create the final plots
        # from the CVS viles
        fig_simtime, ax_simtime = plt.subplots()
        fig_speedup, ax_speedup = plt.subplots()
        fig_validation, (ax_synapses, ax_rate_exc, ax_rate_inh) = plt.subplots(3, 1,
                                                                               sharex=True)

        for label, simulator, target, threads in [
            ('Brian 1', 'Brian 1', 'numpy', None),
            ('Brian 2: runtime (Python)', 'Brian 2', 'numpy', None),
            ('Brian 2: runtime (C++)', 'Brian 2', 'weave', None),
            ('Brian 2: standalone (C++)', 'Brian 2', 'cpp_standalone', 0),
            ('Brian 2: standalone (C++, 12 threads)', 'Brian 2', 'cpp_standalone', 12.0),
            ('NEST', 'NEST', 'N/A', 1),
            ('NEST (12 threads)', 'NEST', 'N/A', 12),
            ('NEURON', 'NEURON', 'N/A', None)
        ]:
            evaluated = evaluate_subset(full_results, simulator=simulator,
                                        target=target, threads=threads)
            ax_simtime.plot(evaluated.index, evaluated['rel_simtime'], label=label)
            ax_synapses.errorbar(evaluated.index, evaluated['n_synapses_mean'],
                                 yerr=evaluated['n_synapses_std'], label=label)
            ax_rate_exc.errorbar(evaluated.index, evaluated['rate_exc_mean'],
                                 yerr=evaluated['rate_exc_std'], label=label)
            ax_rate_inh.errorbar(evaluated.index, evaluated['rate_inh_mean'],
                                 yerr=evaluated['rate_inh_std'], label=label)
        reference = evaluate_subset(full_results, simulator='Brian 2',
                                    target='cpp_standalone', threads=0)
        for label, simulator, target, threads in [
            ('Brian 1', 'Brian 1', 'numpy', None),
            ('NEST (12 threads)', 'NEST', 'N/A', 12),
            ('NEURON', 'NEURON', 'N/A', None)
        ]:
            evaluated = evaluate_subset(full_results, simulator=simulator,
                                        target=target, threads=threads)
            ax_speedup.plot(evaluated.index, evaluated['rel_simtime'] / reference['rel_simtime'],
                            label=label)

        ax_simtime.set(xlabel='Number of neurons', ylabel='Runtime vs. realtime',
                       xscale='log', yscale='log')
        ax_simtime.axhline(1, color='gray', linestyle=':', lw=1)
        ax_simtime.legend()
        fig_simtime.tight_layout()
        fig_simtime.savefig('results/benchmarks_{}.pdf'.format(suffix))

        ax_speedup.set(xlabel='Number of neurons', ylabel='Slowdown vs. C++ standalone',
                       xscale='log')
        ax_speedup.set_ylim(1, ax_speedup.get_ylim()[1])
        ax_speedup.legend()
        fig_speedup.tight_layout()
        fig_speedup.savefig('results/speedup_{}.pdf'.format(suffix))

        ax_synapses.set(xlabel='Number of neurons', ylabel='# synapses',
                        xscale='log', yscale='log')
        ax_rate_exc.set(xlabel='Number of neurons', ylabel='rate (Hz)',
                        xscale='log', title='excitatory neurons')
        ax_rate_inh.set(xlabel='Number of neurons', ylabel='rate (Hz)',
                        xscale='log', title='inhibitory neurons')
        fig_validation.tight_layout()
        fig_validation.savefig('results/validation_{}.pdf'.format(suffix))
