summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/numeric/odeint/performance
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/boost/libs/numeric/odeint/performance
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/numeric/odeint/performance')
-rw-r--r--src/boost/libs/numeric/odeint/performance/Jamfile.v232
-rw-r--r--src/boost/libs/numeric/odeint/performance/Makefile43
-rw-r--r--src/boost/libs/numeric/odeint/performance/SIMD/Makefile33
-rwxr-xr-xsrc/boost/libs/numeric/odeint/performance/SIMD/perf_roessler.sh22
-rw-r--r--src/boost/libs/numeric/odeint/performance/SIMD/roessler.cpp125
-rw-r--r--src/boost/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp149
-rw-r--r--src/boost/libs/numeric/odeint/performance/c_lorenz.c57
-rw-r--r--src/boost/libs/numeric/odeint/performance/fortran_lorenz.f9060
-rw-r--r--src/boost/libs/numeric/odeint/performance/lorenz.hpp33
-rw-r--r--src/boost/libs/numeric/odeint/performance/odeint_rk4_array.cpp63
-rw-r--r--src/boost/libs/numeric/odeint/performance/plot_result.py64
11 files changed, 681 insertions, 0 deletions
diff --git a/src/boost/libs/numeric/odeint/performance/Jamfile.v2 b/src/boost/libs/numeric/odeint/performance/Jamfile.v2
new file mode 100644
index 000000000..e60e4ea12
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/Jamfile.v2
@@ -0,0 +1,32 @@
+# Copyright 2012 Karsten Ahnert
+# Copyright 2012 Mario Mulansky
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+import os ;
+import modules ;
+import path ;
+
+project
+ : requirements
+ <define>BOOST_ALL_NO_LIB=1
+ <include>../../../..
+ <cxxflags>-std=c++11
+ <toolset>gcc:<cxxflags>-ffast-math
+ <toolset>intel:<cxxflags>"-fast -inline-forceinline"
+ : default-build release
+ ;
+
+
+lib libgsl : : <name>gsl ;
+lib libgslcblas : : <name>gslcblas ;
+
+lib libmkl : : <name>mkl_intel_lp64 <link>shared ;
+lib libmkl_core : : <name>mkl_core <link>shared ;
+lib libmkl_intel_thread : : <name>mkl_intel_thread ;
+lib libiomp5 : : <name>iomp5 ;
+lib libpthread : : <name>pthread ;
+
+exe odeint_rk4_array
+ : odeint_rk4_array.cpp
+ ;
diff --git a/src/boost/libs/numeric/odeint/performance/Makefile b/src/boost/libs/numeric/odeint/performance/Makefile
new file mode 100644
index 000000000..641cb0313
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/Makefile
@@ -0,0 +1,43 @@
+# Copyright 2011-2014 Mario Mulansky
+# Copyright 2011-2014 Karsten Ahnert
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or
+# copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# make sure BOOST_ROOT is pointing to your boost directory
+# otherwise, set it here:
+# BOOST_ROOT = /path/to/boost
+
+INCLUDES += -I../../include/ -I$(BOOST_ROOT)
+GCCFLAGS = -O3 -ffast-math -DNDEBUG
+# disabling -ffast-math might give slightly better performance
+ICCFLAGS = -Ofast -xHost -ip -inline-forceinline -DNDEBUG
+# Possible options: -fp-model source -no-fma
+GFORTFLAGS = -Ofast
+
+bin/gcc:
+ mkdir -p bin/gcc
+
+bin/intel:
+ mkdir -p bin/intel
+
+bin/gfort:
+ mkdir -p bin/gfort
+
+bin/gcc/odeint_rk4_array: odeint_rk4_array.cpp bin/gcc
+ g++ ${GCCFLAGS} ${INCLUDES} -o bin/gcc/odeint_rk4_array odeint_rk4_array.cpp
+
+bin/gcc/c_lorenz: c_lorenz.c bin/gcc
+ gcc -std=c99 -Ofast -mtune=corei7-avx c_lorenz.c -o bin/gcc/c_lorenz
+
+bin/intel/odeint_rk4_array: odeint_rk4_array.cpp bin/intel
+ icpc ${ICCFLAGS} ${INCLUDES} -o bin/intel/odeint_rk4_array odeint_rk4_array.cpp
+
+bin/intel/c_lorenz: c_lorenz.c bin/intel
+ icc -std=c99 -Ofast -xHost -ansi-alias -o bin/intel/c_lorenz c_lorenz.c
+
+bin/gfort/fortran_lorenz: fortran_lorenz.f90 bin/gfort
+ gfortran ${GFORTFLAGS} fortran_lorenz.f90 -o bin/gfort/fortran_lorenz
+
+all: bin/gcc/odeint_rk4_array bin/intel/odeint_rk4_array bin/gcc/c_lorenz bin/intel/c_lorenz bin/gfort/fortran_lorenz
diff --git a/src/boost/libs/numeric/odeint/performance/SIMD/Makefile b/src/boost/libs/numeric/odeint/performance/SIMD/Makefile
new file mode 100644
index 000000000..811acd988
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/SIMD/Makefile
@@ -0,0 +1,33 @@
+# Copyright 2014 Mario Mulansky
+#
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or
+# copy at http://www.boost.org/LICENSE_1_0.txt)
+
+# make sure BOOST_ROOT is pointing to your boost directory
+# otherwise, set it here:
+# BOOST_ROOT = /path/to/boost
+# you also need NT2s SIMD libary available set the include path here:
+# SIMD_INCLUDE = /path/to/simd/include
+
+INCLUDES = -I$(BOOST_ROOT) -I${SIMD_INCLUDE}
+
+# INTEL COMPILER
+# change this if you want to cross-compile
+ARCH = Host
+# ARCH = AVX
+# ARCH = SSE4.2
+
+CXX = icpc
+CC = icpc
+CXXFLAGS = -O3 -x${ARCH} -std=c++0x -fno-alias -inline-forceinline -DNDEBUG ${INCLUDES}
+# -ip
+
+# GCC COMPILER
+# change this if you want to cross-compile
+# ARCH = native
+# # ARCH = core-avx-i
+
+# CXX = g++
+# CC = g++
+# CXXFLAGS = -O3 -ffast-math -mtune=${ARCH} -march=${ARCH} -std=c++0x -DNDEBUG ${INCLUDES}
diff --git a/src/boost/libs/numeric/odeint/performance/SIMD/perf_roessler.sh b/src/boost/libs/numeric/odeint/performance/SIMD/perf_roessler.sh
new file mode 100755
index 000000000..a1094f63a
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/SIMD/perf_roessler.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+echo "Running on ${HOSTNAME}"
+
+out_dir=perf_${HOSTNAME}
+mkdir -p ${out_dir}
+
+for N in 256 1024 4096 16384 65536 262144 1048576 4194304 16777216 67108864
+do
+ steps=`expr 4 \* 67108864 / ${N}`
+ for exe in "roessler" "roessler_simd"
+ do
+ rm -f ${out_dir}/${exe}_N${N}.times
+ for i in {0..4}
+ do
+ likwid-pin -cS0:0 ./${exe} ${N} ${steps} >> ${out_dir}/${exe}_N${N}.times
+ done
+ for perf_ctr in "FLOPS_DP" "FLOPS_AVX" "L2" "L3" "MEM"
+ do
+ likwid-perfctr -CS0:0 -g ${perf_ctr} ./${exe} ${N} ${steps} > ${out_dir}/${exe}_N${N}_${perf_ctr}.perf
+ done
+ done
+done
diff --git a/src/boost/libs/numeric/odeint/performance/SIMD/roessler.cpp b/src/boost/libs/numeric/odeint/performance/SIMD/roessler.cpp
new file mode 100644
index 000000000..4e6cc4229
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/SIMD/roessler.cpp
@@ -0,0 +1,125 @@
+/*
+ * Simulation of an ensemble of Roessler attractors
+ *
+ * Copyright 2014 Mario Mulansky
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+
+#include <iostream>
+#include <vector>
+#include <random>
+
+#include <boost/timer.hpp>
+#include <boost/array.hpp>
+
+#include <boost/numeric/odeint.hpp>
+
+namespace odeint = boost::numeric::odeint;
+
+typedef boost::timer timer_type;
+
+typedef double fp_type;
+//typedef float fp_type;
+
+typedef boost::array<fp_type, 3> state_type;
+typedef std::vector<state_type> state_vec;
+
+//---------------------------------------------------------------------------
+struct roessler_system {
+ const fp_type m_a, m_b, m_c;
+
+ roessler_system(const fp_type a, const fp_type b, const fp_type c)
+ : m_a(a), m_b(b), m_c(c)
+ {}
+
+ void operator()(const state_type &x, state_type &dxdt, const fp_type t) const
+ {
+ dxdt[0] = -x[1] - x[2];
+ dxdt[1] = x[0] + m_a * x[1];
+ dxdt[2] = m_b + x[2] * (x[0] - m_c);
+ }
+};
+
+//---------------------------------------------------------------------------
+int main(int argc, char *argv[]) {
+if(argc<3)
+{
+ std::cerr << "Expected size and steps as parameter" << std::endl;
+ exit(1);
+}
+const size_t n = atoi(argv[1]);
+const size_t steps = atoi(argv[2]);
+//const size_t steps = 50;
+
+const fp_type dt = 0.01;
+
+const fp_type a = 0.2;
+const fp_type b = 1.0;
+const fp_type c = 9.0;
+
+// random initial conditions on the device
+std::vector<fp_type> x(n), y(n), z(n);
+std::default_random_engine generator;
+std::uniform_real_distribution<fp_type> distribution_xy(-8.0, 8.0);
+std::uniform_real_distribution<fp_type> distribution_z(0.0, 20.0);
+auto rand_xy = std::bind(distribution_xy, std::ref(generator));
+auto rand_z = std::bind(distribution_z, std::ref(generator));
+std::generate(x.begin(), x.end(), rand_xy);
+std::generate(y.begin(), y.end(), rand_xy);
+std::generate(z.begin(), z.end(), rand_z);
+
+state_vec state(n);
+for(size_t i=0; i<n; ++i)
+{
+ state[i][0] = x[i];
+ state[i][1] = y[i];
+ state[i][2] = z[i];
+}
+
+std::cout.precision(16);
+
+std::cout << "# n: " << n << std::endl;
+
+std::cout << x[0] << std::endl;
+
+
+// Stepper type - use never_resizer for slight performance improvement
+odeint::runge_kutta4_classic<state_type, fp_type, state_type, fp_type,
+ odeint::array_algebra,
+ odeint::default_operations,
+ odeint::never_resizer> stepper;
+
+roessler_system sys(a, b, c);
+
+timer_type timer;
+
+fp_type t = 0.0;
+
+for (int step = 0; step < steps; step++)
+{
+ for(size_t i=0; i<n; ++i)
+ {
+ stepper.do_step(sys, state[i], t, dt);
+ }
+ t += dt;
+}
+
+std::cout << "Integration finished, runtime for " << steps << " steps: ";
+std::cout << timer.elapsed() << " s" << std::endl;
+
+// compute some accumulation to make sure all results have been computed
+fp_type s = 0.0;
+for(size_t i = 0; i < n; ++i)
+{
+ s += state[i][0];
+}
+
+std::cout << state[0][0] << std::endl;
+std::cout << s/n << std::endl;
+
+}
diff --git a/src/boost/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp b/src/boost/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp
new file mode 100644
index 000000000..d79af4d8b
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/SIMD/roessler_simd.cpp
@@ -0,0 +1,149 @@
+/*
+ * Simulation of an ensemble of Roessler attractors using NT2 SIMD library
+ * This requires the SIMD library headers.
+ *
+ * Copyright 2014 Mario Mulansky
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+
+#include <iostream>
+#include <vector>
+#include <random>
+
+#include <boost/timer.hpp>
+#include <boost/array.hpp>
+
+#include <boost/numeric/odeint.hpp>
+#include <boost/simd/sdk/simd/pack.hpp>
+#include <boost/simd/sdk/simd/io.hpp>
+#include <boost/simd/memory/allocator.hpp>
+#include <boost/simd/include/functions/splat.hpp>
+#include <boost/simd/include/functions/plus.hpp>
+#include <boost/simd/include/functions/multiplies.hpp>
+
+
+namespace odeint = boost::numeric::odeint;
+namespace simd = boost::simd;
+
+typedef boost::timer timer_type;
+
+static const size_t dim = 3; // roessler is 3D
+
+typedef double fp_type;
+//typedef float fp_type;
+
+typedef simd::pack<fp_type> simd_pack;
+typedef boost::array<simd_pack, dim> state_type;
+// use the simd allocator to get properly aligned memory
+typedef std::vector< state_type, simd::allocator< state_type > > state_vec;
+
+static const size_t pack_size = simd_pack::static_size;
+
+//---------------------------------------------------------------------------
+struct roessler_system {
+ const fp_type m_a, m_b, m_c;
+
+ roessler_system(const fp_type a, const fp_type b, const fp_type c)
+ : m_a(a), m_b(b), m_c(c)
+ {}
+
+ void operator()(const state_type &x, state_type &dxdt, const fp_type t) const
+ {
+ dxdt[0] = -1.0*x[1] - x[2];
+ dxdt[1] = x[0] + m_a * x[1];
+ dxdt[2] = m_b + x[2] * (x[0] - m_c);
+ }
+};
+
+//---------------------------------------------------------------------------
+int main(int argc, char *argv[]) {
+if(argc<3)
+{
+ std::cerr << "Expected size and steps as parameter" << std::endl;
+ exit(1);
+}
+const size_t n = atoi(argv[1]);
+const size_t steps = atoi(argv[2]);
+
+const fp_type dt = 0.01;
+
+const fp_type a = 0.2;
+const fp_type b = 1.0;
+const fp_type c = 9.0;
+
+// random initial conditions on the device
+std::vector<fp_type> x(n), y(n), z(n);
+std::default_random_engine generator;
+std::uniform_real_distribution<fp_type> distribution_xy(-8.0, 8.0);
+std::uniform_real_distribution<fp_type> distribution_z(0.0, 20.0);
+auto rand_xy = std::bind(distribution_xy, std::ref(generator));
+auto rand_z = std::bind(distribution_z, std::ref(generator));
+std::generate(x.begin(), x.end(), rand_xy);
+std::generate(y.begin(), y.end(), rand_xy);
+std::generate(z.begin(), z.end(), rand_z);
+
+state_vec state(n/pack_size);
+for(size_t i=0; i<n/pack_size; ++i)
+{
+ for(size_t p=0; p<pack_size; ++p)
+ {
+ state[i][0][p] = x[i*pack_size+p];
+ state[i][1][p] = y[i*pack_size+p];
+ state[i][2][p] = z[i*pack_size+p];
+ }
+}
+
+std::cout << "Systems: " << n << std::endl;
+std::cout << "Steps: " << steps << std::endl;
+std::cout << "SIMD pack size: " << pack_size << std::endl;
+
+std::cout << state[0][0] << std::endl;
+
+// Stepper type
+odeint::runge_kutta4_classic<state_type, fp_type, state_type, fp_type,
+ odeint::array_algebra, odeint::default_operations,
+ odeint::never_resizer> stepper;
+
+roessler_system sys(a, b, c);
+
+timer_type timer;
+
+fp_type t = 0.0;
+
+for(int step = 0; step < steps; step++)
+{
+ for(size_t i = 0; i < n/pack_size; ++i)
+ {
+ stepper.do_step(sys, state[i], t, dt);
+ }
+ t += dt;
+}
+
+std::cout.precision(16);
+
+std::cout << "Integration finished, runtime for " << steps << " steps: ";
+std::cout << timer.elapsed() << " s" << std::endl;
+
+// compute some accumulation to make sure all results have been computed
+simd_pack s_pack = 0.0;
+for(size_t i = 0; i < n/pack_size; ++i)
+{
+ s_pack += state[i][0];
+}
+
+fp_type s = 0.0;
+for(size_t p=0; p<pack_size; ++p)
+{
+ s += s_pack[p];
+}
+
+
+std::cout << state[0][0] << std::endl;
+std::cout << s/n << std::endl;
+
+}
diff --git a/src/boost/libs/numeric/odeint/performance/c_lorenz.c b/src/boost/libs/numeric/odeint/performance/c_lorenz.c
new file mode 100644
index 000000000..85aba7fde
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/c_lorenz.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <time.h>
+#include <math.h>
+
+void lorenz(const double *x, double *restrict y) {
+ y[0] = 10.0 * (x[1] - x[0]);
+ y[1] = 28.0 * x[0] - x[1] - x[0] * x[2];
+ y[2] = x[0] * x[1] - (8.0 / 3.0) * x[2];
+}
+
+int main(int argc, const char *argv[])
+{
+ const int nb_steps = 20000000;
+ const double h = 1.0e-10;
+ const double h2 = 0.5 * h;
+ const double nb_loops = 21;
+ double x[3];
+ double y[3];
+ double f1[3];
+ double f2[3];
+ double f3[3];
+ double f4[3];
+ double min_time = 1E6;
+ clock_t begin, end;
+ double time_spent;
+
+ for (int j = 0; j < nb_loops; j++) {
+ x[0] = 8.5;
+ x[1] = 3.1;
+ x[2] = 1.2;
+ begin = clock();
+ for (int k = 0; k < nb_steps; k++) {
+ lorenz(x, f1);
+ for (int i = 0; i < 3; i++) {
+ y[i] = x[i] + h2 * f1[i];
+ }
+ lorenz(y, f2);
+ for (int i = 0; i < 3; i++) {
+ y[i] = x[i] + h2 * f2[i];
+ }
+ lorenz(y, f3);
+ for (int i = 0; i < 3; i++) {
+ y[i] = x[i] + h * f3[i];
+ }
+ lorenz(y, f4);
+ for (int i = 0; i < 3; i++) {
+ x[i] = x[i] + h * (f1[i] + 2 * (f2[i] + f3[i]) + f4[i]) / 6.0;
+ }
+ }
+ end = clock();
+ min_time = fmin(min_time, (double)(end-begin)/CLOCKS_PER_SEC);
+ printf("Result: %f\t runtime: %f\n", x[0], (double)(end-begin)/CLOCKS_PER_SEC);
+ }
+ printf("Minimal Runtime: %f\n", min_time);
+
+ return 0;
+}
diff --git a/src/boost/libs/numeric/odeint/performance/fortran_lorenz.f90 b/src/boost/libs/numeric/odeint/performance/fortran_lorenz.f90
new file mode 100644
index 000000000..26869973c
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/fortran_lorenz.f90
@@ -0,0 +1,60 @@
+program main
+ implicit none
+
+ integer, parameter :: dp = 8
+ real(dp), dimension(1:3) :: x
+ integer, parameter :: nstep = 20000000
+ real(dp) :: t = 0.0_dp
+ real(dp) :: h = 1.0e-10_dp
+ integer, parameter :: nb_loops = 21
+ integer, parameter :: n = 3
+ integer :: k
+ integer :: time_begin
+ integer :: time_end
+ integer :: count_rate
+ real(dp) :: time
+ real(dp) :: min_time = 100.0
+
+ do k = 1, nb_loops
+ x = [ 8.5_dp, 3.1_dp, 1.2_dp ]
+ call system_clock(time_begin, count_rate)
+ call rk4sys(n, t, x, h, nstep)
+ call system_clock(time_end, count_rate)
+ time = real(time_end - time_begin, dp) / real(count_rate, dp)
+ min_time = min(time, min_time)
+ write (*,*) time, x(1)
+ end do
+ write (*,*) "Minimal Runtime:", min_time
+contains
+ subroutine xpsys(x,f)
+ real(dp), dimension(1:3), intent(in) :: x
+ real(dp), dimension(1:3), intent(out) :: f
+ f(1) = 10.0_dp * ( x(2) - x(1) )
+ f(2) = 28.0_dp * x(1) - x(2) - x(1) * x(3)
+ f(3) = x(1) * x(2) - (8.0_dp / 3.0_dp) * x(3)
+ end subroutine xpsys
+
+ subroutine rk4sys(n, t, x, h, nstep)
+ integer, intent(in) :: n
+ real(dp), intent(in) :: t
+ real(dp), dimension(1:n), intent(inout) :: x
+ real(dp), intent(in) :: h
+ integer, intent(in) :: nstep
+ ! Local variables
+ real(dp) :: h2
+ real(dp), dimension(1:n) :: y, f1, f2, f3, f4
+ integer :: i, k
+
+ h2 = 0.5_dp * h
+ do k = 1, nstep
+ call xpsys(x, f1)
+ y = x + h2 * f1
+ call xpsys(y, f2)
+ y = x + h2 * f2
+ call xpsys(y, f3)
+ y = x + h * f3
+ call xpsys(y, f4)
+ x = x + h * (f1 + 2.0_dp * (f2 + f3) + f4) / 6.0_dp
+ end do
+ end subroutine rk4sys
+end program main
diff --git a/src/boost/libs/numeric/odeint/performance/lorenz.hpp b/src/boost/libs/numeric/odeint/performance/lorenz.hpp
new file mode 100644
index 000000000..c1ea37c9e
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/lorenz.hpp
@@ -0,0 +1,33 @@
+/*
+ * lorenz.hpp
+ *
+ * Copyright 2011 Mario Mulansky
+ * Copyright 2012 Karsten Ahnert
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+
+#ifndef LORENZ_HPP_
+#define LORENZ_HPP_
+
+#include <boost/array.hpp>
+
+struct lorenz
+{
+ template< class state_type >
+ void inline operator()( const state_type &x , state_type &dxdt , const double t ) const
+ {
+ const double sigma = 10.0;
+ const double R = 28.0;
+ const double b = 8.0 / 3.0;
+ dxdt[0] = sigma * ( x[1] - x[0] );
+ dxdt[1] = R * x[0] - x[1] - x[0] * x[2];
+ dxdt[2] = x[0]*x[1] - b * x[2];
+ }
+};
+
+
+#endif /* LORENZ_HPP_ */
diff --git a/src/boost/libs/numeric/odeint/performance/odeint_rk4_array.cpp b/src/boost/libs/numeric/odeint/performance/odeint_rk4_array.cpp
new file mode 100644
index 000000000..6d60296f2
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/odeint_rk4_array.cpp
@@ -0,0 +1,63 @@
+/*
+ * odeint_rk4_array
+ *
+ * Copyright 2011 Mario Mulansky
+ * Copyright 2012 Karsten Ahnert
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See accompanying file LICENSE_1_0.txt or
+ * copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+
+#include <iostream>
+
+#include <boost/timer.hpp>
+#include <boost/array.hpp>
+
+#include <boost/numeric/odeint/stepper/runge_kutta4_classic.hpp>
+#include <boost/numeric/odeint/stepper/runge_kutta4.hpp>
+#include <boost/numeric/odeint/algebra/array_algebra.hpp>
+
+#include "lorenz.hpp"
+
+typedef boost::timer timer_type;
+
+typedef boost::array< double , 3 > state_type;
+
+using namespace boost::numeric::odeint;
+
+//typedef boost::numeric::odeint::runge_kutta4_classic< state_type > rk4_odeint_type;
+
+// use the never resizer explicitely for optimal performance with gcc,
+// for the intel compiler this doesnt matter and the above definition
+// gives the same performance
+typedef runge_kutta4_classic< state_type , double , state_type , double ,
+ array_algebra, default_operations, never_resizer > rk4_odeint_type;
+
+
+const int loops = 21;
+const int num_of_steps = 20000000;
+const double dt = 1E-10;
+
+
+int main()
+{
+ double min_time = 1E6; // something big
+ rk4_odeint_type stepper;
+ std::clog.precision(16);
+ std::cout.precision(16);
+ for( int n=0; n<loops; n++ )
+ {
+ state_type x = {{ 8.5, 3.1, 1.2 }};
+ double t = 0.0;
+ timer_type timer;
+ for( size_t i = 0 ; i < num_of_steps ; ++i )
+ {
+ stepper.do_step( lorenz(), x, t, dt );
+ t += dt;
+ }
+ min_time = std::min( timer.elapsed() , min_time );
+ std::clog << timer.elapsed() << '\t' << x[0] << std::endl;
+ }
+ std::cout << "Minimal Runtime: " << min_time << std::endl;
+}
diff --git a/src/boost/libs/numeric/odeint/performance/plot_result.py b/src/boost/libs/numeric/odeint/performance/plot_result.py
new file mode 100644
index 000000000..f39e49fce
--- /dev/null
+++ b/src/boost/libs/numeric/odeint/performance/plot_result.py
@@ -0,0 +1,64 @@
+"""
+ Copyright 2011-2014 Mario Mulansky
+ Copyright 2011-2014 Karsten Ahnert
+
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or
+ copy at http://www.boost.org/LICENSE_1_0.txt)
+"""
+
+import numpy as np
+from matplotlib import pyplot as plt
+
+plt.rc("font", size=16)
+
+
+def get_runtime_from_file(filename):
+ gcc_perf_file = open(filename, 'r')
+ for line in gcc_perf_file:
+ if "Minimal Runtime:" in line:
+ return float(line.split(":")[-1])
+
+
+t_gcc = [get_runtime_from_file("perf_workbook/odeint_rk4_array_gcc.perf"),
+ get_runtime_from_file("perf_ariel/odeint_rk4_array_gcc.perf"),
+ get_runtime_from_file("perf_lyra/odeint_rk4_array_gcc.perf")]
+
+t_intel = [get_runtime_from_file("perf_workbook/odeint_rk4_array_intel.perf"),
+ get_runtime_from_file("perf_ariel/odeint_rk4_array_intel.perf"),
+ get_runtime_from_file("perf_lyra/odeint_rk4_array_intel.perf")]
+
+t_gfort = [get_runtime_from_file("perf_workbook/rk4_gfort.perf"),
+ get_runtime_from_file("perf_ariel/rk4_gfort.perf"),
+ get_runtime_from_file("perf_lyra/rk4_gfort.perf")]
+
+t_c_intel = [get_runtime_from_file("perf_workbook/rk4_c_intel.perf"),
+ get_runtime_from_file("perf_ariel/rk4_c_intel.perf"),
+ get_runtime_from_file("perf_lyra/rk4_c_intel.perf")]
+
+print t_c_intel
+
+
+ind = np.arange(3) # the x locations for the groups
+width = 0.15 # the width of the bars
+
+fig = plt.figure()
+ax = fig.add_subplot(111)
+rects1 = ax.bar(ind, t_gcc, width, color='b', label="odeint gcc")
+rects2 = ax.bar(ind+width, t_intel, width, color='g', label="odeint intel")
+rects3 = ax.bar(ind+2*width, t_c_intel, width, color='y', label="C intel")
+rects4 = ax.bar(ind+3*width, t_gfort, width, color='c', label="gfort")
+
+ax.axis([-width, 2.0+5*width, 0.0, 0.85])
+ax.set_ylabel('Runtime (s)')
+ax.set_title('Performance for integrating the Lorenz system')
+ax.set_xticks(ind + 1.5*width)
+ax.set_xticklabels(('Core i5-3210M\n3.1 GHz',
+ 'Xeon E5-2690\n3.8 GHz',
+ 'Opteron 8431\n 2.4 GHz'))
+ax.legend(loc='upper left', prop={'size': 16})
+
+plt.savefig("perf.pdf")
+plt.savefig("perf.png", dpi=50)
+
+plt.show()