From 4cdd1d271c23e10e22c77f274f9e57f65b11a9c7 Mon Sep 17 00:00:00 2001
From: Simon-Christian Klein <simoniusklein@gmail.com>
Date: Fri, 5 Oct 2018 11:12:58 +0200
Subject: [PATCH 1/2] WIP: Python backend. First commit. Only the SPRK33 TI is
 available, NO DC, NO ARTIFICIAL DISSIPATION

---
 python/InductionEq.py               | 435 ++++++++++++++++++++++++++++
 python/cl.py                        |  57 ++++
 python/compute_error_rotation_2d.py | 165 +++++++++++
 3 files changed, 657 insertions(+)
 create mode 100644 python/InductionEq.py
 create mode 100644 python/cl.py
 create mode 100644 python/compute_error_rotation_2d.py

diff --git a/python/InductionEq.py b/python/InductionEq.py
new file mode 100644
index 0000000..f471f38
--- /dev/null
+++ b/python/InductionEq.py
@@ -0,0 +1,435 @@
+import math
+import numpy as np
+#import pyopencl as cl
+import cl
+
+# from +InductionEq/preparse_vars.m
+def preparse_vars():
+	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results
+	I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0}
+	I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0}
+	I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0}
+	I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0}
+	I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None}
+	I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False}
+	I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 }
+
+# from generate_settings.m
+
+def generate_settings(map, keys):
+    # Atomatically generates a formatted settings string that contains all
+    # compiler optimizations and OpenCL defines of map with the given keys where 
+    # keys is a cell array, e.g. settings_mesh = generate_settings(I_Mesh, {'DX'; 'DY'; 'DZ'}
+
+	settings = '';
+	for key in keys:
+		if key in map:
+			if not(type(map[key]) is float or type(map[key]) is int):
+				if key==  'optimizations':
+					settings = settings + map[key]
+				elif 'USE' in map[key]:
+					settings = settings + ' -D' +  map[key] + '=1'
+				else:
+					settings = settings + ' -D' + key + '=' + str(map[key])
+			else:
+				settings = settings + ' -D' + key + "=" + str(map[key])
+		else:
+			print('Unknown identifier ' +  keys[i] + '\n')
+	return settings
+
+
+
+
+# from +InductionEq/initialize.m
+def initialize():
+	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
+
+
+# *NEW* generate opencl context with PyOpenCl
+	cl.create_ctx()
+
+# a python float is a 64bit float => float means double
+	if I_RunOps['periodic'] == 'USE_PERIODIC':
+		DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X']);
+		DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y']);
+		DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z']);
+	else:
+		DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1);
+		DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1);
+		DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1);
+
+
+	I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ;
+	num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z'];
+
+# later ...
+#    type = dev_type(I_Tech('device'));
+#    if (strcmp(type{1},'CPU'))
+#        group_size = 16;
+#    else
+#        if (num_nodes > lw_size(I_Tech('device')))
+#            group_size = lw_size(I_Tech('device'));
+#        else
+#          group_size = 2^floor(log(num_nodes) / log(2));
+#        end
+#    end
+
+# FOR MY GPU
+	group_size = 1024
+
+	group_size = float(group_size);
+	num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size
+	num_groups = math.ceil(num_nodes_pad/group_size)
+
+	I_Tech['num_nodes_pad'] = int(num_nodes_pad)
+
+	I_Tech['num_groups'] = int(num_groups);
+	I_Tech['W_SIZE'] = int(group_size);
+
+
+# Initialise fields: magnetic, veclocity, and charge density
+	if I_Tech['REAL'] == 'float':
+		field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+		field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+		field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)+1
+	else:
+		field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
+		field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
+		field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)+1
+
+
+# Generate settings needed for computation of the initial state for the
+# specified testcase.
+	settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']);
+	settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
+	settings = settings_tech + settings_mesh
+
+
+	kernel_path_list =  []
+	kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h')
+	kernel_path_list.append('../include/utils.h')
+	kernel_path_list.append('../kernel/kernel_init.cl')
+
+	cl.compile_kernels(kernel_path_list, settings)
+
+    # Set global and local range for calculation of the inital state
+    # The global and local range can either be a vector of type uint32 or
+    # double. Double is supported due to the fact that by default Matlab
+    # stores all variables as double.
+	I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32)
+	I_IEq['l_range'] = np.array([0], dtype=np.int32)
+
+    # Compute initial state for magnetic field, velocity field and density
+    # field
+	cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init)
+	cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init)
+	cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init)
+
+    # Compute time step from CFL number, min step size and maximal velocity
+	u_max = max(np.sqrt(np.power(field_u_init[1,:], 2) + np.power(field_u_init[2,:],2) + np.power(field_u_init[3,:],2)))
+	dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max)
+	num_steps = math.ceil(I_TI['final_time']/dt)
+	dt = I_TI['final_time'] / num_steps
+
+	I_TI['DT'] = dt
+	I_TI['num_steps'] = num_steps
+
+    # Global and local range for dot product and norm
+	I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32)
+	I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32)
+
+    # Depending on the discretization of the laplace operator for
+    # divergence cleaning set the number of boundary nodes
+	if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS':
+        	I_DC['BNODES'] = 0;
+	elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'):
+		I_DC['BNODES'] = 1;
+	else:
+		print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.')
+
+    # Set global and local range for divergence cleaning
+	I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32)
+	I_DC['l_range'] = np.array([0], dtype=np.int32)
+
+	return field_b_init, field_u_init, field_rho_init
+
+def compute_numerical_solution_setup(field_b, field_u, field_rho):
+	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
+
+	# Memory allocation for fields
+	if I_Tech['REAL'] == 'float':
+	# Initialize auxillary fields needed for time stepping
+		field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+		field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+		field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+		field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+		current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage
+		norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32)
+	# Allocate memory for field used for divergence cleaning
+		if I_DC['divergence_cleaning']:
+        #Field for divergence of b-field and phi
+			field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+			field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+        #Fields needed for CG
+			field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32);
+			field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32);
+			field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32);
+    
+		energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32);
+		L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32)
+		L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32);
+	else:
+       # Initialize auxillary fields needed for time stepping
+                field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
+                field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
+                field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)
+                field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
+                current_time = np.zeros(2, dtype=np.float64) # contains two elements: time at the beginning of a step and of a stage
+                norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float64)
+          # Allocate memory for field used for divergence cleaning
+                if I_DC['divergence_cleaning']:
+        #Field for divergence of b-field and phi
+                        field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)
+                        field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)
+        #Fields needed for CG
+                        field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float64)
+                        field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64);
+                        field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype =np.float64)
+    
+                energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64);
+                L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64)
+                L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64);
+
+	kernel_path_list = []
+
+# Include header file containing the coefficients of the respective order
+	if I_RunOps['order'] == 2:
+		if I_RunOps['operator_form'] == 'extended':
+			kernel_path_list.append('../include/2ndOrderExtended.h')
+		else: # classical operators
+			kernel_path_list.append('../include/2ndOrder.h')
+
+	elif I_RunOps['order'] == 4:
+		if I_RunOps['operator_form'] ==  'extended':
+			kernel_path_list.append('../include/4thOrderExtended.h')
+		else: # classical operators
+			kernel_path_list.append('../include/4thOrder.h')
+
+	elif I_RunOps['order'] == 6:
+		if I_RunOps['operator_form'] == 'extended':
+			kernel_path_list.append('../include/6thOrderExtended.h')
+		else: # classical operators
+			kernel_path_list.append('../include/6thOrder.h')
+
+	else:
+    		print('Specify order \n')
+
+	kernel_path_list.append('../include/utils.h')
+	kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h')
+	kernel_path_list.append('../kernel/kernel_init.cl')
+
+	kernel_path_list.append('../kernel/SBP_operator.cl')
+
+	kernel_path_list.append('../kernel/kernel_operator.cl')
+
+	# Include files for divergence cleaning if desired
+	if I_DC['divergence_cleaning']:
+		pass
+    #Prepare temporary fields for divergence cleaning
+#		DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output);
+#    kernel_path_list = [kernel_path_list, {'../kernel/kernel_div_cleaning.cl'}];
+#    kernel_path_list = [kernel_path_list, {'../kernel/kernel_CG.cl'}];
+#    settings_dc = generate_settings(I_DC, {'BNODES', 'divergence_cleaning_form'});
+	else:
+		DC_fields = 0
+		settings_dc =''
+
+# Include files for artificial dissipation if desired
+	if I_IEq['dissipation'] ==  'USE_ARTIFICIAL_DISSIPATION':
+		pass
+#		kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}];
+#		kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}];
+#		settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'});
+
+	if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION':
+		pass
+#		settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'}));
+	else:
+    		settings_dissipation = ''
+
+	kernel_path_list.append('../kernel/induction_Eq_volume.cl')
+	kernel_path_list.append('../kernel/induction_Eq_surface.cl')
+	kernel_path_list.append('../kernel/kernel_dot_product.cl')
+	kernel_path_list.append('../kernel/kernel_time_integrator.cl')
+
+# Specify compile settings
+	settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations'])
+
+	settings_mesh = generate_settings(I_Mesh,  ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
+
+	settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term'])
+
+	settings_time_integration = generate_settings(I_TI, ['DT'])
+
+	settings_runops = generate_settings(I_RunOps, ['periodic'])
+
+
+	settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops
+
+
+	if I_TI['time_integrator'] ==  'SSPRK33':
+        # 'SSPRK33_1', 'SSPRK33_2a', 'SSPRK33_2b', 'SSPRK33_3a', 'SSPRK33_3b'};
+        # calc_curlB_rho = {'calc_curlB_rho_1_3args', 'calc_curlB_rho_2_3args', 'calc_curlB_rho_3_3args'};
+		RK_Step = [];
+		if I_IEq['hall_term'] ==  'USE_HALL':
+			RK_Step.append('calc_curlB_rho_1_3args')
+		if I_RunOps['variable_u']:
+			RK_Step.append('calc_u_3_args')
+		RK_Step.append('SSPRK33_1')
+
+		RK_Step.append('SSPRK33_2a')
+		if I_IEq['hall_term']== 'USE_HALL':
+			RK_Step.append('calc_curlB_rho_2_3args')
+		if I_RunOps['variable_u']:
+			RK_Step.append('calc_u_3_args')
+		RK_Step.append('SSPRK33_2b')
+
+		RK_Step.append('SSPRK33_3a')
+		if I_IEq['hall_term'] ==  'USE_HALL':
+			RK_Step.append('calc_curlB_rho_3_3args')
+		if I_RunOps['variable_u']:
+			RK_Step.append('calc_u_3_args')
+		RK_Step.append('SSPRK33_3b')
+
+		RK_Step.append('calc_time_3_args')
+
+		time_integrator_num_fields = 3;
+
+# switch time_integrator_num_fields
+	if time_integrator_num_fields == 2:
+		IEq_fields = {'field_b':field_b, \
+		'field_b2': field_b2, \
+		'field_curlB_rho': field_curlB_rho, \
+		'field_u': field_u, \
+		'field_rho':field_rho, \
+		'current_time':current_time, \
+		'field_divB': field_divB, \
+		'norm2_output':norm2_output, \
+		'energy': energy, \
+		'L2error_B': L2error_B, \
+		'L2error_divB': L2error_divB}
+	elif time_integrator_num_fields == 3:
+		IEq_fields = {'field_b': field_b, \
+		'field_b2': field_b2, \
+		'field_b3': field_b3, \
+		'field_curlB_rho': field_curlB_rho, \
+		'field_u': field_u, \
+		'field_rho': field_rho, \
+		'current_time': current_time, \
+		'field_divB': field_divB, \
+                'norm2_output': norm2_output,\
+                'energy': energy,\
+                'L2error_B': L2error_B, \
+                'L2error_divB': L2error_divB}
+	else:
+        	print('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields))
+
+
+	return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step
+
+def compute_numerical_solution(field_b, field_u, field_rho):
+	global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results
+
+# setup fields, settings etc.
+	(kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho)
+
+# Initialise logging variables
+	kernel_runtime = 0;
+	RK_block_size = 15000;
+	if I_RunOps['save_integrals_over_time']:
+		energy = IEq_fields['energy']
+		L2error_B = IEq_fields['L2error_B']
+		L2error_divB = IEq_fields['L2error_divB']
+
+	norm2_output = IEq_fields['norm2_output']
+
+# Compile kernel
+	cl.compile_kernels(kernel_path_list, settings);
+
+# Switch case for time integrator
+
+	if time_integrator_num_fields == 2:
+		print("time_integrator_num_fields = 2 not ready yet \n")
+	elif time_integrator_num_fields == 3:
+		field_b = IEq_fields['field_b']
+		field_b2 = IEq_fields['field_b2']
+		field_b3 = IEq_fields['field_b3']
+		field_curlB_rho = IEq_fields['field_curlB_rho']
+		field_u = IEq_fields['field_u']
+		field_rho = IEq_fields['field_rho']
+		current_time = IEq_fields['current_time']
+		field_divB = IEq_fields['field_divB']
+
+		if False: #other versions
+			pass
+		else:
+			num_steps_run = I_TI['num_steps'];
+			while num_steps_run > RK_block_size:
+				kernel_list = RK_Step * RK_block_size
+				t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
+				field_b, field_b2, field_b3, field_curlB_rho,\
+				field_u, field_rho, current_time)
+				kernel_runtime =  kernel_runtime + t;
+				num_steps_run = num_steps_run - RK_block_size;
+
+			if num_steps_run > 0:
+				kernel_list = RK_Step *num_steps_run
+				t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
+				field_b, field_b2, field_b3, field_curlB_rho,\
+				field_u, field_rho, current_time);
+				kernel_runtime = kernel_runtime + t;
+
+	else:
+		print('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n')
+	#runtime = toc;
+	runtime = 0
+	print('Elapsed time is' + str(runtime) + 'seconds.\n')
+
+	# Save total runtime and kernel runtime
+	I_Results['runtime'] = runtime;
+	I_Results['kernel_runtime'] = kernel_runtime;
+
+	# save fields
+	if I_RunOps['save_fields']:
+		I_Results['field_b'] = field_b;
+
+	field_b_ana = IEq_fields['field_b2']
+
+	#Calculate analytical solution and error
+	current_time[1] = I_TI['final_time']
+	cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time);
+
+	norm2_output[:] = 0;
+
+	cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output);
+	I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output)));
+
+
+	norm2_output[:] = 0;
+	cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output);
+	I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum());
+
+
+	#Calculate divergence
+	cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB);
+	norm2_output[:] = 0;
+	cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output);
+	I_Results['divergence_norm'] = math.sqrt(norm2_output.sum());
+
+
+	#Calculate energy
+	norm2_output[:] = 0;
+	cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output);
+	I_Results['energy'] = norm2_output.sum();
+
+
diff --git a/python/cl.py b/python/cl.py
new file mode 100644
index 0000000..cc177f5
--- /dev/null
+++ b/python/cl.py
@@ -0,0 +1,57 @@
+import pyopencl as cl
+
+def compile_kernels(src_path, cmd_line, *args):
+	global program, ctx
+	src = ' '
+	for entry in src_path:
+		file = open(entry, 'r')
+		src = src + file.read()
+		file.close()
+	prg = cl.Program(ctx, src).build(options=cmd_line)
+	program = prg
+	return prg
+
+def create_ctx():
+	global ctx, queue, PYOPENCL_COMPILER_OUTPUT
+	PYOPENCL_COMPILER_OUTPUT=1
+	ctx = cl.create_some_context()
+	queue = cl.CommandQueue(ctx)
+	return ctx, queue
+
+def run_kernel(names, g_range, l_range, *arguments):
+	global queue, program
+	inputbuffers = []
+	for arg in arguments:
+		inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg))
+	callstrings = []
+	if type(names) is list:
+		for name in names:
+			callstrings.append('program.' + name + '(queue, tuple(g_range) , None, *inputbuffers)')
+	else:
+		callstrings.append('program.' + names + '(queue, tuple(g_range), None, *inputbuffers)')
+
+	for callstring in callstrings:
+		event = eval(callstring)
+#		event.wait() 
+	i = 0
+	for arg in arguments:
+		cl.enqueue_copy(queue, arg, inputbuffers[i])
+#		inputbuffers[i].release()
+		i = i +1
+	return 0
+
+def run_kernel_ref(name, g_range, l_range, *arg_refs):
+	global queue, program
+	callstring = 'program.' + name + '(queue, None, None, *inputbuffers)'
+	eval(callstring)
+	return
+
+def allocate_buf(array):
+	return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array)
+
+def malloc(size):
+	return cl.Buffer(ctx, size=size)
+
+
+
+
diff --git a/python/compute_error_rotation_2d.py b/python/compute_error_rotation_2d.py
new file mode 100644
index 0000000..e5cd9ed
--- /dev/null
+++ b/python/compute_error_rotation_2d.py
@@ -0,0 +1,165 @@
+# -*- coding: <utf8> -*-
+
+#This project is licensed under the terms of the Creative Commons CC BY-NC-ND 3.0 license.
+
+#Use this script to specify the general conditions and the testcase you
+#want to investigate. It calls functions to initializes all variables and fields relevant for
+#the testcase and advance the numerical solution in time. Depending on the
+#properties that shall be investigated additional functionalities can be
+#enabled, e.g. divergence cleaning.
+import InductionEq as induction_eq
+from math import pi
+#prepare_vars() initializes containers (maps) in which all variables (label, value) are
+#stored. Variables are grouped together by purpose:
+#I_Mesh: Contains all variables concerning the discrete mesh, e.g.
+#stepsize, box size
+#I_TI: Contains all variables concerning time integration, e.g. time
+#integrator, timestep
+#I_IEq: Contains all variables relevant for the induction equation, e.g.
+#discretization form, switch for additional artificial dissipation
+#I_DC: Contains all variables concerning divergence cleaning, e.g.
+#discretization, error threshold
+#I_Tech: Contains all variables of technical nature, e.g. the device,
+#computation precision
+#I_RunOps: Specifies the parameters of a computation, e.g. which variables
+#will be saved, the testcase
+
+#Variables in capital letters are program specific defines which are set by
+#openCL compile settings. If the value is a string in captial letters
+#beginning with USE it acts as a switch, e.g. to enable artificial dissipation
+#or to switch between different discretizations. In case the key is written
+#in capital letters the program define will be set to the corresponding
+#value of the key.
+induction_eq.preparse_vars();
+
+
+#global I_Mesh I_TI I_IEq I_DC I_Tech I_RunOps I_Results
+
+I_Mesh = induction_eq.I_Mesh
+I_TI = induction_eq.I_TI
+I_IEq = induction_eq.I_IEq
+I_DC = induction_eq.I_DC
+I_Tech = induction_eq.I_Tech
+I_RunOps = induction_eq.I_RunOps
+I_Results = induction_eq.I_Results
+
+# Mesh related parameters. The minimum number of nodes per direction is
+# constrained by the number of boundary nodes, e.g. the 4th order method
+# has 2*4 boundary nodes which means the minimum number of nodes amounts to 8.
+
+N = 40
+I_Mesh['NODES_X'],I_Mesh['NODES_Y'],I_Mesh['NODES_Z'] = (N, N, 12)
+I_Mesh['XMIN'], I_Mesh['XMAX'] = (-1.0, 1.0)
+I_Mesh['YMIN'], I_Mesh['YMAX'] = (-1.0, 1.0)
+I_Mesh['ZMIN'], I_Mesh['ZMAX'] = (-1.0, 1.0)
+
+# Time integration related variables
+I_TI['cfl'] = 0.95  #Define the Courant–Friedrichs–Lewy condition
+I_TI['final_time'] = 2*pi
+#Chose the time integrator. Below is a list of up to date available
+#options:
+# SSPRK33, SSPRK104,
+# KennedyCarpenterLewis2R54C, CalvoFrancoRandez2R64,
+# CarpenterKennedy2N54, ToulorgeDesmet2N84F
+I_TI['time_integrator'] = 'SSPRK33' #'KennedyCarpenterLewis2R54C';
+
+
+# Induction equation related variables
+#Specify how the three part of the linear induction equation shall be
+#computed.
+I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT'; # PRODUCT, SPLIT, CENTRAL
+I_IEq['form_source'] = 'USE_SOURCE_CENTRAL'; # CENTRAL, SPLIT, ZERO
+I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL'; # SPLIT, CENTRAL, PRODUCT
+#Enable or disable Hall-Term
+I_IEq['hall_term'] = 'NONE'; # NONE, USE_HALL
+#Enable or disable artificial dissipation
+I_IEq['dissipation'] = 'NONE'; # NONE, USE_ARTIFICIAL_DISSIPATION
+#Specify what kind of artifical dissipation shall be used
+    #USE_ADAPTIVE_DISSIPATION, USE_FIRST_ORDER_DISSIPATION, USE_HIGH_ORDER_DISSIPATION
+I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION';
+I_IEq['HO_DISSIPATION_FACTOR'] = 1; # a constant (non-negative) factor adapting the influence of the high order dissipation
+#Additional parameters needed for adaptive dissipation. For typical values
+#see Svärd et al. (2009).
+I_IEq['MP2MIN'] = 1
+I_IEq['MP2MAX'] = 10
+I_IEq['CMIN'] = 1
+I_IEq['CMAX'] = 10
+
+
+# Divergence cleaning related variables
+#Enable divergence cleaning (True) or disable divergence cleaning (False)
+I_DC['divergence_cleaning'] = False
+#Choose how the laplace operator will be discretized
+I_DC['divergence_cleaning_form'] = 'USE_LAPLACE_WIDE_STENCIL_LNS'
+#USE_LAPLACE_WIDE_STENCIL_LNS USE_LAPLACE_WIDE_STENCIL_DIRICHLET USE_LAPLACE_NARROW_STENCIL_DIRICHLET
+#Specify a threshold for the divergence norm
+I_DC['absolute_error_threshold'] = 1e-3;
+#The divergence cleaner will exit after max_iterations even if the error
+#threshold is not reached
+I_DC['max_iterations'] = 50;
+
+
+# Technical details
+# Specify the OpenCL device that will be used for computation
+I_Tech['device'] = 1;
+
+# Switch between single floating point and double floating point precision
+I_Tech['REAL'] = 'double' # float
+I_Tech['REAL4'] = I_Tech['REAL'] + '4'; #Vector datatype
+
+#Compiler based optimizations
+if I_Tech['REAL'] == 'float':
+    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant';
+else:
+    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only';
+
+
+# Options relevant for a run
+# Defines the order
+I_RunOps['order'] = 4; # 2, 4, 6
+I_RunOps['operator_form'] = 'classical'; # 'classical' or 'extended' operators
+# Specify the testcase. The name of the testcase has to be equal to the
+# name of a header file which contains a function describing the initial state.
+# Example testcases are:
+# rotation_2D, rotation_3D, alfven_periodic_2D, hall_travelling_wave, hall_periodic
+induction_eq.I_RunOps['testcase'] = 'rotation_2D';
+I_RunOps['variable_u'] = False; # must be set to True if a variable velocity is used
+I_RunOps['periodic'] = 'NONE'; # 'NONE', 'USE_PERIODIC'; must be set to 'USE_PERIODIC'
+                                       # if periodic boundary conditions should be used
+# Optional plotting parameters (2D plots).
+# Choose the cross section with
+# 'x', 'y', 'z'
+# If you want to plot multiple cross sections use
+# 'xy', 'xz', 'yz', 'xyz'
+I_RunOps['plot_numerical_solution'] = '';
+I_RunOps['plot_analytical_solution'] = '';
+I_RunOps['plot_difference'] = '';
+I_RunOps['plot_divergence'] = '';
+#If set to 1 the magnetic field will be saved to I_Results('field_b')
+I_RunOps['save_fields'] = False;
+#If set to true the magnetic energy (L^2 norm) will be saved to I_Results('energy_over_time'), the
+#L2 errors (if available) to I_Results('L2error_B_over_time') & I_Results('L2error_divB_over_time')
+#and the time to I_Results('time')
+I_RunOps['save_integrals_over_time'] = False;
+
+#Initialize the magnetic field, the velocity field and the density field
+#according to the specified testcase. Also calculates and sets additional
+#variables, e.g. stepsize, timestep, local work group size, etc...
+(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize();
+
+print('Testcase: ' + I_RunOps['testcase'] +'\n')
+print('Order:' +str(I_RunOps['order'])+ '\n')
+print('Time integrator:' + I_TI['time_integrator'] + ' \n')
+print('DT: '+ str(I_TI['DT']) + '   N_STEPS: ' + str(I_TI['num_steps']) + '   FINAL_TIME: '+str(I_TI['final_time'])+'\n')
+print('DX: '+ str(I_Mesh['DX']) + '   NODES_X: '+str(I_Mesh['NODES_X']) +'\n')
+print('DY: '+ str(I_Mesh['DY']) + '   NODES_Y: '+str(I_Mesh['NODES_Y'])+'\n')
+print('DZ: '+ str(I_Mesh['DZ']) + '   NODES_Z: '+str(I_Mesh['NODES_Z'])+'\n')
+print('Dissipation: '+I_IEq['dissipation']+' \n')
+print('Divergence cleaning: '+str(I_DC['divergence_cleaning'])+' \n')
+print('REAL:'+ I_Tech['REAL']+'\n')
+
+# Takes the initialized fields and advances the solution in time
+induction_eq.compute_numerical_solution(field_b_init, field_u_init, field_rho_init);
+
+print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy']) + '\n')
+print('Relative Error:' + str(100*I_Results['rel_err']) + '%\n\n')

From 2bf98977bee4cfaa99d111eaa8e6cda9318b6bac Mon Sep 17 00:00:00 2001
From: Simon-Christian Klein <simoniusklein@gmail.com>
Date: Thu, 11 Oct 2018 11:17:49 +0200
Subject: [PATCH 2/2] changed the codingstyle and some lines as requested in PR
 #33. Added the timer and OpenCl device querry from the matlab version. found
 a bug from Fortran vs C array indexing

---
 .gitignore                          |   6 +
 python/InductionEq.py               | 613 ++++++++++++++--------------
 python/cl.py                        |  86 ++--
 python/compute_error_rotation_2d.py |  76 ++--
 4 files changed, 400 insertions(+), 381 deletions(-)
 mode change 100644 => 100755 python/compute_error_rotation_2d.py

diff --git a/.gitignore b/.gitignore
index 6bfc794..4750550 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,9 @@ examples/*.m~
 
 # temporary files for jupyter/ipython notebooks
 docs/.ipynb_checkpoints/
+
+
+# Python files
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/python/InductionEq.py b/python/InductionEq.py
index f471f38..0af9d02 100644
--- a/python/InductionEq.py
+++ b/python/InductionEq.py
@@ -1,18 +1,19 @@
 import math
 import numpy as np
-#import pyopencl as cl
 import cl
+import sys
+import time
 
 # from +InductionEq/preparse_vars.m
 def preparse_vars():
-	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results
-	I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0}
-	I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0}
-	I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0}
-	I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0}
-	I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None}
-	I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False}
-	I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 }
+    global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results
+    I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0}
+    I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0}
+    I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0}
+    I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0}
+    I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None}
+    I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False}
+    I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 }
 
 # from generate_settings.m
 
@@ -21,164 +22,160 @@ def generate_settings(map, keys):
     # compiler optimizations and OpenCL defines of map with the given keys where 
     # keys is a cell array, e.g. settings_mesh = generate_settings(I_Mesh, {'DX'; 'DY'; 'DZ'}
 
-	settings = '';
-	for key in keys:
-		if key in map:
-			if not(type(map[key]) is float or type(map[key]) is int):
-				if key==  'optimizations':
-					settings = settings + map[key]
-				elif 'USE' in map[key]:
-					settings = settings + ' -D' +  map[key] + '=1'
-				else:
-					settings = settings + ' -D' + key + '=' + str(map[key])
-			else:
-				settings = settings + ' -D' + key + "=" + str(map[key])
-		else:
-			print('Unknown identifier ' +  keys[i] + '\n')
-	return settings
+    settings = '';
+    for key in keys:
+        if key in map:
+            if not(type(map[key]) is float or type(map[key]) is int):
+                if key==  'optimizations':
+                    settings = settings + map[key]
+                elif 'USE' in map[key]:
+                    settings = settings + ' -D' +  map[key] + '=1'
+                else:
+                    settings = settings + ' -D' + key + '=' + str(map[key])
+            else:
+                settings = settings + ' -D' + key + "=" + str(map[key])
+        else:
+            print('Unknown identifier ' +  keys[i])
+    return settings
 
 
 
 
 # from +InductionEq/initialize.m
 def initialize():
-	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
+    global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
 
 
 # *NEW* generate opencl context with PyOpenCl
-	cl.create_ctx()
-
+    cl.create_ctx()
+    I_Tech['device'] = cl.devices[0]
 # a python float is a 64bit float => float means double
-	if I_RunOps['periodic'] == 'USE_PERIODIC':
-		DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X']);
-		DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y']);
-		DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z']);
-	else:
-		DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1);
-		DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1);
-		DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1);
+    if I_RunOps['periodic'] == 'USE_PERIODIC':
+        DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X'])
+        DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y'])
+        DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z'])
+    else:
+        DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1)
+        DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1)
+        DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1)
+
 
+    I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ
+    num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z']
 
-	I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ;
-	num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z'];
 
-# later ...
 #    type = dev_type(I_Tech('device'));
 #    if (strcmp(type{1},'CPU'))
 #        group_size = 16;
 #    else
-#        if (num_nodes > lw_size(I_Tech('device')))
-#            group_size = lw_size(I_Tech('device'));
-#        else
-#          group_size = 2^floor(log(num_nodes) / log(2));
-#        end
-#    end
 
-# FOR MY GPU
-	group_size = 1024
+    if (num_nodes > cl.group_size):
+        group_size = cl.group_size
+    else:
+        group_size = 2^floor(log(num_nodes) / log(2))
 
-	group_size = float(group_size);
-	num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size
-	num_groups = math.ceil(num_nodes_pad/group_size)
+    group_size = float(group_size);
+    num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size
+    num_groups = math.ceil(num_nodes_pad/group_size)
 
-	I_Tech['num_nodes_pad'] = int(num_nodes_pad)
+    I_Tech['num_nodes_pad'] = int(num_nodes_pad)
 
-	I_Tech['num_groups'] = int(num_groups);
-	I_Tech['W_SIZE'] = int(group_size);
+    I_Tech['num_groups'] = int(num_groups)
+    I_Tech['W_SIZE'] = int(group_size)
 
 
 # Initialise fields: magnetic, veclocity, and charge density
-	if I_Tech['REAL'] == 'float':
-		field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
-		field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
-		field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)+1
-	else:
-		field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
-		field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
-		field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)+1
-
+    if I_Tech['REAL'] == 'float':
+        field_u_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype=np.float32)
+        field_b_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype=np.float32)
+        field_rho_init = np.ones((I_Tech['num_nodes_pad'], 1), dtype=np.float32)
+    else:
+        field_u_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype = np.float64)
+        field_b_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype = np.float64)
+        field_rho_init = np.ones((I_Tech['num_nodes_pad'], 1), dtype = np.float64)
 
 # Generate settings needed for computation of the initial state for the
 # specified testcase.
-	settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']);
-	settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
-	settings = settings_tech + settings_mesh
+    settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']);
+    settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
+    settings = settings_tech + settings_mesh
 
 
-	kernel_path_list =  []
-	kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h')
-	kernel_path_list.append('../include/utils.h')
-	kernel_path_list.append('../kernel/kernel_init.cl')
+    kernel_path_list =  []
+    kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h')
+    kernel_path_list.append('../include/utils.h')
+    kernel_path_list.append('../kernel/kernel_init.cl')
 
-	cl.compile_kernels(kernel_path_list, settings)
+    cl.compile_kernels(kernel_path_list, settings)
 
     # Set global and local range for calculation of the inital state
     # The global and local range can either be a vector of type uint32 or
     # double. Double is supported due to the fact that by default Matlab
     # stores all variables as double.
-	I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32)
-	I_IEq['l_range'] = np.array([0], dtype=np.int32)
+    I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32)
+    I_IEq['l_range'] = np.array([0], dtype=np.int32)
 
     # Compute initial state for magnetic field, velocity field and density
     # field
-	cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init)
-	cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init)
-	cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init)
+    cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init)
+    cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init)
+    cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init)
+
 
     # Compute time step from CFL number, min step size and maximal velocity
-	u_max = max(np.sqrt(np.power(field_u_init[1,:], 2) + np.power(field_u_init[2,:],2) + np.power(field_u_init[3,:],2)))
-	dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max)
-	num_steps = math.ceil(I_TI['final_time']/dt)
-	dt = I_TI['final_time'] / num_steps
+    u_max = max(np.sqrt(np.power(field_u_init[:, 0], 2) + np.power(field_u_init[:, 1],2) + np.power(field_u_init[:, 2],2)))
+    dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max)
+    num_steps = math.ceil(I_TI['final_time']/dt)
+    dt = I_TI['final_time'] / num_steps
 
-	I_TI['DT'] = dt
-	I_TI['num_steps'] = num_steps
+    I_TI['DT'] = dt
+    I_TI['num_steps'] = num_steps
 
     # Global and local range for dot product and norm
-	I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32)
-	I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32)
+    I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32)
+    I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32)
 
     # Depending on the discretization of the laplace operator for
     # divergence cleaning set the number of boundary nodes
-	if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS':
-        	I_DC['BNODES'] = 0;
-	elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'):
-		I_DC['BNODES'] = 1;
-	else:
-		print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.')
+    if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS':
+            I_DC['BNODES'] = 0
+    elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'):
+        I_DC['BNODES'] = 1
+    else:
+        print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.')
 
     # Set global and local range for divergence cleaning
-	I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32)
-	I_DC['l_range'] = np.array([0], dtype=np.int32)
+    I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32)
+    I_DC['l_range'] = np.array([0], dtype=np.int32)
 
-	return field_b_init, field_u_init, field_rho_init
+    return field_b_init, field_u_init, field_rho_init
 
 def compute_numerical_solution_setup(field_b, field_u, field_rho):
-	global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
-
-	# Memory allocation for fields
-	if I_Tech['REAL'] == 'float':
-	# Initialize auxillary fields needed for time stepping
-		field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
-		field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
-		field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
-		field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
-		current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage
-		norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32)
-	# Allocate memory for field used for divergence cleaning
-		if I_DC['divergence_cleaning']:
+    global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps
+
+    # Memory allocation for fields
+    if I_Tech['REAL'] == 'float':
+    # Initialize auxillary fields needed for time stepping
+        field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+        field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+        field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+        field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32)
+        current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage
+        norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32)
+    # Allocate memory for field used for divergence cleaning
+        if I_DC['divergence_cleaning']:
         #Field for divergence of b-field and phi
-			field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
-			field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+            field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+            field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
         #Fields needed for CG
-			field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32);
-			field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32);
-			field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32);
+            field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32)
+            field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)
+            field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32)
     
-		energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32);
-		L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32)
-		L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32);
-	else:
+        energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32)
+        L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32)
+        L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32)
+    else:
        # Initialize auxillary fields needed for time stepping
                 field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
                 field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64)
@@ -196,240 +193,244 @@ def compute_numerical_solution_setup(field_b, field_u, field_rho):
                         field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64);
                         field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype =np.float64)
     
-                energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64);
+                energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64)
                 L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64)
-                L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64);
+                L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64)
 
-	kernel_path_list = []
+    kernel_path_list = []
 
 # Include header file containing the coefficients of the respective order
-	if I_RunOps['order'] == 2:
-		if I_RunOps['operator_form'] == 'extended':
-			kernel_path_list.append('../include/2ndOrderExtended.h')
-		else: # classical operators
-			kernel_path_list.append('../include/2ndOrder.h')
-
-	elif I_RunOps['order'] == 4:
-		if I_RunOps['operator_form'] ==  'extended':
-			kernel_path_list.append('../include/4thOrderExtended.h')
-		else: # classical operators
-			kernel_path_list.append('../include/4thOrder.h')
-
-	elif I_RunOps['order'] == 6:
-		if I_RunOps['operator_form'] == 'extended':
-			kernel_path_list.append('../include/6thOrderExtended.h')
-		else: # classical operators
-			kernel_path_list.append('../include/6thOrder.h')
-
-	else:
-    		print('Specify order \n')
-
-	kernel_path_list.append('../include/utils.h')
-	kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h')
-	kernel_path_list.append('../kernel/kernel_init.cl')
-
-	kernel_path_list.append('../kernel/SBP_operator.cl')
-
-	kernel_path_list.append('../kernel/kernel_operator.cl')
-
-	# Include files for divergence cleaning if desired
-	if I_DC['divergence_cleaning']:
-		pass
+    if I_RunOps['order'] == 2:
+        if I_RunOps['operator_form'] == 'extended':
+            kernel_path_list.append('../include/2ndOrderExtended.h')
+        else: # classical operators
+            kernel_path_list.append('../include/2ndOrder.h')
+
+    elif I_RunOps['order'] == 4:
+        if I_RunOps['operator_form'] ==  'extended':
+            kernel_path_list.append('../include/4thOrderExtended.h')
+        else: # classical operators
+            kernel_path_list.append('../include/4thOrder.h')
+
+    elif I_RunOps['order'] == 6:
+        if I_RunOps['operator_form'] == 'extended':
+            kernel_path_list.append('../include/6thOrderExtended.h')
+        else: # classical operators
+            kernel_path_list.append('../include/6thOrder.h')
+
+    else:
+            print('Specify order \n')
+
+    kernel_path_list.append('../include/utils.h')
+    kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h')
+    kernel_path_list.append('../kernel/kernel_init.cl')
+
+    kernel_path_list.append('../kernel/SBP_operator.cl')
+
+    kernel_path_list.append('../kernel/kernel_operator.cl')
+
+    # Include files for divergence cleaning if desired
+    if I_DC['divergence_cleaning']:
+        pass
     #Prepare temporary fields for divergence cleaning
-#		DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output);
+#       DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output);
 #    kernel_path_list = [kernel_path_list, {'../kernel/kernel_div_cleaning.cl'}];
 #    kernel_path_list = [kernel_path_list, {'../kernel/kernel_CG.cl'}];
 #    settings_dc = generate_settings(I_DC, {'BNODES', 'divergence_cleaning_form'});
-	else:
-		DC_fields = 0
-		settings_dc =''
+    else:
+        DC_fields = 0
+        settings_dc =''
 
 # Include files for artificial dissipation if desired
-	if I_IEq['dissipation'] ==  'USE_ARTIFICIAL_DISSIPATION':
-		pass
-#		kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}];
-#		kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}];
-#		settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'});
-
-	if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION':
-		pass
-#		settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'}));
-	else:
-    		settings_dissipation = ''
-
-	kernel_path_list.append('../kernel/induction_Eq_volume.cl')
-	kernel_path_list.append('../kernel/induction_Eq_surface.cl')
-	kernel_path_list.append('../kernel/kernel_dot_product.cl')
-	kernel_path_list.append('../kernel/kernel_time_integrator.cl')
+    if I_IEq['dissipation'] ==  'USE_ARTIFICIAL_DISSIPATION':
+        pass
+#       kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}];
+#       kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}];
+#       settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'});
+
+    if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION':
+        pass
+#       settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'}));
+    else:
+            settings_dissipation = ''
+
+    kernel_path_list.append('../kernel/induction_Eq_volume.cl')
+    kernel_path_list.append('../kernel/induction_Eq_surface.cl')
+    kernel_path_list.append('../kernel/kernel_dot_product.cl')
+    kernel_path_list.append('../kernel/kernel_time_integrator.cl')
 
 # Specify compile settings
-	settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations'])
+    settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations'])
 
-	settings_mesh = generate_settings(I_Mesh,  ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
+    settings_mesh = generate_settings(I_Mesh,  ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX'])
 
-	settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term'])
+    settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term'])
 
-	settings_time_integration = generate_settings(I_TI, ['DT'])
+    settings_time_integration = generate_settings(I_TI, ['DT'])
 
-	settings_runops = generate_settings(I_RunOps, ['periodic'])
+    settings_runops = generate_settings(I_RunOps, ['periodic'])
 
 
-	settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops
+    settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops
 
 
-	if I_TI['time_integrator'] ==  'SSPRK33':
+    if I_TI['time_integrator'] ==  'SSPRK33':
         # 'SSPRK33_1', 'SSPRK33_2a', 'SSPRK33_2b', 'SSPRK33_3a', 'SSPRK33_3b'};
         # calc_curlB_rho = {'calc_curlB_rho_1_3args', 'calc_curlB_rho_2_3args', 'calc_curlB_rho_3_3args'};
-		RK_Step = [];
-		if I_IEq['hall_term'] ==  'USE_HALL':
-			RK_Step.append('calc_curlB_rho_1_3args')
-		if I_RunOps['variable_u']:
-			RK_Step.append('calc_u_3_args')
-		RK_Step.append('SSPRK33_1')
-
-		RK_Step.append('SSPRK33_2a')
-		if I_IEq['hall_term']== 'USE_HALL':
-			RK_Step.append('calc_curlB_rho_2_3args')
-		if I_RunOps['variable_u']:
-			RK_Step.append('calc_u_3_args')
-		RK_Step.append('SSPRK33_2b')
-
-		RK_Step.append('SSPRK33_3a')
-		if I_IEq['hall_term'] ==  'USE_HALL':
-			RK_Step.append('calc_curlB_rho_3_3args')
-		if I_RunOps['variable_u']:
-			RK_Step.append('calc_u_3_args')
-		RK_Step.append('SSPRK33_3b')
-
-		RK_Step.append('calc_time_3_args')
-
-		time_integrator_num_fields = 3;
+        RK_Step = [];
+        if I_IEq['hall_term'] ==  'USE_HALL':
+            RK_Step.append('calc_curlB_rho_1_3args')
+        if I_RunOps['variable_u']:
+            RK_Step.append('calc_u_3_args')
+        RK_Step.append('SSPRK33_1')
+
+        RK_Step.append('SSPRK33_2a')
+        if I_IEq['hall_term']== 'USE_HALL':
+            RK_Step.append('calc_curlB_rho_2_3args')
+        if I_RunOps['variable_u']:
+            RK_Step.append('calc_u_3_args')
+        RK_Step.append('SSPRK33_2b')
+
+        RK_Step.append('SSPRK33_3a')
+        if I_IEq['hall_term'] ==  'USE_HALL':
+            RK_Step.append('calc_curlB_rho_3_3args')
+        if I_RunOps['variable_u']:
+            RK_Step.append('calc_u_3_args')
+        RK_Step.append('SSPRK33_3b')
+
+        RK_Step.append('calc_time_3_args')
+
+        time_integrator_num_fields = 3
 
 # switch time_integrator_num_fields
-	if time_integrator_num_fields == 2:
-		IEq_fields = {'field_b':field_b, \
-		'field_b2': field_b2, \
-		'field_curlB_rho': field_curlB_rho, \
-		'field_u': field_u, \
-		'field_rho':field_rho, \
-		'current_time':current_time, \
-		'field_divB': field_divB, \
-		'norm2_output':norm2_output, \
-		'energy': energy, \
-		'L2error_B': L2error_B, \
-		'L2error_divB': L2error_divB}
-	elif time_integrator_num_fields == 3:
-		IEq_fields = {'field_b': field_b, \
-		'field_b2': field_b2, \
-		'field_b3': field_b3, \
-		'field_curlB_rho': field_curlB_rho, \
-		'field_u': field_u, \
-		'field_rho': field_rho, \
-		'current_time': current_time, \
-		'field_divB': field_divB, \
+    if time_integrator_num_fields == 2:
+        IEq_fields = {'field_b':field_b, \
+        'field_b2': field_b2, \
+        'field_curlB_rho': field_curlB_rho, \
+        'field_u': field_u, \
+        'field_rho':field_rho, \
+        'current_time':current_time, \
+        'field_divB': field_divB, \
+        'norm2_output':norm2_output, \
+        'energy': energy, \
+        'L2error_B': L2error_B, \
+        'L2error_divB': L2error_divB}
+    elif time_integrator_num_fields == 3:
+        IEq_fields = {'field_b': field_b, \
+        'field_b2': field_b2, \
+        'field_b3': field_b3, \
+        'field_curlB_rho': field_curlB_rho, \
+        'field_u': field_u, \
+        'field_rho': field_rho, \
+        'current_time': current_time, \
+        'field_divB': field_divB, \
                 'norm2_output': norm2_output,\
                 'energy': energy,\
                 'L2error_B': L2error_B, \
                 'L2error_divB': L2error_divB}
-	else:
-        	print('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields))
+    else:
+            raise Exception('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields))
 
 
-	return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step
+    return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step
 
 def compute_numerical_solution(field_b, field_u, field_rho):
-	global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results
+    global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results
 
 # setup fields, settings etc.
-	(kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho)
+    (kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho)
 
 # Initialise logging variables
-	kernel_runtime = 0;
-	RK_block_size = 15000;
-	if I_RunOps['save_integrals_over_time']:
-		energy = IEq_fields['energy']
-		L2error_B = IEq_fields['L2error_B']
-		L2error_divB = IEq_fields['L2error_divB']
+    kernel_runtime = 0;
+    RK_block_size = 15000;
+    if I_RunOps['save_integrals_over_time']:
+        energy = IEq_fields['energy']
+        L2error_B = IEq_fields['L2error_B']
+        L2error_divB = IEq_fields['L2error_divB']
 
-	norm2_output = IEq_fields['norm2_output']
+    norm2_output = IEq_fields['norm2_output']
 
 # Compile kernel
-	cl.compile_kernels(kernel_path_list, settings);
+    cl.compile_kernels(kernel_path_list, settings)
+
+    t1 = time.clock()
 
 # Switch case for time integrator
 
-	if time_integrator_num_fields == 2:
-		print("time_integrator_num_fields = 2 not ready yet \n")
-	elif time_integrator_num_fields == 3:
-		field_b = IEq_fields['field_b']
-		field_b2 = IEq_fields['field_b2']
-		field_b3 = IEq_fields['field_b3']
-		field_curlB_rho = IEq_fields['field_curlB_rho']
-		field_u = IEq_fields['field_u']
-		field_rho = IEq_fields['field_rho']
-		current_time = IEq_fields['current_time']
-		field_divB = IEq_fields['field_divB']
-
-		if False: #other versions
-			pass
-		else:
-			num_steps_run = I_TI['num_steps'];
-			while num_steps_run > RK_block_size:
-				kernel_list = RK_Step * RK_block_size
-				t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
-				field_b, field_b2, field_b3, field_curlB_rho,\
-				field_u, field_rho, current_time)
-				kernel_runtime =  kernel_runtime + t;
-				num_steps_run = num_steps_run - RK_block_size;
-
-			if num_steps_run > 0:
-				kernel_list = RK_Step *num_steps_run
-				t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
-				field_b, field_b2, field_b3, field_curlB_rho,\
-				field_u, field_rho, current_time);
-				kernel_runtime = kernel_runtime + t;
-
-	else:
-		print('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n')
-	#runtime = toc;
-	runtime = 0
-	print('Elapsed time is' + str(runtime) + 'seconds.\n')
-
-	# Save total runtime and kernel runtime
-	I_Results['runtime'] = runtime;
-	I_Results['kernel_runtime'] = kernel_runtime;
-
-	# save fields
-	if I_RunOps['save_fields']:
-		I_Results['field_b'] = field_b;
-
-	field_b_ana = IEq_fields['field_b2']
-
-	#Calculate analytical solution and error
-	current_time[1] = I_TI['final_time']
-	cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time);
-
-	norm2_output[:] = 0;
-
-	cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output);
-	I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output)));
-
-
-	norm2_output[:] = 0;
-	cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output);
-	I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum());
-
-
-	#Calculate divergence
-	cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB);
-	norm2_output[:] = 0;
-	cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output);
-	I_Results['divergence_norm'] = math.sqrt(norm2_output.sum());
-
-
-	#Calculate energy
-	norm2_output[:] = 0;
-	cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output);
-	I_Results['energy'] = norm2_output.sum();
+    if time_integrator_num_fields == 2:
+        print("time_integrator_num_fields = 2 not ready yet \n")
+    elif time_integrator_num_fields == 3:
+        field_b = IEq_fields['field_b']
+        field_b2 = IEq_fields['field_b2']
+        field_b3 = IEq_fields['field_b3']
+        field_curlB_rho = IEq_fields['field_curlB_rho']
+        field_u = IEq_fields['field_u']
+        field_rho = IEq_fields['field_rho']
+        current_time = IEq_fields['current_time']
+        field_divB = IEq_fields['field_divB']
+
+        if False: #other versions
+            pass
+        else:
+            num_steps_run = I_TI['num_steps'];
+            while num_steps_run > RK_block_size:
+                kernel_list = RK_Step * RK_block_size
+                t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
+                field_b, field_b2, field_b3, field_curlB_rho,\
+                field_u, field_rho, current_time)
+                kernel_runtime =  kernel_runtime + t
+                num_steps_run = num_steps_run - RK_block_size
+
+            if num_steps_run > 0:
+                kernel_list = RK_Step *num_steps_run
+                t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\
+                field_b, field_b2, field_b3, field_curlB_rho,\
+                field_u, field_rho, current_time)
+                kernel_runtime = kernel_runtime + t
+
+    else:
+        raise Exception('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n')
+    runtime =  time.clock() - t1
+    print('Elapsed time is ' + str(runtime) + ' seconds.\n')
+
+    # Save total runtime and kernel runtime
+    I_Results['runtime'] = runtime
+    I_Results['kernel_runtime'] = kernel_runtime
+
+    # save fields
+    if I_RunOps['save_fields']:
+        I_Results['field_b'] = field_b
+
+    field_b_ana = IEq_fields['field_b2']
+
+    #Calculate analytical solution and error
+    current_time[1] = I_TI['final_time']
+    cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time)
+
+    norm2_output[:] = 0
+
+    cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output)
+    I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output)))
+
+
+    norm2_output[:] = 0
+    cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output)
+    I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum())
+
+
+    #Calculate divergence
+    cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB)
+    norm2_output[:] = 0
+    cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output)
+    I_Results['divergence_norm'] = math.sqrt(norm2_output.sum())
+
+
+    #Calculate energy
+    norm2_output[:] = 0;
+    cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output)
+    I_Results['energy'] = norm2_output.sum()
+
 
 
+if sys.version_info[0] < 3:
+    raise Exception("Must be using Python 3")
diff --git a/python/cl.py b/python/cl.py
index cc177f5..96de6c4 100644
--- a/python/cl.py
+++ b/python/cl.py
@@ -1,56 +1,62 @@
 import pyopencl as cl
 
 def compile_kernels(src_path, cmd_line, *args):
-	global program, ctx
-	src = ' '
-	for entry in src_path:
-		file = open(entry, 'r')
-		src = src + file.read()
-		file.close()
-	prg = cl.Program(ctx, src).build(options=cmd_line)
-	program = prg
-	return prg
+    global program, ctx
+    src = ' '
+    for entry in src_path:
+        with open(entry, 'r') as io:
+            src = src + io.read()
+    prg = cl.Program(ctx, src).build(options=cmd_line)
+    program = prg
+    return prg
 
 def create_ctx():
-	global ctx, queue, PYOPENCL_COMPILER_OUTPUT
-	PYOPENCL_COMPILER_OUTPUT=1
-	ctx = cl.create_some_context()
-	queue = cl.CommandQueue(ctx)
-	return ctx, queue
+    global ctx, queue, PYOPENCL_COMPILER_OUTPUT, devices, group_size
+    PYOPENCL_COMPILER_OUTPUT=1
+    ctx = cl.create_some_context()
+    queue = cl.CommandQueue(ctx)
+    devices = ctx.devices
+    group_size = devices[0].max_work_group_size
+    return ctx, queue
 
 def run_kernel(names, g_range, l_range, *arguments):
-	global queue, program
-	inputbuffers = []
-	for arg in arguments:
-		inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg))
-	callstrings = []
-	if type(names) is list:
-		for name in names:
-			callstrings.append('program.' + name + '(queue, tuple(g_range) , None, *inputbuffers)')
-	else:
-		callstrings.append('program.' + names + '(queue, tuple(g_range), None, *inputbuffers)')
-
-	for callstring in callstrings:
-		event = eval(callstring)
-#		event.wait() 
-	i = 0
-	for arg in arguments:
-		cl.enqueue_copy(queue, arg, inputbuffers[i])
-#		inputbuffers[i].release()
-		i = i +1
-	return 0
+    global queue, program
+    inputbuffers = []
+    for arg in arguments:
+        inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg))
+    callstrings = []
+    if l_range[0] == 0 and len(l_range) == 1:
+        l_range = None
+    else:
+        l_range = tuple(l_range)
+
+    if type(names) is list:
+        for name in names:
+            callstrings.append('program.' + name + '(queue, tuple(g_range) , l_range, *inputbuffers)')
+    else:
+        callstrings.append('program.' + names + '(queue, tuple(g_range), l_range, *inputbuffers)')
+
+    for callstring in callstrings:
+        event = eval(callstring)
+        event.wait()
+    i = 0
+    for arg in arguments:
+        cl.enqueue_copy(queue, arg, inputbuffers[i]).wait()
+        inputbuffers[i].release()
+        i = i +1
+    return 0
 
 def run_kernel_ref(name, g_range, l_range, *arg_refs):
-	global queue, program
-	callstring = 'program.' + name + '(queue, None, None, *inputbuffers)'
-	eval(callstring)
-	return
+    global queue, program
+    callstring = 'program.' + name + '(queue, None, None, *inputbuffers)'
+    eval(callstring)
+    return
 
 def allocate_buf(array):
-	return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array)
+    return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array)
 
 def malloc(size):
-	return cl.Buffer(ctx, size=size)
+    return cl.Buffer(ctx, size=size)
 
 
 
diff --git a/python/compute_error_rotation_2d.py b/python/compute_error_rotation_2d.py
old mode 100644
new mode 100755
index e5cd9ed..67f7bb2
--- a/python/compute_error_rotation_2d.py
+++ b/python/compute_error_rotation_2d.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
 # -*- coding: <utf8> -*-
 
 #This project is licensed under the terms of the Creative Commons CC BY-NC-ND 3.0 license.
@@ -9,6 +10,7 @@
 #enabled, e.g. divergence cleaning.
 import InductionEq as induction_eq
 from math import pi
+import sys
 #prepare_vars() initializes containers (maps) in which all variables (label, value) are
 #stored. Variables are grouped together by purpose:
 #I_Mesh: Contains all variables concerning the discrete mesh, e.g.
@@ -30,7 +32,11 @@
 #or to switch between different discretizations. In case the key is written
 #in capital letters the program define will be set to the corresponding
 #value of the key.
-induction_eq.preparse_vars();
+if sys.version_info[0] < 3:
+    raise Exception("Must be using Python 3")
+
+
+induction_eq.preparse_vars()
 
 
 #global I_Mesh I_TI I_IEq I_DC I_Tech I_RunOps I_Results
@@ -67,17 +73,17 @@
 # Induction equation related variables
 #Specify how the three part of the linear induction equation shall be
 #computed.
-I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT'; # PRODUCT, SPLIT, CENTRAL
-I_IEq['form_source'] = 'USE_SOURCE_CENTRAL'; # CENTRAL, SPLIT, ZERO
-I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL'; # SPLIT, CENTRAL, PRODUCT
+I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT' # PRODUCT, SPLIT, CENTRAL
+I_IEq['form_source'] = 'USE_SOURCE_CENTRAL' # CENTRAL, SPLIT, ZERO
+I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL' # SPLIT, CENTRAL, PRODUCT
 #Enable or disable Hall-Term
-I_IEq['hall_term'] = 'NONE'; # NONE, USE_HALL
+I_IEq['hall_term'] = 'NONE' # NONE, USE_HALL
 #Enable or disable artificial dissipation
-I_IEq['dissipation'] = 'NONE'; # NONE, USE_ARTIFICIAL_DISSIPATION
+I_IEq['dissipation'] = 'NONE' # NONE, USE_ARTIFICIAL_DISSIPATION
 #Specify what kind of artifical dissipation shall be used
     #USE_ADAPTIVE_DISSIPATION, USE_FIRST_ORDER_DISSIPATION, USE_HIGH_ORDER_DISSIPATION
-I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION';
-I_IEq['HO_DISSIPATION_FACTOR'] = 1; # a constant (non-negative) factor adapting the influence of the high order dissipation
+I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION'
+I_IEq['HO_DISSIPATION_FACTOR'] = 1 # a constant (non-negative) factor adapting the influence of the high order dissipation
 #Additional parameters needed for adaptive dissipation. For typical values
 #see Svärd et al. (2009).
 I_IEq['MP2MIN'] = 1
@@ -93,31 +99,31 @@
 I_DC['divergence_cleaning_form'] = 'USE_LAPLACE_WIDE_STENCIL_LNS'
 #USE_LAPLACE_WIDE_STENCIL_LNS USE_LAPLACE_WIDE_STENCIL_DIRICHLET USE_LAPLACE_NARROW_STENCIL_DIRICHLET
 #Specify a threshold for the divergence norm
-I_DC['absolute_error_threshold'] = 1e-3;
+I_DC['absolute_error_threshold'] = 1e-3
 #The divergence cleaner will exit after max_iterations even if the error
 #threshold is not reached
-I_DC['max_iterations'] = 50;
+I_DC['max_iterations'] = 50
 
 
 # Technical details
 # Specify the OpenCL device that will be used for computation
-I_Tech['device'] = 1;
+I_Tech['device'] = 1
 
 # Switch between single floating point and double floating point precision
 I_Tech['REAL'] = 'double' # float
-I_Tech['REAL4'] = I_Tech['REAL'] + '4'; #Vector datatype
+I_Tech['REAL4'] = I_Tech['REAL'] + '4' #Vector datatype
 
 #Compiler based optimizations
 if I_Tech['REAL'] == 'float':
-    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant';
+    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant'
 else:
-    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only';
+    I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only'
 
 
 # Options relevant for a run
 # Defines the order
-I_RunOps['order'] = 4; # 2, 4, 6
-I_RunOps['operator_form'] = 'classical'; # 'classical' or 'extended' operators
+I_RunOps['order'] = 4 # 2, 4, 6
+I_RunOps['operator_form'] = 'classical' # 'classical' or 'extended' operators
 # Specify the testcase. The name of the testcase has to be equal to the
 # name of a header file which contains a function describing the initial state.
 # Example testcases are:
@@ -131,35 +137,35 @@
 # 'x', 'y', 'z'
 # If you want to plot multiple cross sections use
 # 'xy', 'xz', 'yz', 'xyz'
-I_RunOps['plot_numerical_solution'] = '';
-I_RunOps['plot_analytical_solution'] = '';
-I_RunOps['plot_difference'] = '';
-I_RunOps['plot_divergence'] = '';
+I_RunOps['plot_numerical_solution'] = ''
+I_RunOps['plot_analytical_solution'] = ''
+I_RunOps['plot_difference'] = ''
+I_RunOps['plot_divergence'] = ''
 #If set to 1 the magnetic field will be saved to I_Results('field_b')
-I_RunOps['save_fields'] = False;
+I_RunOps['save_fields'] = False
 #If set to true the magnetic energy (L^2 norm) will be saved to I_Results('energy_over_time'), the
 #L2 errors (if available) to I_Results('L2error_B_over_time') & I_Results('L2error_divB_over_time')
 #and the time to I_Results('time')
-I_RunOps['save_integrals_over_time'] = False;
+I_RunOps['save_integrals_over_time'] = False
 
 #Initialize the magnetic field, the velocity field and the density field
 #according to the specified testcase. Also calculates and sets additional
 #variables, e.g. stepsize, timestep, local work group size, etc...
-(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize();
-
-print('Testcase: ' + I_RunOps['testcase'] +'\n')
-print('Order:' +str(I_RunOps['order'])+ '\n')
-print('Time integrator:' + I_TI['time_integrator'] + ' \n')
-print('DT: '+ str(I_TI['DT']) + '   N_STEPS: ' + str(I_TI['num_steps']) + '   FINAL_TIME: '+str(I_TI['final_time'])+'\n')
-print('DX: '+ str(I_Mesh['DX']) + '   NODES_X: '+str(I_Mesh['NODES_X']) +'\n')
-print('DY: '+ str(I_Mesh['DY']) + '   NODES_Y: '+str(I_Mesh['NODES_Y'])+'\n')
-print('DZ: '+ str(I_Mesh['DZ']) + '   NODES_Z: '+str(I_Mesh['NODES_Z'])+'\n')
-print('Dissipation: '+I_IEq['dissipation']+' \n')
-print('Divergence cleaning: '+str(I_DC['divergence_cleaning'])+' \n')
-print('REAL:'+ I_Tech['REAL']+'\n')
+(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize()
+
+print('Testcase: ' + I_RunOps['testcase'])
+print('Order:' + str(I_RunOps['order']))
+print('Time integrator:' + I_TI['time_integrator'])
+print('DT: '+ str(I_TI['DT']) + '   N_STEPS: ' + str(I_TI['num_steps']) + '   FINAL_TIME: '+str(I_TI['final_time']))
+print('DX: '+ str(I_Mesh['DX']) + '   NODES_X: '+str(I_Mesh['NODES_X']))
+print('DY: '+ str(I_Mesh['DY']) + '   NODES_Y: '+str(I_Mesh['NODES_Y']))
+print('DZ: '+ str(I_Mesh['DZ']) + '   NODES_Z: '+str(I_Mesh['NODES_Z']))
+print('Dissipation: '+I_IEq['dissipation'])
+print('Divergence cleaning: '+str(I_DC['divergence_cleaning']))
+print('REAL:'+ I_Tech['REAL'])
 
 # Takes the initialized fields and advances the solution in time
 induction_eq.compute_numerical_solution(field_b_init, field_u_init, field_rho_init);
 
-print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy']) + '\n')
+print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy']))
 print('Relative Error:' + str(100*I_Results['rel_err']) + '%\n\n')